diff --git a/ios/Platform/Platform/Features/Reader/Models/ReaderModels.swift b/ios/Platform/Platform/Features/Reader/Models/ReaderModels.swift
index 699d877..167d5f2 100644
--- a/ios/Platform/Platform/Features/Reader/Models/ReaderModels.swift
+++ b/ios/Platform/Platform/Features/Reader/Models/ReaderModels.swift
@@ -18,6 +18,7 @@ struct ReaderEntry: Codable, Identifiable, Hashable {
let status: String
let starred: Bool
let readingTime: Int
+ let thumbnail: String?
let feed: ReaderFeedRef?
var isRead: Bool { status == "read" }
@@ -49,7 +50,12 @@ struct ReaderEntry: Codable, Identifiable, Hashable {
}
var thumbnailURL: URL? {
- ReaderEntry.extractThumbnail(from: content ?? fullContent ?? "")
+ // Prefer server-provided thumbnail
+ if let thumb = thumbnail, !thumb.isEmpty, let url = URL(string: thumb) {
+ return url
+ }
+ // Fallback: extract from content
+ return ReaderEntry.extractThumbnail(from: content ?? fullContent ?? "")
}
private static let imgRegex = try! NSRegularExpression(
@@ -68,6 +74,9 @@ struct ReaderEntry: Codable, Identifiable, Hashable {
return nil
}
let src = String(searchRange[srcRange])
+ .replacingOccurrences(of: "&", with: "&")
+ .replacingOccurrences(of: "<", with: "<")
+ .replacingOccurrences(of: ">", with: ">")
return URL(string: src)
}
diff --git a/services/reader/app/api/entries.py b/services/reader/app/api/entries.py
index b408869..be3ea9e 100644
--- a/services/reader/app/api/entries.py
+++ b/services/reader/app/api/entries.py
@@ -40,6 +40,7 @@ class EntryOut(BaseModel):
status: str = "unread"
starred: bool = False
reading_time: int = 1
+ thumbnail: str | None = None
feed: FeedRef | None = None
class Config:
@@ -49,6 +50,10 @@ class EntryOut(BaseModel):
def from_entry(cls, entry: Entry) -> "EntryOut":
# Use full_content if available, otherwise RSS content
best_content = entry.full_content if entry.full_content else entry.content
+ # Extract thumbnail from stored field, or from content
+ thumb = entry.thumbnail
+ if not thumb:
+ thumb = cls._extract_thumbnail(entry.content or entry.full_content or "")
return cls(
id=entry.id,
title=entry.title,
@@ -60,9 +65,25 @@ class EntryOut(BaseModel):
status=entry.status,
starred=entry.starred,
reading_time=entry.reading_time,
+ thumbnail=thumb,
feed=FeedRef(id=entry.feed.id, title=entry.feed.title) if entry.feed else None,
)
+ @staticmethod
+ def _extract_thumbnail(html: str) -> str | None:
+ """Extract first image URL from HTML content."""
+ if not html:
+ return None
+ import re
+ match = re.search(r'
]+src=["\']([^"\']+)["\']', html[:3000], re.IGNORECASE)
+ if match:
+ url = match.group(1).replace("&", "&")
+ # Skip tiny tracking pixels and icons
+ if any(skip in url.lower() for skip in ["1x1", "pixel", "tracking", "spacer"]):
+ return None
+ return url
+ return None
+
class EntryListOut(BaseModel):
total: int
diff --git a/services/reader/app/models.py b/services/reader/app/models.py
index 2aa6f35..9b9258e 100644
--- a/services/reader/app/models.py
+++ b/services/reader/app/models.py
@@ -68,6 +68,7 @@ class Entry(Base):
published_at = Column(DateTime)
status = Column(String(10), default="unread")
starred = Column(Boolean, default=False)
+ thumbnail = Column(Text)
reading_time = Column(Integer, default=1)
created_at = Column(DateTime, default=datetime.utcnow)
diff --git a/services/reader/app/worker/tasks.py b/services/reader/app/worker/tasks.py
index ea92652..cb3af37 100644
--- a/services/reader/app/worker/tasks.py
+++ b/services/reader/app/worker/tasks.py
@@ -79,6 +79,31 @@ def _get_entry_content(entry: dict) -> str:
return ""
+def _extract_thumbnail(entry: dict, content: str) -> str | None:
+ """Extract thumbnail from feedparser entry or content HTML."""
+ # 1. Check media:thumbnail
+ for mt in entry.get("media_thumbnail", []):
+ if mt.get("url"):
+ return mt["url"]
+ # 2. Check media:content with image type
+ for mc in entry.get("media_content", []):
+ if mc.get("medium") == "image" or (mc.get("type", "").startswith("image")):
+ if mc.get("url"):
+ return mc["url"]
+ # 3. Check enclosures
+ for enc in entry.get("enclosures", []):
+ if enc.get("type", "").startswith("image") and enc.get("href"):
+ return enc["href"]
+ # 4. Extract from content HTML
+ if content:
+ match = re.search(r'
]+src=["\']([^"\']+)["\']', content[:3000], re.IGNORECASE)
+ if match:
+ url = match.group(1).replace("&", "&")
+ if not any(skip in url.lower() for skip in ["1x1", "pixel", "tracking", "spacer"]):
+ return url
+ return None
+
+
def _get_entry_author(entry: dict) -> str | None:
"""Extract author from a feedparser entry."""
if entry.get("author"):
@@ -157,6 +182,7 @@ def fetch_single_feed(feed_id: int):
content = _get_entry_content(fe)
pub_date = _parse_date(fe)
+ thumb = _extract_thumbnail(fe, content)
stmt = pg_insert(Entry).values(
feed_id=feed.id,
@@ -168,6 +194,7 @@ def fetch_single_feed(feed_id: int):
published_at=pub_date,
status="unread",
starred=False,
+ thumbnail=thumb,
reading_time=_calc_reading_time(content),
).on_conflict_do_nothing(
constraint="uq_reader_entries_feed_url"