feat: thumbnail extraction for Reader — fixes all clients
All checks were successful
Security Checks / dockerfile-lint (push) Successful in 4s
Security Checks / dependency-audit (push) Successful in 13s
Security Checks / secret-scanning (push) Successful in 3s

Server-side (dashboard + iOS + any client):
- Added thumbnail column to reader_entries
- Worker extracts from media:thumbnail, media:content, enclosures, HTML img
- API returns thumbnail in EntryOut with & decoding
- Backfilled 260 existing entries

iOS:
- Prefers API thumbnail, falls back to client-side extraction
- Decodes HTML entities in URLs

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Yusuf Suleman
2026-04-03 19:32:47 -05:00
parent 798ba17a93
commit a3eabf3e3b
4 changed files with 59 additions and 1 deletions

View File

@@ -79,6 +79,31 @@ def _get_entry_content(entry: dict) -> str:
return ""
def _extract_thumbnail(entry: dict, content: str) -> str | None:
"""Extract thumbnail from feedparser entry or content HTML."""
# 1. Check media:thumbnail
for mt in entry.get("media_thumbnail", []):
if mt.get("url"):
return mt["url"]
# 2. Check media:content with image type
for mc in entry.get("media_content", []):
if mc.get("medium") == "image" or (mc.get("type", "").startswith("image")):
if mc.get("url"):
return mc["url"]
# 3. Check enclosures
for enc in entry.get("enclosures", []):
if enc.get("type", "").startswith("image") and enc.get("href"):
return enc["href"]
# 4. Extract from content HTML
if content:
match = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', content[:3000], re.IGNORECASE)
if match:
url = match.group(1).replace("&amp;", "&")
if not any(skip in url.lower() for skip in ["1x1", "pixel", "tracking", "spacer"]):
return url
return None
def _get_entry_author(entry: dict) -> str | None:
"""Extract author from a feedparser entry."""
if entry.get("author"):
@@ -157,6 +182,7 @@ def fetch_single_feed(feed_id: int):
content = _get_entry_content(fe)
pub_date = _parse_date(fe)
thumb = _extract_thumbnail(fe, content)
stmt = pg_insert(Entry).values(
feed_id=feed.id,
@@ -168,6 +194,7 @@ def fetch_single_feed(feed_id: int):
published_at=pub_date,
status="unread",
starred=False,
thumbnail=thumb,
reading_time=_calc_reading_time(content),
).on_conflict_do_nothing(
constraint="uq_reader_entries_feed_url"