Server: add slim query param (default true) to entries list endpoint, returning EntrySlimOut without content/full_content HTML — cuts payload size dramatically for list views. Single entry endpoint still returns full content. iOS: ArticleView now fetches full entry content on demand when opened instead of relying on list data. Shows loading indicator while fetching. Mark-as-read is fire-and-forget to avoid blocking the view. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
319 lines
10 KiB
Python
319 lines
10 KiB
Python
"""Entry endpoints."""
|
|
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from pydantic import BaseModel
|
|
from sqlalchemy import func, select, update
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm import selectinload
|
|
|
|
from app.api.deps import get_db_session, get_user_id
|
|
from app.config import CRAWLER_URL
|
|
from app.models import Entry, Feed
|
|
|
|
log = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/api/entries", tags=["entries"])
|
|
|
|
|
|
# ── Schemas ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
class FeedRef(BaseModel):
|
|
id: int
|
|
title: str
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class EntryOut(BaseModel):
|
|
id: int
|
|
title: str | None = None
|
|
url: str | None = None
|
|
content: str | None = None
|
|
full_content: str | None = None
|
|
author: str | None = None
|
|
published_at: str | None = None
|
|
status: str = "unread"
|
|
starred: bool = False
|
|
reading_time: int = 1
|
|
thumbnail: str | None = None
|
|
feed: FeedRef | None = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
@classmethod
|
|
def from_entry(cls, entry: Entry, slim: bool = False) -> "EntryOut | EntrySlimOut":
|
|
# Extract thumbnail from stored field, or from content
|
|
thumb = entry.thumbnail
|
|
if not thumb:
|
|
thumb = cls._extract_thumbnail(entry.content or entry.full_content or "")
|
|
|
|
if slim:
|
|
return EntrySlimOut(
|
|
id=entry.id,
|
|
title=entry.title,
|
|
url=entry.url,
|
|
author=entry.author,
|
|
published_at=entry.published_at.isoformat() if entry.published_at else None,
|
|
status=entry.status,
|
|
starred=entry.starred,
|
|
reading_time=entry.reading_time,
|
|
thumbnail=thumb,
|
|
feed=FeedRef(id=entry.feed.id, title=entry.feed.title) if entry.feed else None,
|
|
)
|
|
|
|
# Use full_content if available, otherwise RSS content
|
|
best_content = entry.full_content if entry.full_content else entry.content
|
|
return cls(
|
|
id=entry.id,
|
|
title=entry.title,
|
|
url=entry.url,
|
|
content=best_content,
|
|
full_content=entry.full_content,
|
|
author=entry.author,
|
|
published_at=entry.published_at.isoformat() if entry.published_at else None,
|
|
status=entry.status,
|
|
starred=entry.starred,
|
|
reading_time=entry.reading_time,
|
|
thumbnail=thumb,
|
|
feed=FeedRef(id=entry.feed.id, title=entry.feed.title) if entry.feed else None,
|
|
)
|
|
|
|
@staticmethod
|
|
def _extract_thumbnail(html: str) -> str | None:
|
|
"""Extract first image URL from HTML content."""
|
|
if not html:
|
|
return None
|
|
import re
|
|
match = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', html[:3000], re.IGNORECASE)
|
|
if match:
|
|
url = match.group(1).replace("&", "&")
|
|
# Skip tiny tracking pixels and icons
|
|
if any(skip in url.lower() for skip in ["1x1", "pixel", "tracking", "spacer"]):
|
|
return None
|
|
return url
|
|
return None
|
|
|
|
|
|
class EntrySlimOut(BaseModel):
|
|
"""Entry without content fields — used for list views."""
|
|
id: int
|
|
title: str | None = None
|
|
url: str | None = None
|
|
author: str | None = None
|
|
published_at: str | None = None
|
|
status: str = "unread"
|
|
starred: bool = False
|
|
reading_time: int = 1
|
|
thumbnail: str | None = None
|
|
feed: FeedRef | None = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class EntryListOut(BaseModel):
|
|
total: int
|
|
entries: list[EntryOut | EntrySlimOut]
|
|
|
|
|
|
class EntryBulkUpdate(BaseModel):
|
|
entry_ids: list[int]
|
|
status: str
|
|
|
|
|
|
# ── Routes ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
@router.get("", response_model=EntryListOut)
|
|
async def list_entries(
|
|
status: Optional[str] = Query(None),
|
|
starred: Optional[bool] = Query(None),
|
|
feed_id: Optional[int] = Query(None),
|
|
category_id: Optional[int] = Query(None),
|
|
slim: bool = Query(True),
|
|
limit: int = Query(50, ge=1, le=500),
|
|
offset: int = Query(0, ge=0),
|
|
direction: str = Query("desc"),
|
|
order: str = Query("published_at"),
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
query = select(Entry).where(Entry.user_id == user_id)
|
|
count_query = select(func.count(Entry.id)).where(Entry.user_id == user_id)
|
|
|
|
if status:
|
|
query = query.where(Entry.status == status)
|
|
count_query = count_query.where(Entry.status == status)
|
|
|
|
if starred is not None:
|
|
query = query.where(Entry.starred == starred)
|
|
count_query = count_query.where(Entry.starred == starred)
|
|
|
|
if feed_id is not None:
|
|
query = query.where(Entry.feed_id == feed_id)
|
|
count_query = count_query.where(Entry.feed_id == feed_id)
|
|
|
|
if category_id is not None:
|
|
# Join through feed to filter by category
|
|
query = query.join(Feed, Entry.feed_id == Feed.id).where(Feed.category_id == category_id)
|
|
count_query = count_query.join(Feed, Entry.feed_id == Feed.id).where(Feed.category_id == category_id)
|
|
|
|
# Ordering
|
|
order_col = Entry.published_at if order == "published_at" else Entry.created_at
|
|
if direction == "asc":
|
|
query = query.order_by(order_col.asc().nullslast())
|
|
else:
|
|
query = query.order_by(order_col.desc().nullsfirst())
|
|
|
|
# Total count
|
|
total_result = await db.execute(count_query)
|
|
total = total_result.scalar() or 0
|
|
|
|
# Paginate
|
|
query = query.options(selectinload(Entry.feed)).offset(offset).limit(limit)
|
|
result = await db.execute(query)
|
|
entries = result.scalars().all()
|
|
|
|
return EntryListOut(
|
|
total=total,
|
|
entries=[EntryOut.from_entry(e, slim=slim) for e in entries],
|
|
)
|
|
|
|
|
|
@router.put("")
|
|
async def bulk_update_entries(
|
|
body: EntryBulkUpdate,
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
if body.status not in ("read", "unread"):
|
|
raise HTTPException(status_code=400, detail="Status must be 'read' or 'unread'")
|
|
|
|
await db.execute(
|
|
update(Entry)
|
|
.where(Entry.user_id == user_id, Entry.id.in_(body.entry_ids))
|
|
.values(status=body.status)
|
|
)
|
|
await db.commit()
|
|
return {"ok": True}
|
|
|
|
|
|
class MarkAllReadBody(BaseModel):
|
|
feed_id: int | None = None
|
|
category_id: int | None = None
|
|
|
|
|
|
@router.put("/mark-all-read")
|
|
async def mark_all_read(
|
|
body: MarkAllReadBody,
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
"""Mark ALL unread entries as read, optionally filtered by feed or category."""
|
|
q = update(Entry).where(Entry.user_id == user_id, Entry.status == "unread")
|
|
|
|
if body.feed_id:
|
|
q = q.where(Entry.feed_id == body.feed_id)
|
|
elif body.category_id:
|
|
from app.models import Feed
|
|
feed_ids_q = select(Feed.id).where(Feed.category_id == body.category_id, Feed.user_id == user_id)
|
|
q = q.where(Entry.feed_id.in_(feed_ids_q))
|
|
|
|
result = await db.execute(q.values(status="read"))
|
|
await db.commit()
|
|
return {"ok": True, "marked": result.rowcount}
|
|
|
|
|
|
@router.get("/{entry_id}", response_model=EntryOut)
|
|
async def get_entry(
|
|
entry_id: int,
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
result = await db.execute(
|
|
select(Entry)
|
|
.options(selectinload(Entry.feed))
|
|
.where(Entry.id == entry_id, Entry.user_id == user_id)
|
|
)
|
|
entry = result.scalar_one_or_none()
|
|
if not entry:
|
|
raise HTTPException(status_code=404, detail="Entry not found")
|
|
return EntryOut.from_entry(entry)
|
|
|
|
|
|
@router.put("/{entry_id}/bookmark")
|
|
async def toggle_bookmark(
|
|
entry_id: int,
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
result = await db.execute(
|
|
select(Entry).where(Entry.id == entry_id, Entry.user_id == user_id)
|
|
)
|
|
entry = result.scalar_one_or_none()
|
|
if not entry:
|
|
raise HTTPException(status_code=404, detail="Entry not found")
|
|
|
|
entry.starred = not entry.starred
|
|
await db.commit()
|
|
return {"starred": entry.starred}
|
|
|
|
|
|
@router.post("/{entry_id}/fetch-full-content", response_model=EntryOut)
|
|
async def fetch_full_content(
|
|
entry_id: int,
|
|
user_id: str = Depends(get_user_id),
|
|
db: AsyncSession = Depends(get_db_session),
|
|
):
|
|
result = await db.execute(
|
|
select(Entry)
|
|
.options(selectinload(Entry.feed))
|
|
.where(Entry.id == entry_id, Entry.user_id == user_id)
|
|
)
|
|
entry = result.scalar_one_or_none()
|
|
if not entry:
|
|
raise HTTPException(status_code=404, detail="Entry not found")
|
|
|
|
if not entry.url:
|
|
raise HTTPException(status_code=400, detail="Entry has no URL to crawl")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60) as client:
|
|
resp = await client.post(
|
|
f"{CRAWLER_URL}/crawl",
|
|
json={"url": entry.url},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except httpx.HTTPError as e:
|
|
log.error("Crawler error for entry %d: %s", entry_id, e)
|
|
raise HTTPException(status_code=502, detail="Failed to fetch full content")
|
|
|
|
# Prefer readable_html (Readability-extracted clean article with images)
|
|
readable = data.get("readable_html", "")
|
|
full_text = data.get("text", "")
|
|
if readable:
|
|
entry.full_content = readable
|
|
elif full_text:
|
|
paragraphs = [p.strip() for p in full_text.split("\n\n") if p.strip()]
|
|
if not paragraphs:
|
|
paragraphs = [p.strip() for p in full_text.split("\n") if p.strip()]
|
|
entry.full_content = "\n".join(f"<p>{p}</p>" for p in paragraphs)
|
|
else:
|
|
entry.full_content = ""
|
|
|
|
# Recalculate reading time from plain text
|
|
if full_text:
|
|
word_count = len(full_text.split())
|
|
entry.reading_time = max(1, word_count // 200)
|
|
|
|
await db.commit()
|
|
await db.refresh(entry)
|
|
return EntryOut.from_entry(entry)
|