platform/services/reader/app/api/entries.py

"""Entry endpoints."""

import logging
from typing import Optional

import httpx
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import func, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload

from app.api.deps import get_db_session, get_user_id
from app.config import CRAWLER_URL
from app.models import Entry, Feed

log = logging.getLogger(__name__)
router = APIRouter(prefix="/api/entries", tags=["entries"])


# ── Schemas ──────────────────────────────────────────────────────────────


class FeedRef(BaseModel):
    id: int
    title: str

    class Config:
        from_attributes = True


class EntryOut(BaseModel):
    id: int
    title: str | None = None
    url: str | None = None
    content: str | None = None
    full_content: str | None = None
    author: str | None = None
    published_at: str | None = None
    status: str = "unread"
    starred: bool = False
    reading_time: int = 1
    thumbnail: str | None = None
    feed: FeedRef | None = None

    class Config:
        from_attributes = True

    @classmethod
    def from_entry(cls, entry: Entry, slim: bool = False) -> "EntryOut | EntrySlimOut":
        # Extract thumbnail from stored field, or from content
        thumb = entry.thumbnail
        if not thumb:
            thumb = cls._extract_thumbnail(entry.content or entry.full_content or "")

        if slim:
            return EntrySlimOut(
                id=entry.id,
                title=entry.title,
                url=entry.url,
                author=entry.author,
                published_at=entry.published_at.isoformat() if entry.published_at else None,
                status=entry.status,
                starred=entry.starred,
                reading_time=entry.reading_time,
                thumbnail=thumb,
                feed=FeedRef(id=entry.feed.id, title=entry.feed.title) if entry.feed else None,
            )

        # Use full_content if available, otherwise RSS content
        best_content = entry.full_content if entry.full_content else entry.content
        return cls(
            id=entry.id,
            title=entry.title,
            url=entry.url,
            content=best_content,
            full_content=entry.full_content,
            author=entry.author,
            published_at=entry.published_at.isoformat() if entry.published_at else None,
            status=entry.status,
            starred=entry.starred,
            reading_time=entry.reading_time,
            thumbnail=thumb,
            feed=FeedRef(id=entry.feed.id, title=entry.feed.title) if entry.feed else None,
        )

    @staticmethod
    def _extract_thumbnail(html: str) -> str | None:
        """Extract first image URL from HTML content."""
        if not html:
            return None
        import re
        match = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', html[:3000], re.IGNORECASE)
        if match:
            url = match.group(1).replace("&amp;", "&")
            # Skip tiny tracking pixels and icons
            if any(skip in url.lower() for skip in ["1x1", "pixel", "tracking", "spacer"]):
                return None
            return url
        return None


class EntrySlimOut(BaseModel):
    """Entry without content fields — used for list views."""
    id: int
    title: str | None = None
    url: str | None = None
    author: str | None = None
    published_at: str | None = None
    status: str = "unread"
    starred: bool = False
    reading_time: int = 1
    thumbnail: str | None = None
    feed: FeedRef | None = None

    class Config:
        from_attributes = True


class EntryListOut(BaseModel):
    total: int
    entries: list[EntryOut | EntrySlimOut]


class EntryBulkUpdate(BaseModel):
    entry_ids: list[int]
    status: str


# ── Routes ───────────────────────────────────────────────────────────────


@router.get("", response_model=EntryListOut)
async def list_entries(
    status: Optional[str] = Query(None),
    starred: Optional[bool] = Query(None),
    feed_id: Optional[int] = Query(None),
    category_id: Optional[int] = Query(None),
    slim: bool = Query(True),
    limit: int = Query(50, ge=1, le=500),
    offset: int = Query(0, ge=0),
    direction: str = Query("desc"),
    order: str = Query("published_at"),
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    query = select(Entry).where(Entry.user_id == user_id)
    count_query = select(func.count(Entry.id)).where(Entry.user_id == user_id)

    if status:
        query = query.where(Entry.status == status)
        count_query = count_query.where(Entry.status == status)

    if starred is not None:
        query = query.where(Entry.starred == starred)
        count_query = count_query.where(Entry.starred == starred)

    if feed_id is not None:
        query = query.where(Entry.feed_id == feed_id)
        count_query = count_query.where(Entry.feed_id == feed_id)

    if category_id is not None:
        # Join through feed to filter by category
        query = query.join(Feed, Entry.feed_id == Feed.id).where(Feed.category_id == category_id)
        count_query = count_query.join(Feed, Entry.feed_id == Feed.id).where(Feed.category_id == category_id)

    # Ordering
    order_col = Entry.published_at if order == "published_at" else Entry.created_at
    if direction == "asc":
        query = query.order_by(order_col.asc().nullslast())
    else:
        query = query.order_by(order_col.desc().nullsfirst())

    # Total count
    total_result = await db.execute(count_query)
    total = total_result.scalar() or 0

    # Paginate
    query = query.options(selectinload(Entry.feed)).offset(offset).limit(limit)
    result = await db.execute(query)
    entries = result.scalars().all()

    return EntryListOut(
        total=total,
        entries=[EntryOut.from_entry(e, slim=slim) for e in entries],
    )


@router.put("")
async def bulk_update_entries(
    body: EntryBulkUpdate,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    if body.status not in ("read", "unread"):
        raise HTTPException(status_code=400, detail="Status must be 'read' or 'unread'")

    await db.execute(
        update(Entry)
        .where(Entry.user_id == user_id, Entry.id.in_(body.entry_ids))
        .values(status=body.status)
    )
    await db.commit()
    return {"ok": True}


class MarkAllReadBody(BaseModel):
    feed_id: int | None = None
    category_id: int | None = None


@router.put("/mark-all-read")
async def mark_all_read(
    body: MarkAllReadBody,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    """Mark ALL unread entries as read, optionally filtered by feed or category."""
    q = update(Entry).where(Entry.user_id == user_id, Entry.status == "unread")

    if body.feed_id:
        q = q.where(Entry.feed_id == body.feed_id)
    elif body.category_id:
        from app.models import Feed
        feed_ids_q = select(Feed.id).where(Feed.category_id == body.category_id, Feed.user_id == user_id)
        q = q.where(Entry.feed_id.in_(feed_ids_q))

    result = await db.execute(q.values(status="read"))
    await db.commit()
    return {"ok": True, "marked": result.rowcount}


@router.get("/{entry_id}", response_model=EntryOut)
async def get_entry(
    entry_id: int,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    result = await db.execute(
        select(Entry)
        .options(selectinload(Entry.feed))
        .where(Entry.id == entry_id, Entry.user_id == user_id)
    )
    entry = result.scalar_one_or_none()
    if not entry:
        raise HTTPException(status_code=404, detail="Entry not found")
    return EntryOut.from_entry(entry)


@router.put("/{entry_id}/bookmark")
async def toggle_bookmark(
    entry_id: int,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    result = await db.execute(
        select(Entry).where(Entry.id == entry_id, Entry.user_id == user_id)
    )
    entry = result.scalar_one_or_none()
    if not entry:
        raise HTTPException(status_code=404, detail="Entry not found")

    entry.starred = not entry.starred
    await db.commit()
    return {"starred": entry.starred}


@router.post("/{entry_id}/fetch-full-content", response_model=EntryOut)
async def fetch_full_content(
    entry_id: int,
    user_id: str = Depends(get_user_id),
    db: AsyncSession = Depends(get_db_session),
):
    result = await db.execute(
        select(Entry)
        .options(selectinload(Entry.feed))
        .where(Entry.id == entry_id, Entry.user_id == user_id)
    )
    entry = result.scalar_one_or_none()
    if not entry:
        raise HTTPException(status_code=404, detail="Entry not found")

    if not entry.url:
        raise HTTPException(status_code=400, detail="Entry has no URL to crawl")

    try:
        async with httpx.AsyncClient(timeout=60) as client:
            resp = await client.post(
                f"{CRAWLER_URL}/crawl",
                json={"url": entry.url},
            )
            resp.raise_for_status()
            data = resp.json()
    except httpx.HTTPError as e:
        log.error("Crawler error for entry %d: %s", entry_id, e)
        raise HTTPException(status_code=502, detail="Failed to fetch full content")

    # Prefer readable_html (Readability-extracted clean article with images)
    readable = data.get("readable_html", "")
    full_text = data.get("text", "")
    if readable:
        entry.full_content = readable
    elif full_text:
        paragraphs = [p.strip() for p in full_text.split("\n\n") if p.strip()]
        if not paragraphs:
            paragraphs = [p.strip() for p in full_text.split("\n") if p.strip()]
        entry.full_content = "\n".join(f"<p>{p}</p>" for p in paragraphs)
    else:
        entry.full_content = ""

    # Recalculate reading time from plain text
    if full_text:
        word_count = len(full_text.split())
        entry.reading_time = max(1, word_count // 200)

    await db.commit()
    await db.refresh(entry)
    return EntryOut.from_entry(entry)