"""Feed endpoints.""" import logging import re import feedparser import httpx from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from app.api.deps import get_db_session, get_user_id from app.models import Category, Entry, Feed log = logging.getLogger(__name__) router = APIRouter(prefix="/api/feeds", tags=["feeds"]) # ── Schemas ────────────────────────────────────────────────────────────── class CategoryRef(BaseModel): id: int title: str class Config: from_attributes = True class FeedOut(BaseModel): id: int title: str feed_url: str site_url: str | None = None category: CategoryRef | None = None class Config: from_attributes = True class FeedCreate(BaseModel): feed_url: str category_id: int | None = None class CountersOut(BaseModel): unreads: dict[str, int] # ── Helpers ────────────────────────────────────────────────────────────── def _discover_feed_url(html: str, base_url: str) -> str | None: """Try to find an RSS/Atom feed link in HTML.""" patterns = [ r']+type=["\']application/(?:rss|atom)\+xml["\'][^>]+href=["\']([^"\']+)["\']', r']+href=["\']([^"\']+)["\'][^>]+type=["\']application/(?:rss|atom)\+xml["\']', ] for pat in patterns: match = re.search(pat, html, re.IGNORECASE) if match: href = match.group(1) if href.startswith("/"): # Resolve relative URL from urllib.parse import urljoin href = urljoin(base_url, href) return href return None async def _fetch_and_parse_feed(feed_url: str) -> tuple[str, str, str | None]: """ Fetch a URL. If it's a valid feed, return (feed_url, title, site_url). If it's HTML, try to discover the feed link and follow it. """ async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: resp = await client.get(feed_url, headers={"User-Agent": "Reader/1.0"}) resp.raise_for_status() body = resp.text parsed = feedparser.parse(body) # Check if it's a valid feed if parsed.feed.get("title") or parsed.entries: title = parsed.feed.get("title", feed_url) site_url = parsed.feed.get("link") return feed_url, title, site_url # Not a feed — try to discover from HTML discovered = _discover_feed_url(body, feed_url) if not discovered: raise HTTPException(status_code=400, detail="No RSS/Atom feed found at this URL") # Fetch the discovered feed async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: resp2 = await client.get(discovered, headers={"User-Agent": "Reader/1.0"}) resp2.raise_for_status() parsed2 = feedparser.parse(resp2.text) title = parsed2.feed.get("title", discovered) site_url = parsed2.feed.get("link") or feed_url return discovered, title, site_url # ── Routes ─────────────────────────────────────────────────────────────── @router.get("/counters", response_model=CountersOut) async def feed_counters( user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): result = await db.execute( select(Entry.feed_id, func.count(Entry.id)) .where(Entry.user_id == user_id, Entry.status == "unread") .group_by(Entry.feed_id) ) unreads = {str(row[0]): row[1] for row in result.all()} return {"unreads": unreads} @router.get("", response_model=list[FeedOut]) async def list_feeds( user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): result = await db.execute( select(Feed) .where(Feed.user_id == user_id) .order_by(Feed.title) ) return result.scalars().all() @router.post("", response_model=FeedOut, status_code=201) async def create_feed( body: FeedCreate, user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): # Check for duplicate existing = await db.execute( select(Feed).where(Feed.feed_url == body.feed_url) ) if existing.scalar_one_or_none(): raise HTTPException(status_code=409, detail="Feed already exists") # Validate category belongs to user if body.category_id: cat = await db.execute( select(Category).where( Category.id == body.category_id, Category.user_id == user_id, ) ) if not cat.scalar_one_or_none(): raise HTTPException(status_code=404, detail="Category not found") # Fetch and discover feed try: actual_url, title, site_url = await _fetch_and_parse_feed(body.feed_url) except httpx.HTTPError as e: log.warning("Failed to fetch feed %s: %s", body.feed_url, e) raise HTTPException(status_code=400, detail=f"Could not fetch feed: {e}") # Check again with discovered URL if actual_url != body.feed_url: existing = await db.execute( select(Feed).where(Feed.feed_url == actual_url) ) if existing.scalar_one_or_none(): raise HTTPException(status_code=409, detail="Feed already exists") feed = Feed( user_id=user_id, category_id=body.category_id, title=title, feed_url=actual_url, site_url=site_url, ) db.add(feed) await db.commit() await db.refresh(feed) return feed @router.delete("/{feed_id}", status_code=204) async def delete_feed( feed_id: int, user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): result = await db.execute( select(Feed).where(Feed.id == feed_id, Feed.user_id == user_id) ) feed = result.scalar_one_or_none() if not feed: raise HTTPException(status_code=404, detail="Feed not found") await db.delete(feed) await db.commit() @router.post("/{feed_id}/refresh") async def refresh_feed( feed_id: int, user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): result = await db.execute( select(Feed).where(Feed.id == feed_id, Feed.user_id == user_id) ) feed = result.scalar_one_or_none() if not feed: raise HTTPException(status_code=404, detail="Feed not found") import asyncio from app.worker.tasks import fetch_single_feed await asyncio.to_thread(fetch_single_feed, feed_id) return {"ok": True, "message": f"Refreshed {feed.title}"} @router.post("/refresh-all") async def refresh_all_feeds( user_id: str = Depends(get_user_id), db: AsyncSession = Depends(get_db_session), ): result = await db.execute( select(Feed).where(Feed.user_id == user_id) ) feeds = result.scalars().all() import asyncio from app.worker.tasks import fetch_single_feed for feed in feeds: try: await asyncio.to_thread(fetch_single_feed, feed.id) except Exception: pass return {"ok": True, "message": f"Refreshed {len(feeds)} feeds"}