feat: brain service — self-contained second brain knowledge manager

Full backend service with:
- FastAPI REST API with CRUD, search, reprocess endpoints
- PostgreSQL + pgvector for items and semantic search
- Redis + RQ for background job processing
- Meilisearch for fast keyword/filter search
- Browserless/Chrome for JS rendering and screenshots
- OpenAI structured output for AI classification
- Local file storage with S3-ready abstraction
- Gateway auth via X-Gateway-User-Id header
- Own docker-compose stack (6 containers)

Classification: fixed folders (Home/Family/Work/Travel/Knowledge/Faith/Projects)
and fixed tags (28 predefined). AI assigns exactly 1 folder, 2-3 tags, title,
summary, and confidence score per item.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Yusuf Suleman
2026-04-01 11:48:29 -05:00
parent 51a8157fd4
commit 8275f3a71b
73 changed files with 24081 additions and 4209 deletions

View File

View File

@@ -0,0 +1,21 @@
"""API dependencies — auth, database session."""
from fastapi import Depends, Header, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
async def get_user_id(
x_gateway_user_id: str = Header(None, alias="X-Gateway-User-Id"),
) -> str:
"""Extract authenticated user ID from gateway-injected header."""
if not x_gateway_user_id:
raise HTTPException(status_code=401, detail="Not authenticated")
return x_gateway_user_id
async def get_db_session() -> AsyncSession:
"""Provide an async database session."""
async for session in get_db():
yield session

View File

@@ -0,0 +1,319 @@
"""Brain API endpoints."""
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Query
from sqlalchemy import select, func, desc
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.api.deps import get_user_id, get_db_session
from app.config import FOLDERS, TAGS
from app.models.item import Item, ItemAsset
from app.models.schema import (
ItemCreate, ItemUpdate, ItemOut, ItemList, SearchQuery, SemanticSearchQuery,
HybridSearchQuery, SearchResult, ConfigOut,
)
from app.services.storage import storage
from app.worker.tasks import enqueue_process_item
router = APIRouter(prefix="/api", tags=["brain"])
# ── Health ──
@router.get("/health")
async def health():
return {"status": "ok", "service": "brain"}
# ── Config ──
@router.get("/config", response_model=ConfigOut)
async def get_config():
return ConfigOut(folders=FOLDERS, tags=TAGS)
# ── Create item ──
@router.post("/items", response_model=ItemOut, status_code=201)
async def create_item(
body: ItemCreate,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
item = Item(
id=str(uuid.uuid4()),
user_id=user_id,
type=body.type,
url=body.url,
raw_content=body.raw_content,
title=body.title,
folder=body.folder,
tags=body.tags or [],
processing_status="pending",
)
db.add(item)
await db.commit()
await db.refresh(item, ["assets"])
# Enqueue background processing
enqueue_process_item(item.id)
return item
# ── Upload file ──
@router.post("/items/upload", response_model=ItemOut, status_code=201)
async def upload_file(
file: UploadFile = File(...),
title: Optional[str] = Form(None),
folder: Optional[str] = Form(None),
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
item_id = str(uuid.uuid4())
content_type = file.content_type or "application/octet-stream"
# Determine type from content_type
if content_type.startswith("image/"):
item_type = "image"
elif content_type == "application/pdf":
item_type = "pdf"
else:
item_type = "file"
# Store the uploaded file
data = await file.read()
path = storage.save(
item_id=item_id,
asset_type="original_upload",
filename=file.filename or "upload",
data=data,
)
item = Item(
id=item_id,
user_id=user_id,
type=item_type,
title=title or file.filename,
folder=folder,
processing_status="pending",
)
db.add(item)
asset = ItemAsset(
id=str(uuid.uuid4()),
item_id=item_id,
asset_type="original_upload",
filename=file.filename or "upload",
content_type=content_type,
size_bytes=len(data),
storage_path=path,
)
db.add(asset)
await db.commit()
await db.refresh(item, ["assets"])
enqueue_process_item(item.id)
return item
# ── Get item ──
@router.get("/items/{item_id}", response_model=ItemOut)
async def get_item(
item_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
result = await db.execute(
select(Item).options(selectinload(Item.assets))
.where(Item.id == item_id, Item.user_id == user_id)
)
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
return item
# ── List items ──
@router.get("/items", response_model=ItemList)
async def list_items(
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
folder: Optional[str] = Query(None),
tag: Optional[str] = Query(None),
type: Optional[str] = Query(None),
status: Optional[str] = Query(None),
limit: int = Query(20, le=100),
offset: int = Query(0),
):
q = select(Item).options(selectinload(Item.assets)).where(Item.user_id == user_id)
if folder:
q = q.where(Item.folder == folder)
if tag:
q = q.where(Item.tags.contains([tag]))
if type:
q = q.where(Item.type == type)
if status:
q = q.where(Item.processing_status == status)
# Count
count_q = select(func.count()).select_from(q.subquery())
total = (await db.execute(count_q)).scalar() or 0
# Fetch
q = q.order_by(desc(Item.created_at)).offset(offset).limit(limit)
result = await db.execute(q)
items = result.scalars().all()
return ItemList(items=items, total=total)
# ── Update item ──
@router.patch("/items/{item_id}", response_model=ItemOut)
async def update_item(
item_id: str,
body: ItemUpdate,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
result = await db.execute(
select(Item).options(selectinload(Item.assets))
.where(Item.id == item_id, Item.user_id == user_id)
)
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
if body.title is not None:
item.title = body.title
if body.folder is not None:
item.folder = body.folder
if body.tags is not None:
item.tags = body.tags
if body.raw_content is not None:
item.raw_content = body.raw_content
item.updated_at = datetime.utcnow()
await db.commit()
await db.refresh(item)
return item
# ── Delete item ──
@router.delete("/items/{item_id}")
async def delete_item(
item_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
result = await db.execute(
select(Item).where(Item.id == item_id, Item.user_id == user_id)
)
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
# Delete stored assets
for asset in (await db.execute(
select(ItemAsset).where(ItemAsset.item_id == item_id)
)).scalars().all():
storage.delete(asset.storage_path)
await db.delete(item)
await db.commit()
return {"status": "deleted"}
# ── Reprocess item ──
@router.post("/items/{item_id}/reprocess", response_model=ItemOut)
async def reprocess_item(
item_id: str,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
result = await db.execute(
select(Item).options(selectinload(Item.assets))
.where(Item.id == item_id, Item.user_id == user_id)
)
item = result.scalar_one_or_none()
if not item:
raise HTTPException(status_code=404, detail="Item not found")
item.processing_status = "pending"
item.processing_error = None
item.updated_at = datetime.utcnow()
await db.commit()
enqueue_process_item(item.id)
return item
# ── Search (keyword via Meilisearch) ──
@router.post("/search", response_model=SearchResult)
async def search_items(
body: SearchQuery,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
from app.search.engine import keyword_search
item_ids, total = await keyword_search(
user_id=user_id, q=body.q, folder=body.folder, tags=body.tags,
item_type=body.type, limit=body.limit, offset=body.offset,
)
if not item_ids:
return SearchResult(items=[], total=0, query=body.q)
result = await db.execute(
select(Item).options(selectinload(Item.assets))
.where(Item.id.in_(item_ids))
)
items_map = {i.id: i for i in result.scalars().all()}
ordered = [items_map[id] for id in item_ids if id in items_map]
return SearchResult(items=ordered, total=total, query=body.q)
# ── Semantic search (pgvector) ──
@router.post("/search/semantic", response_model=SearchResult)
async def semantic_search(
body: SemanticSearchQuery,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
from app.search.engine import vector_search
items = await vector_search(
db=db, user_id=user_id, q=body.q,
folder=body.folder, item_type=body.type, limit=body.limit,
)
return SearchResult(items=items, total=len(items), query=body.q)
# ── Hybrid search ──
@router.post("/search/hybrid", response_model=SearchResult)
async def hybrid_search(
body: HybridSearchQuery,
user_id: str = Depends(get_user_id),
db: AsyncSession = Depends(get_db_session),
):
from app.search.engine import hybrid_search as do_hybrid
items = await do_hybrid(
db=db, user_id=user_id, q=body.q,
folder=body.folder, tags=body.tags, item_type=body.type, limit=body.limit,
)
return SearchResult(items=items, total=len(items), query=body.q)