feat: brain service — self-contained second brain knowledge manager

Full backend service with:
- FastAPI REST API with CRUD, search, reprocess endpoints
- PostgreSQL + pgvector for items and semantic search
- Redis + RQ for background job processing
- Meilisearch for fast keyword/filter search
- Browserless/Chrome for JS rendering and screenshots
- OpenAI structured output for AI classification
- Local file storage with S3-ready abstraction
- Gateway auth via X-Gateway-User-Id header
- Own docker-compose stack (6 containers)

Classification: fixed folders (Home/Family/Work/Travel/Knowledge/Faith/Projects)
and fixed tags (28 predefined). AI assigns exactly 1 folder, 2-3 tags, title,
summary, and confidence score per item.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Yusuf Suleman
2026-04-01 11:48:29 -05:00
parent 51a8157fd4
commit 8275f3a71b
73 changed files with 24081 additions and 4209 deletions

View File

@@ -0,0 +1,80 @@
"""SQLAlchemy models for the brain service."""
import uuid
from datetime import datetime
from pgvector.sqlalchemy import Vector
from sqlalchemy import (
Column, String, Text, Integer, Float, DateTime, ForeignKey, Index, text
)
from sqlalchemy.dialects.postgresql import JSONB, UUID, ARRAY
from sqlalchemy.orm import relationship
from app.config import OPENAI_EMBED_DIM
from app.database import Base
def new_id():
return str(uuid.uuid4())
class Item(Base):
__tablename__ = "items"
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
user_id = Column(String(64), nullable=False, index=True)
type = Column(String(32), nullable=False, default="link") # link|note|pdf|image|document|file
title = Column(Text, nullable=True)
url = Column(Text, nullable=True)
raw_content = Column(Text, nullable=True) # original user input (note body, etc.)
extracted_text = Column(Text, nullable=True) # full extracted text from page/doc
folder = Column(String(64), nullable=True)
tags = Column(ARRAY(String), nullable=True, default=list)
summary = Column(Text, nullable=True)
confidence = Column(Float, nullable=True)
metadata_json = Column(JSONB, nullable=True, default=dict)
processing_status = Column(String(32), nullable=False, default="pending") # pending|processing|ready|failed
processing_error = Column(Text, nullable=True)
# Embedding (pgvector)
embedding = Column(Vector(OPENAI_EMBED_DIM), nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
# Relationships
assets = relationship("ItemAsset", back_populates="item", cascade="all, delete-orphan")
__table_args__ = (
Index("ix_items_user_status", "user_id", "processing_status"),
Index("ix_items_user_folder", "user_id", "folder"),
Index("ix_items_created", "created_at"),
)
class ItemAsset(Base):
__tablename__ = "item_assets"
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
item_id = Column(UUID(as_uuid=False), ForeignKey("items.id", ondelete="CASCADE"), nullable=False, index=True)
asset_type = Column(String(32), nullable=False) # screenshot|archived_html|original_upload|extracted_file
filename = Column(String(512), nullable=False)
content_type = Column(String(128), nullable=True)
size_bytes = Column(Integer, nullable=True)
storage_path = Column(String(1024), nullable=False) # relative path in storage
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
# Relationships
item = relationship("Item", back_populates="assets")
class AppLink(Base):
"""Placeholder for future cross-app linking (e.g. link a saved item to a trip or task)."""
__tablename__ = "app_links"
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
item_id = Column(UUID(as_uuid=False), ForeignKey("items.id", ondelete="CASCADE"), nullable=False, index=True)
app = Column(String(64), nullable=False) # trips|tasks|fitness|inventory
app_entity_id = Column(String(128), nullable=False)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)