Full backend service with: - FastAPI REST API with CRUD, search, reprocess endpoints - PostgreSQL + pgvector for items and semantic search - Redis + RQ for background job processing - Meilisearch for fast keyword/filter search - Browserless/Chrome for JS rendering and screenshots - OpenAI structured output for AI classification - Local file storage with S3-ready abstraction - Gateway auth via X-Gateway-User-Id header - Own docker-compose stack (6 containers) Classification: fixed folders (Home/Family/Work/Travel/Knowledge/Faith/Projects) and fixed tags (28 predefined). AI assigns exactly 1 folder, 2-3 tags, title, summary, and confidence score per item. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
81 lines
3.2 KiB
Python
81 lines
3.2 KiB
Python
"""SQLAlchemy models for the brain service."""
|
|
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
from pgvector.sqlalchemy import Vector
|
|
from sqlalchemy import (
|
|
Column, String, Text, Integer, Float, DateTime, ForeignKey, Index, text
|
|
)
|
|
from sqlalchemy.dialects.postgresql import JSONB, UUID, ARRAY
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from app.config import OPENAI_EMBED_DIM
|
|
from app.database import Base
|
|
|
|
|
|
def new_id():
|
|
return str(uuid.uuid4())
|
|
|
|
|
|
class Item(Base):
|
|
__tablename__ = "items"
|
|
|
|
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
|
|
user_id = Column(String(64), nullable=False, index=True)
|
|
type = Column(String(32), nullable=False, default="link") # link|note|pdf|image|document|file
|
|
title = Column(Text, nullable=True)
|
|
url = Column(Text, nullable=True)
|
|
raw_content = Column(Text, nullable=True) # original user input (note body, etc.)
|
|
extracted_text = Column(Text, nullable=True) # full extracted text from page/doc
|
|
folder = Column(String(64), nullable=True)
|
|
tags = Column(ARRAY(String), nullable=True, default=list)
|
|
summary = Column(Text, nullable=True)
|
|
confidence = Column(Float, nullable=True)
|
|
metadata_json = Column(JSONB, nullable=True, default=dict)
|
|
processing_status = Column(String(32), nullable=False, default="pending") # pending|processing|ready|failed
|
|
processing_error = Column(Text, nullable=True)
|
|
|
|
# Embedding (pgvector)
|
|
embedding = Column(Vector(OPENAI_EMBED_DIM), nullable=True)
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
# Relationships
|
|
assets = relationship("ItemAsset", back_populates="item", cascade="all, delete-orphan")
|
|
|
|
__table_args__ = (
|
|
Index("ix_items_user_status", "user_id", "processing_status"),
|
|
Index("ix_items_user_folder", "user_id", "folder"),
|
|
Index("ix_items_created", "created_at"),
|
|
)
|
|
|
|
|
|
class ItemAsset(Base):
|
|
__tablename__ = "item_assets"
|
|
|
|
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
|
|
item_id = Column(UUID(as_uuid=False), ForeignKey("items.id", ondelete="CASCADE"), nullable=False, index=True)
|
|
asset_type = Column(String(32), nullable=False) # screenshot|archived_html|original_upload|extracted_file
|
|
filename = Column(String(512), nullable=False)
|
|
content_type = Column(String(128), nullable=True)
|
|
size_bytes = Column(Integer, nullable=True)
|
|
storage_path = Column(String(1024), nullable=False) # relative path in storage
|
|
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
# Relationships
|
|
item = relationship("Item", back_populates="assets")
|
|
|
|
|
|
class AppLink(Base):
|
|
"""Placeholder for future cross-app linking (e.g. link a saved item to a trip or task)."""
|
|
__tablename__ = "app_links"
|
|
|
|
id = Column(UUID(as_uuid=False), primary_key=True, default=new_id)
|
|
item_id = Column(UUID(as_uuid=False), ForeignKey("items.id", ondelete="CASCADE"), nullable=False, index=True)
|
|
app = Column(String(64), nullable=False) # trips|tasks|fitness|inventory
|
|
app_entity_id = Column(String(128), nullable=False)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|