feat: brain taxonomy — DB-backed folders/tags, sidebar, CRUD API

Backend: - New Folder/Tag/ItemTag models with proper relational tables - Taxonomy CRUD endpoints: list, create, rename, delete, merge tags - Sidebar endpoint with folder/tag counts - AI classification reads live folders/tags from DB, not hardcoded - Default folders/tags seeded on first request per user - folder_id FK on items for relational integrity Frontend: - Left sidebar with Folders/Tags tabs (like Karakeep) - Click folder/tag to filter items - "Manage" mode: add new folders/tags, delete existing - Counts next to each folder/tag - "All items" option to clear filter - Replaces the old signal-strip cards Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 20:23:45 -05:00
parent 4805729f87
commit 68a8d4c228
7 changed files with 693 additions and 100 deletions
--- a/services/brain/app/services/classify.py
+++ b/services/brain/app/services/classify.py
@@ -5,15 +5,17 @@ import logging

 import httpx

-from app.config import OPENAI_API_KEY, OPENAI_MODEL, FOLDERS, TAGS
+from app.config import OPENAI_API_KEY, OPENAI_MODEL

 log = logging.getLogger(__name__)

-SYSTEM_PROMPT = f"""You are a classification engine for a personal "second brain" knowledge management system.
+
+def build_system_prompt(folders: list[str], tags: list[str]) -> str:
+    return f"""You are a classification engine for a personal "second brain" knowledge management system.

 Given an item (URL, note, document, or file), you must return structured JSON with:
- folder: exactly 1 from this list: {json.dumps(FOLDERS)}
- tags: exactly 2 or 3 from this list: {json.dumps(TAGS)}
+- folder: exactly 1 from this list: {json.dumps(folders)}
+- tags: exactly 2 or 3 from this list: {json.dumps(tags)}
 - title: a concise, normalized title (max 80 chars)
 - summary: a 1-2 sentence summary of the content (for links/documents only)
 - corrected_text: for NOTES ONLY — return the original note text with spelling/grammar fixed. Keep the original meaning, tone, and structure. Only fix typos and obvious errors. Return empty string for non-notes.
@@ -27,7 +29,9 @@ Rules:
 - For notes: the summary field should be a very short 5-10 word description, not a rewrite.
 - Always return valid JSON matching the schema exactly"""

-RESPONSE_SCHEMA = {
+
+def build_response_schema(folders: list[str], tags: list[str]) -> dict:
+    return {
    "type": "json_schema",
    "json_schema": {
        "name": "classification",
@@ -35,10 +39,10 @@ RESPONSE_SCHEMA = {
        "schema": {
            "type": "object",
            "properties": {
-                "folder": {"type": "string", "enum": FOLDERS},
+                "folder": {"type": "string", "enum": folders},
                "tags": {
                    "type": "array",
-                    "items": {"type": "string", "enum": TAGS},
+                    "items": {"type": "string", "enum": tags},
                    "minItems": 2,
                    "maxItems": 3,
                },
@@ -72,9 +76,15 @@ async def classify_item(
    url: str | None = None,
    title: str | None = None,
    text: str | None = None,
+    folders: list[str] | None = None,
+    tags: list[str] | None = None,
    retries: int = 2,
 ) -> dict:
    """Call OpenAI to classify an item. Returns dict with folder, tags, title, summary, confidence."""
+    from app.config import FOLDERS, TAGS
+    folders = folders or FOLDERS
+    tags = tags or TAGS
+
    if not OPENAI_API_KEY:
        log.warning("No OPENAI_API_KEY set, returning defaults")
        return {
@@ -86,6 +96,8 @@ async def classify_item(
        }

    user_msg = build_user_prompt(item_type, url, title, text)
+    system_prompt = build_system_prompt(folders, tags)
+    response_schema = build_response_schema(folders, tags)

    for attempt in range(retries + 1):
        try:
@@ -96,10 +108,10 @@ async def classify_item(
                    json={
                        "model": OPENAI_MODEL,
                        "messages": [
-                            {"role": "system", "content": SYSTEM_PROMPT},
+                            {"role": "system", "content": system_prompt},
                            {"role": "user", "content": user_msg},
                        ],
-                        "response_format": RESPONSE_SCHEMA,
+                        "response_format": response_schema,
                        "temperature": 0.2,
                    },
                )
@@ -109,9 +121,9 @@ async def classify_item(
                result = json.loads(content)

                # Validate folder and tags are in allowed sets
-                if result["folder"] not in FOLDERS:
-                    result["folder"] = "Knowledge"
-                result["tags"] = [t for t in result["tags"] if t in TAGS][:3]
+                if result["folder"] not in folders:
+                    result["folder"] = folders[0] if folders else "Knowledge"
+                result["tags"] = [t for t in result["tags"] if t in tags][:3]
                if len(result["tags"]) < 2:
                    result["tags"] = (result["tags"] + ["reference", "read-later"])[:3]