Backend: - New Folder/Tag/ItemTag models with proper relational tables - Taxonomy CRUD endpoints: list, create, rename, delete, merge tags - Sidebar endpoint with folder/tag counts - AI classification reads live folders/tags from DB, not hardcoded - Default folders/tags seeded on first request per user - folder_id FK on items for relational integrity Frontend: - Left sidebar with Folders/Tags tabs (like Karakeep) - Click folder/tag to filter items - "Manage" mode: add new folders/tags, delete existing - Counts next to each folder/tag - "All items" option to clear filter - Replaces the old signal-strip cards Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
142 lines
5.3 KiB
Python
142 lines
5.3 KiB
Python
"""OpenAI classification — structured output for folder/tags/title/summary."""
|
|
|
|
import json
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from app.config import OPENAI_API_KEY, OPENAI_MODEL
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def build_system_prompt(folders: list[str], tags: list[str]) -> str:
|
|
return f"""You are a classification engine for a personal "second brain" knowledge management system.
|
|
|
|
Given an item (URL, note, document, or file), you must return structured JSON with:
|
|
- folder: exactly 1 from this list: {json.dumps(folders)}
|
|
- tags: exactly 2 or 3 from this list: {json.dumps(tags)}
|
|
- title: a concise, normalized title (max 80 chars)
|
|
- summary: a 1-2 sentence summary of the content (for links/documents only)
|
|
- corrected_text: for NOTES ONLY — return the original note text with spelling/grammar fixed. Keep the original meaning, tone, and structure. Only fix typos and obvious errors. Return empty string for non-notes.
|
|
- confidence: a float 0.0-1.0 indicating how confident you are
|
|
|
|
Rules:
|
|
- NEVER invent folders or tags not in the lists above
|
|
- NEVER skip classification
|
|
- NEVER return freeform text outside the schema
|
|
- For notes: do NOT summarize. Keep the original text. Only fix spelling.
|
|
- For notes: the summary field should be a very short 5-10 word description, not a rewrite.
|
|
- Always return valid JSON matching the schema exactly"""
|
|
|
|
|
|
def build_response_schema(folders: list[str], tags: list[str]) -> dict:
|
|
return {
|
|
"type": "json_schema",
|
|
"json_schema": {
|
|
"name": "classification",
|
|
"strict": True,
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"folder": {"type": "string", "enum": folders},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {"type": "string", "enum": tags},
|
|
"minItems": 2,
|
|
"maxItems": 3,
|
|
},
|
|
"title": {"type": "string"},
|
|
"summary": {"type": "string"},
|
|
"corrected_text": {"type": "string"},
|
|
"confidence": {"type": "number"},
|
|
},
|
|
"required": ["folder", "tags", "title", "summary", "corrected_text", "confidence"],
|
|
"additionalProperties": False,
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def build_user_prompt(item_type: str, url: str | None, title: str | None, text: str | None) -> str:
|
|
parts = [f"Item type: {item_type}"]
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
if title:
|
|
parts.append(f"Original title: {title}")
|
|
if text:
|
|
# Truncate to ~4000 chars for context window efficiency
|
|
truncated = text[:4000]
|
|
parts.append(f"Content:\n{truncated}")
|
|
return "\n\n".join(parts)
|
|
|
|
|
|
async def classify_item(
|
|
item_type: str,
|
|
url: str | None = None,
|
|
title: str | None = None,
|
|
text: str | None = None,
|
|
folders: list[str] | None = None,
|
|
tags: list[str] | None = None,
|
|
retries: int = 2,
|
|
) -> dict:
|
|
"""Call OpenAI to classify an item. Returns dict with folder, tags, title, summary, confidence."""
|
|
from app.config import FOLDERS, TAGS
|
|
folders = folders or FOLDERS
|
|
tags = tags or TAGS
|
|
|
|
if not OPENAI_API_KEY:
|
|
log.warning("No OPENAI_API_KEY set, returning defaults")
|
|
return {
|
|
"folder": "Knowledge",
|
|
"tags": ["reference", "read-later"],
|
|
"title": title or "Untitled",
|
|
"summary": "No AI classification available",
|
|
"confidence": 0.0,
|
|
}
|
|
|
|
user_msg = build_user_prompt(item_type, url, title, text)
|
|
system_prompt = build_system_prompt(folders, tags)
|
|
response_schema = build_response_schema(folders, tags)
|
|
|
|
for attempt in range(retries + 1):
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
resp = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
|
|
json={
|
|
"model": OPENAI_MODEL,
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_msg},
|
|
],
|
|
"response_format": response_schema,
|
|
"temperature": 0.2,
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
content = data["choices"][0]["message"]["content"]
|
|
result = json.loads(content)
|
|
|
|
# Validate folder and tags are in allowed sets
|
|
if result["folder"] not in folders:
|
|
result["folder"] = folders[0] if folders else "Knowledge"
|
|
result["tags"] = [t for t in result["tags"] if t in tags][:3]
|
|
if len(result["tags"]) < 2:
|
|
result["tags"] = (result["tags"] + ["reference", "read-later"])[:3]
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
log.error(f"Classification attempt {attempt + 1} failed: {e}")
|
|
if attempt == retries:
|
|
return {
|
|
"folder": "Knowledge",
|
|
"tags": ["reference", "read-later"],
|
|
"title": title or "Untitled",
|
|
"summary": f"Classification failed: {e}",
|
|
"confidence": 0.0,
|
|
}
|