- Link detail: shows screenshot image (clickable to open URL), URL, summary, tags - Note detail: click note text to edit, save/cancel buttons - Notes: AI now fixes spelling/grammar instead of rewriting - AI returns corrected_text field for notes, worker replaces raw_content - Removed verbose meta grid (folder/confidence/status/saved) - Folder shown as a pill badge in meta line Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
130 lines
4.9 KiB
Python
130 lines
4.9 KiB
Python
"""OpenAI classification — structured output for folder/tags/title/summary."""
|
|
|
|
import json
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from app.config import OPENAI_API_KEY, OPENAI_MODEL, FOLDERS, TAGS
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
SYSTEM_PROMPT = f"""You are a classification engine for a personal "second brain" knowledge management system.
|
|
|
|
Given an item (URL, note, document, or file), you must return structured JSON with:
|
|
- folder: exactly 1 from this list: {json.dumps(FOLDERS)}
|
|
- tags: exactly 2 or 3 from this list: {json.dumps(TAGS)}
|
|
- title: a concise, normalized title (max 80 chars)
|
|
- summary: a 1-2 sentence summary of the content (for links/documents only)
|
|
- corrected_text: for NOTES ONLY — return the original note text with spelling/grammar fixed. Keep the original meaning, tone, and structure. Only fix typos and obvious errors. Return empty string for non-notes.
|
|
- confidence: a float 0.0-1.0 indicating how confident you are
|
|
|
|
Rules:
|
|
- NEVER invent folders or tags not in the lists above
|
|
- NEVER skip classification
|
|
- NEVER return freeform text outside the schema
|
|
- For notes: do NOT summarize. Keep the original text. Only fix spelling.
|
|
- For notes: the summary field should be a very short 5-10 word description, not a rewrite.
|
|
- Always return valid JSON matching the schema exactly"""
|
|
|
|
RESPONSE_SCHEMA = {
|
|
"type": "json_schema",
|
|
"json_schema": {
|
|
"name": "classification",
|
|
"strict": True,
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"folder": {"type": "string", "enum": FOLDERS},
|
|
"tags": {
|
|
"type": "array",
|
|
"items": {"type": "string", "enum": TAGS},
|
|
"minItems": 2,
|
|
"maxItems": 3,
|
|
},
|
|
"title": {"type": "string"},
|
|
"summary": {"type": "string"},
|
|
"corrected_text": {"type": "string"},
|
|
"confidence": {"type": "number"},
|
|
},
|
|
"required": ["folder", "tags", "title", "summary", "corrected_text", "confidence"],
|
|
"additionalProperties": False,
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def build_user_prompt(item_type: str, url: str | None, title: str | None, text: str | None) -> str:
|
|
parts = [f"Item type: {item_type}"]
|
|
if url:
|
|
parts.append(f"URL: {url}")
|
|
if title:
|
|
parts.append(f"Original title: {title}")
|
|
if text:
|
|
# Truncate to ~4000 chars for context window efficiency
|
|
truncated = text[:4000]
|
|
parts.append(f"Content:\n{truncated}")
|
|
return "\n\n".join(parts)
|
|
|
|
|
|
async def classify_item(
|
|
item_type: str,
|
|
url: str | None = None,
|
|
title: str | None = None,
|
|
text: str | None = None,
|
|
retries: int = 2,
|
|
) -> dict:
|
|
"""Call OpenAI to classify an item. Returns dict with folder, tags, title, summary, confidence."""
|
|
if not OPENAI_API_KEY:
|
|
log.warning("No OPENAI_API_KEY set, returning defaults")
|
|
return {
|
|
"folder": "Knowledge",
|
|
"tags": ["reference", "read-later"],
|
|
"title": title or "Untitled",
|
|
"summary": "No AI classification available",
|
|
"confidence": 0.0,
|
|
}
|
|
|
|
user_msg = build_user_prompt(item_type, url, title, text)
|
|
|
|
for attempt in range(retries + 1):
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
resp = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
|
|
json={
|
|
"model": OPENAI_MODEL,
|
|
"messages": [
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": user_msg},
|
|
],
|
|
"response_format": RESPONSE_SCHEMA,
|
|
"temperature": 0.2,
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
content = data["choices"][0]["message"]["content"]
|
|
result = json.loads(content)
|
|
|
|
# Validate folder and tags are in allowed sets
|
|
if result["folder"] not in FOLDERS:
|
|
result["folder"] = "Knowledge"
|
|
result["tags"] = [t for t in result["tags"] if t in TAGS][:3]
|
|
if len(result["tags"]) < 2:
|
|
result["tags"] = (result["tags"] + ["reference", "read-later"])[:3]
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
log.error(f"Classification attempt {attempt + 1} failed: {e}")
|
|
if attempt == retries:
|
|
return {
|
|
"folder": "Knowledge",
|
|
"tags": ["reference", "read-later"],
|
|
"title": title or "Untitled",
|
|
"summary": f"Classification failed: {e}",
|
|
"confidence": 0.0,
|
|
}
|