feat: major platform expansion — Brain service, RSS reader, iOS app, AI assistants, Firefox extension

Brain Service: - Playwright stealth crawler replacing browserless (og:image, Readability, Reddit JSON API) - AI classification with tag definitions and folder assignment - YouTube video download via yt-dlp - Karakeep migration complete (96 items) - Taxonomy management (folders with icons/colors, tags) - Discovery shuffle, sort options, search (Meilisearch + pgvector) - Item tag/folder editing, card color accents RSS Reader Service: - Custom FastAPI reader replacing Miniflux - Feed management (add/delete/refresh), category support - Full article extraction via Readability - Background content fetching for new entries - Mark all read with confirmation - Infinite scroll, retention cleanup (30/60 day) - 17 feeds migrated from Miniflux iOS App (SwiftUI): - Native iOS 17+ app with @Observable architecture - Cookie-based auth, configurable gateway URL - Dashboard with custom background photo + frosted glass widgets - Full fitness module (today/templates/goals/food library) - AI assistant chat (fitness + brain, raw JSON state management) - 120fps ProMotion support AI Assistants (Gateway): - Unified dispatcher with fitness/brain domain detection - Fitness: natural language food logging, photo analysis, multi-item splitting - Brain: save/append/update/delete notes, search & answer, undo support - Madiha user gets fitness-only (brain disabled) Firefox Extension: - One-click save to Brain from any page - Login with platform credentials - Right-click context menu (save page/link/image) - Notes field for URL saves - Signed and published on AMO Other: - Reader bookmark button routes to Brain (was Karakeep) - Fitness food library with "Add" button + add-to-meal popup - Kindle send file size check (25MB SMTP2GO limit) - Atelier UI as default (useAtelierShell=true) - Mobile upload box in nav drawer Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 00:56:29 -05:00
parent af1765bd8e
commit 4592e35732
97 changed files with 11009 additions and 532 deletions
--- a/services/brain/migrate_karakeep.py
+++ b/services/brain/migrate_karakeep.py
@@ -0,0 +1,254 @@
+"""Migrate all bookmarks from Karakeep into Brain service via API."""
+
+import json
+import os
+import sys
+import time
+import urllib.request
+import urllib.error
+import tempfile
+
+KARAKEEP_URL = os.environ.get("KARAKEEP_URL", "http://192.168.1.42:3005")
+KARAKEEP_API_KEY = os.environ.get("KARAKEEP_API_KEY", "ak2_f4141e5fe7265e23bd6f_4549c932c262010eafd08acb2139f1ac")
+BRAIN_URL = "http://localhost:8200"
+BRAIN_USER = "admin"
+
+
+def karakeep_get(path):
+    req = urllib.request.Request(
+        f"{KARAKEEP_URL}{path}",
+        headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
+    )
+    return json.loads(urllib.request.urlopen(req, timeout=30).read())
+
+
+def karakeep_download(asset_id):
+    req = urllib.request.Request(
+        f"{KARAKEEP_URL}/api/v1/assets/{asset_id}",
+        headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
+    )
+    resp = urllib.request.urlopen(req, timeout=120)
+    return resp.read(), resp.headers.get("Content-Type", "application/octet-stream")
+
+
+def brain_post_json(path, data):
+    body = json.dumps(data).encode()
+    req = urllib.request.Request(
+        f"{BRAIN_URL}/api{path}",
+        data=body,
+        headers={"X-Gateway-User-Id": BRAIN_USER, "Content-Type": "application/json"},
+        method="POST",
+    )
+    resp = urllib.request.urlopen(req, timeout=30)
+    return json.loads(resp.read())
+
+
+def brain_upload(file_data, filename, content_type, title=None):
+    """Multipart upload to /api/items/upload."""
+    boundary = "----MigrationBoundary12345"
+    parts = []
+
+    # File part
+    parts.append(f"--{boundary}\r\n".encode())
+    parts.append(f'Content-Disposition: form-data; name="file"; filename="{filename}"\r\n'.encode())
+    parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
+    parts.append(file_data)
+    parts.append(b"\r\n")
+
+    # Title part
+    if title:
+        parts.append(f"--{boundary}\r\n".encode())
+        parts.append(b'Content-Disposition: form-data; name="title"\r\n\r\n')
+        parts.append(title.encode())
+        parts.append(b"\r\n")
+
+    parts.append(f"--{boundary}--\r\n".encode())
+    body = b"".join(parts)
+
+    req = urllib.request.Request(
+        f"{BRAIN_URL}/api/items/upload",
+        data=body,
+        headers={
+            "X-Gateway-User-Id": BRAIN_USER,
+            "Content-Type": f"multipart/form-data; boundary={boundary}",
+        },
+        method="POST",
+    )
+    resp = urllib.request.urlopen(req, timeout=60)
+    return json.loads(resp.read())
+
+
+def brain_get_item(item_id):
+    req = urllib.request.Request(
+        f"{BRAIN_URL}/api/items/{item_id}",
+        headers={"X-Gateway-User-Id": BRAIN_USER},
+    )
+    resp = urllib.request.urlopen(req, timeout=15)
+    return json.loads(resp.read())
+
+
+def fetch_all_bookmarks():
+    all_bk = []
+    cursor = None
+    while True:
+        url = "/api/v1/bookmarks?limit=100"
+        if cursor:
+            url += f"&cursor={cursor}"
+        data = karakeep_get(url)
+        bks = data.get("bookmarks", [])
+        all_bk.extend(bks)
+        cursor = data.get("nextCursor")
+        if not cursor or not bks:
+            break
+    return all_bk
+
+
+def wait_for_processing(item_id, timeout=120):
+    """Poll until item is done processing."""
+    start = time.time()
+    while time.time() - start < timeout:
+        item = brain_get_item(item_id)
+        status = item.get("processing_status", "pending")
+        if status in ("ready", "error"):
+            return item
+        time.sleep(3)
+    return brain_get_item(item_id)
+
+
+def main():
+    print("Fetching all Karakeep bookmarks...")
+    bookmarks = fetch_all_bookmarks()
+    print(f"Found {len(bookmarks)} bookmarks\n")
+
+    # Sort: notes first, then links, then assets (PDFs take longer)
+    def sort_key(b):
+        t = b.get("content", {}).get("type", "")
+        return {"text": 0, "link": 1, "asset": 2}.get(t, 3)
+    bookmarks.sort(key=sort_key)
+
+    results = {"success": 0, "error": 0, "skipped": 0}
+    comparison = []
+
+    for i, bk in enumerate(bookmarks):
+        content = bk.get("content", {})
+        bk_type = content.get("type", "unknown")
+        bk_title = bk.get("title") or "Untitled"
+        bk_tags = [t["name"] for t in bk.get("tags", [])]
+        bk_list = bk.get("list", {})
+        bk_folder = bk_list.get("name") if bk_list else None
+
+        print(f"[{i+1}/{len(bookmarks)}] {bk_type}: {bk_title[:60]}")
+
+        try:
+            if bk_type == "link":
+                url = content.get("url", "")
+                if not url:
+                    print("  SKIP: no URL")
+                    results["skipped"] += 1
+                    continue
+                resp = brain_post_json("/items", {
+                    "type": "link",
+                    "url": url,
+                    "title": bk_title if bk_title != "Untitled" else None,
+                })
+
+            elif bk_type == "text":
+                text = content.get("text", "")
+                if not text:
+                    print("  SKIP: no text")
+                    results["skipped"] += 1
+                    continue
+                resp = brain_post_json("/items", {
+                    "type": "note",
+                    "raw_content": text,
+                    "title": bk_title if bk_title != "Untitled" else None,
+                })
+
+            elif bk_type == "asset":
+                asset_id = content.get("assetId")
+                asset_type = content.get("assetType", "unknown")
+                if not asset_id:
+                    print("  SKIP: no assetId")
+                    results["skipped"] += 1
+                    continue
+
+                print(f"  Downloading {asset_type} ({asset_id[:8]})...")
+                file_data, ct = karakeep_download(asset_id)
+                ext = {"pdf": ".pdf", "image": ".png"}.get(asset_type, ".bin")
+                filename = f"{bk_title[:50]}{ext}" if bk_title != "Untitled" else f"upload{ext}"
+                # Clean filename
+                filename = filename.replace("/", "-").replace("\\", "-")
+                if asset_type == "pdf":
+                    ct = "application/pdf"
+                resp = brain_upload(file_data, filename, ct, title=bk_title if bk_title != "Untitled" else None)
+            else:
+                print(f"  SKIP: unknown type '{bk_type}'")
+                results["skipped"] += 1
+                continue
+
+            item_id = resp.get("id")
+            print(f"  Created: {item_id} — waiting for AI classification...")
+
+            # Wait for processing
+            final = wait_for_processing(item_id, timeout=90)
+            status = final.get("processing_status", "?")
+            ai_folder = final.get("folder", "?")
+            ai_tags = final.get("tags", [])
+            ai_title = final.get("title", "?")
+
+            # Compare
+            entry = {
+                "karakeep_title": bk_title,
+                "karakeep_tags": bk_tags,
+                "karakeep_folder": bk_folder,
+                "ai_title": ai_title,
+                "ai_folder": ai_folder,
+                "ai_tags": ai_tags,
+                "status": status,
+            }
+            comparison.append(entry)
+
+            tag_match = "OK" if set(bk_tags) & set(ai_tags) or (not bk_tags and not ai_tags) else "DIFF"
+
+            print(f"  Status: {status}")
+            print(f"  AI Folder: {ai_folder} (Karakeep: {bk_folder or 'none'})")
+            print(f"  AI Tags: {ai_tags} vs Karakeep: {bk_tags} [{tag_match}]")
+            print(f"  AI Title: {ai_title}")
+
+            results["success"] += 1
+
+        except Exception as e:
+            print(f"  ERROR: {e}")
+            results["error"] += 1
+
+        print()
+
+    # Summary
+    print("=" * 60)
+    print(f"MIGRATION COMPLETE")
+    print(f"  Success: {results['success']}")
+    print(f"  Errors:  {results['error']}")
+    print(f"  Skipped: {results['skipped']}")
+    print()
+
+    # Tag comparison summary
+    matches = 0
+    diffs = 0
+    for c in comparison:
+        kk = set(c["karakeep_tags"])
+        ai = set(c["ai_tags"])
+        if kk & ai or (not kk and not ai):
+            matches += 1
+        else:
+            diffs += 1
+    print(f"Tag overlap: {matches}/{len(comparison)} items had at least one matching tag")
+    print(f"Tag differences: {diffs}/{len(comparison)} items had zero overlap")
+
+    # Save comparison
+    with open("/tmp/migration_comparison.json", "w") as f:
+        json.dump(comparison, f, indent=2)
+    print("\nFull comparison saved to /tmp/migration_comparison.json")
+
+
+if __name__ == "__main__":
+    main()