Files
platform/services/brain/migrate_karakeep.py
Yusuf Suleman 4592e35732
All checks were successful
Security Checks / dependency-audit (push) Successful in 1m13s
Security Checks / secret-scanning (push) Successful in 3s
Security Checks / dockerfile-lint (push) Successful in 3s
feat: major platform expansion — Brain service, RSS reader, iOS app, AI assistants, Firefox extension
Brain Service:
- Playwright stealth crawler replacing browserless (og:image, Readability, Reddit JSON API)
- AI classification with tag definitions and folder assignment
- YouTube video download via yt-dlp
- Karakeep migration complete (96 items)
- Taxonomy management (folders with icons/colors, tags)
- Discovery shuffle, sort options, search (Meilisearch + pgvector)
- Item tag/folder editing, card color accents

RSS Reader Service:
- Custom FastAPI reader replacing Miniflux
- Feed management (add/delete/refresh), category support
- Full article extraction via Readability
- Background content fetching for new entries
- Mark all read with confirmation
- Infinite scroll, retention cleanup (30/60 day)
- 17 feeds migrated from Miniflux

iOS App (SwiftUI):
- Native iOS 17+ app with @Observable architecture
- Cookie-based auth, configurable gateway URL
- Dashboard with custom background photo + frosted glass widgets
- Full fitness module (today/templates/goals/food library)
- AI assistant chat (fitness + brain, raw JSON state management)
- 120fps ProMotion support

AI Assistants (Gateway):
- Unified dispatcher with fitness/brain domain detection
- Fitness: natural language food logging, photo analysis, multi-item splitting
- Brain: save/append/update/delete notes, search & answer, undo support
- Madiha user gets fitness-only (brain disabled)

Firefox Extension:
- One-click save to Brain from any page
- Login with platform credentials
- Right-click context menu (save page/link/image)
- Notes field for URL saves
- Signed and published on AMO

Other:
- Reader bookmark button routes to Brain (was Karakeep)
- Fitness food library with "Add" button + add-to-meal popup
- Kindle send file size check (25MB SMTP2GO limit)
- Atelier UI as default (useAtelierShell=true)
- Mobile upload box in nav drawer

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 00:56:29 -05:00

255 lines
8.3 KiB
Python

"""Migrate all bookmarks from Karakeep into Brain service via API."""
import json
import os
import sys
import time
import urllib.request
import urllib.error
import tempfile
KARAKEEP_URL = os.environ.get("KARAKEEP_URL", "http://192.168.1.42:3005")
KARAKEEP_API_KEY = os.environ.get("KARAKEEP_API_KEY", "ak2_f4141e5fe7265e23bd6f_4549c932c262010eafd08acb2139f1ac")
BRAIN_URL = "http://localhost:8200"
BRAIN_USER = "admin"
def karakeep_get(path):
req = urllib.request.Request(
f"{KARAKEEP_URL}{path}",
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
)
return json.loads(urllib.request.urlopen(req, timeout=30).read())
def karakeep_download(asset_id):
req = urllib.request.Request(
f"{KARAKEEP_URL}/api/v1/assets/{asset_id}",
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
)
resp = urllib.request.urlopen(req, timeout=120)
return resp.read(), resp.headers.get("Content-Type", "application/octet-stream")
def brain_post_json(path, data):
body = json.dumps(data).encode()
req = urllib.request.Request(
f"{BRAIN_URL}/api{path}",
data=body,
headers={"X-Gateway-User-Id": BRAIN_USER, "Content-Type": "application/json"},
method="POST",
)
resp = urllib.request.urlopen(req, timeout=30)
return json.loads(resp.read())
def brain_upload(file_data, filename, content_type, title=None):
"""Multipart upload to /api/items/upload."""
boundary = "----MigrationBoundary12345"
parts = []
# File part
parts.append(f"--{boundary}\r\n".encode())
parts.append(f'Content-Disposition: form-data; name="file"; filename="{filename}"\r\n'.encode())
parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
parts.append(file_data)
parts.append(b"\r\n")
# Title part
if title:
parts.append(f"--{boundary}\r\n".encode())
parts.append(b'Content-Disposition: form-data; name="title"\r\n\r\n')
parts.append(title.encode())
parts.append(b"\r\n")
parts.append(f"--{boundary}--\r\n".encode())
body = b"".join(parts)
req = urllib.request.Request(
f"{BRAIN_URL}/api/items/upload",
data=body,
headers={
"X-Gateway-User-Id": BRAIN_USER,
"Content-Type": f"multipart/form-data; boundary={boundary}",
},
method="POST",
)
resp = urllib.request.urlopen(req, timeout=60)
return json.loads(resp.read())
def brain_get_item(item_id):
req = urllib.request.Request(
f"{BRAIN_URL}/api/items/{item_id}",
headers={"X-Gateway-User-Id": BRAIN_USER},
)
resp = urllib.request.urlopen(req, timeout=15)
return json.loads(resp.read())
def fetch_all_bookmarks():
all_bk = []
cursor = None
while True:
url = "/api/v1/bookmarks?limit=100"
if cursor:
url += f"&cursor={cursor}"
data = karakeep_get(url)
bks = data.get("bookmarks", [])
all_bk.extend(bks)
cursor = data.get("nextCursor")
if not cursor or not bks:
break
return all_bk
def wait_for_processing(item_id, timeout=120):
"""Poll until item is done processing."""
start = time.time()
while time.time() - start < timeout:
item = brain_get_item(item_id)
status = item.get("processing_status", "pending")
if status in ("ready", "error"):
return item
time.sleep(3)
return brain_get_item(item_id)
def main():
print("Fetching all Karakeep bookmarks...")
bookmarks = fetch_all_bookmarks()
print(f"Found {len(bookmarks)} bookmarks\n")
# Sort: notes first, then links, then assets (PDFs take longer)
def sort_key(b):
t = b.get("content", {}).get("type", "")
return {"text": 0, "link": 1, "asset": 2}.get(t, 3)
bookmarks.sort(key=sort_key)
results = {"success": 0, "error": 0, "skipped": 0}
comparison = []
for i, bk in enumerate(bookmarks):
content = bk.get("content", {})
bk_type = content.get("type", "unknown")
bk_title = bk.get("title") or "Untitled"
bk_tags = [t["name"] for t in bk.get("tags", [])]
bk_list = bk.get("list", {})
bk_folder = bk_list.get("name") if bk_list else None
print(f"[{i+1}/{len(bookmarks)}] {bk_type}: {bk_title[:60]}")
try:
if bk_type == "link":
url = content.get("url", "")
if not url:
print(" SKIP: no URL")
results["skipped"] += 1
continue
resp = brain_post_json("/items", {
"type": "link",
"url": url,
"title": bk_title if bk_title != "Untitled" else None,
})
elif bk_type == "text":
text = content.get("text", "")
if not text:
print(" SKIP: no text")
results["skipped"] += 1
continue
resp = brain_post_json("/items", {
"type": "note",
"raw_content": text,
"title": bk_title if bk_title != "Untitled" else None,
})
elif bk_type == "asset":
asset_id = content.get("assetId")
asset_type = content.get("assetType", "unknown")
if not asset_id:
print(" SKIP: no assetId")
results["skipped"] += 1
continue
print(f" Downloading {asset_type} ({asset_id[:8]})...")
file_data, ct = karakeep_download(asset_id)
ext = {"pdf": ".pdf", "image": ".png"}.get(asset_type, ".bin")
filename = f"{bk_title[:50]}{ext}" if bk_title != "Untitled" else f"upload{ext}"
# Clean filename
filename = filename.replace("/", "-").replace("\\", "-")
if asset_type == "pdf":
ct = "application/pdf"
resp = brain_upload(file_data, filename, ct, title=bk_title if bk_title != "Untitled" else None)
else:
print(f" SKIP: unknown type '{bk_type}'")
results["skipped"] += 1
continue
item_id = resp.get("id")
print(f" Created: {item_id} — waiting for AI classification...")
# Wait for processing
final = wait_for_processing(item_id, timeout=90)
status = final.get("processing_status", "?")
ai_folder = final.get("folder", "?")
ai_tags = final.get("tags", [])
ai_title = final.get("title", "?")
# Compare
entry = {
"karakeep_title": bk_title,
"karakeep_tags": bk_tags,
"karakeep_folder": bk_folder,
"ai_title": ai_title,
"ai_folder": ai_folder,
"ai_tags": ai_tags,
"status": status,
}
comparison.append(entry)
tag_match = "OK" if set(bk_tags) & set(ai_tags) or (not bk_tags and not ai_tags) else "DIFF"
print(f" Status: {status}")
print(f" AI Folder: {ai_folder} (Karakeep: {bk_folder or 'none'})")
print(f" AI Tags: {ai_tags} vs Karakeep: {bk_tags} [{tag_match}]")
print(f" AI Title: {ai_title}")
results["success"] += 1
except Exception as e:
print(f" ERROR: {e}")
results["error"] += 1
print()
# Summary
print("=" * 60)
print(f"MIGRATION COMPLETE")
print(f" Success: {results['success']}")
print(f" Errors: {results['error']}")
print(f" Skipped: {results['skipped']}")
print()
# Tag comparison summary
matches = 0
diffs = 0
for c in comparison:
kk = set(c["karakeep_tags"])
ai = set(c["ai_tags"])
if kk & ai or (not kk and not ai):
matches += 1
else:
diffs += 1
print(f"Tag overlap: {matches}/{len(comparison)} items had at least one matching tag")
print(f"Tag differences: {diffs}/{len(comparison)} items had zero overlap")
# Save comparison
with open("/tmp/migration_comparison.json", "w") as f:
json.dump(comparison, f, indent=2)
print("\nFull comparison saved to /tmp/migration_comparison.json")
if __name__ == "__main__":
main()