feat: major platform expansion — Brain service, RSS reader, iOS app, AI assistants, Firefox extension
Brain Service: - Playwright stealth crawler replacing browserless (og:image, Readability, Reddit JSON API) - AI classification with tag definitions and folder assignment - YouTube video download via yt-dlp - Karakeep migration complete (96 items) - Taxonomy management (folders with icons/colors, tags) - Discovery shuffle, sort options, search (Meilisearch + pgvector) - Item tag/folder editing, card color accents RSS Reader Service: - Custom FastAPI reader replacing Miniflux - Feed management (add/delete/refresh), category support - Full article extraction via Readability - Background content fetching for new entries - Mark all read with confirmation - Infinite scroll, retention cleanup (30/60 day) - 17 feeds migrated from Miniflux iOS App (SwiftUI): - Native iOS 17+ app with @Observable architecture - Cookie-based auth, configurable gateway URL - Dashboard with custom background photo + frosted glass widgets - Full fitness module (today/templates/goals/food library) - AI assistant chat (fitness + brain, raw JSON state management) - 120fps ProMotion support AI Assistants (Gateway): - Unified dispatcher with fitness/brain domain detection - Fitness: natural language food logging, photo analysis, multi-item splitting - Brain: save/append/update/delete notes, search & answer, undo support - Madiha user gets fitness-only (brain disabled) Firefox Extension: - One-click save to Brain from any page - Login with platform credentials - Right-click context menu (save page/link/image) - Notes field for URL saves - Signed and published on AMO Other: - Reader bookmark button routes to Brain (was Karakeep) - Fitness food library with "Add" button + add-to-meal popup - Kindle send file size check (25MB SMTP2GO limit) - Atelier UI as default (useAtelierShell=true) - Mobile upload box in nav drawer Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
254
services/brain/migrate_karakeep.py
Normal file
254
services/brain/migrate_karakeep.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Migrate all bookmarks from Karakeep into Brain service via API."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import tempfile
|
||||
|
||||
KARAKEEP_URL = os.environ.get("KARAKEEP_URL", "http://192.168.1.42:3005")
|
||||
KARAKEEP_API_KEY = os.environ.get("KARAKEEP_API_KEY", "ak2_f4141e5fe7265e23bd6f_4549c932c262010eafd08acb2139f1ac")
|
||||
BRAIN_URL = "http://localhost:8200"
|
||||
BRAIN_USER = "admin"
|
||||
|
||||
|
||||
def karakeep_get(path):
|
||||
req = urllib.request.Request(
|
||||
f"{KARAKEEP_URL}{path}",
|
||||
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
|
||||
)
|
||||
return json.loads(urllib.request.urlopen(req, timeout=30).read())
|
||||
|
||||
|
||||
def karakeep_download(asset_id):
|
||||
req = urllib.request.Request(
|
||||
f"{KARAKEEP_URL}/api/v1/assets/{asset_id}",
|
||||
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=120)
|
||||
return resp.read(), resp.headers.get("Content-Type", "application/octet-stream")
|
||||
|
||||
|
||||
def brain_post_json(path, data):
|
||||
body = json.dumps(data).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{BRAIN_URL}/api{path}",
|
||||
data=body,
|
||||
headers={"X-Gateway-User-Id": BRAIN_USER, "Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def brain_upload(file_data, filename, content_type, title=None):
|
||||
"""Multipart upload to /api/items/upload."""
|
||||
boundary = "----MigrationBoundary12345"
|
||||
parts = []
|
||||
|
||||
# File part
|
||||
parts.append(f"--{boundary}\r\n".encode())
|
||||
parts.append(f'Content-Disposition: form-data; name="file"; filename="{filename}"\r\n'.encode())
|
||||
parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
|
||||
parts.append(file_data)
|
||||
parts.append(b"\r\n")
|
||||
|
||||
# Title part
|
||||
if title:
|
||||
parts.append(f"--{boundary}\r\n".encode())
|
||||
parts.append(b'Content-Disposition: form-data; name="title"\r\n\r\n')
|
||||
parts.append(title.encode())
|
||||
parts.append(b"\r\n")
|
||||
|
||||
parts.append(f"--{boundary}--\r\n".encode())
|
||||
body = b"".join(parts)
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{BRAIN_URL}/api/items/upload",
|
||||
data=body,
|
||||
headers={
|
||||
"X-Gateway-User-Id": BRAIN_USER,
|
||||
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=60)
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def brain_get_item(item_id):
|
||||
req = urllib.request.Request(
|
||||
f"{BRAIN_URL}/api/items/{item_id}",
|
||||
headers={"X-Gateway-User-Id": BRAIN_USER},
|
||||
)
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def fetch_all_bookmarks():
|
||||
all_bk = []
|
||||
cursor = None
|
||||
while True:
|
||||
url = "/api/v1/bookmarks?limit=100"
|
||||
if cursor:
|
||||
url += f"&cursor={cursor}"
|
||||
data = karakeep_get(url)
|
||||
bks = data.get("bookmarks", [])
|
||||
all_bk.extend(bks)
|
||||
cursor = data.get("nextCursor")
|
||||
if not cursor or not bks:
|
||||
break
|
||||
return all_bk
|
||||
|
||||
|
||||
def wait_for_processing(item_id, timeout=120):
|
||||
"""Poll until item is done processing."""
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
item = brain_get_item(item_id)
|
||||
status = item.get("processing_status", "pending")
|
||||
if status in ("ready", "error"):
|
||||
return item
|
||||
time.sleep(3)
|
||||
return brain_get_item(item_id)
|
||||
|
||||
|
||||
def main():
|
||||
print("Fetching all Karakeep bookmarks...")
|
||||
bookmarks = fetch_all_bookmarks()
|
||||
print(f"Found {len(bookmarks)} bookmarks\n")
|
||||
|
||||
# Sort: notes first, then links, then assets (PDFs take longer)
|
||||
def sort_key(b):
|
||||
t = b.get("content", {}).get("type", "")
|
||||
return {"text": 0, "link": 1, "asset": 2}.get(t, 3)
|
||||
bookmarks.sort(key=sort_key)
|
||||
|
||||
results = {"success": 0, "error": 0, "skipped": 0}
|
||||
comparison = []
|
||||
|
||||
for i, bk in enumerate(bookmarks):
|
||||
content = bk.get("content", {})
|
||||
bk_type = content.get("type", "unknown")
|
||||
bk_title = bk.get("title") or "Untitled"
|
||||
bk_tags = [t["name"] for t in bk.get("tags", [])]
|
||||
bk_list = bk.get("list", {})
|
||||
bk_folder = bk_list.get("name") if bk_list else None
|
||||
|
||||
print(f"[{i+1}/{len(bookmarks)}] {bk_type}: {bk_title[:60]}")
|
||||
|
||||
try:
|
||||
if bk_type == "link":
|
||||
url = content.get("url", "")
|
||||
if not url:
|
||||
print(" SKIP: no URL")
|
||||
results["skipped"] += 1
|
||||
continue
|
||||
resp = brain_post_json("/items", {
|
||||
"type": "link",
|
||||
"url": url,
|
||||
"title": bk_title if bk_title != "Untitled" else None,
|
||||
})
|
||||
|
||||
elif bk_type == "text":
|
||||
text = content.get("text", "")
|
||||
if not text:
|
||||
print(" SKIP: no text")
|
||||
results["skipped"] += 1
|
||||
continue
|
||||
resp = brain_post_json("/items", {
|
||||
"type": "note",
|
||||
"raw_content": text,
|
||||
"title": bk_title if bk_title != "Untitled" else None,
|
||||
})
|
||||
|
||||
elif bk_type == "asset":
|
||||
asset_id = content.get("assetId")
|
||||
asset_type = content.get("assetType", "unknown")
|
||||
if not asset_id:
|
||||
print(" SKIP: no assetId")
|
||||
results["skipped"] += 1
|
||||
continue
|
||||
|
||||
print(f" Downloading {asset_type} ({asset_id[:8]})...")
|
||||
file_data, ct = karakeep_download(asset_id)
|
||||
ext = {"pdf": ".pdf", "image": ".png"}.get(asset_type, ".bin")
|
||||
filename = f"{bk_title[:50]}{ext}" if bk_title != "Untitled" else f"upload{ext}"
|
||||
# Clean filename
|
||||
filename = filename.replace("/", "-").replace("\\", "-")
|
||||
if asset_type == "pdf":
|
||||
ct = "application/pdf"
|
||||
resp = brain_upload(file_data, filename, ct, title=bk_title if bk_title != "Untitled" else None)
|
||||
else:
|
||||
print(f" SKIP: unknown type '{bk_type}'")
|
||||
results["skipped"] += 1
|
||||
continue
|
||||
|
||||
item_id = resp.get("id")
|
||||
print(f" Created: {item_id} — waiting for AI classification...")
|
||||
|
||||
# Wait for processing
|
||||
final = wait_for_processing(item_id, timeout=90)
|
||||
status = final.get("processing_status", "?")
|
||||
ai_folder = final.get("folder", "?")
|
||||
ai_tags = final.get("tags", [])
|
||||
ai_title = final.get("title", "?")
|
||||
|
||||
# Compare
|
||||
entry = {
|
||||
"karakeep_title": bk_title,
|
||||
"karakeep_tags": bk_tags,
|
||||
"karakeep_folder": bk_folder,
|
||||
"ai_title": ai_title,
|
||||
"ai_folder": ai_folder,
|
||||
"ai_tags": ai_tags,
|
||||
"status": status,
|
||||
}
|
||||
comparison.append(entry)
|
||||
|
||||
tag_match = "OK" if set(bk_tags) & set(ai_tags) or (not bk_tags and not ai_tags) else "DIFF"
|
||||
|
||||
print(f" Status: {status}")
|
||||
print(f" AI Folder: {ai_folder} (Karakeep: {bk_folder or 'none'})")
|
||||
print(f" AI Tags: {ai_tags} vs Karakeep: {bk_tags} [{tag_match}]")
|
||||
print(f" AI Title: {ai_title}")
|
||||
|
||||
results["success"] += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
results["error"] += 1
|
||||
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 60)
|
||||
print(f"MIGRATION COMPLETE")
|
||||
print(f" Success: {results['success']}")
|
||||
print(f" Errors: {results['error']}")
|
||||
print(f" Skipped: {results['skipped']}")
|
||||
print()
|
||||
|
||||
# Tag comparison summary
|
||||
matches = 0
|
||||
diffs = 0
|
||||
for c in comparison:
|
||||
kk = set(c["karakeep_tags"])
|
||||
ai = set(c["ai_tags"])
|
||||
if kk & ai or (not kk and not ai):
|
||||
matches += 1
|
||||
else:
|
||||
diffs += 1
|
||||
print(f"Tag overlap: {matches}/{len(comparison)} items had at least one matching tag")
|
||||
print(f"Tag differences: {diffs}/{len(comparison)} items had zero overlap")
|
||||
|
||||
# Save comparison
|
||||
with open("/tmp/migration_comparison.json", "w") as f:
|
||||
json.dump(comparison, f, indent=2)
|
||||
print("\nFull comparison saved to /tmp/migration_comparison.json")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user