Brain Service: - Playwright stealth crawler replacing browserless (og:image, Readability, Reddit JSON API) - AI classification with tag definitions and folder assignment - YouTube video download via yt-dlp - Karakeep migration complete (96 items) - Taxonomy management (folders with icons/colors, tags) - Discovery shuffle, sort options, search (Meilisearch + pgvector) - Item tag/folder editing, card color accents RSS Reader Service: - Custom FastAPI reader replacing Miniflux - Feed management (add/delete/refresh), category support - Full article extraction via Readability - Background content fetching for new entries - Mark all read with confirmation - Infinite scroll, retention cleanup (30/60 day) - 17 feeds migrated from Miniflux iOS App (SwiftUI): - Native iOS 17+ app with @Observable architecture - Cookie-based auth, configurable gateway URL - Dashboard with custom background photo + frosted glass widgets - Full fitness module (today/templates/goals/food library) - AI assistant chat (fitness + brain, raw JSON state management) - 120fps ProMotion support AI Assistants (Gateway): - Unified dispatcher with fitness/brain domain detection - Fitness: natural language food logging, photo analysis, multi-item splitting - Brain: save/append/update/delete notes, search & answer, undo support - Madiha user gets fitness-only (brain disabled) Firefox Extension: - One-click save to Brain from any page - Login with platform credentials - Right-click context menu (save page/link/image) - Notes field for URL saves - Signed and published on AMO Other: - Reader bookmark button routes to Brain (was Karakeep) - Fitness food library with "Add" button + add-to-meal popup - Kindle send file size check (25MB SMTP2GO limit) - Atelier UI as default (useAtelierShell=true) - Mobile upload box in nav drawer Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
255 lines
8.3 KiB
Python
255 lines
8.3 KiB
Python
"""Migrate all bookmarks from Karakeep into Brain service via API."""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
import urllib.error
|
|
import tempfile
|
|
|
|
KARAKEEP_URL = os.environ.get("KARAKEEP_URL", "http://192.168.1.42:3005")
|
|
KARAKEEP_API_KEY = os.environ.get("KARAKEEP_API_KEY", "ak2_f4141e5fe7265e23bd6f_4549c932c262010eafd08acb2139f1ac")
|
|
BRAIN_URL = "http://localhost:8200"
|
|
BRAIN_USER = "admin"
|
|
|
|
|
|
def karakeep_get(path):
|
|
req = urllib.request.Request(
|
|
f"{KARAKEEP_URL}{path}",
|
|
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
|
|
)
|
|
return json.loads(urllib.request.urlopen(req, timeout=30).read())
|
|
|
|
|
|
def karakeep_download(asset_id):
|
|
req = urllib.request.Request(
|
|
f"{KARAKEEP_URL}/api/v1/assets/{asset_id}",
|
|
headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"},
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=120)
|
|
return resp.read(), resp.headers.get("Content-Type", "application/octet-stream")
|
|
|
|
|
|
def brain_post_json(path, data):
|
|
body = json.dumps(data).encode()
|
|
req = urllib.request.Request(
|
|
f"{BRAIN_URL}/api{path}",
|
|
data=body,
|
|
headers={"X-Gateway-User-Id": BRAIN_USER, "Content-Type": "application/json"},
|
|
method="POST",
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=30)
|
|
return json.loads(resp.read())
|
|
|
|
|
|
def brain_upload(file_data, filename, content_type, title=None):
|
|
"""Multipart upload to /api/items/upload."""
|
|
boundary = "----MigrationBoundary12345"
|
|
parts = []
|
|
|
|
# File part
|
|
parts.append(f"--{boundary}\r\n".encode())
|
|
parts.append(f'Content-Disposition: form-data; name="file"; filename="{filename}"\r\n'.encode())
|
|
parts.append(f"Content-Type: {content_type}\r\n\r\n".encode())
|
|
parts.append(file_data)
|
|
parts.append(b"\r\n")
|
|
|
|
# Title part
|
|
if title:
|
|
parts.append(f"--{boundary}\r\n".encode())
|
|
parts.append(b'Content-Disposition: form-data; name="title"\r\n\r\n')
|
|
parts.append(title.encode())
|
|
parts.append(b"\r\n")
|
|
|
|
parts.append(f"--{boundary}--\r\n".encode())
|
|
body = b"".join(parts)
|
|
|
|
req = urllib.request.Request(
|
|
f"{BRAIN_URL}/api/items/upload",
|
|
data=body,
|
|
headers={
|
|
"X-Gateway-User-Id": BRAIN_USER,
|
|
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
|
},
|
|
method="POST",
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=60)
|
|
return json.loads(resp.read())
|
|
|
|
|
|
def brain_get_item(item_id):
|
|
req = urllib.request.Request(
|
|
f"{BRAIN_URL}/api/items/{item_id}",
|
|
headers={"X-Gateway-User-Id": BRAIN_USER},
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=15)
|
|
return json.loads(resp.read())
|
|
|
|
|
|
def fetch_all_bookmarks():
|
|
all_bk = []
|
|
cursor = None
|
|
while True:
|
|
url = "/api/v1/bookmarks?limit=100"
|
|
if cursor:
|
|
url += f"&cursor={cursor}"
|
|
data = karakeep_get(url)
|
|
bks = data.get("bookmarks", [])
|
|
all_bk.extend(bks)
|
|
cursor = data.get("nextCursor")
|
|
if not cursor or not bks:
|
|
break
|
|
return all_bk
|
|
|
|
|
|
def wait_for_processing(item_id, timeout=120):
|
|
"""Poll until item is done processing."""
|
|
start = time.time()
|
|
while time.time() - start < timeout:
|
|
item = brain_get_item(item_id)
|
|
status = item.get("processing_status", "pending")
|
|
if status in ("ready", "error"):
|
|
return item
|
|
time.sleep(3)
|
|
return brain_get_item(item_id)
|
|
|
|
|
|
def main():
|
|
print("Fetching all Karakeep bookmarks...")
|
|
bookmarks = fetch_all_bookmarks()
|
|
print(f"Found {len(bookmarks)} bookmarks\n")
|
|
|
|
# Sort: notes first, then links, then assets (PDFs take longer)
|
|
def sort_key(b):
|
|
t = b.get("content", {}).get("type", "")
|
|
return {"text": 0, "link": 1, "asset": 2}.get(t, 3)
|
|
bookmarks.sort(key=sort_key)
|
|
|
|
results = {"success": 0, "error": 0, "skipped": 0}
|
|
comparison = []
|
|
|
|
for i, bk in enumerate(bookmarks):
|
|
content = bk.get("content", {})
|
|
bk_type = content.get("type", "unknown")
|
|
bk_title = bk.get("title") or "Untitled"
|
|
bk_tags = [t["name"] for t in bk.get("tags", [])]
|
|
bk_list = bk.get("list", {})
|
|
bk_folder = bk_list.get("name") if bk_list else None
|
|
|
|
print(f"[{i+1}/{len(bookmarks)}] {bk_type}: {bk_title[:60]}")
|
|
|
|
try:
|
|
if bk_type == "link":
|
|
url = content.get("url", "")
|
|
if not url:
|
|
print(" SKIP: no URL")
|
|
results["skipped"] += 1
|
|
continue
|
|
resp = brain_post_json("/items", {
|
|
"type": "link",
|
|
"url": url,
|
|
"title": bk_title if bk_title != "Untitled" else None,
|
|
})
|
|
|
|
elif bk_type == "text":
|
|
text = content.get("text", "")
|
|
if not text:
|
|
print(" SKIP: no text")
|
|
results["skipped"] += 1
|
|
continue
|
|
resp = brain_post_json("/items", {
|
|
"type": "note",
|
|
"raw_content": text,
|
|
"title": bk_title if bk_title != "Untitled" else None,
|
|
})
|
|
|
|
elif bk_type == "asset":
|
|
asset_id = content.get("assetId")
|
|
asset_type = content.get("assetType", "unknown")
|
|
if not asset_id:
|
|
print(" SKIP: no assetId")
|
|
results["skipped"] += 1
|
|
continue
|
|
|
|
print(f" Downloading {asset_type} ({asset_id[:8]})...")
|
|
file_data, ct = karakeep_download(asset_id)
|
|
ext = {"pdf": ".pdf", "image": ".png"}.get(asset_type, ".bin")
|
|
filename = f"{bk_title[:50]}{ext}" if bk_title != "Untitled" else f"upload{ext}"
|
|
# Clean filename
|
|
filename = filename.replace("/", "-").replace("\\", "-")
|
|
if asset_type == "pdf":
|
|
ct = "application/pdf"
|
|
resp = brain_upload(file_data, filename, ct, title=bk_title if bk_title != "Untitled" else None)
|
|
else:
|
|
print(f" SKIP: unknown type '{bk_type}'")
|
|
results["skipped"] += 1
|
|
continue
|
|
|
|
item_id = resp.get("id")
|
|
print(f" Created: {item_id} — waiting for AI classification...")
|
|
|
|
# Wait for processing
|
|
final = wait_for_processing(item_id, timeout=90)
|
|
status = final.get("processing_status", "?")
|
|
ai_folder = final.get("folder", "?")
|
|
ai_tags = final.get("tags", [])
|
|
ai_title = final.get("title", "?")
|
|
|
|
# Compare
|
|
entry = {
|
|
"karakeep_title": bk_title,
|
|
"karakeep_tags": bk_tags,
|
|
"karakeep_folder": bk_folder,
|
|
"ai_title": ai_title,
|
|
"ai_folder": ai_folder,
|
|
"ai_tags": ai_tags,
|
|
"status": status,
|
|
}
|
|
comparison.append(entry)
|
|
|
|
tag_match = "OK" if set(bk_tags) & set(ai_tags) or (not bk_tags and not ai_tags) else "DIFF"
|
|
|
|
print(f" Status: {status}")
|
|
print(f" AI Folder: {ai_folder} (Karakeep: {bk_folder or 'none'})")
|
|
print(f" AI Tags: {ai_tags} vs Karakeep: {bk_tags} [{tag_match}]")
|
|
print(f" AI Title: {ai_title}")
|
|
|
|
results["success"] += 1
|
|
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
results["error"] += 1
|
|
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 60)
|
|
print(f"MIGRATION COMPLETE")
|
|
print(f" Success: {results['success']}")
|
|
print(f" Errors: {results['error']}")
|
|
print(f" Skipped: {results['skipped']}")
|
|
print()
|
|
|
|
# Tag comparison summary
|
|
matches = 0
|
|
diffs = 0
|
|
for c in comparison:
|
|
kk = set(c["karakeep_tags"])
|
|
ai = set(c["ai_tags"])
|
|
if kk & ai or (not kk and not ai):
|
|
matches += 1
|
|
else:
|
|
diffs += 1
|
|
print(f"Tag overlap: {matches}/{len(comparison)} items had at least one matching tag")
|
|
print(f"Tag differences: {diffs}/{len(comparison)} items had zero overlap")
|
|
|
|
# Save comparison
|
|
with open("/tmp/migration_comparison.json", "w") as f:
|
|
json.dump(comparison, f, indent=2)
|
|
print("\nFull comparison saved to /tmp/migration_comparison.json")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|