"""Migrate all bookmarks from Karakeep into Brain service via API.""" import json import os import sys import time import urllib.request import urllib.error import tempfile KARAKEEP_URL = os.environ.get("KARAKEEP_URL", "http://192.168.1.42:3005") KARAKEEP_API_KEY = os.environ.get("KARAKEEP_API_KEY", "ak2_f4141e5fe7265e23bd6f_4549c932c262010eafd08acb2139f1ac") BRAIN_URL = "http://localhost:8200" BRAIN_USER = "admin" def karakeep_get(path): req = urllib.request.Request( f"{KARAKEEP_URL}{path}", headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"}, ) return json.loads(urllib.request.urlopen(req, timeout=30).read()) def karakeep_download(asset_id): req = urllib.request.Request( f"{KARAKEEP_URL}/api/v1/assets/{asset_id}", headers={"Authorization": f"Bearer {KARAKEEP_API_KEY}"}, ) resp = urllib.request.urlopen(req, timeout=120) return resp.read(), resp.headers.get("Content-Type", "application/octet-stream") def brain_post_json(path, data): body = json.dumps(data).encode() req = urllib.request.Request( f"{BRAIN_URL}/api{path}", data=body, headers={"X-Gateway-User-Id": BRAIN_USER, "Content-Type": "application/json"}, method="POST", ) resp = urllib.request.urlopen(req, timeout=30) return json.loads(resp.read()) def brain_upload(file_data, filename, content_type, title=None): """Multipart upload to /api/items/upload.""" boundary = "----MigrationBoundary12345" parts = [] # File part parts.append(f"--{boundary}\r\n".encode()) parts.append(f'Content-Disposition: form-data; name="file"; filename="{filename}"\r\n'.encode()) parts.append(f"Content-Type: {content_type}\r\n\r\n".encode()) parts.append(file_data) parts.append(b"\r\n") # Title part if title: parts.append(f"--{boundary}\r\n".encode()) parts.append(b'Content-Disposition: form-data; name="title"\r\n\r\n') parts.append(title.encode()) parts.append(b"\r\n") parts.append(f"--{boundary}--\r\n".encode()) body = b"".join(parts) req = urllib.request.Request( f"{BRAIN_URL}/api/items/upload", data=body, headers={ "X-Gateway-User-Id": BRAIN_USER, "Content-Type": f"multipart/form-data; boundary={boundary}", }, method="POST", ) resp = urllib.request.urlopen(req, timeout=60) return json.loads(resp.read()) def brain_get_item(item_id): req = urllib.request.Request( f"{BRAIN_URL}/api/items/{item_id}", headers={"X-Gateway-User-Id": BRAIN_USER}, ) resp = urllib.request.urlopen(req, timeout=15) return json.loads(resp.read()) def fetch_all_bookmarks(): all_bk = [] cursor = None while True: url = "/api/v1/bookmarks?limit=100" if cursor: url += f"&cursor={cursor}" data = karakeep_get(url) bks = data.get("bookmarks", []) all_bk.extend(bks) cursor = data.get("nextCursor") if not cursor or not bks: break return all_bk def wait_for_processing(item_id, timeout=120): """Poll until item is done processing.""" start = time.time() while time.time() - start < timeout: item = brain_get_item(item_id) status = item.get("processing_status", "pending") if status in ("ready", "error"): return item time.sleep(3) return brain_get_item(item_id) def main(): print("Fetching all Karakeep bookmarks...") bookmarks = fetch_all_bookmarks() print(f"Found {len(bookmarks)} bookmarks\n") # Sort: notes first, then links, then assets (PDFs take longer) def sort_key(b): t = b.get("content", {}).get("type", "") return {"text": 0, "link": 1, "asset": 2}.get(t, 3) bookmarks.sort(key=sort_key) results = {"success": 0, "error": 0, "skipped": 0} comparison = [] for i, bk in enumerate(bookmarks): content = bk.get("content", {}) bk_type = content.get("type", "unknown") bk_title = bk.get("title") or "Untitled" bk_tags = [t["name"] for t in bk.get("tags", [])] bk_list = bk.get("list", {}) bk_folder = bk_list.get("name") if bk_list else None print(f"[{i+1}/{len(bookmarks)}] {bk_type}: {bk_title[:60]}") try: if bk_type == "link": url = content.get("url", "") if not url: print(" SKIP: no URL") results["skipped"] += 1 continue resp = brain_post_json("/items", { "type": "link", "url": url, "title": bk_title if bk_title != "Untitled" else None, }) elif bk_type == "text": text = content.get("text", "") if not text: print(" SKIP: no text") results["skipped"] += 1 continue resp = brain_post_json("/items", { "type": "note", "raw_content": text, "title": bk_title if bk_title != "Untitled" else None, }) elif bk_type == "asset": asset_id = content.get("assetId") asset_type = content.get("assetType", "unknown") if not asset_id: print(" SKIP: no assetId") results["skipped"] += 1 continue print(f" Downloading {asset_type} ({asset_id[:8]})...") file_data, ct = karakeep_download(asset_id) ext = {"pdf": ".pdf", "image": ".png"}.get(asset_type, ".bin") filename = f"{bk_title[:50]}{ext}" if bk_title != "Untitled" else f"upload{ext}" # Clean filename filename = filename.replace("/", "-").replace("\\", "-") if asset_type == "pdf": ct = "application/pdf" resp = brain_upload(file_data, filename, ct, title=bk_title if bk_title != "Untitled" else None) else: print(f" SKIP: unknown type '{bk_type}'") results["skipped"] += 1 continue item_id = resp.get("id") print(f" Created: {item_id} — waiting for AI classification...") # Wait for processing final = wait_for_processing(item_id, timeout=90) status = final.get("processing_status", "?") ai_folder = final.get("folder", "?") ai_tags = final.get("tags", []) ai_title = final.get("title", "?") # Compare entry = { "karakeep_title": bk_title, "karakeep_tags": bk_tags, "karakeep_folder": bk_folder, "ai_title": ai_title, "ai_folder": ai_folder, "ai_tags": ai_tags, "status": status, } comparison.append(entry) tag_match = "OK" if set(bk_tags) & set(ai_tags) or (not bk_tags and not ai_tags) else "DIFF" print(f" Status: {status}") print(f" AI Folder: {ai_folder} (Karakeep: {bk_folder or 'none'})") print(f" AI Tags: {ai_tags} vs Karakeep: {bk_tags} [{tag_match}]") print(f" AI Title: {ai_title}") results["success"] += 1 except Exception as e: print(f" ERROR: {e}") results["error"] += 1 print() # Summary print("=" * 60) print(f"MIGRATION COMPLETE") print(f" Success: {results['success']}") print(f" Errors: {results['error']}") print(f" Skipped: {results['skipped']}") print() # Tag comparison summary matches = 0 diffs = 0 for c in comparison: kk = set(c["karakeep_tags"]) ai = set(c["ai_tags"]) if kk & ai or (not kk and not ai): matches += 1 else: diffs += 1 print(f"Tag overlap: {matches}/{len(comparison)} items had at least one matching tag") print(f"Tag differences: {diffs}/{len(comparison)} items had zero overlap") # Save comparison with open("/tmp/migration_comparison.json", "w") as f: json.dump(comparison, f, indent=2) print("\nFull comparison saved to /tmp/migration_comparison.json") if __name__ == "__main__": main()