From 202e70b4a8c858c8f0abc0698fcba87d2d5f06f0 Mon Sep 17 00:00:00 2001 From: Motoko Date: Mon, 30 Mar 2026 23:28:01 +0000 Subject: [PATCH] Add migration script to convert markdown content to TipTap JSON - Script migrates documents where content (markdown) exists but tiptap_content is NULL - Idempotent: only processes documents needing migration - Reports count of processed documents and any errors - Includes post-migration validation --- migrations/migrate_existing_content.py | 167 +++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 migrations/migrate_existing_content.py diff --git a/migrations/migrate_existing_content.py b/migrations/migrate_existing_content.py new file mode 100644 index 0000000..25ebfed --- /dev/null +++ b/migrations/migrate_existing_content.py @@ -0,0 +1,167 @@ +""" +Migration: Convert existing markdown content to TipTap JSON + +This migration converts documents that have `content` (markdown) populated +but `tiptap_content` is NULL to the new TipTap JSON format. + +Idempotent: Only processes documents where tiptap_content is NULL and content is not NULL. +Can be run multiple times safely. +""" +import asyncio +import json +import sys +from pathlib import Path + +# Add backend to path for imports when run standalone +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import text +from app.database import async_engine + + +def markdown_to_tiptap(markdown_content: str) -> dict: + """ + Convert markdown content to TipTap JSON structure. + + Basic conversion: wraps each line in a paragraph. + Empty lines result in empty paragraphs. + """ + lines = markdown_content.split('\n') + content = [] + for line in lines: + content.append({ + "type": "paragraph", + "content": [{"type": "text", "text": line}] if line else [] + }) + return {"type": "doc", "content": content} + + +async def get_documents_needing_migration(): + """Get documents that have content but no tiptap_content.""" + async with async_engine.begin() as conn: + result = await conn.execute( + text(""" + SELECT id, title, content + FROM documents + WHERE content IS NOT NULL + AND content != '' + AND tiptap_content IS NULL + AND is_deleted = 0 + """) + ) + return result.fetchall() + + +async def update_document(doc_id: str, tiptap_content: str): + """Update a document's tiptap_content field.""" + async with async_engine.begin() as conn: + await conn.execute( + text(""" + UPDATE documents + SET tiptap_content = :tiptap_content, + updated_at = datetime('now') + WHERE id = :id + """), + {"id": doc_id, "tiptap_content": tiptap_content} + ) + + +async def migrate(): + """Run the migration.""" + print("=" * 60) + print("Migration: Convert markdown to TipTap JSON") + print("=" * 60) + + # Get documents needing migration + documents = await get_documents_needing_migration() + total = len(documents) + + print(f"\nFound {total} document(s) needing migration.") + + if total == 0: + print("Nothing to do. Exiting.") + return + + print(f"\nProcessing {total} document(s)...\n") + + processed = 0 + errors = 0 + + for doc_id, title, content in documents: + try: + # Convert markdown to TipTap JSON + tiptap_json = markdown_to_tiptap(content) + tiptap_str = json.dumps(tiptap_json, ensure_ascii=False) + + # Update database + await update_document(doc_id, tiptap_str) + + processed += 1 + # Truncate title for display + display_title = title[:50] + "..." if len(title) > 50 else title + print(f" [OK] {display_title} (id: {doc_id[:8]}...)") + + except Exception as e: + errors += 1 + print(f" [ERR] Document {doc_id[:8]}... failed: {e}") + + print("\n" + "=" * 60) + print(f"Migration complete:") + print(f" - Processed: {processed}") + print(f" - Errors: {errors}") + print("=" * 60) + + if errors > 0: + print(f"\nWARNING: {errors} document(s) failed. Check logs above.") + raise RuntimeError(f"Migration failed with {errors} error(s)") + + +async def validate(): + """Validate migration results.""" + print("\n--- Post-migration validation ---") + + async with async_engine.begin() as conn: + # Count documents with content but no tiptap_content + result = await conn.execute( + text(""" + SELECT COUNT(*) + FROM documents + WHERE content IS NOT NULL + AND content != '' + AND tiptap_content IS NULL + AND is_deleted = 0 + """) + ) + remaining = result.scalar() + + if remaining == 0: + print(" [OK] All documents with content now have tiptap_content") + else: + print(f" [WARN] {remaining} document(s) still need migration") + + # Verify tiptap_content is valid JSON + result = await conn.execute( + text(""" + SELECT id, tiptap_content + FROM documents + WHERE tiptap_content IS NOT NULL + LIMIT 5 + """) + ) + samples = result.fetchall() + + valid_count = 0 + for doc_id, tc in samples: + try: + parsed = json.loads(tc) + if parsed.get("type") == "doc" and "content" in parsed: + valid_count += 1 + except json.JSONDecodeError: + pass + + print(f" [OK] {valid_count}/{len(samples)} sampled tiptap_content records are valid TipTap JSON") + + +if __name__ == "__main__": + asyncio.run(migrate()) + asyncio.run(validate())