""" Migration: Convert existing markdown content to TipTap JSON This migration converts documents that have `content` (markdown) populated but `tiptap_content` is NULL to the new TipTap JSON format. Idempotent: Only processes documents where tiptap_content is NULL and content is not NULL. Can be run multiple times safely. """ import asyncio import json import sys from pathlib import Path # Add backend to path for imports when run standalone sys.path.insert(0, str(Path(__file__).parent.parent)) from sqlalchemy import text from app.database import async_engine def markdown_to_tiptap(markdown_content: str) -> dict: """ Convert markdown content to TipTap JSON structure. Basic conversion: wraps each line in a paragraph. Empty lines result in empty paragraphs. """ lines = markdown_content.split('\n') content = [] for line in lines: content.append({ "type": "paragraph", "content": [{"type": "text", "text": line}] if line else [] }) return {"type": "doc", "content": content} async def get_documents_needing_migration(): """Get documents that have content but no tiptap_content.""" async with async_engine.begin() as conn: result = await conn.execute( text(""" SELECT id, title, content FROM documents WHERE content IS NOT NULL AND content != '' AND tiptap_content IS NULL AND is_deleted = 0 """) ) return result.fetchall() async def update_document(doc_id: str, tiptap_content: str): """Update a document's tiptap_content field.""" async with async_engine.begin() as conn: await conn.execute( text(""" UPDATE documents SET tiptap_content = :tiptap_content, updated_at = datetime('now') WHERE id = :id """), {"id": doc_id, "tiptap_content": tiptap_content} ) async def migrate(): """Run the migration.""" print("=" * 60) print("Migration: Convert markdown to TipTap JSON") print("=" * 60) # Get documents needing migration documents = await get_documents_needing_migration() total = len(documents) print(f"\nFound {total} document(s) needing migration.") if total == 0: print("Nothing to do. Exiting.") return print(f"\nProcessing {total} document(s)...\n") processed = 0 errors = 0 for doc_id, title, content in documents: try: # Convert markdown to TipTap JSON tiptap_json = markdown_to_tiptap(content) tiptap_str = json.dumps(tiptap_json, ensure_ascii=False) # Update database await update_document(doc_id, tiptap_str) processed += 1 # Truncate title for display display_title = title[:50] + "..." if len(title) > 50 else title print(f" [OK] {display_title} (id: {doc_id[:8]}...)") except Exception as e: errors += 1 print(f" [ERR] Document {doc_id[:8]}... failed: {e}") print("\n" + "=" * 60) print(f"Migration complete:") print(f" - Processed: {processed}") print(f" - Errors: {errors}") print("=" * 60) if errors > 0: print(f"\nWARNING: {errors} document(s) failed. Check logs above.") raise RuntimeError(f"Migration failed with {errors} error(s)") async def validate(): """Validate migration results.""" print("\n--- Post-migration validation ---") async with async_engine.begin() as conn: # Count documents with content but no tiptap_content result = await conn.execute( text(""" SELECT COUNT(*) FROM documents WHERE content IS NOT NULL AND content != '' AND tiptap_content IS NULL AND is_deleted = 0 """) ) remaining = result.scalar() if remaining == 0: print(" [OK] All documents with content now have tiptap_content") else: print(f" [WARN] {remaining} document(s) still need migration") # Verify tiptap_content is valid JSON result = await conn.execute( text(""" SELECT id, tiptap_content FROM documents WHERE tiptap_content IS NOT NULL LIMIT 5 """) ) samples = result.fetchall() valid_count = 0 for doc_id, tc in samples: try: parsed = json.loads(tc) if parsed.get("type") == "doc" and "content" in parsed: valid_count += 1 except json.JSONDecodeError: pass print(f" [OK] {valid_count}/{len(samples)} sampled tiptap_content records are valid TipTap JSON") if __name__ == "__main__": asyncio.run(migrate()) asyncio.run(validate())