Files
claudia-docs-api/migrations/migrate_existing_content.py
Motoko 202e70b4a8 Add migration script to convert markdown content to TipTap JSON
- Script migrates documents where content (markdown) exists but tiptap_content is NULL
- Idempotent: only processes documents needing migration
- Reports count of processed documents and any errors
- Includes post-migration validation
2026-03-30 23:28:01 +00:00

168 lines
5.1 KiB
Python

"""
Migration: Convert existing markdown content to TipTap JSON
This migration converts documents that have `content` (markdown) populated
but `tiptap_content` is NULL to the new TipTap JSON format.
Idempotent: Only processes documents where tiptap_content is NULL and content is not NULL.
Can be run multiple times safely.
"""
import asyncio
import json
import sys
from pathlib import Path
# Add backend to path for imports when run standalone
sys.path.insert(0, str(Path(__file__).parent.parent))
from sqlalchemy import text
from app.database import async_engine
def markdown_to_tiptap(markdown_content: str) -> dict:
"""
Convert markdown content to TipTap JSON structure.
Basic conversion: wraps each line in a paragraph.
Empty lines result in empty paragraphs.
"""
lines = markdown_content.split('\n')
content = []
for line in lines:
content.append({
"type": "paragraph",
"content": [{"type": "text", "text": line}] if line else []
})
return {"type": "doc", "content": content}
async def get_documents_needing_migration():
"""Get documents that have content but no tiptap_content."""
async with async_engine.begin() as conn:
result = await conn.execute(
text("""
SELECT id, title, content
FROM documents
WHERE content IS NOT NULL
AND content != ''
AND tiptap_content IS NULL
AND is_deleted = 0
""")
)
return result.fetchall()
async def update_document(doc_id: str, tiptap_content: str):
"""Update a document's tiptap_content field."""
async with async_engine.begin() as conn:
await conn.execute(
text("""
UPDATE documents
SET tiptap_content = :tiptap_content,
updated_at = datetime('now')
WHERE id = :id
"""),
{"id": doc_id, "tiptap_content": tiptap_content}
)
async def migrate():
"""Run the migration."""
print("=" * 60)
print("Migration: Convert markdown to TipTap JSON")
print("=" * 60)
# Get documents needing migration
documents = await get_documents_needing_migration()
total = len(documents)
print(f"\nFound {total} document(s) needing migration.")
if total == 0:
print("Nothing to do. Exiting.")
return
print(f"\nProcessing {total} document(s)...\n")
processed = 0
errors = 0
for doc_id, title, content in documents:
try:
# Convert markdown to TipTap JSON
tiptap_json = markdown_to_tiptap(content)
tiptap_str = json.dumps(tiptap_json, ensure_ascii=False)
# Update database
await update_document(doc_id, tiptap_str)
processed += 1
# Truncate title for display
display_title = title[:50] + "..." if len(title) > 50 else title
print(f" [OK] {display_title} (id: {doc_id[:8]}...)")
except Exception as e:
errors += 1
print(f" [ERR] Document {doc_id[:8]}... failed: {e}")
print("\n" + "=" * 60)
print(f"Migration complete:")
print(f" - Processed: {processed}")
print(f" - Errors: {errors}")
print("=" * 60)
if errors > 0:
print(f"\nWARNING: {errors} document(s) failed. Check logs above.")
raise RuntimeError(f"Migration failed with {errors} error(s)")
async def validate():
"""Validate migration results."""
print("\n--- Post-migration validation ---")
async with async_engine.begin() as conn:
# Count documents with content but no tiptap_content
result = await conn.execute(
text("""
SELECT COUNT(*)
FROM documents
WHERE content IS NOT NULL
AND content != ''
AND tiptap_content IS NULL
AND is_deleted = 0
""")
)
remaining = result.scalar()
if remaining == 0:
print(" [OK] All documents with content now have tiptap_content")
else:
print(f" [WARN] {remaining} document(s) still need migration")
# Verify tiptap_content is valid JSON
result = await conn.execute(
text("""
SELECT id, tiptap_content
FROM documents
WHERE tiptap_content IS NOT NULL
LIMIT 5
""")
)
samples = result.fetchall()
valid_count = 0
for doc_id, tc in samples:
try:
parsed = json.loads(tc)
if parsed.get("type") == "doc" and "content" in parsed:
valid_count += 1
except json.JSONDecodeError:
pass
print(f" [OK] {valid_count}/{len(samples)} sampled tiptap_content records are valid TipTap JSON")
if __name__ == "__main__":
asyncio.run(migrate())
asyncio.run(validate())