Add migration script to convert markdown content to TipTap JSON
- Script migrates documents where content (markdown) exists but tiptap_content is NULL - Idempotent: only processes documents needing migration - Reports count of processed documents and any errors - Includes post-migration validation
This commit is contained in:
167
migrations/migrate_existing_content.py
Normal file
167
migrations/migrate_existing_content.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
Migration: Convert existing markdown content to TipTap JSON
|
||||
|
||||
This migration converts documents that have `content` (markdown) populated
|
||||
but `tiptap_content` is NULL to the new TipTap JSON format.
|
||||
|
||||
Idempotent: Only processes documents where tiptap_content is NULL and content is not NULL.
|
||||
Can be run multiple times safely.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add backend to path for imports when run standalone
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from sqlalchemy import text
|
||||
from app.database import async_engine
|
||||
|
||||
|
||||
def markdown_to_tiptap(markdown_content: str) -> dict:
|
||||
"""
|
||||
Convert markdown content to TipTap JSON structure.
|
||||
|
||||
Basic conversion: wraps each line in a paragraph.
|
||||
Empty lines result in empty paragraphs.
|
||||
"""
|
||||
lines = markdown_content.split('\n')
|
||||
content = []
|
||||
for line in lines:
|
||||
content.append({
|
||||
"type": "paragraph",
|
||||
"content": [{"type": "text", "text": line}] if line else []
|
||||
})
|
||||
return {"type": "doc", "content": content}
|
||||
|
||||
|
||||
async def get_documents_needing_migration():
|
||||
"""Get documents that have content but no tiptap_content."""
|
||||
async with async_engine.begin() as conn:
|
||||
result = await conn.execute(
|
||||
text("""
|
||||
SELECT id, title, content
|
||||
FROM documents
|
||||
WHERE content IS NOT NULL
|
||||
AND content != ''
|
||||
AND tiptap_content IS NULL
|
||||
AND is_deleted = 0
|
||||
""")
|
||||
)
|
||||
return result.fetchall()
|
||||
|
||||
|
||||
async def update_document(doc_id: str, tiptap_content: str):
|
||||
"""Update a document's tiptap_content field."""
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("""
|
||||
UPDATE documents
|
||||
SET tiptap_content = :tiptap_content,
|
||||
updated_at = datetime('now')
|
||||
WHERE id = :id
|
||||
"""),
|
||||
{"id": doc_id, "tiptap_content": tiptap_content}
|
||||
)
|
||||
|
||||
|
||||
async def migrate():
|
||||
"""Run the migration."""
|
||||
print("=" * 60)
|
||||
print("Migration: Convert markdown to TipTap JSON")
|
||||
print("=" * 60)
|
||||
|
||||
# Get documents needing migration
|
||||
documents = await get_documents_needing_migration()
|
||||
total = len(documents)
|
||||
|
||||
print(f"\nFound {total} document(s) needing migration.")
|
||||
|
||||
if total == 0:
|
||||
print("Nothing to do. Exiting.")
|
||||
return
|
||||
|
||||
print(f"\nProcessing {total} document(s)...\n")
|
||||
|
||||
processed = 0
|
||||
errors = 0
|
||||
|
||||
for doc_id, title, content in documents:
|
||||
try:
|
||||
# Convert markdown to TipTap JSON
|
||||
tiptap_json = markdown_to_tiptap(content)
|
||||
tiptap_str = json.dumps(tiptap_json, ensure_ascii=False)
|
||||
|
||||
# Update database
|
||||
await update_document(doc_id, tiptap_str)
|
||||
|
||||
processed += 1
|
||||
# Truncate title for display
|
||||
display_title = title[:50] + "..." if len(title) > 50 else title
|
||||
print(f" [OK] {display_title} (id: {doc_id[:8]}...)")
|
||||
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
print(f" [ERR] Document {doc_id[:8]}... failed: {e}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Migration complete:")
|
||||
print(f" - Processed: {processed}")
|
||||
print(f" - Errors: {errors}")
|
||||
print("=" * 60)
|
||||
|
||||
if errors > 0:
|
||||
print(f"\nWARNING: {errors} document(s) failed. Check logs above.")
|
||||
raise RuntimeError(f"Migration failed with {errors} error(s)")
|
||||
|
||||
|
||||
async def validate():
|
||||
"""Validate migration results."""
|
||||
print("\n--- Post-migration validation ---")
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
# Count documents with content but no tiptap_content
|
||||
result = await conn.execute(
|
||||
text("""
|
||||
SELECT COUNT(*)
|
||||
FROM documents
|
||||
WHERE content IS NOT NULL
|
||||
AND content != ''
|
||||
AND tiptap_content IS NULL
|
||||
AND is_deleted = 0
|
||||
""")
|
||||
)
|
||||
remaining = result.scalar()
|
||||
|
||||
if remaining == 0:
|
||||
print(" [OK] All documents with content now have tiptap_content")
|
||||
else:
|
||||
print(f" [WARN] {remaining} document(s) still need migration")
|
||||
|
||||
# Verify tiptap_content is valid JSON
|
||||
result = await conn.execute(
|
||||
text("""
|
||||
SELECT id, tiptap_content
|
||||
FROM documents
|
||||
WHERE tiptap_content IS NOT NULL
|
||||
LIMIT 5
|
||||
""")
|
||||
)
|
||||
samples = result.fetchall()
|
||||
|
||||
valid_count = 0
|
||||
for doc_id, tc in samples:
|
||||
try:
|
||||
parsed = json.loads(tc)
|
||||
if parsed.get("type") == "doc" and "content" in parsed:
|
||||
valid_count += 1
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
print(f" [OK] {valid_count}/{len(samples)} sampled tiptap_content records are valid TipTap JSON")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(migrate())
|
||||
asyncio.run(validate())
|
||||
Reference in New Issue
Block a user