- Script migrates documents where content (markdown) exists but tiptap_content is NULL - Idempotent: only processes documents needing migration - Reports count of processed documents and any errors - Includes post-migration validation
168 lines
5.1 KiB
Python
168 lines
5.1 KiB
Python
"""
|
|
Migration: Convert existing markdown content to TipTap JSON
|
|
|
|
This migration converts documents that have `content` (markdown) populated
|
|
but `tiptap_content` is NULL to the new TipTap JSON format.
|
|
|
|
Idempotent: Only processes documents where tiptap_content is NULL and content is not NULL.
|
|
Can be run multiple times safely.
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add backend to path for imports when run standalone
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from sqlalchemy import text
|
|
from app.database import async_engine
|
|
|
|
|
|
def markdown_to_tiptap(markdown_content: str) -> dict:
|
|
"""
|
|
Convert markdown content to TipTap JSON structure.
|
|
|
|
Basic conversion: wraps each line in a paragraph.
|
|
Empty lines result in empty paragraphs.
|
|
"""
|
|
lines = markdown_content.split('\n')
|
|
content = []
|
|
for line in lines:
|
|
content.append({
|
|
"type": "paragraph",
|
|
"content": [{"type": "text", "text": line}] if line else []
|
|
})
|
|
return {"type": "doc", "content": content}
|
|
|
|
|
|
async def get_documents_needing_migration():
|
|
"""Get documents that have content but no tiptap_content."""
|
|
async with async_engine.begin() as conn:
|
|
result = await conn.execute(
|
|
text("""
|
|
SELECT id, title, content
|
|
FROM documents
|
|
WHERE content IS NOT NULL
|
|
AND content != ''
|
|
AND tiptap_content IS NULL
|
|
AND is_deleted = 0
|
|
""")
|
|
)
|
|
return result.fetchall()
|
|
|
|
|
|
async def update_document(doc_id: str, tiptap_content: str):
|
|
"""Update a document's tiptap_content field."""
|
|
async with async_engine.begin() as conn:
|
|
await conn.execute(
|
|
text("""
|
|
UPDATE documents
|
|
SET tiptap_content = :tiptap_content,
|
|
updated_at = datetime('now')
|
|
WHERE id = :id
|
|
"""),
|
|
{"id": doc_id, "tiptap_content": tiptap_content}
|
|
)
|
|
|
|
|
|
async def migrate():
|
|
"""Run the migration."""
|
|
print("=" * 60)
|
|
print("Migration: Convert markdown to TipTap JSON")
|
|
print("=" * 60)
|
|
|
|
# Get documents needing migration
|
|
documents = await get_documents_needing_migration()
|
|
total = len(documents)
|
|
|
|
print(f"\nFound {total} document(s) needing migration.")
|
|
|
|
if total == 0:
|
|
print("Nothing to do. Exiting.")
|
|
return
|
|
|
|
print(f"\nProcessing {total} document(s)...\n")
|
|
|
|
processed = 0
|
|
errors = 0
|
|
|
|
for doc_id, title, content in documents:
|
|
try:
|
|
# Convert markdown to TipTap JSON
|
|
tiptap_json = markdown_to_tiptap(content)
|
|
tiptap_str = json.dumps(tiptap_json, ensure_ascii=False)
|
|
|
|
# Update database
|
|
await update_document(doc_id, tiptap_str)
|
|
|
|
processed += 1
|
|
# Truncate title for display
|
|
display_title = title[:50] + "..." if len(title) > 50 else title
|
|
print(f" [OK] {display_title} (id: {doc_id[:8]}...)")
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
print(f" [ERR] Document {doc_id[:8]}... failed: {e}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"Migration complete:")
|
|
print(f" - Processed: {processed}")
|
|
print(f" - Errors: {errors}")
|
|
print("=" * 60)
|
|
|
|
if errors > 0:
|
|
print(f"\nWARNING: {errors} document(s) failed. Check logs above.")
|
|
raise RuntimeError(f"Migration failed with {errors} error(s)")
|
|
|
|
|
|
async def validate():
|
|
"""Validate migration results."""
|
|
print("\n--- Post-migration validation ---")
|
|
|
|
async with async_engine.begin() as conn:
|
|
# Count documents with content but no tiptap_content
|
|
result = await conn.execute(
|
|
text("""
|
|
SELECT COUNT(*)
|
|
FROM documents
|
|
WHERE content IS NOT NULL
|
|
AND content != ''
|
|
AND tiptap_content IS NULL
|
|
AND is_deleted = 0
|
|
""")
|
|
)
|
|
remaining = result.scalar()
|
|
|
|
if remaining == 0:
|
|
print(" [OK] All documents with content now have tiptap_content")
|
|
else:
|
|
print(f" [WARN] {remaining} document(s) still need migration")
|
|
|
|
# Verify tiptap_content is valid JSON
|
|
result = await conn.execute(
|
|
text("""
|
|
SELECT id, tiptap_content
|
|
FROM documents
|
|
WHERE tiptap_content IS NOT NULL
|
|
LIMIT 5
|
|
""")
|
|
)
|
|
samples = result.fetchall()
|
|
|
|
valid_count = 0
|
|
for doc_id, tc in samples:
|
|
try:
|
|
parsed = json.loads(tc)
|
|
if parsed.get("type") == "doc" and "content" in parsed:
|
|
valid_count += 1
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
print(f" [OK] {valid_count}/{len(samples)} sampled tiptap_content records are valid TipTap JSON")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(migrate())
|
|
asyncio.run(validate())
|