Files
claudia-docs-api/app/routers/documents.py
Motoko bbbe42358d Phase 2: Add reasoning and TipTap content endpoints
- Extend Document model with reasoning_type, confidence, reasoning_steps, model_source, tiptap_content fields
- Add new endpoints:
  - GET /documents/{id}/reasoning - Get reasoning metadata
  - PATCH /documents/{id}/reasoning - Update reasoning metadata
  - GET /documents/{id}/reasoning-panel - Get reasoning panel data for UI
  - POST /documents/{id}/reasoning-steps - Add reasoning step
  - DELETE /documents/{id}/reasoning-steps/{step} - Delete reasoning step
  - GET /documents/{id}/content?format=tiptap|markdown - Get content in TipTap or Markdown
  - PUT /documents/{id}/content - Update content (supports both TipTap JSON and Markdown)
- Add TipTap to Markdown conversion
- Update database schema with new columns
- Add comprehensive tests for all new endpoints
- All 37 tests passing
2026-03-30 23:11:44 +00:00

843 lines
26 KiB
Python

import json
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from sqlalchemy import delete, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.models.document import Document, ReasoningType
from app.models.folder import Folder
from app.models.project import Project
from app.models.tag import DocumentTag, Tag
from app.routers.auth import get_current_agent
from app.schemas.document import (
DocumentBriefResponse,
DocumentContentUpdate,
DocumentCreate,
DocumentListResponse,
DocumentResponse,
DocumentUpdate,
ReasoningMetadata,
ReasoningPanel,
ReasoningStep,
ReasoningStepAdd,
ReasoningUpdate,
TagInfo,
TipTapContentResponse,
TipTapContentUpdate,
)
from app.schemas.tag import DocumentTagsAssign
router = APIRouter(tags=["documents"])
def build_doc_path(project_id: str, doc_id: str, folder_id: str | None, folder_path: str | None) -> str:
if folder_id and folder_path:
return f"{folder_path}/{doc_id}"
return f"/{project_id}/{doc_id}"
def tiptap_to_markdown(tiptap: dict) -> str:
"""Convert TipTap JSON to Markdown string."""
if not tiptap or not isinstance(tiptap, dict):
return ""
lines = []
def process_node(node: dict) -> str:
if not isinstance(node, dict):
return ""
node_type = node.get("type", "")
content = node.get("content", [])
if node_type == "doc":
result = []
for child in content:
result.append(process_node(child))
return "\n".join(result)
elif node_type == "paragraph":
inner = "".join(process_node(c) for c in content)
return f"{inner}\n"
elif node_type == "heading":
level = node.get("attrs", {}).get("level", 1)
inner = "".join(process_node(c) for c in content)
return f"{'#' * level} {inner}\n"
elif node_type == "text":
text_val = node.get("text", "")
marks = node.get("marks", [])
for mark in marks:
if mark.get("type") == "bold":
text_val = f"**{text_val}**"
elif mark.get("type") == "italic":
text_val = f"*{text_val}*"
elif mark.get("type") == "code":
text_val = f"`{text_val}`"
elif mark.get("type") == "strike":
text_val = f"~~{text_val}~~"
return text_val
elif node_type == "bulletList":
return "\n".join(process_node(item) for item in content) + "\n"
elif node_type == "orderedList":
return "\n".join(process_node(item) for item in content) + "\n"
elif node_type == "listItem":
inner = "".join(process_node(c) for c in content)
return f"- {inner.strip()}\n"
elif node_type == "blockquote":
inner = "".join(process_node(c) for c in content)
return f"> {inner.strip()}\n"
elif node_type == "codeBlock":
lang = node.get("attrs", {}).get("language", "")
inner = "".join(process_node(c) for c in content)
return f"```{lang}\n{inner}\n```\n"
elif node_type == "hardBreak":
return "\n"
elif node_type == "horizontalRule":
return "---\n"
elif node_type == "image":
src = node.get("attrs", {}).get("src", "")
alt = node.get("attrs", {}).get("alt", "")
return f"![{alt}]({src})"
return ""
result = process_node(tiptap)
return result.strip()
async def get_document_tags(db: AsyncSession, doc_id: str) -> list[TagInfo]:
result = await db.execute(
text("""
SELECT t.id, t.name, t.color
FROM active_tags t
JOIN document_tags dt ON t.id = dt.tag_id
WHERE dt.document_id = :doc_id
"""),
{"doc_id": doc_id}
)
rows = result.fetchall()
return [TagInfo(id=r.id, name=r.name, color=r.color) for r in rows]
async def document_to_response(db: AsyncSession, doc: Document) -> DocumentResponse:
tags = await get_document_tags(db, doc.id)
# Parse reasoning_steps from JSON if present
reasoning_steps = []
if doc.reasoning_steps:
try:
reasoning_steps = json.loads(doc.reasoning_steps)
except json.JSONDecodeError:
reasoning_steps = []
# Parse tiptap_content from JSON if present
tiptap_content = None
if doc.tiptap_content:
try:
tiptap_content = json.loads(doc.tiptap_content)
except json.JSONDecodeError:
tiptap_content = None
# Parse confidence from string if present
confidence = None
if doc.confidence:
try:
confidence = float(doc.confidence)
except (ValueError, TypeError):
confidence = None
return DocumentResponse(
id=doc.id,
title=doc.title,
content=doc.content,
project_id=doc.project_id,
folder_id=doc.folder_id,
path=doc.path,
tags=tags,
created_at=doc.created_at,
updated_at=doc.updated_at,
reasoning_type=doc.reasoning_type,
confidence=confidence,
reasoning_steps=reasoning_steps,
model_source=doc.model_source,
tiptap_content=tiptap_content,
)
@router.get("/api/v1/projects/{project_id}/documents", response_model=DocumentListResponse)
async def list_documents(
request: Request,
project_id: str,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
proj_result = await db.execute(
select(Project).where(
Project.id == project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Project not found")
result = await db.execute(
select(Document).where(
Document.project_id == project_id,
Document.is_deleted == False,
).order_by(Document.created_at.desc())
)
docs = result.scalars().all()
responses = []
for doc in docs:
tags = await get_document_tags(db, doc.id)
responses.append(DocumentBriefResponse(
id=doc.id,
title=doc.title,
project_id=doc.project_id,
folder_id=doc.folder_id,
path=doc.path,
tags=tags,
created_at=doc.created_at,
updated_at=doc.updated_at,
))
return DocumentListResponse(documents=responses)
@router.post("/api/v1/projects/{project_id}/documents", response_model=DocumentResponse, status_code=201)
async def create_document(
request: Request,
project_id: str,
payload: DocumentCreate,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
proj_result = await db.execute(
select(Project).where(
Project.id == project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Project not found")
folder_path = None
if payload.folder_id:
folder_result = await db.execute(
select(Folder).where(
Folder.id == payload.folder_id,
Folder.project_id == project_id,
Folder.is_deleted == False,
)
)
folder = folder_result.scalar_one_or_none()
if not folder:
raise HTTPException(status_code=400, detail="Folder not found")
folder_path = folder.path
doc_id = str(uuid.uuid4())
path = build_doc_path(project_id, doc_id, payload.folder_id, folder_path)
doc = Document(
id=doc_id,
title=payload.title,
content=payload.content,
project_id=project_id,
folder_id=payload.folder_id,
path=path,
)
db.add(doc)
await db.flush()
return await document_to_response(db, doc)
@router.get("/api/v1/documents/{document_id}", response_model=DocumentResponse)
async def get_document(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Document not found")
return await document_to_response(db, doc)
@router.put("/api/v1/documents/{document_id}", response_model=DocumentResponse)
async def update_document(
request: Request,
document_id: str,
payload: DocumentUpdate,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
if payload.title is not None:
doc.title = payload.title
if payload.folder_id is not None:
if payload.folder_id:
folder_result = await db.execute(
select(Folder).where(
Folder.id == payload.folder_id,
Folder.project_id == doc.project_id,
Folder.is_deleted == False,
)
)
folder = folder_result.scalar_one_or_none()
if not folder:
raise HTTPException(status_code=400, detail="Folder not found")
doc.path = f"{folder.path}/{doc.id}"
else:
doc.path = f"/{doc.project_id}/{doc.id}"
doc.folder_id = payload.folder_id
doc.updated_at = datetime.utcnow()
await db.flush()
return await document_to_response(db, doc)
@router.delete("/api/v1/documents/{document_id}", status_code=204)
async def delete_document(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
doc.is_deleted = True
doc.deleted_at = datetime.utcnow()
doc.deleted_by = agent.id
await db.flush()
return None
@router.put("/api/v1/documents/{document_id}/content", response_model=DocumentResponse)
async def update_document_content(
request: Request,
document_id: str,
payload: TipTapContentUpdate,
db: AsyncSession = Depends(get_db),
):
"""Update document content (TipTap JSON or Markdown).
Phase 2: Now supports both TipTap JSON and Markdown formats via the 'format' field.
Also backward-compatible with legacy string content (treated as markdown).
"""
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
# Determine actual format based on content type (backward compatibility)
# If content is a string, treat as markdown regardless of format field
# If content is a dict, treat as tiptap
is_string_content = isinstance(payload.content, str)
# Validate content size (1MB limit)
content_json = json.dumps(payload.content)
if len(content_json) > 1_000_000:
raise HTTPException(status_code=413, detail="Content too large (max 1MB)")
if is_string_content:
# Legacy string content or markdown - store as markdown
doc.content = payload.content
# Create a simple tiptap structure for the editor
doc.tiptap_content = json.dumps({
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [{"type": "text", "text": payload.content}]
}
]
})
else:
# TipTap JSON content
if not isinstance(payload.content, dict):
raise HTTPException(status_code=400, detail="content must be a string or dict")
doc.tiptap_content = content_json
# Also update the plain content by converting tiptap -> markdown
doc.content = tiptap_to_markdown(payload.content)
doc.updated_at = datetime.utcnow()
await db.flush()
return await document_to_response(db, doc)
@router.post("/api/v1/documents/{document_id}/restore", response_model=DocumentResponse)
async def restore_document(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == True,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
doc.is_deleted = False
doc.deleted_at = None
doc.deleted_by = None
await db.flush()
return await document_to_response(db, doc)
@router.post("/api/v1/documents/{document_id}/tags", status_code=204)
async def assign_tags(
request: Request,
document_id: str,
payload: DocumentTagsAssign,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
for tag_id in payload.tag_ids:
tag_result = await db.execute(
select(Tag).where(
Tag.id == tag_id,
Tag.is_deleted == False,
)
)
tag = tag_result.scalar_one_or_none()
if not tag:
raise HTTPException(status_code=400, detail=f"Tag {tag_id} not found")
existing = await db.execute(
select(DocumentTag).where(
DocumentTag.document_id == document_id,
DocumentTag.tag_id == tag_id,
)
)
if not existing.scalar_one_or_none():
dt = DocumentTag(document_id=document_id, tag_id=tag_id)
db.add(dt)
await db.flush()
return None
@router.delete("/api/v1/documents/{document_id}/tags/{tag_id}", status_code=204)
async def remove_tag(
request: Request,
document_id: str,
tag_id: str,
db: AsyncSession = Depends(get_db),
):
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
await db.execute(
delete(DocumentTag).where(
DocumentTag.document_id == document_id,
DocumentTag.tag_id == tag_id,
)
)
await db.flush()
return None
# =============================================================================
# Phase 2: New Endpoints
# =============================================================================
async def _get_doc_with_access(
request: Request,
document_id: str,
db: AsyncSession,
require_write: bool = False,
) -> tuple[Document, bool]:
"""Get document and check access. Returns (doc, has_access)."""
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
return doc, True
@router.get("/api/v1/documents/{document_id}/reasoning")
async def get_document_reasoning(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
"""Get reasoning metadata for a document."""
doc, _ = await _get_doc_with_access(request, document_id, db)
# Parse reasoning_steps
reasoning_steps = []
if doc.reasoning_steps:
try:
reasoning_steps = json.loads(doc.reasoning_steps)
except json.JSONDecodeError:
reasoning_steps = []
# Parse confidence
confidence = None
if doc.confidence:
try:
confidence = float(doc.confidence)
except (ValueError, TypeError):
confidence = None
return {
"reasoning_type": doc.reasoning_type,
"confidence": confidence,
"reasoning_steps": reasoning_steps,
"model_source": doc.model_source,
}
@router.patch("/api/v1/documents/{document_id}/reasoning")
async def update_document_reasoning(
request: Request,
document_id: str,
payload: ReasoningUpdate,
db: AsyncSession = Depends(get_db),
):
"""Update reasoning metadata for a document."""
doc, _ = await _get_doc_with_access(request, document_id, db, require_write=True)
if payload.reasoning_type is not None:
doc.reasoning_type = payload.reasoning_type.value if hasattr(payload.reasoning_type, 'value') else payload.reasoning_type
if payload.confidence is not None:
if payload.confidence < 0.0 or payload.confidence > 1.0:
raise HTTPException(status_code=400, detail="confidence must be between 0.0 and 1.0")
doc.confidence = str(payload.confidence)
if payload.reasoning_steps is not None:
# Validate steps
for step in payload.reasoning_steps:
if not isinstance(step.step, int):
raise HTTPException(status_code=400, detail="Each step must have an integer 'step' field")
if not step.thought or len(step.thought) > 2000:
raise HTTPException(status_code=400, detail="thought must be non-empty and max 2000 chars")
doc.reasoning_steps = json.dumps([s.model_dump() for s in payload.reasoning_steps])
if payload.model_source is not None:
doc.model_source = payload.model_source
doc.updated_at = datetime.utcnow()
await db.flush()
return await get_document_reasoning(request, document_id, db)
@router.get("/api/v1/documents/{document_id}/reasoning-panel")
async def get_reasoning_panel(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
"""Get reasoning panel data for UI."""
doc, _ = await _get_doc_with_access(request, document_id, db)
# Check if document has reasoning
has_reasoning = any([
doc.reasoning_type is not None,
doc.confidence is not None,
doc.reasoning_steps,
doc.model_source is not None,
])
reasoning_metadata = None
if has_reasoning:
reasoning_steps = []
if doc.reasoning_steps:
try:
reasoning_steps = json.loads(doc.reasoning_steps)
except json.JSONDecodeError:
reasoning_steps = []
confidence = None
if doc.confidence:
try:
confidence = float(doc.confidence)
except (ValueError, TypeError):
confidence = None
reasoning_metadata = ReasoningMetadata(
reasoning_type=doc.reasoning_type,
confidence=confidence,
reasoning_steps=reasoning_steps,
model_source=doc.model_source,
)
return ReasoningPanel(
document_id=document_id,
has_reasoning=has_reasoning,
reasoning=reasoning_metadata,
editable=True, # Agent has write access since they passed _get_doc_with_access
)
@router.post("/api/v1/documents/{document_id}/reasoning-steps", status_code=201)
async def add_reasoning_step(
request: Request,
document_id: str,
payload: ReasoningStepAdd,
db: AsyncSession = Depends(get_db),
):
"""Add a new reasoning step to a document."""
doc, _ = await _get_doc_with_access(request, document_id, db, require_write=True)
# Parse existing steps
steps = []
if doc.reasoning_steps:
try:
steps = json.loads(doc.reasoning_steps)
except json.JSONDecodeError:
steps = []
# Determine next step number
next_step = max([s.get("step", 0) for s in steps], default=0) + 1
# Create new step
new_step = {
"step": next_step,
"thought": payload.thought,
"conclusion": payload.conclusion,
}
steps.append(new_step)
doc.reasoning_steps = json.dumps(steps)
doc.updated_at = datetime.utcnow()
await db.flush()
return new_step
@router.delete("/api/v1/documents/{document_id}/reasoning-steps/{step}", status_code=204)
async def delete_reasoning_step(
request: Request,
document_id: str,
step: int,
db: AsyncSession = Depends(get_db),
):
"""Delete a specific reasoning step from a document."""
doc, _ = await _get_doc_with_access(request, document_id, db, require_write=True)
# Parse existing steps
steps = []
if doc.reasoning_steps:
try:
steps = json.loads(doc.reasoning_steps)
except json.JSONDecodeError:
steps = []
# Find and remove the step
original_len = len(steps)
steps = [s for s in steps if s.get("step") != step]
if len(steps) == original_len:
raise HTTPException(status_code=404, detail="Step not found")
doc.reasoning_steps = json.dumps(steps)
doc.updated_at = datetime.utcnow()
await db.flush()
return None
@router.get("/api/v1/documents/{document_id}/content")
async def get_document_content(
request: Request,
document_id: str,
format: str = Query("tiptap", description="Output format: tiptap or markdown"),
db: AsyncSession = Depends(get_db),
):
"""Get document content in TipTap JSON or Markdown format."""
doc, _ = await _get_doc_with_access(request, document_id, db)
if format == "markdown":
# Try to get tiptap_content and convert
if doc.tiptap_content:
try:
tiptap = json.loads(doc.tiptap_content)
content = tiptap_to_markdown(tiptap)
return TipTapContentResponse(content=content, format="markdown")
except json.JSONDecodeError:
pass
# Fallback to plain content
return TipTapContentResponse(content=doc.content, format="markdown")
else:
# Return tiptap format
if doc.tiptap_content:
try:
tiptap = json.loads(doc.tiptap_content)
return TipTapContentResponse(content=tiptap, format="tiptap")
except json.JSONDecodeError:
pass
# Return default tiptap structure
default_tiptap = {"type": "doc", "content": [{"type": "paragraph", "content": []}]}
return TipTapContentResponse(content=default_tiptap, format="tiptap")