Files
claudia-docs-api/app/routers/links.py
Motoko 07f9ac91fc Phase 3: Graph view, backlinks, quick switcher, export
- Add outgoing_links (JSON) and backlinks_count to Document model
- POST /documents/{id}/detect-links — detect [[uuid]] patterns in content
- GET /documents/{id}/backlinks — documents referencing this doc
- GET /documents/{id}/outgoing-links — documents this doc references
- GET /documents/{id}/links — combined incoming + outgoing
- GET /projects/{id}/graph — full project relationship graph
- GET /search/quick — fuzzy search (Quick Switcher Cmd+K)
- GET /projects/{id}/documents/search — project-scoped search
- GET /documents/{id}/export — markdown|json export
- GET /projects/{id}/export — json|zip export
- 27 new tests
2026-03-30 23:46:45 +00:00

491 lines
15 KiB
Python

import json
import re
import uuid
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.models.document import Document
from app.models.project import Project
from app.routers.auth import get_current_agent
from app.schemas.document import (
BacklinkItem,
BacklinksResponse,
BrokenLink,
DetectLinksRequest,
DetectLinksResponse,
GraphEdge,
GraphNode,
GraphResponse,
GraphStats,
LinkItem,
LinksResponse,
OutgoingLinkItem,
OutgoingLinksResponse,
)
router = APIRouter(tags=["links"])
# =============================================================================
# Link Detection
# =============================================================================
def detect_links_in_content(content: str) -> tuple[list[str], list[BrokenLink]]:
"""
Detect [[uuid]] and [[uuid|text]] patterns in content.
Returns (valid_ids, broken_links).
"""
# Pattern: [[uuid]] or [[uuid|text]]
pattern = r'\[\[([0-9a-f-]{36})(?:\|[^\]]+)?\]\]'
matches = re.findall(pattern, content, re.IGNORECASE)
valid_ids = []
broken_links = []
for match in matches:
try:
# Validate UUID format
uuid.UUID(match)
valid_ids.append(match)
except ValueError:
broken_links.append(BrokenLink(reference=match, reason="invalid_format"))
return valid_ids, broken_links
async def _get_doc_with_access(request: Request, document_id: str, db: AsyncSession) -> Document:
"""Get document and verify access."""
agent = await get_current_agent(request, db)
result = await db.execute(
select(Document).where(
Document.id == document_id,
Document.is_deleted == False,
)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
proj_result = await db.execute(
select(Project).where(
Project.id == doc.project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
if not proj_result.scalar_one_or_none():
raise HTTPException(status_code=403, detail="Forbidden")
return doc
async def _get_project_with_access(request: Request, project_id: str, db: AsyncSession) -> Project:
"""Get project and verify access."""
agent = await get_current_agent(request, db)
result = await db.execute(
select(Project).where(
Project.id == project_id,
Project.agent_id == agent.id,
Project.is_deleted == False,
)
)
project = result.scalar_one_or_none()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
return project
@router.post("/api/v1/documents/{document_id}/detect-links", response_model=DetectLinksResponse)
async def detect_links(
request: Request,
document_id: str,
payload: DetectLinksRequest,
db: AsyncSession = Depends(get_db),
):
"""
Detect and save [[uuid]] references in content.
Updates the document's outgoing_links field.
"""
doc = await _get_doc_with_access(request, document_id, db)
# Validate content size
if len(payload.content) > 5_000_000:
raise HTTPException(status_code=413, detail="Content too large (max 5MB)")
# Detect links
link_ids, broken_links = detect_links_in_content(payload.content)
# Validate that referenced documents exist
valid_ids = []
for lid in link_ids:
ref_result = await db.execute(
select(Document.id).where(
Document.id == lid,
Document.is_deleted == False,
)
)
if ref_result.scalar_one_or_none():
valid_ids.append(lid)
else:
broken_links.append(BrokenLink(reference=lid, reason="document_not_found"))
# Remove duplicates while preserving order
seen = set()
unique_valid_ids = []
for vid in valid_ids:
if vid not in seen:
seen.add(vid)
unique_valid_ids.append(vid)
# Update document's outgoing_links
doc.outgoing_links = json.dumps(unique_valid_ids)
doc.updated_at = datetime.utcnow()
# Update backlinks_count on target documents
# First, decrement old links
old_links = []
if doc.outgoing_links:
try:
old_links = json.loads(doc.outgoing_links) if doc.outgoing_links != "[]" else []
except json.JSONDecodeError:
old_links = []
for target_id in old_links:
if target_id not in unique_valid_ids:
await db.execute(
text("""
UPDATE documents
SET backlinks_count = MAX(0, backlinks_count - 1)
WHERE id = :target_id AND backlinks_count > 0
"""),
{"target_id": target_id}
)
# Then, increment new links
for target_id in unique_valid_ids:
if target_id not in old_links:
await db.execute(
text("""
UPDATE documents
SET backlinks_count = backlinks_count + 1
WHERE id = :target_id
"""),
{"target_id": target_id}
)
await db.flush()
return DetectLinksResponse(
document_id=document_id,
outgoing_links=unique_valid_ids,
links_detected=len(unique_valid_ids),
links_broken=len(broken_links),
broken_links=broken_links,
)
# =============================================================================
# Backlinks & Outgoing Links
# =============================================================================
@router.get("/api/v1/documents/{document_id}/backlinks", response_model=BacklinksResponse)
async def get_backlinks(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
"""
Get documents that reference this document (incoming links).
"""
doc = await _get_doc_with_access(request, document_id, db)
# Find all documents that have this doc_id in their outgoing_links
result = await db.execute(
text("""
SELECT d.id, d.title, d.project_id, d.content, d.updated_at,
p.name as project_name
FROM active_documents d
JOIN active_projects p ON d.project_id = p.id
WHERE d.outgoing_links LIKE :pattern
AND d.is_deleted = 0
ORDER BY d.updated_at DESC
"""),
{"pattern": f"%{document_id}%"}
)
rows = result.fetchall()
backlinks = []
for row in rows:
# Parse outgoing_links JSON to verify this doc actually references target
outgoing = []
try:
outgoing = json.loads(row.content) if row.content else []
except json.JSONDecodeError:
pass
# Check if this document's outgoing_links contains document_id
try:
outgoing_list = json.loads(row.content) if row.content else []
# Actually we need to check outgoing_links field directly
except:
pass
# Use a more precise check
check_result = await db.execute(
select(Document).where(
Document.id == row.id,
Document.outgoing_links.like(f"%{document_id}%")
)
)
if not check_result.scalar_one_or_none():
continue
# Build excerpt around the reference
excerpt = _build_backlink_excerpt(row.content or "", document_id)
backlinks.append(BacklinkItem(
document_id=row.id,
title=row.title,
project_id=row.project_id,
project_name=row.project_name,
excerpt=excerpt,
updated_at=row.updated_at,
))
return BacklinksResponse(
document_id=document_id,
backlinks_count=len(backlinks),
backlinks=backlinks,
)
@router.get("/api/v1/documents/{document_id}/outgoing-links", response_model=OutgoingLinksResponse)
async def get_outgoing_links(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
"""
Get documents that this document references (outgoing links).
"""
doc = await _get_doc_with_access(request, document_id, db)
# Parse outgoing_links
outgoing_ids = []
if doc.outgoing_links:
try:
outgoing_ids = json.loads(doc.outgoing_links)
except json.JSONDecodeError:
outgoing_ids = []
outgoing_links = []
for target_id in outgoing_ids:
# Check if target document exists
target_result = await db.execute(
text("""
SELECT d.id, d.title, d.project_id, d.updated_at,
p.name as project_name
FROM active_documents d
JOIN active_projects p ON d.project_id = p.id
WHERE d.id = :target_id
"""),
{"target_id": target_id}
)
row = target_result.fetchone()
if row:
outgoing_links.append(OutgoingLinkItem(
document_id=row.id,
title=row.title,
project_id=row.project_id,
project_name=row.project_name,
exists=True,
updated_at=row.updated_at,
))
else:
# Document was deleted but was referenced
outgoing_links.append(OutgoingLinkItem(
document_id=target_id,
title="[Deleted Document]",
project_id="",
project_name="",
exists=False,
updated_at=None,
))
return OutgoingLinksResponse(
document_id=document_id,
outgoing_links_count=len(outgoing_links),
outgoing_links=outgoing_links,
)
@router.get("/api/v1/documents/{document_id}/links", response_model=LinksResponse)
async def get_links(
request: Request,
document_id: str,
db: AsyncSession = Depends(get_db),
):
"""
Get all incoming and outgoing links for a document.
"""
doc = await _get_doc_with_access(request, document_id, db)
# Get outgoing links
outgoing_ids = []
if doc.outgoing_links:
try:
outgoing_ids = json.loads(doc.outgoing_links)
except json.JSONDecodeError:
outgoing_ids = []
outgoing = []
for target_id in outgoing_ids:
target_result = await db.execute(
select(Document).where(Document.id == target_id, Document.is_deleted == False)
)
target = target_result.scalar_one_or_none()
if target:
outgoing.append(LinkItem(
document_id=target.id,
title=target.title,
anchor_text=None,
))
# Get incoming links (backlinks)
backlinks_result = await db.execute(
text("""
SELECT d.id, d.title, d.outgoing_links
FROM active_documents d
WHERE d.outgoing_links LIKE :pattern
AND d.is_deleted = 0
"""),
{"pattern": f"%{document_id}%"}
)
backlink_rows = backlinks_result.fetchall()
backlinks = []
for row in backlink_rows:
# Verify this link actually points to our document
try:
outgoing_list = json.loads(row.outgoing_links) if row.outgoing_links else []
except json.JSONDecodeError:
continue
if document_id in outgoing_list:
backlinks.append(LinkItem(
document_id=row.id,
title=row.title,
anchor_text=None,
))
return LinksResponse(
document_id=document_id,
outgoing_links=outgoing,
backlinks=backlinks,
)
def _build_backlink_excerpt(content: str, target_id: str, context_chars: int = 150) -> str:
"""Build an excerpt around the [[target_id]] reference in content."""
# Find the [[uuid]] pattern in content
pattern = r'\[\[' + re.escape(target_id) + r'(?:\|[^\]]+)?\]\]'
match = re.search(pattern, content, re.IGNORECASE)
if not match:
return content[:context_chars * 2] or ""
start = max(0, match.start() - context_chars)
end = min(len(content), match.end() + context_chars)
excerpt = content[start:end]
if start > 0:
excerpt = "..." + excerpt
if end < len(content):
excerpt = excerpt + "..."
return excerpt
# =============================================================================
# Project Graph
# =============================================================================
@router.get("/api/v1/projects/{project_id}/graph", response_model=GraphResponse)
async def get_project_graph(
request: Request,
project_id: str,
depth: int = Query(2, ge=1, le=3),
db: AsyncSession = Depends(get_db),
):
"""
Get the full graph of document relationships within a project.
Depth controls how many hops of outgoing links to include.
"""
project = await _get_project_with_access(request, project_id, db)
# Get all documents in project
docs_result = await db.execute(
select(Document).where(
Document.project_id == project_id,
Document.is_deleted == False,
)
)
all_docs = docs_result.scalars().all()
# Build adjacency: doc_id -> set of outgoing_ids
doc_map = {doc.id: doc for doc in all_docs}
adjacency: dict[str, set[str]] = {doc.id: set() for doc in all_docs}
edges = []
total_references = 0
reachable: set[str] = set()
for doc in all_docs:
outgoing_ids = []
if doc.outgoing_links:
try:
outgoing_ids = json.loads(doc.outgoing_links)
except json.JSONDecodeError:
pass
for target_id in outgoing_ids:
if target_id in doc_map:
adjacency[doc.id].add(target_id)
edges.append(GraphEdge(source=doc.id, target=target_id, type="reference"))
total_references += 1
reachable.add(doc.id)
reachable.add(target_id)
# Build nodes
nodes = []
for doc in all_docs:
nodes.append(GraphNode(id=doc.id, title=doc.title, type="document"))
# Orphaned = docs with no incoming and no outgoing links
incoming_count: dict[str, int] = {doc.id: 0 for doc in all_docs}
for doc in all_docs:
for target_id in adjacency[doc.id]:
if target_id in incoming_count:
incoming_count[target_id] += 1
orphaned = sum(1 for doc in all_docs if incoming_count[doc.id] == 0 and len(adjacency[doc.id]) == 0)
return GraphResponse(
project_id=project_id,
nodes=nodes,
edges=edges,
stats=GraphStats(
total_documents=len(all_docs),
total_references=total_references,
orphaned_documents=orphaned,
),
)