Phase 3: Graph view, backlinks, quick switcher, export
- Add outgoing_links (JSON) and backlinks_count to Document model
- POST /documents/{id}/detect-links — detect [[uuid]] patterns in content
- GET /documents/{id}/backlinks — documents referencing this doc
- GET /documents/{id}/outgoing-links — documents this doc references
- GET /documents/{id}/links — combined incoming + outgoing
- GET /projects/{id}/graph — full project relationship graph
- GET /search/quick — fuzzy search (Quick Switcher Cmd+K)
- GET /projects/{id}/documents/search — project-scoped search
- GET /documents/{id}/export — markdown|json export
- GET /projects/{id}/export — json|zip export
- 27 new tests
This commit is contained in:
490
app/routers/links.py
Normal file
490
app/routers/links.py
Normal file
@@ -0,0 +1,490 @@
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.database import get_db
|
||||
from app.models.document import Document
|
||||
from app.models.project import Project
|
||||
from app.routers.auth import get_current_agent
|
||||
from app.schemas.document import (
|
||||
BacklinkItem,
|
||||
BacklinksResponse,
|
||||
BrokenLink,
|
||||
DetectLinksRequest,
|
||||
DetectLinksResponse,
|
||||
GraphEdge,
|
||||
GraphNode,
|
||||
GraphResponse,
|
||||
GraphStats,
|
||||
LinkItem,
|
||||
LinksResponse,
|
||||
OutgoingLinkItem,
|
||||
OutgoingLinksResponse,
|
||||
)
|
||||
|
||||
router = APIRouter(tags=["links"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Link Detection
|
||||
# =============================================================================
|
||||
|
||||
def detect_links_in_content(content: str) -> tuple[list[str], list[BrokenLink]]:
|
||||
"""
|
||||
Detect [[uuid]] and [[uuid|text]] patterns in content.
|
||||
Returns (valid_ids, broken_links).
|
||||
"""
|
||||
# Pattern: [[uuid]] or [[uuid|text]]
|
||||
pattern = r'\[\[([0-9a-f-]{36})(?:\|[^\]]+)?\]\]'
|
||||
matches = re.findall(pattern, content, re.IGNORECASE)
|
||||
|
||||
valid_ids = []
|
||||
broken_links = []
|
||||
|
||||
for match in matches:
|
||||
try:
|
||||
# Validate UUID format
|
||||
uuid.UUID(match)
|
||||
valid_ids.append(match)
|
||||
except ValueError:
|
||||
broken_links.append(BrokenLink(reference=match, reason="invalid_format"))
|
||||
|
||||
return valid_ids, broken_links
|
||||
|
||||
|
||||
async def _get_doc_with_access(request: Request, document_id: str, db: AsyncSession) -> Document:
|
||||
"""Get document and verify access."""
|
||||
agent = await get_current_agent(request, db)
|
||||
|
||||
result = await db.execute(
|
||||
select(Document).where(
|
||||
Document.id == document_id,
|
||||
Document.is_deleted == False,
|
||||
)
|
||||
)
|
||||
doc = result.scalar_one_or_none()
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
proj_result = await db.execute(
|
||||
select(Project).where(
|
||||
Project.id == doc.project_id,
|
||||
Project.agent_id == agent.id,
|
||||
Project.is_deleted == False,
|
||||
)
|
||||
)
|
||||
if not proj_result.scalar_one_or_none():
|
||||
raise HTTPException(status_code=403, detail="Forbidden")
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
async def _get_project_with_access(request: Request, project_id: str, db: AsyncSession) -> Project:
|
||||
"""Get project and verify access."""
|
||||
agent = await get_current_agent(request, db)
|
||||
|
||||
result = await db.execute(
|
||||
select(Project).where(
|
||||
Project.id == project_id,
|
||||
Project.agent_id == agent.id,
|
||||
Project.is_deleted == False,
|
||||
)
|
||||
)
|
||||
project = result.scalar_one_or_none()
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
return project
|
||||
|
||||
|
||||
@router.post("/api/v1/documents/{document_id}/detect-links", response_model=DetectLinksResponse)
|
||||
async def detect_links(
|
||||
request: Request,
|
||||
document_id: str,
|
||||
payload: DetectLinksRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Detect and save [[uuid]] references in content.
|
||||
Updates the document's outgoing_links field.
|
||||
"""
|
||||
doc = await _get_doc_with_access(request, document_id, db)
|
||||
|
||||
# Validate content size
|
||||
if len(payload.content) > 5_000_000:
|
||||
raise HTTPException(status_code=413, detail="Content too large (max 5MB)")
|
||||
|
||||
# Detect links
|
||||
link_ids, broken_links = detect_links_in_content(payload.content)
|
||||
|
||||
# Validate that referenced documents exist
|
||||
valid_ids = []
|
||||
for lid in link_ids:
|
||||
ref_result = await db.execute(
|
||||
select(Document.id).where(
|
||||
Document.id == lid,
|
||||
Document.is_deleted == False,
|
||||
)
|
||||
)
|
||||
if ref_result.scalar_one_or_none():
|
||||
valid_ids.append(lid)
|
||||
else:
|
||||
broken_links.append(BrokenLink(reference=lid, reason="document_not_found"))
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_valid_ids = []
|
||||
for vid in valid_ids:
|
||||
if vid not in seen:
|
||||
seen.add(vid)
|
||||
unique_valid_ids.append(vid)
|
||||
|
||||
# Update document's outgoing_links
|
||||
doc.outgoing_links = json.dumps(unique_valid_ids)
|
||||
doc.updated_at = datetime.utcnow()
|
||||
|
||||
# Update backlinks_count on target documents
|
||||
# First, decrement old links
|
||||
old_links = []
|
||||
if doc.outgoing_links:
|
||||
try:
|
||||
old_links = json.loads(doc.outgoing_links) if doc.outgoing_links != "[]" else []
|
||||
except json.JSONDecodeError:
|
||||
old_links = []
|
||||
|
||||
for target_id in old_links:
|
||||
if target_id not in unique_valid_ids:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE documents
|
||||
SET backlinks_count = MAX(0, backlinks_count - 1)
|
||||
WHERE id = :target_id AND backlinks_count > 0
|
||||
"""),
|
||||
{"target_id": target_id}
|
||||
)
|
||||
|
||||
# Then, increment new links
|
||||
for target_id in unique_valid_ids:
|
||||
if target_id not in old_links:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE documents
|
||||
SET backlinks_count = backlinks_count + 1
|
||||
WHERE id = :target_id
|
||||
"""),
|
||||
{"target_id": target_id}
|
||||
)
|
||||
|
||||
await db.flush()
|
||||
|
||||
return DetectLinksResponse(
|
||||
document_id=document_id,
|
||||
outgoing_links=unique_valid_ids,
|
||||
links_detected=len(unique_valid_ids),
|
||||
links_broken=len(broken_links),
|
||||
broken_links=broken_links,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Backlinks & Outgoing Links
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/api/v1/documents/{document_id}/backlinks", response_model=BacklinksResponse)
|
||||
async def get_backlinks(
|
||||
request: Request,
|
||||
document_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Get documents that reference this document (incoming links).
|
||||
"""
|
||||
doc = await _get_doc_with_access(request, document_id, db)
|
||||
|
||||
# Find all documents that have this doc_id in their outgoing_links
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT d.id, d.title, d.project_id, d.content, d.updated_at,
|
||||
p.name as project_name
|
||||
FROM active_documents d
|
||||
JOIN active_projects p ON d.project_id = p.id
|
||||
WHERE d.outgoing_links LIKE :pattern
|
||||
AND d.is_deleted = 0
|
||||
ORDER BY d.updated_at DESC
|
||||
"""),
|
||||
{"pattern": f"%{document_id}%"}
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
backlinks = []
|
||||
for row in rows:
|
||||
# Parse outgoing_links JSON to verify this doc actually references target
|
||||
outgoing = []
|
||||
try:
|
||||
outgoing = json.loads(row.content) if row.content else []
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Check if this document's outgoing_links contains document_id
|
||||
try:
|
||||
outgoing_list = json.loads(row.content) if row.content else []
|
||||
# Actually we need to check outgoing_links field directly
|
||||
except:
|
||||
pass
|
||||
|
||||
# Use a more precise check
|
||||
check_result = await db.execute(
|
||||
select(Document).where(
|
||||
Document.id == row.id,
|
||||
Document.outgoing_links.like(f"%{document_id}%")
|
||||
)
|
||||
)
|
||||
if not check_result.scalar_one_or_none():
|
||||
continue
|
||||
|
||||
# Build excerpt around the reference
|
||||
excerpt = _build_backlink_excerpt(row.content or "", document_id)
|
||||
|
||||
backlinks.append(BacklinkItem(
|
||||
document_id=row.id,
|
||||
title=row.title,
|
||||
project_id=row.project_id,
|
||||
project_name=row.project_name,
|
||||
excerpt=excerpt,
|
||||
updated_at=row.updated_at,
|
||||
))
|
||||
|
||||
return BacklinksResponse(
|
||||
document_id=document_id,
|
||||
backlinks_count=len(backlinks),
|
||||
backlinks=backlinks,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/v1/documents/{document_id}/outgoing-links", response_model=OutgoingLinksResponse)
|
||||
async def get_outgoing_links(
|
||||
request: Request,
|
||||
document_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Get documents that this document references (outgoing links).
|
||||
"""
|
||||
doc = await _get_doc_with_access(request, document_id, db)
|
||||
|
||||
# Parse outgoing_links
|
||||
outgoing_ids = []
|
||||
if doc.outgoing_links:
|
||||
try:
|
||||
outgoing_ids = json.loads(doc.outgoing_links)
|
||||
except json.JSONDecodeError:
|
||||
outgoing_ids = []
|
||||
|
||||
outgoing_links = []
|
||||
for target_id in outgoing_ids:
|
||||
# Check if target document exists
|
||||
target_result = await db.execute(
|
||||
text("""
|
||||
SELECT d.id, d.title, d.project_id, d.updated_at,
|
||||
p.name as project_name
|
||||
FROM active_documents d
|
||||
JOIN active_projects p ON d.project_id = p.id
|
||||
WHERE d.id = :target_id
|
||||
"""),
|
||||
{"target_id": target_id}
|
||||
)
|
||||
row = target_result.fetchone()
|
||||
|
||||
if row:
|
||||
outgoing_links.append(OutgoingLinkItem(
|
||||
document_id=row.id,
|
||||
title=row.title,
|
||||
project_id=row.project_id,
|
||||
project_name=row.project_name,
|
||||
exists=True,
|
||||
updated_at=row.updated_at,
|
||||
))
|
||||
else:
|
||||
# Document was deleted but was referenced
|
||||
outgoing_links.append(OutgoingLinkItem(
|
||||
document_id=target_id,
|
||||
title="[Deleted Document]",
|
||||
project_id="",
|
||||
project_name="",
|
||||
exists=False,
|
||||
updated_at=None,
|
||||
))
|
||||
|
||||
return OutgoingLinksResponse(
|
||||
document_id=document_id,
|
||||
outgoing_links_count=len(outgoing_links),
|
||||
outgoing_links=outgoing_links,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/v1/documents/{document_id}/links", response_model=LinksResponse)
|
||||
async def get_links(
|
||||
request: Request,
|
||||
document_id: str,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Get all incoming and outgoing links for a document.
|
||||
"""
|
||||
doc = await _get_doc_with_access(request, document_id, db)
|
||||
|
||||
# Get outgoing links
|
||||
outgoing_ids = []
|
||||
if doc.outgoing_links:
|
||||
try:
|
||||
outgoing_ids = json.loads(doc.outgoing_links)
|
||||
except json.JSONDecodeError:
|
||||
outgoing_ids = []
|
||||
|
||||
outgoing = []
|
||||
for target_id in outgoing_ids:
|
||||
target_result = await db.execute(
|
||||
select(Document).where(Document.id == target_id, Document.is_deleted == False)
|
||||
)
|
||||
target = target_result.scalar_one_or_none()
|
||||
if target:
|
||||
outgoing.append(LinkItem(
|
||||
document_id=target.id,
|
||||
title=target.title,
|
||||
anchor_text=None,
|
||||
))
|
||||
|
||||
# Get incoming links (backlinks)
|
||||
backlinks_result = await db.execute(
|
||||
text("""
|
||||
SELECT d.id, d.title, d.outgoing_links
|
||||
FROM active_documents d
|
||||
WHERE d.outgoing_links LIKE :pattern
|
||||
AND d.is_deleted = 0
|
||||
"""),
|
||||
{"pattern": f"%{document_id}%"}
|
||||
)
|
||||
backlink_rows = backlinks_result.fetchall()
|
||||
|
||||
backlinks = []
|
||||
for row in backlink_rows:
|
||||
# Verify this link actually points to our document
|
||||
try:
|
||||
outgoing_list = json.loads(row.outgoing_links) if row.outgoing_links else []
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if document_id in outgoing_list:
|
||||
backlinks.append(LinkItem(
|
||||
document_id=row.id,
|
||||
title=row.title,
|
||||
anchor_text=None,
|
||||
))
|
||||
|
||||
return LinksResponse(
|
||||
document_id=document_id,
|
||||
outgoing_links=outgoing,
|
||||
backlinks=backlinks,
|
||||
)
|
||||
|
||||
|
||||
def _build_backlink_excerpt(content: str, target_id: str, context_chars: int = 150) -> str:
|
||||
"""Build an excerpt around the [[target_id]] reference in content."""
|
||||
# Find the [[uuid]] pattern in content
|
||||
pattern = r'\[\[' + re.escape(target_id) + r'(?:\|[^\]]+)?\]\]'
|
||||
match = re.search(pattern, content, re.IGNORECASE)
|
||||
|
||||
if not match:
|
||||
return content[:context_chars * 2] or ""
|
||||
|
||||
start = max(0, match.start() - context_chars)
|
||||
end = min(len(content), match.end() + context_chars)
|
||||
excerpt = content[start:end]
|
||||
|
||||
if start > 0:
|
||||
excerpt = "..." + excerpt
|
||||
if end < len(content):
|
||||
excerpt = excerpt + "..."
|
||||
|
||||
return excerpt
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Project Graph
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/api/v1/projects/{project_id}/graph", response_model=GraphResponse)
|
||||
async def get_project_graph(
|
||||
request: Request,
|
||||
project_id: str,
|
||||
depth: int = Query(2, ge=1, le=3),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
Get the full graph of document relationships within a project.
|
||||
Depth controls how many hops of outgoing links to include.
|
||||
"""
|
||||
project = await _get_project_with_access(request, project_id, db)
|
||||
|
||||
# Get all documents in project
|
||||
docs_result = await db.execute(
|
||||
select(Document).where(
|
||||
Document.project_id == project_id,
|
||||
Document.is_deleted == False,
|
||||
)
|
||||
)
|
||||
all_docs = docs_result.scalars().all()
|
||||
|
||||
# Build adjacency: doc_id -> set of outgoing_ids
|
||||
doc_map = {doc.id: doc for doc in all_docs}
|
||||
adjacency: dict[str, set[str]] = {doc.id: set() for doc in all_docs}
|
||||
|
||||
edges = []
|
||||
total_references = 0
|
||||
reachable: set[str] = set()
|
||||
|
||||
for doc in all_docs:
|
||||
outgoing_ids = []
|
||||
if doc.outgoing_links:
|
||||
try:
|
||||
outgoing_ids = json.loads(doc.outgoing_links)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
for target_id in outgoing_ids:
|
||||
if target_id in doc_map:
|
||||
adjacency[doc.id].add(target_id)
|
||||
edges.append(GraphEdge(source=doc.id, target=target_id, type="reference"))
|
||||
total_references += 1
|
||||
reachable.add(doc.id)
|
||||
reachable.add(target_id)
|
||||
|
||||
# Build nodes
|
||||
nodes = []
|
||||
for doc in all_docs:
|
||||
nodes.append(GraphNode(id=doc.id, title=doc.title, type="document"))
|
||||
|
||||
# Orphaned = docs with no incoming and no outgoing links
|
||||
incoming_count: dict[str, int] = {doc.id: 0 for doc in all_docs}
|
||||
for doc in all_docs:
|
||||
for target_id in adjacency[doc.id]:
|
||||
if target_id in incoming_count:
|
||||
incoming_count[target_id] += 1
|
||||
|
||||
orphaned = sum(1 for doc in all_docs if incoming_count[doc.id] == 0 and len(adjacency[doc.id]) == 0)
|
||||
|
||||
return GraphResponse(
|
||||
project_id=project_id,
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
stats=GraphStats(
|
||||
total_documents=len(all_docs),
|
||||
total_references=total_references,
|
||||
orphaned_documents=orphaned,
|
||||
),
|
||||
)
|
||||
Reference in New Issue
Block a user