- Add outgoing_links (JSON) and backlinks_count to Document model
- POST /documents/{id}/detect-links — detect [[uuid]] patterns in content
- GET /documents/{id}/backlinks — documents referencing this doc
- GET /documents/{id}/outgoing-links — documents this doc references
- GET /documents/{id}/links — combined incoming + outgoing
- GET /projects/{id}/graph — full project relationship graph
- GET /search/quick — fuzzy search (Quick Switcher Cmd+K)
- GET /projects/{id}/documents/search — project-scoped search
- GET /documents/{id}/export — markdown|json export
- GET /projects/{id}/export — json|zip export
- 27 new tests
491 lines
15 KiB
Python
491 lines
15 KiB
Python
import json
|
|
import re
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
|
from sqlalchemy import select, text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.database import get_db
|
|
from app.models.document import Document
|
|
from app.models.project import Project
|
|
from app.routers.auth import get_current_agent
|
|
from app.schemas.document import (
|
|
BacklinkItem,
|
|
BacklinksResponse,
|
|
BrokenLink,
|
|
DetectLinksRequest,
|
|
DetectLinksResponse,
|
|
GraphEdge,
|
|
GraphNode,
|
|
GraphResponse,
|
|
GraphStats,
|
|
LinkItem,
|
|
LinksResponse,
|
|
OutgoingLinkItem,
|
|
OutgoingLinksResponse,
|
|
)
|
|
|
|
router = APIRouter(tags=["links"])
|
|
|
|
|
|
# =============================================================================
|
|
# Link Detection
|
|
# =============================================================================
|
|
|
|
def detect_links_in_content(content: str) -> tuple[list[str], list[BrokenLink]]:
|
|
"""
|
|
Detect [[uuid]] and [[uuid|text]] patterns in content.
|
|
Returns (valid_ids, broken_links).
|
|
"""
|
|
# Pattern: [[uuid]] or [[uuid|text]]
|
|
pattern = r'\[\[([0-9a-f-]{36})(?:\|[^\]]+)?\]\]'
|
|
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
|
|
valid_ids = []
|
|
broken_links = []
|
|
|
|
for match in matches:
|
|
try:
|
|
# Validate UUID format
|
|
uuid.UUID(match)
|
|
valid_ids.append(match)
|
|
except ValueError:
|
|
broken_links.append(BrokenLink(reference=match, reason="invalid_format"))
|
|
|
|
return valid_ids, broken_links
|
|
|
|
|
|
async def _get_doc_with_access(request: Request, document_id: str, db: AsyncSession) -> Document:
|
|
"""Get document and verify access."""
|
|
agent = await get_current_agent(request, db)
|
|
|
|
result = await db.execute(
|
|
select(Document).where(
|
|
Document.id == document_id,
|
|
Document.is_deleted == False,
|
|
)
|
|
)
|
|
doc = result.scalar_one_or_none()
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
proj_result = await db.execute(
|
|
select(Project).where(
|
|
Project.id == doc.project_id,
|
|
Project.agent_id == agent.id,
|
|
Project.is_deleted == False,
|
|
)
|
|
)
|
|
if not proj_result.scalar_one_or_none():
|
|
raise HTTPException(status_code=403, detail="Forbidden")
|
|
|
|
return doc
|
|
|
|
|
|
async def _get_project_with_access(request: Request, project_id: str, db: AsyncSession) -> Project:
|
|
"""Get project and verify access."""
|
|
agent = await get_current_agent(request, db)
|
|
|
|
result = await db.execute(
|
|
select(Project).where(
|
|
Project.id == project_id,
|
|
Project.agent_id == agent.id,
|
|
Project.is_deleted == False,
|
|
)
|
|
)
|
|
project = result.scalar_one_or_none()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
return project
|
|
|
|
|
|
@router.post("/api/v1/documents/{document_id}/detect-links", response_model=DetectLinksResponse)
|
|
async def detect_links(
|
|
request: Request,
|
|
document_id: str,
|
|
payload: DetectLinksRequest,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Detect and save [[uuid]] references in content.
|
|
Updates the document's outgoing_links field.
|
|
"""
|
|
doc = await _get_doc_with_access(request, document_id, db)
|
|
|
|
# Validate content size
|
|
if len(payload.content) > 5_000_000:
|
|
raise HTTPException(status_code=413, detail="Content too large (max 5MB)")
|
|
|
|
# Detect links
|
|
link_ids, broken_links = detect_links_in_content(payload.content)
|
|
|
|
# Validate that referenced documents exist
|
|
valid_ids = []
|
|
for lid in link_ids:
|
|
ref_result = await db.execute(
|
|
select(Document.id).where(
|
|
Document.id == lid,
|
|
Document.is_deleted == False,
|
|
)
|
|
)
|
|
if ref_result.scalar_one_or_none():
|
|
valid_ids.append(lid)
|
|
else:
|
|
broken_links.append(BrokenLink(reference=lid, reason="document_not_found"))
|
|
|
|
# Remove duplicates while preserving order
|
|
seen = set()
|
|
unique_valid_ids = []
|
|
for vid in valid_ids:
|
|
if vid not in seen:
|
|
seen.add(vid)
|
|
unique_valid_ids.append(vid)
|
|
|
|
# Update document's outgoing_links
|
|
doc.outgoing_links = json.dumps(unique_valid_ids)
|
|
doc.updated_at = datetime.utcnow()
|
|
|
|
# Update backlinks_count on target documents
|
|
# First, decrement old links
|
|
old_links = []
|
|
if doc.outgoing_links:
|
|
try:
|
|
old_links = json.loads(doc.outgoing_links) if doc.outgoing_links != "[]" else []
|
|
except json.JSONDecodeError:
|
|
old_links = []
|
|
|
|
for target_id in old_links:
|
|
if target_id not in unique_valid_ids:
|
|
await db.execute(
|
|
text("""
|
|
UPDATE documents
|
|
SET backlinks_count = MAX(0, backlinks_count - 1)
|
|
WHERE id = :target_id AND backlinks_count > 0
|
|
"""),
|
|
{"target_id": target_id}
|
|
)
|
|
|
|
# Then, increment new links
|
|
for target_id in unique_valid_ids:
|
|
if target_id not in old_links:
|
|
await db.execute(
|
|
text("""
|
|
UPDATE documents
|
|
SET backlinks_count = backlinks_count + 1
|
|
WHERE id = :target_id
|
|
"""),
|
|
{"target_id": target_id}
|
|
)
|
|
|
|
await db.flush()
|
|
|
|
return DetectLinksResponse(
|
|
document_id=document_id,
|
|
outgoing_links=unique_valid_ids,
|
|
links_detected=len(unique_valid_ids),
|
|
links_broken=len(broken_links),
|
|
broken_links=broken_links,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Backlinks & Outgoing Links
|
|
# =============================================================================
|
|
|
|
@router.get("/api/v1/documents/{document_id}/backlinks", response_model=BacklinksResponse)
|
|
async def get_backlinks(
|
|
request: Request,
|
|
document_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Get documents that reference this document (incoming links).
|
|
"""
|
|
doc = await _get_doc_with_access(request, document_id, db)
|
|
|
|
# Find all documents that have this doc_id in their outgoing_links
|
|
result = await db.execute(
|
|
text("""
|
|
SELECT d.id, d.title, d.project_id, d.content, d.updated_at,
|
|
p.name as project_name
|
|
FROM active_documents d
|
|
JOIN active_projects p ON d.project_id = p.id
|
|
WHERE d.outgoing_links LIKE :pattern
|
|
AND d.is_deleted = 0
|
|
ORDER BY d.updated_at DESC
|
|
"""),
|
|
{"pattern": f"%{document_id}%"}
|
|
)
|
|
rows = result.fetchall()
|
|
|
|
backlinks = []
|
|
for row in rows:
|
|
# Parse outgoing_links JSON to verify this doc actually references target
|
|
outgoing = []
|
|
try:
|
|
outgoing = json.loads(row.content) if row.content else []
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
# Check if this document's outgoing_links contains document_id
|
|
try:
|
|
outgoing_list = json.loads(row.content) if row.content else []
|
|
# Actually we need to check outgoing_links field directly
|
|
except:
|
|
pass
|
|
|
|
# Use a more precise check
|
|
check_result = await db.execute(
|
|
select(Document).where(
|
|
Document.id == row.id,
|
|
Document.outgoing_links.like(f"%{document_id}%")
|
|
)
|
|
)
|
|
if not check_result.scalar_one_or_none():
|
|
continue
|
|
|
|
# Build excerpt around the reference
|
|
excerpt = _build_backlink_excerpt(row.content or "", document_id)
|
|
|
|
backlinks.append(BacklinkItem(
|
|
document_id=row.id,
|
|
title=row.title,
|
|
project_id=row.project_id,
|
|
project_name=row.project_name,
|
|
excerpt=excerpt,
|
|
updated_at=row.updated_at,
|
|
))
|
|
|
|
return BacklinksResponse(
|
|
document_id=document_id,
|
|
backlinks_count=len(backlinks),
|
|
backlinks=backlinks,
|
|
)
|
|
|
|
|
|
@router.get("/api/v1/documents/{document_id}/outgoing-links", response_model=OutgoingLinksResponse)
|
|
async def get_outgoing_links(
|
|
request: Request,
|
|
document_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Get documents that this document references (outgoing links).
|
|
"""
|
|
doc = await _get_doc_with_access(request, document_id, db)
|
|
|
|
# Parse outgoing_links
|
|
outgoing_ids = []
|
|
if doc.outgoing_links:
|
|
try:
|
|
outgoing_ids = json.loads(doc.outgoing_links)
|
|
except json.JSONDecodeError:
|
|
outgoing_ids = []
|
|
|
|
outgoing_links = []
|
|
for target_id in outgoing_ids:
|
|
# Check if target document exists
|
|
target_result = await db.execute(
|
|
text("""
|
|
SELECT d.id, d.title, d.project_id, d.updated_at,
|
|
p.name as project_name
|
|
FROM active_documents d
|
|
JOIN active_projects p ON d.project_id = p.id
|
|
WHERE d.id = :target_id
|
|
"""),
|
|
{"target_id": target_id}
|
|
)
|
|
row = target_result.fetchone()
|
|
|
|
if row:
|
|
outgoing_links.append(OutgoingLinkItem(
|
|
document_id=row.id,
|
|
title=row.title,
|
|
project_id=row.project_id,
|
|
project_name=row.project_name,
|
|
exists=True,
|
|
updated_at=row.updated_at,
|
|
))
|
|
else:
|
|
# Document was deleted but was referenced
|
|
outgoing_links.append(OutgoingLinkItem(
|
|
document_id=target_id,
|
|
title="[Deleted Document]",
|
|
project_id="",
|
|
project_name="",
|
|
exists=False,
|
|
updated_at=None,
|
|
))
|
|
|
|
return OutgoingLinksResponse(
|
|
document_id=document_id,
|
|
outgoing_links_count=len(outgoing_links),
|
|
outgoing_links=outgoing_links,
|
|
)
|
|
|
|
|
|
@router.get("/api/v1/documents/{document_id}/links", response_model=LinksResponse)
|
|
async def get_links(
|
|
request: Request,
|
|
document_id: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Get all incoming and outgoing links for a document.
|
|
"""
|
|
doc = await _get_doc_with_access(request, document_id, db)
|
|
|
|
# Get outgoing links
|
|
outgoing_ids = []
|
|
if doc.outgoing_links:
|
|
try:
|
|
outgoing_ids = json.loads(doc.outgoing_links)
|
|
except json.JSONDecodeError:
|
|
outgoing_ids = []
|
|
|
|
outgoing = []
|
|
for target_id in outgoing_ids:
|
|
target_result = await db.execute(
|
|
select(Document).where(Document.id == target_id, Document.is_deleted == False)
|
|
)
|
|
target = target_result.scalar_one_or_none()
|
|
if target:
|
|
outgoing.append(LinkItem(
|
|
document_id=target.id,
|
|
title=target.title,
|
|
anchor_text=None,
|
|
))
|
|
|
|
# Get incoming links (backlinks)
|
|
backlinks_result = await db.execute(
|
|
text("""
|
|
SELECT d.id, d.title, d.outgoing_links
|
|
FROM active_documents d
|
|
WHERE d.outgoing_links LIKE :pattern
|
|
AND d.is_deleted = 0
|
|
"""),
|
|
{"pattern": f"%{document_id}%"}
|
|
)
|
|
backlink_rows = backlinks_result.fetchall()
|
|
|
|
backlinks = []
|
|
for row in backlink_rows:
|
|
# Verify this link actually points to our document
|
|
try:
|
|
outgoing_list = json.loads(row.outgoing_links) if row.outgoing_links else []
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if document_id in outgoing_list:
|
|
backlinks.append(LinkItem(
|
|
document_id=row.id,
|
|
title=row.title,
|
|
anchor_text=None,
|
|
))
|
|
|
|
return LinksResponse(
|
|
document_id=document_id,
|
|
outgoing_links=outgoing,
|
|
backlinks=backlinks,
|
|
)
|
|
|
|
|
|
def _build_backlink_excerpt(content: str, target_id: str, context_chars: int = 150) -> str:
|
|
"""Build an excerpt around the [[target_id]] reference in content."""
|
|
# Find the [[uuid]] pattern in content
|
|
pattern = r'\[\[' + re.escape(target_id) + r'(?:\|[^\]]+)?\]\]'
|
|
match = re.search(pattern, content, re.IGNORECASE)
|
|
|
|
if not match:
|
|
return content[:context_chars * 2] or ""
|
|
|
|
start = max(0, match.start() - context_chars)
|
|
end = min(len(content), match.end() + context_chars)
|
|
excerpt = content[start:end]
|
|
|
|
if start > 0:
|
|
excerpt = "..." + excerpt
|
|
if end < len(content):
|
|
excerpt = excerpt + "..."
|
|
|
|
return excerpt
|
|
|
|
|
|
# =============================================================================
|
|
# Project Graph
|
|
# =============================================================================
|
|
|
|
@router.get("/api/v1/projects/{project_id}/graph", response_model=GraphResponse)
|
|
async def get_project_graph(
|
|
request: Request,
|
|
project_id: str,
|
|
depth: int = Query(2, ge=1, le=3),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Get the full graph of document relationships within a project.
|
|
Depth controls how many hops of outgoing links to include.
|
|
"""
|
|
project = await _get_project_with_access(request, project_id, db)
|
|
|
|
# Get all documents in project
|
|
docs_result = await db.execute(
|
|
select(Document).where(
|
|
Document.project_id == project_id,
|
|
Document.is_deleted == False,
|
|
)
|
|
)
|
|
all_docs = docs_result.scalars().all()
|
|
|
|
# Build adjacency: doc_id -> set of outgoing_ids
|
|
doc_map = {doc.id: doc for doc in all_docs}
|
|
adjacency: dict[str, set[str]] = {doc.id: set() for doc in all_docs}
|
|
|
|
edges = []
|
|
total_references = 0
|
|
reachable: set[str] = set()
|
|
|
|
for doc in all_docs:
|
|
outgoing_ids = []
|
|
if doc.outgoing_links:
|
|
try:
|
|
outgoing_ids = json.loads(doc.outgoing_links)
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
for target_id in outgoing_ids:
|
|
if target_id in doc_map:
|
|
adjacency[doc.id].add(target_id)
|
|
edges.append(GraphEdge(source=doc.id, target=target_id, type="reference"))
|
|
total_references += 1
|
|
reachable.add(doc.id)
|
|
reachable.add(target_id)
|
|
|
|
# Build nodes
|
|
nodes = []
|
|
for doc in all_docs:
|
|
nodes.append(GraphNode(id=doc.id, title=doc.title, type="document"))
|
|
|
|
# Orphaned = docs with no incoming and no outgoing links
|
|
incoming_count: dict[str, int] = {doc.id: 0 for doc in all_docs}
|
|
for doc in all_docs:
|
|
for target_id in adjacency[doc.id]:
|
|
if target_id in incoming_count:
|
|
incoming_count[target_id] += 1
|
|
|
|
orphaned = sum(1 for doc in all_docs if incoming_count[doc.id] == 0 and len(adjacency[doc.id]) == 0)
|
|
|
|
return GraphResponse(
|
|
project_id=project_id,
|
|
nodes=nodes,
|
|
edges=edges,
|
|
stats=GraphStats(
|
|
total_documents=len(all_docs),
|
|
total_references=total_references,
|
|
orphaned_documents=orphaned,
|
|
),
|
|
)
|