import json import re import uuid from datetime import datetime from fastapi import APIRouter, Depends, HTTPException, Query, Request from sqlalchemy import select, text from sqlalchemy.ext.asyncio import AsyncSession from app.database import get_db from app.models.document import Document from app.models.project import Project from app.routers.auth import get_current_agent from app.schemas.document import ( BacklinkItem, BacklinksResponse, BrokenLink, DetectLinksRequest, DetectLinksResponse, GraphEdge, GraphNode, GraphResponse, GraphStats, LinkItem, LinksResponse, OutgoingLinkItem, OutgoingLinksResponse, ) router = APIRouter(tags=["links"]) # ============================================================================= # Link Detection # ============================================================================= def detect_links_in_content(content: str) -> tuple[list[str], list[BrokenLink]]: """ Detect [[uuid]] and [[uuid|text]] patterns in content. Returns (valid_ids, broken_links). """ # Pattern: [[uuid]] or [[uuid|text]] pattern = r'\[\[([0-9a-f-]{36})(?:\|[^\]]+)?\]\]' matches = re.findall(pattern, content, re.IGNORECASE) valid_ids = [] broken_links = [] for match in matches: try: # Validate UUID format uuid.UUID(match) valid_ids.append(match) except ValueError: broken_links.append(BrokenLink(reference=match, reason="invalid_format")) return valid_ids, broken_links async def _get_doc_with_access(request: Request, document_id: str, db: AsyncSession) -> Document: """Get document and verify access.""" agent = await get_current_agent(request, db) result = await db.execute( select(Document).where( Document.id == document_id, Document.is_deleted == False, ) ) doc = result.scalar_one_or_none() if not doc: raise HTTPException(status_code=404, detail="Document not found") proj_result = await db.execute( select(Project).where( Project.id == doc.project_id, Project.agent_id == agent.id, Project.is_deleted == False, ) ) if not proj_result.scalar_one_or_none(): raise HTTPException(status_code=403, detail="Forbidden") return doc async def _get_project_with_access(request: Request, project_id: str, db: AsyncSession) -> Project: """Get project and verify access.""" agent = await get_current_agent(request, db) result = await db.execute( select(Project).where( Project.id == project_id, Project.agent_id == agent.id, Project.is_deleted == False, ) ) project = result.scalar_one_or_none() if not project: raise HTTPException(status_code=404, detail="Project not found") return project @router.post("/api/v1/documents/{document_id}/detect-links", response_model=DetectLinksResponse) async def detect_links( request: Request, document_id: str, payload: DetectLinksRequest, db: AsyncSession = Depends(get_db), ): """ Detect and save [[uuid]] references in content. Updates the document's outgoing_links field. """ doc = await _get_doc_with_access(request, document_id, db) # Validate content size if len(payload.content) > 5_000_000: raise HTTPException(status_code=413, detail="Content too large (max 5MB)") # Detect links link_ids, broken_links = detect_links_in_content(payload.content) # Validate that referenced documents exist valid_ids = [] for lid in link_ids: ref_result = await db.execute( select(Document.id).where( Document.id == lid, Document.is_deleted == False, ) ) if ref_result.scalar_one_or_none(): valid_ids.append(lid) else: broken_links.append(BrokenLink(reference=lid, reason="document_not_found")) # Remove duplicates while preserving order seen = set() unique_valid_ids = [] for vid in valid_ids: if vid not in seen: seen.add(vid) unique_valid_ids.append(vid) # Update document's outgoing_links doc.outgoing_links = json.dumps(unique_valid_ids) doc.updated_at = datetime.utcnow() # Update backlinks_count on target documents # First, decrement old links old_links = [] if doc.outgoing_links: try: old_links = json.loads(doc.outgoing_links) if doc.outgoing_links != "[]" else [] except json.JSONDecodeError: old_links = [] for target_id in old_links: if target_id not in unique_valid_ids: await db.execute( text(""" UPDATE documents SET backlinks_count = MAX(0, backlinks_count - 1) WHERE id = :target_id AND backlinks_count > 0 """), {"target_id": target_id} ) # Then, increment new links for target_id in unique_valid_ids: if target_id not in old_links: await db.execute( text(""" UPDATE documents SET backlinks_count = backlinks_count + 1 WHERE id = :target_id """), {"target_id": target_id} ) await db.flush() return DetectLinksResponse( document_id=document_id, outgoing_links=unique_valid_ids, links_detected=len(unique_valid_ids), links_broken=len(broken_links), broken_links=broken_links, ) # ============================================================================= # Backlinks & Outgoing Links # ============================================================================= @router.get("/api/v1/documents/{document_id}/backlinks", response_model=BacklinksResponse) async def get_backlinks( request: Request, document_id: str, db: AsyncSession = Depends(get_db), ): """ Get documents that reference this document (incoming links). """ doc = await _get_doc_with_access(request, document_id, db) # Find all documents that have this doc_id in their outgoing_links result = await db.execute( text(""" SELECT d.id, d.title, d.project_id, d.content, d.updated_at, p.name as project_name FROM active_documents d JOIN active_projects p ON d.project_id = p.id WHERE d.outgoing_links LIKE :pattern AND d.is_deleted = 0 ORDER BY d.updated_at DESC """), {"pattern": f"%{document_id}%"} ) rows = result.fetchall() backlinks = [] for row in rows: # Parse outgoing_links JSON to verify this doc actually references target outgoing = [] try: outgoing = json.loads(row.content) if row.content else [] except json.JSONDecodeError: pass # Check if this document's outgoing_links contains document_id try: outgoing_list = json.loads(row.content) if row.content else [] # Actually we need to check outgoing_links field directly except: pass # Use a more precise check check_result = await db.execute( select(Document).where( Document.id == row.id, Document.outgoing_links.like(f"%{document_id}%") ) ) if not check_result.scalar_one_or_none(): continue # Build excerpt around the reference excerpt = _build_backlink_excerpt(row.content or "", document_id) backlinks.append(BacklinkItem( document_id=row.id, title=row.title, project_id=row.project_id, project_name=row.project_name, excerpt=excerpt, updated_at=row.updated_at, )) return BacklinksResponse( document_id=document_id, backlinks_count=len(backlinks), backlinks=backlinks, ) @router.get("/api/v1/documents/{document_id}/outgoing-links", response_model=OutgoingLinksResponse) async def get_outgoing_links( request: Request, document_id: str, db: AsyncSession = Depends(get_db), ): """ Get documents that this document references (outgoing links). """ doc = await _get_doc_with_access(request, document_id, db) # Parse outgoing_links outgoing_ids = [] if doc.outgoing_links: try: outgoing_ids = json.loads(doc.outgoing_links) except json.JSONDecodeError: outgoing_ids = [] outgoing_links = [] for target_id in outgoing_ids: # Check if target document exists target_result = await db.execute( text(""" SELECT d.id, d.title, d.project_id, d.updated_at, p.name as project_name FROM active_documents d JOIN active_projects p ON d.project_id = p.id WHERE d.id = :target_id """), {"target_id": target_id} ) row = target_result.fetchone() if row: outgoing_links.append(OutgoingLinkItem( document_id=row.id, title=row.title, project_id=row.project_id, project_name=row.project_name, exists=True, updated_at=row.updated_at, )) else: # Document was deleted but was referenced outgoing_links.append(OutgoingLinkItem( document_id=target_id, title="[Deleted Document]", project_id="", project_name="", exists=False, updated_at=None, )) return OutgoingLinksResponse( document_id=document_id, outgoing_links_count=len(outgoing_links), outgoing_links=outgoing_links, ) @router.get("/api/v1/documents/{document_id}/links", response_model=LinksResponse) async def get_links( request: Request, document_id: str, db: AsyncSession = Depends(get_db), ): """ Get all incoming and outgoing links for a document. """ doc = await _get_doc_with_access(request, document_id, db) # Get outgoing links outgoing_ids = [] if doc.outgoing_links: try: outgoing_ids = json.loads(doc.outgoing_links) except json.JSONDecodeError: outgoing_ids = [] outgoing = [] for target_id in outgoing_ids: target_result = await db.execute( select(Document).where(Document.id == target_id, Document.is_deleted == False) ) target = target_result.scalar_one_or_none() if target: outgoing.append(LinkItem( document_id=target.id, title=target.title, anchor_text=None, )) # Get incoming links (backlinks) backlinks_result = await db.execute( text(""" SELECT d.id, d.title, d.outgoing_links FROM active_documents d WHERE d.outgoing_links LIKE :pattern AND d.is_deleted = 0 """), {"pattern": f"%{document_id}%"} ) backlink_rows = backlinks_result.fetchall() backlinks = [] for row in backlink_rows: # Verify this link actually points to our document try: outgoing_list = json.loads(row.outgoing_links) if row.outgoing_links else [] except json.JSONDecodeError: continue if document_id in outgoing_list: backlinks.append(LinkItem( document_id=row.id, title=row.title, anchor_text=None, )) return LinksResponse( document_id=document_id, outgoing_links=outgoing, backlinks=backlinks, ) def _build_backlink_excerpt(content: str, target_id: str, context_chars: int = 150) -> str: """Build an excerpt around the [[target_id]] reference in content.""" # Find the [[uuid]] pattern in content pattern = r'\[\[' + re.escape(target_id) + r'(?:\|[^\]]+)?\]\]' match = re.search(pattern, content, re.IGNORECASE) if not match: return content[:context_chars * 2] or "" start = max(0, match.start() - context_chars) end = min(len(content), match.end() + context_chars) excerpt = content[start:end] if start > 0: excerpt = "..." + excerpt if end < len(content): excerpt = excerpt + "..." return excerpt # ============================================================================= # Project Graph # ============================================================================= @router.get("/api/v1/projects/{project_id}/graph", response_model=GraphResponse) async def get_project_graph( request: Request, project_id: str, depth: int = Query(2, ge=1, le=3), db: AsyncSession = Depends(get_db), ): """ Get the full graph of document relationships within a project. Depth controls how many hops of outgoing links to include. """ project = await _get_project_with_access(request, project_id, db) # Get all documents in project docs_result = await db.execute( select(Document).where( Document.project_id == project_id, Document.is_deleted == False, ) ) all_docs = docs_result.scalars().all() # Build adjacency: doc_id -> set of outgoing_ids doc_map = {doc.id: doc for doc in all_docs} adjacency: dict[str, set[str]] = {doc.id: set() for doc in all_docs} edges = [] total_references = 0 reachable: set[str] = set() for doc in all_docs: outgoing_ids = [] if doc.outgoing_links: try: outgoing_ids = json.loads(doc.outgoing_links) except json.JSONDecodeError: pass for target_id in outgoing_ids: if target_id in doc_map: adjacency[doc.id].add(target_id) edges.append(GraphEdge(source=doc.id, target=target_id, type="reference")) total_references += 1 reachable.add(doc.id) reachable.add(target_id) # Build nodes nodes = [] for doc in all_docs: nodes.append(GraphNode(id=doc.id, title=doc.title, type="document")) # Orphaned = docs with no incoming and no outgoing links incoming_count: dict[str, int] = {doc.id: 0 for doc in all_docs} for doc in all_docs: for target_id in adjacency[doc.id]: if target_id in incoming_count: incoming_count[target_id] += 1 orphaned = sum(1 for doc in all_docs if incoming_count[doc.id] == 0 and len(adjacency[doc.id]) == 0) return GraphResponse( project_id=project_id, nodes=nodes, edges=edges, stats=GraphStats( total_documents=len(all_docs), total_references=total_references, orphaned_documents=orphaned, ), )