claudia-docs-api/app/routers/links.py

import json
import re
import uuid
from datetime import datetime

from fastapi import APIRouter, Depends, HTTPException, Query, Request
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession

from app.database import get_db
from app.models.document import Document
from app.models.project import Project
from app.routers.auth import get_current_agent
from app.schemas.document import (
    BacklinkItem,
    BacklinksResponse,
    BrokenLink,
    DetectLinksRequest,
    DetectLinksResponse,
    GraphEdge,
    GraphNode,
    GraphResponse,
    GraphStats,
    LinkItem,
    LinksResponse,
    OutgoingLinkItem,
    OutgoingLinksResponse,
)

router = APIRouter(tags=["links"])


# =============================================================================
# Link Detection
# =============================================================================

def detect_links_in_content(content: str) -> tuple[list[str], list[BrokenLink]]:
    """
    Detect [[uuid]] and [[uuid|text]] patterns in content.
    Returns (valid_ids, broken_links).
    """
    # Pattern: [[uuid]] or [[uuid|text]]
    pattern = r'\[\[([0-9a-f-]{36})(?:\|[^\]]+)?\]\]'
    matches = re.findall(pattern, content, re.IGNORECASE)

    valid_ids = []
    broken_links = []

    for match in matches:
        try:
            # Validate UUID format
            uuid.UUID(match)
            valid_ids.append(match)
        except ValueError:
            broken_links.append(BrokenLink(reference=match, reason="invalid_format"))

    return valid_ids, broken_links


async def _get_doc_with_access(request: Request, document_id: str, db: AsyncSession) -> Document:
    """Get document and verify access."""
    agent = await get_current_agent(request, db)

    result = await db.execute(
        select(Document).where(
            Document.id == document_id,
            Document.is_deleted == False,
        )
    )
    doc = result.scalar_one_or_none()
    if not doc:
        raise HTTPException(status_code=404, detail="Document not found")

    proj_result = await db.execute(
        select(Project).where(
            Project.id == doc.project_id,
            Project.agent_id == agent.id,
            Project.is_deleted == False,
        )
    )
    if not proj_result.scalar_one_or_none():
        raise HTTPException(status_code=403, detail="Forbidden")

    return doc


async def _get_project_with_access(request: Request, project_id: str, db: AsyncSession) -> Project:
    """Get project and verify access."""
    agent = await get_current_agent(request, db)

    result = await db.execute(
        select(Project).where(
            Project.id == project_id,
            Project.agent_id == agent.id,
            Project.is_deleted == False,
        )
    )
    project = result.scalar_one_or_none()
    if not project:
        raise HTTPException(status_code=404, detail="Project not found")

    return project


@router.post("/api/v1/documents/{document_id}/detect-links", response_model=DetectLinksResponse)
async def detect_links(
    request: Request,
    document_id: str,
    payload: DetectLinksRequest,
    db: AsyncSession = Depends(get_db),
):
    """
    Detect and save [[uuid]] references in content.
    Updates the document's outgoing_links field.
    """
    doc = await _get_doc_with_access(request, document_id, db)

    # Validate content size
    if len(payload.content) > 5_000_000:
        raise HTTPException(status_code=413, detail="Content too large (max 5MB)")

    # Detect links
    link_ids, broken_links = detect_links_in_content(payload.content)

    # Validate that referenced documents exist
    valid_ids = []
    for lid in link_ids:
        ref_result = await db.execute(
            select(Document.id).where(
                Document.id == lid,
                Document.is_deleted == False,
            )
        )
        if ref_result.scalar_one_or_none():
            valid_ids.append(lid)
        else:
            broken_links.append(BrokenLink(reference=lid, reason="document_not_found"))

    # Remove duplicates while preserving order
    seen = set()
    unique_valid_ids = []
    for vid in valid_ids:
        if vid not in seen:
            seen.add(vid)
            unique_valid_ids.append(vid)

    # Update document's outgoing_links
    doc.outgoing_links = json.dumps(unique_valid_ids)
    doc.updated_at = datetime.utcnow()

    # Update backlinks_count on target documents
    # First, decrement old links
    old_links = []
    if doc.outgoing_links:
        try:
            old_links = json.loads(doc.outgoing_links) if doc.outgoing_links != "[]" else []
        except json.JSONDecodeError:
            old_links = []

    for target_id in old_links:
        if target_id not in unique_valid_ids:
            await db.execute(
                text("""
                    UPDATE documents
                    SET backlinks_count = MAX(0, backlinks_count - 1)
                    WHERE id = :target_id AND backlinks_count > 0
                """),
                {"target_id": target_id}
            )

    # Then, increment new links
    for target_id in unique_valid_ids:
        if target_id not in old_links:
            await db.execute(
                text("""
                    UPDATE documents
                    SET backlinks_count = backlinks_count + 1
                    WHERE id = :target_id
                """),
                {"target_id": target_id}
            )

    await db.flush()

    return DetectLinksResponse(
        document_id=document_id,
        outgoing_links=unique_valid_ids,
        links_detected=len(unique_valid_ids),
        links_broken=len(broken_links),
        broken_links=broken_links,
    )


# =============================================================================
# Backlinks & Outgoing Links
# =============================================================================

@router.get("/api/v1/documents/{document_id}/backlinks", response_model=BacklinksResponse)
async def get_backlinks(
    request: Request,
    document_id: str,
    db: AsyncSession = Depends(get_db),
):
    """
    Get documents that reference this document (incoming links).
    """
    doc = await _get_doc_with_access(request, document_id, db)

    # Find all documents that have this doc_id in their outgoing_links
    result = await db.execute(
        text("""
            SELECT d.id, d.title, d.project_id, d.content, d.updated_at,
                   p.name as project_name
            FROM active_documents d
            JOIN active_projects p ON d.project_id = p.id
            WHERE d.outgoing_links LIKE :pattern
              AND d.is_deleted = 0
            ORDER BY d.updated_at DESC
        """),
        {"pattern": f"%{document_id}%"}
    )
    rows = result.fetchall()

    backlinks = []
    for row in rows:
        # Parse outgoing_links JSON to verify this doc actually references target
        outgoing = []
        try:
            outgoing = json.loads(row.content) if row.content else []
        except json.JSONDecodeError:
            pass

        # Check if this document's outgoing_links contains document_id
        try:
            outgoing_list = json.loads(row.content) if row.content else []
            # Actually we need to check outgoing_links field directly
        except:
            pass

        # Use a more precise check
        check_result = await db.execute(
            select(Document).where(
                Document.id == row.id,
                Document.outgoing_links.like(f"%{document_id}%")
            )
        )
        if not check_result.scalar_one_or_none():
            continue

        # Build excerpt around the reference
        excerpt = _build_backlink_excerpt(row.content or "", document_id)

        backlinks.append(BacklinkItem(
            document_id=row.id,
            title=row.title,
            project_id=row.project_id,
            project_name=row.project_name,
            excerpt=excerpt,
            updated_at=row.updated_at,
        ))

    return BacklinksResponse(
        document_id=document_id,
        backlinks_count=len(backlinks),
        backlinks=backlinks,
    )


@router.get("/api/v1/documents/{document_id}/outgoing-links", response_model=OutgoingLinksResponse)
async def get_outgoing_links(
    request: Request,
    document_id: str,
    db: AsyncSession = Depends(get_db),
):
    """
    Get documents that this document references (outgoing links).
    """
    doc = await _get_doc_with_access(request, document_id, db)

    # Parse outgoing_links
    outgoing_ids = []
    if doc.outgoing_links:
        try:
            outgoing_ids = json.loads(doc.outgoing_links)
        except json.JSONDecodeError:
            outgoing_ids = []

    outgoing_links = []
    for target_id in outgoing_ids:
        # Check if target document exists
        target_result = await db.execute(
            text("""
                SELECT d.id, d.title, d.project_id, d.updated_at,
                       p.name as project_name
                FROM active_documents d
                JOIN active_projects p ON d.project_id = p.id
                WHERE d.id = :target_id
            """),
            {"target_id": target_id}
        )
        row = target_result.fetchone()

        if row:
            outgoing_links.append(OutgoingLinkItem(
                document_id=row.id,
                title=row.title,
                project_id=row.project_id,
                project_name=row.project_name,
                exists=True,
                updated_at=row.updated_at,
            ))
        else:
            # Document was deleted but was referenced
            outgoing_links.append(OutgoingLinkItem(
                document_id=target_id,
                title="[Deleted Document]",
                project_id="",
                project_name="",
                exists=False,
                updated_at=None,
            ))

    return OutgoingLinksResponse(
        document_id=document_id,
        outgoing_links_count=len(outgoing_links),
        outgoing_links=outgoing_links,
    )


@router.get("/api/v1/documents/{document_id}/links", response_model=LinksResponse)
async def get_links(
    request: Request,
    document_id: str,
    db: AsyncSession = Depends(get_db),
):
    """
    Get all incoming and outgoing links for a document.
    """
    doc = await _get_doc_with_access(request, document_id, db)

    # Get outgoing links
    outgoing_ids = []
    if doc.outgoing_links:
        try:
            outgoing_ids = json.loads(doc.outgoing_links)
        except json.JSONDecodeError:
            outgoing_ids = []

    outgoing = []
    for target_id in outgoing_ids:
        target_result = await db.execute(
            select(Document).where(Document.id == target_id, Document.is_deleted == False)
        )
        target = target_result.scalar_one_or_none()
        if target:
            outgoing.append(LinkItem(
                document_id=target.id,
                title=target.title,
                anchor_text=None,
            ))

    # Get incoming links (backlinks)
    backlinks_result = await db.execute(
        text("""
            SELECT d.id, d.title, d.outgoing_links
            FROM active_documents d
            WHERE d.outgoing_links LIKE :pattern
              AND d.is_deleted = 0
        """),
        {"pattern": f"%{document_id}%"}
    )
    backlink_rows = backlinks_result.fetchall()

    backlinks = []
    for row in backlink_rows:
        # Verify this link actually points to our document
        try:
            outgoing_list = json.loads(row.outgoing_links) if row.outgoing_links else []
        except json.JSONDecodeError:
            continue

        if document_id in outgoing_list:
            backlinks.append(LinkItem(
                document_id=row.id,
                title=row.title,
                anchor_text=None,
            ))

    return LinksResponse(
        document_id=document_id,
        outgoing_links=outgoing,
        backlinks=backlinks,
    )


def _build_backlink_excerpt(content: str, target_id: str, context_chars: int = 150) -> str:
    """Build an excerpt around the [[target_id]] reference in content."""
    # Find the [[uuid]] pattern in content
    pattern = r'\[\[' + re.escape(target_id) + r'(?:\|[^\]]+)?\]\]'
    match = re.search(pattern, content, re.IGNORECASE)

    if not match:
        return content[:context_chars * 2] or ""

    start = max(0, match.start() - context_chars)
    end = min(len(content), match.end() + context_chars)
    excerpt = content[start:end]

    if start > 0:
        excerpt = "..." + excerpt
    if end < len(content):
        excerpt = excerpt + "..."

    return excerpt


# =============================================================================
# Project Graph
# =============================================================================

@router.get("/api/v1/projects/{project_id}/graph", response_model=GraphResponse)
async def get_project_graph(
    request: Request,
    project_id: str,
    depth: int = Query(2, ge=1, le=3),
    db: AsyncSession = Depends(get_db),
):
    """
    Get the full graph of document relationships within a project.
    Depth controls how many hops of outgoing links to include.
    """
    project = await _get_project_with_access(request, project_id, db)

    # Get all documents in project
    docs_result = await db.execute(
        select(Document).where(
            Document.project_id == project_id,
            Document.is_deleted == False,
        )
    )
    all_docs = docs_result.scalars().all()

    # Build adjacency: doc_id -> set of outgoing_ids
    doc_map = {doc.id: doc for doc in all_docs}
    adjacency: dict[str, set[str]] = {doc.id: set() for doc in all_docs}

    edges = []
    total_references = 0
    reachable: set[str] = set()

    for doc in all_docs:
        outgoing_ids = []
        if doc.outgoing_links:
            try:
                outgoing_ids = json.loads(doc.outgoing_links)
            except json.JSONDecodeError:
                pass

        for target_id in outgoing_ids:
            if target_id in doc_map:
                adjacency[doc.id].add(target_id)
                edges.append(GraphEdge(source=doc.id, target=target_id, type="reference"))
                total_references += 1
                reachable.add(doc.id)
                reachable.add(target_id)

    # Build nodes
    nodes = []
    for doc in all_docs:
        nodes.append(GraphNode(id=doc.id, title=doc.title, type="document"))

    # Orphaned = docs with no incoming and no outgoing links
    incoming_count: dict[str, int] = {doc.id: 0 for doc in all_docs}
    for doc in all_docs:
        for target_id in adjacency[doc.id]:
            if target_id in incoming_count:
                incoming_count[target_id] += 1

    orphaned = sum(1 for doc in all_docs if incoming_count[doc.id] == 0 and len(adjacency[doc.id]) == 0)

    return GraphResponse(
        project_id=project_id,
        nodes=nodes,
        edges=edges,
        stats=GraphStats(
            total_documents=len(all_docs),
            total_references=total_references,
            orphaned_documents=orphaned,
        ),
    )