recall/src/lib/related.ts

import { prisma } from '@/lib/prisma'
import { getUsageStats } from '@/lib/usage'

// Stop words to filter out from content matching (English + Spanish)
const STOP_WORDS = new Set([
  // English
  'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
  'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',
  'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
  'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought',
  'used', 'it', 'its', 'this', 'that', 'these', 'those', 'i', 'you', 'he',
  'she', 'we', 'they', 'what', 'which', 'who', 'whom', 'whose', 'where',
  'when', 'why', 'how', 'all', 'each', 'every', 'both', 'few', 'more',
  'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own',
  'same', 'so', 'than', 'too', 'very', 'just', 'also', 'now', 'here',
  'there', 'then', 'once', 'if', 'your', 'our', 'their', 'my', 'his',
  'her', 'into', 'over', 'under', 'after', 'before', 'between', 'through',
  'during', 'above', 'below', 'up', 'down', 'out', 'off', 'about', 'against',
  'config', 'file', 'files', 'using', 'use', 'example', 'following', 'etc',
  'based', 'include', 'includes', 'included', 'add', 'added', 'adding',
  'see', 'want', 'make', 'made', 'creating', 'create', 'created',
  // Spanish
  'el', 'la', 'los', 'las', 'un', 'una', 'unos', 'unas', 'y', 'o', 'pero',
  'en', 'de', 'a', 'con', 'por', 'para', 'sin', 'sobre', 'entre', 'del',
  'al', 'lo', 'se', 'es', 'son', 'era', 'eran', 'fue', 'fueron', 'ser',
  'estar', 'está', 'están', 'estaba', 'estaban', 'he', 'ha', 'han', 'hay',
  'haber', 'había', 'habían', 'tener', 'tiene', 'tienen', 'tenía', 'hacer',
  'hace', 'hacen', 'hizo', 'hicieron', 'poder', 'puede', 'pueden', 'podía',
  'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'esos', 'esas', 'esto',
  'eso', 'cual', 'cuales', 'quien', 'quienes', 'cuyo', 'cuyos', 'donde',
  'cuando', 'como', 'porque', 'ya', 'aun', 'aunque', 'si', 'no', 'ni',
  'mi', 'tu', 'su', 'sus', 'nuestro', 'nuestra', 'nuestros', 'nuestras',
  'yo', 'tú', 'él', 'ella', 'ellos', 'ellas', 'nosotros', 'vosotros',
  'ustedes', 'mí', 'ti', 'sí', 'qué', 'quién', 'cuál', 'cuáles',
  'cuánto', 'cuántos', 'cuánta', 'cuántas', 'dónde', 'adónde', 'de dónde',
  'nada', 'nadie', 'algo', 'alguien', 'todo', 'todos', 'toda', 'todas',
  'cada', 'otro', 'otra', 'otros', 'otras', 'mismo', 'misma', 'mismos',
  'mismas', 'tanto', 'tanta', 'tantos', 'tantas', 'bastante', 'bastantes',
  'muy', 'más', 'menos', 'mejor', 'peor', 'mucho', 'poco', 'casi', 'solo',
  'solamente', 'también', 'además', 'entonces', 'ahora', 'hoy', 'aquí',
  'allí', 'así', 'así', 'tan', 'qué', 'quién', 'cuál', 'ver', 'vez',
  'parte', 'parte', 'manera', 'forma', 'caso', 'casos', 'momento', 'lugar',
  'día', 'días', 'año', 'años', 'mes', 'meses', 'semana', 'semanas',
  'hora', 'horas', 'minuto', 'minutos', 'segundo', 'segundos',
  // Common tech words that cause false positives
  'command', 'comando', 'description', 'descripción', 'description', 'nota',
  'notes', 'notas', 'content', 'contenido', 'code', 'código', 'ejemplo',
  'example', 'steps', 'pasos', 'item', 'items', 'quantity', 'cantidad',
  'añadir', 'agregar', 'nuevo', 'nueva', 'nuevos', 'nuevas', 'nueces',
])

// Keywords that indicate actual relevance (technical terms)
const KEYWORDS = new Set([
  'git', 'docker', 'react', 'typescript', 'javascript', 'python', 'sql',
  'postgres', 'postgresql', 'mysql', 'redis', 'nginx', 'kubernetes', 'k8s',
  'api', 'http', 'json', 'xml', 'html', 'css', 'node', 'nodejs', 'npm',
  'bash', 'shell', 'linux', 'ubuntu', 'aws', 'gcp', 'azure', 'vercel',
  'prisma', 'nextjs', 'next', 'tailwind', 'eslint', 'prettier', 'jest',
  'database', 'db', 'server', 'client', 'frontend', 'backend', 'fullstack',
  'crud', 'rest', 'graphql', 'websocket', 'ssh', 'ssl', 'tls', 'jwt',
  'auth', 'authentication', 'authorization', 'cookie', 'session', 'cache',
  'deploy', 'deployment', 'ci', 'cd', 'pipeline', 'docker-compose',
  'container', 'image', 'build', 'test', 'production', 'staging', 'dev',
  'development', 'development', 'environment', 'config', 'configuration',
  'variable', 'env', 'secret', 'key', 'password', 'token',
])

function extractKeywords(text: string): string[] {
  const words = text.toLowerCase()
    .split(/[\s\-_.,;:!?()\[\]{}'"]+/)
    .filter(w => w.length > 2)

  return words.filter(w => !STOP_WORDS.has(w))
}

function getSignificantWords(words: string[]): string[] {
  return words.filter(w => KEYWORDS.has(w) || w.length > 4)
}

interface ScoredNote {
  id: string
  title: string
  type: string
  tags: string[]
  score: number
  reason: string
}

export async function getRelatedNotes(noteId: string, limit = 5): Promise<ScoredNote[]> {
  const note = await prisma.note.findUnique({
    where: { id: noteId },
    include: { tags: { include: { tag: true } } },
  })

  if (!note) return []

  const noteTagNames = note.tags.map(t => t.tag.name)
  const noteTitleWords = getSignificantWords(extractKeywords(note.title))
  const noteContentWords = getSignificantWords(extractKeywords(note.content))

  const allNotes = await prisma.note.findMany({
    where: { id: { not: noteId } },
    include: { tags: { include: { tag: true } } },
  })

  const scored: ScoredNote[] = []

  for (const other of allNotes) {
    let score = 0
    const reasons: string[] = []

    // +3 si comparten tipo
    if (other.type === note.type) {
      score += 3
      reasons.push(`Same type (${note.type})`)
    }

    // +3 por cada tag compartido
    const sharedTags = noteTagNames.filter(t => other.tags.some(ot => ot.tag.name === t))
    score += sharedTags.length * 3
    if (sharedTags.length > 0) {
      reasons.push(`Tags: ${sharedTags.join(', ')}`)
    }

    // +2 por palabra clave del título compartida
    const otherTitleWords = extractKeywords(other.title)
    const sharedTitleWords = noteTitleWords.filter(w =>
      otherTitleWords.includes(w)
    )
    score += Math.min(sharedTitleWords.length, 3) // max +3
    if (sharedTitleWords.length > 0) {
      reasons.push(`Title: ${sharedTitleWords.slice(0, 2).join(', ')}`)
    }

    // +1 por palabra clave del contenido compartida
    const otherContentWords = getSignificantWords(extractKeywords(other.content))
    const sharedContentWords = noteContentWords.filter(w =>
      otherContentWords.includes(w)
    )
    score += Math.min(sharedContentWords.length, 2) // max +2
    if (sharedContentWords.length > 0) {
      reasons.push(`Content: ${sharedContentWords.slice(0, 2).join(', ')}`)
    }

    // Usage-based boost (small, does not eclipse content matching)
    // +1 per 5 views (max +3), +2 if used recently (recency)
    const usageStats = await getUsageStats(other.id, 7) // last 7 days for recency
    const viewBoost = Math.min(Math.floor(usageStats.views / 5), 3)
    score += viewBoost
    // Recency: if used in last 7 days, add +2
    if (usageStats.views >= 1 || usageStats.relatedClicks >= 1) {
      score += 2
    }

    // Solo incluir si tiene score > 0 Y al menos una razón válida
    if (score > 0 && reasons.length > 0) {
      scored.push({
        id: other.id,
        title: other.title,
        type: other.type,
        tags: other.tags.map(t => t.tag.name),
        score,
        reason: reasons.join(' | '),
      })
    }
  }

  return scored
    .sort((a, b) => b.score - a.score)
    .slice(0, limit)
}