tracker-cli/tracker/storage/markdown_reader.py

"""Markdown reader utility."""

import re
from datetime import datetime
from typing import Optional


class MarkdownReader:
    """Lectura de archivos Markdown del proyecto."""

    def parse_log_entry(self, content: str) -> dict:
        """Parse una entrada de LOG.md.

        Formato esperado:
        ## 2026-03-23 10:00–11:20
        **Objetivo**
        ...
        **Trabajo realizado**
        - ...
        **Cambios relevantes**
        - ...
        **Bloqueos**
        - ...
        **Decisiones**
        - ...
        **Próximos pasos**
        - ...
        **Resumen**
        ...

        Returns dict con:
        - date_range: str
        - objective: str
        - work_done: list[str]
        - changes: list[str]
        - blockers: list[str]
        - decisions: list[str]
        - next_steps: list[str]
        - summary: str
        """
        result = {
            "date_range": "",
            "objective": "",
            "work_done": [],
            "changes": [],
            "blockers": [],
            "decisions": [],
            "next_steps": [],
            "summary": "",
        }

        # Extraer fecha/rango
        date_match = re.search(r"##\s+(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}[–-]\d{2}:\d{2})", content)
        if date_match:
            result["date_range"] = date_match.group(1)

        # Extraer secciones
        sections = {
            "objective": r"\*\*Objetivo\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "work_done": r"\*\*Trabajo realizado\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "changes": r"\*\*Cambios relevantes\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "blockers": r"\*\*Bloqueos\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "decisions": r"\*\*Decisiones\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "next_steps": r"\*\*Próximos pasos\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
            "summary": r"\*\*Resumen\*\*\s*\n(.*?)(?=\n##|\Z)",
        }

        for key, pattern in sections.items():
            match = re.search(pattern, content, re.DOTALL)
            if match:
                text = match.group(1).strip()
                if key in ("work_done", "changes", "blockers", "decisions", "next_steps"):
                    # Extraer listas con bullet points
                    items = re.findall(r"^\s*-\s+(.+)$", text, re.MULTILINE)
                    result[key] = items
                else:
                    result[key] = text

        return result

    def extract_autogen_section(self, content: str, section: str) -> str:
        """Extrae contenido de una seccion AUTOGEN.

        Busca <!-- AUTOGEN:{section}_START --> ... <!-- AUTOGEN:{section}_END -->
        Returns el contenido entre esos marcadores, o string vacio si no existe.
        """
        pattern = rf"<!--\s*AUTOGEN:{section}_START\s*-->(.*?)<!--\s*AUTOGEN:{section}_END\s*-->"
        match = re.search(pattern, content, re.DOTALL)
        if match:
            return match.group(1).strip()
        return ""

    def parse_tasks(self, content: str) -> dict:
        """Parse TASKS.md por secciones.

        Secciones esperadas:
        - Inbox
        - Próximo
        - En curso
        - Bloqueado
        - En espera
        - Hecho

        Returns dict con nombre de seccion -> lista de tareas
        """
        result = {}
        current_section = None
        current_tasks = []

        lines = content.split("\n")

        for line in lines:
            # Detectar headers de seccion (## )
            section_match = re.match(r"^##\s+(.+)$", line)
            if section_match:
                # Guardar seccion anterior
                if current_section is not None:
                    result[current_section] = current_tasks

                current_section = section_match.group(1).strip()
                current_tasks = []
            elif current_section is not None:
                # Parsear bullet points
                task_match = re.match(r"^\s*-\s+\[([ x])\]\s*(.+)$", line)
                if task_match:
                    checked = task_match.group(1) == "x"
                    task_text = task_match.group(2).strip()
                    current_tasks.append({"text": task_text, "done": checked})
                elif line.strip():
                    # Lineas que no son bullet ni header, agregar a la ultima tarea
                    if current_tasks:
                        current_tasks[-1]["text"] += " " + line.strip()

        # Guardar ultima seccion
        if current_section is not None:
            result[current_section] = current_tasks

        return result