Files
tracker-cli/tracker/storage/markdown_reader.py
Daniel Arroyo 4547c492da Implement storage layer for MVP-1 Personal Tracker CLI
Add storage layer with FileStorage, MarkdownReader, and MarkdownWriter classes.
Add data models (Project, Session, Note, Change).
2026-03-23 08:54:00 -03:00

139 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Markdown reader utility."""
import re
from datetime import datetime
from typing import Optional
class MarkdownReader:
"""Lectura de archivos Markdown del proyecto."""
def parse_log_entry(self, content: str) -> dict:
"""Parse una entrada de LOG.md.
Formato esperado:
## 2026-03-23 10:0011:20
**Objetivo**
...
**Trabajo realizado**
- ...
**Cambios relevantes**
- ...
**Bloqueos**
- ...
**Decisiones**
- ...
**Próximos pasos**
- ...
**Resumen**
...
Returns dict con:
- date_range: str
- objective: str
- work_done: list[str]
- changes: list[str]
- blockers: list[str]
- decisions: list[str]
- next_steps: list[str]
- summary: str
"""
result = {
"date_range": "",
"objective": "",
"work_done": [],
"changes": [],
"blockers": [],
"decisions": [],
"next_steps": [],
"summary": "",
}
# Extraer fecha/rango
date_match = re.search(r"##\s+(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}[-]\d{2}:\d{2})", content)
if date_match:
result["date_range"] = date_match.group(1)
# Extraer secciones
sections = {
"objective": r"\*\*Objetivo\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"work_done": r"\*\*Trabajo realizado\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"changes": r"\*\*Cambios relevantes\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"blockers": r"\*\*Bloqueos\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"decisions": r"\*\*Decisiones\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"next_steps": r"\*\*Próximos pasos\*\*\s*\n(.*?)(?=\n\*\*|\n##|\Z)",
"summary": r"\*\*Resumen\*\*\s*\n(.*?)(?=\n##|\Z)",
}
for key, pattern in sections.items():
match = re.search(pattern, content, re.DOTALL)
if match:
text = match.group(1).strip()
if key in ("work_done", "changes", "blockers", "decisions", "next_steps"):
# Extraer listas con bullet points
items = re.findall(r"^\s*-\s+(.+)$", text, re.MULTILINE)
result[key] = items
else:
result[key] = text
return result
def extract_autogen_section(self, content: str, section: str) -> str:
"""Extrae contenido de una seccion AUTOGEN.
Busca <!-- AUTOGEN:{section}_START --> ... <!-- AUTOGEN:{section}_END -->
Returns el contenido entre esos marcadores, o string vacio si no existe.
"""
pattern = rf"<!--\s*AUTOGEN:{section}_START\s*-->(.*?)<!--\s*AUTOGEN:{section}_END\s*-->"
match = re.search(pattern, content, re.DOTALL)
if match:
return match.group(1).strip()
return ""
def parse_tasks(self, content: str) -> dict:
"""Parse TASKS.md por secciones.
Secciones esperadas:
- Inbox
- Próximo
- En curso
- Bloqueado
- En espera
- Hecho
Returns dict con nombre de seccion -> lista de tareas
"""
result = {}
current_section = None
current_tasks = []
lines = content.split("\n")
for line in lines:
# Detectar headers de seccion (## )
section_match = re.match(r"^##\s+(.+)$", line)
if section_match:
# Guardar seccion anterior
if current_section is not None:
result[current_section] = current_tasks
current_section = section_match.group(1).strip()
current_tasks = []
elif current_section is not None:
# Parsear bullet points
task_match = re.match(r"^\s*-\s+\[([ x])\]\s*(.+)$", line)
if task_match:
checked = task_match.group(1) == "x"
task_text = task_match.group(2).strip()
current_tasks.append({"text": task_text, "done": checked})
elif line.strip():
# Lineas que no son bullet ni header, agregar a la ultima tarea
if current_tasks:
current_tasks[-1]["text"] += " " + line.strip()
# Guardar ultima seccion
if current_section is not None:
result[current_section] = current_tasks
return result