r360mx-docs-converter/apply_template.py

#!/usr/bin/env python3
"""
apply_template.py - Conversión de ENERGY REPORT a formato corporativo R360MX.

Aplica la plantilla oficial (portada + disclaimer + índice + contraportada)
a uno o varios documentos ENERGY REPORT de RatedPower.

Uso:
  # Simple
  python3 apply_template.py informe.docx plantilla.docx

  # Con opciones
  python3 apply_template.py informe.docx plantilla.docx -o salida.docx -v

  # Modo batch (procesa todo un directorio)
  python3 apply_template.py --batch ./informes/ plantilla.docx -v

  # Dry-run (solo muestra lo que haría)
  python3 apply_template.py informe.docx plantilla.docx --dry-run -v
"""

import sys
import os
import re
import copy
import logging
import argparse
import zipfile
import json
from pathlib import Path
from datetime import datetime
from lxml import etree

# Namespaces OOXML
NS = {
    'w':  'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
    'r':  'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    'a':  'http://schemas.openxmlformats.org/drawingml/2006/main',
    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
    'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
    'rel': 'http://schemas.openxmlformats.org/package/2006/relationships',
}

log = logging.getLogger('r360mx')

# ======================================================================
# MAPEO DE ESTILOS: source -> template
# ======================================================================
DEFAULT_STYLE_MAP = {
    'Title1': 'Título 1',
    'Title2': 'Título 2',
    'Title3': 'Título 3',
    'Title2Index': 'Title2Index',
    'TableContentEnd': 'TableContentEnd',
}


# ======================================================================
# UTILIDADES XML
# ======================================================================

def parse_xml(content: bytes) -> etree._Element:
    return etree.fromstring(content)


def q(tag: str) -> str:
    """Convierte 'w:body' a la URL completa con namespace."""
    prefix, local = tag.split(':')
    return f'{{{NS[prefix]}}}{local}'


def nsmap(*prefixes: str) -> dict:
    """Construye nsmap para tostring."""
    return {p: NS[p] for p in prefixes}


def get_style_id(p_element: etree._Element) -> str | None:
    """Devuelve el styleId de un párrafo, o None."""
    pPr = p_element.find(q('w:pPr'))
    if pPr is None:
        return None
    pStyle = pPr.find(q('w:pStyle'))
    if pStyle is None:
        return None
    return pStyle.get(q('w:val'))


def get_para_text(p_element: etree._Element) -> str:
    """Obtiene el texto plano de un párrafo."""
    texts = p_element.findall(f'.//{q("w:t")}')
    return ''.join(t.text or '' for t in texts)


def get_paras(body: etree._Element) -> list:
    """Devuelve todos los párrafos y tablas del body en orden."""
    return [child for child in body if child.tag in (q('w:p'), q('w:tbl'))]


def collect_image_refs(xml_root: etree._Element) -> list[tuple]:
    """Encuentra todos los a:blip con r:embed."""
    blips = []
    for blip in xml_root.iter(f'{{{NS["a"]}}}blip'):
        rid = blip.get(f'{{{NS["r"]}}}embed')
        if rid:
            blips.append((blip, rid))
    return blips


class DocxError(Exception):
    """Error relacionado con el procesamiento de documentos DOCX."""
    pass


# ======================================================================
# DETECCIÓN INTELIGENTE DE SECCIONES
# ======================================================================

class SectionDetector:
    """
    Detecta las secciones clave en el template y el documento source
    basándose en marcadores, estilos y contenido, sin números mágicos.
    """

    MARKER_STYLES = {
        'indice_fin': 'TableContentEnd',
        'titulo_contenido': 'Título 1',
    }

    @staticmethod
    def find_end_of_preface(body: etree._Element) -> int:
        """
        Encuentra dónde termina el prefacio del template
        (portada + disclaimer + índice).
        Busca el marcador `TableContentEnd` o `ContentStart`.
        También busca un salto de sección después del índice.
        """
        children = list(body)
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'TableContentEnd':
                    log.debug("  Marker 'TableContentEnd' encontrado en hijo %d", i)
                    return i
                if style_id == 'ContentStart':
                    log.debug("  Marker 'ContentStart' encontrado en hijo %d", i)
                    return i
                text = get_para_text(child).strip()
                if text.upper() == '<<CONTENT_START>>':
                    log.debug("  Marker textual '<<CONTENT_START>>' en hijo %d", i)
                    return i

        # Fallback: buscar primer Título 1 que parezca contenido real
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                text = get_para_text(child).strip()
                if style_id == 'Título 1' and text:
                    # Si hay un salto de sección justo antes, ese es el límite
                    for j in range(max(0, i - 3), i):
                        prev_child = children[j]
                        if prev_child.tag == q('w:p'):
                            prev_pPr = prev_child.find(q('w:pPr'))
                            if prev_pPr is not None:
                                sectPr = prev_pPr.find(q('w:sectPr'))
                                if sectPr is not None:
                                    log.debug("  Salto de sección antes de Título 1 en hijo %d", j)
                                    return j
                    # Si no, devolver el índice del párrafo anterior al primer Título 1
                    return i - 1 if i > 0 else 0
        return 0

    @staticmethod
    def find_back_cover_start(body: etree._Element) -> int:
        """
        Encuentra dónde empieza la contraportada en el template.
        Busca DESDE EL FINAL hacia el principio para encontrar la última
        ocurrencia de 'RENOVABLES 360' o el marcador BackCover.
        """
        children = list(body)
        # Buscar desde el final hacia atrás
        for i in range(len(children) - 1, -1, -1):
            child = children[i]
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'BackCover':
                    log.debug("  Marker 'BackCover' en hijo %d (desde el final)", i)
                    return i
                text = get_para_text(child).strip()
                if 'RENOVABLES 360' in text.upper() or 'RENEWABLE 360' in text.upper():
                    log.debug("  Texto 'RENOVABLES 360' en hijo %d (desde el final)", i)
                    return i
        return len(children) - 1  # última página

    @staticmethod
    def find_content_start(body: etree._Element) -> int:
        """
        Encuentra el primer elemento de contenido real en el documento source,
        detectando dónde acaba el índice de RatedPower.
        """
        children = list(body)
        found_toc_marker = False
        best = 69  # fallback conservador

        # 1. Buscar marcador TableContentEnd
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'TableContentEnd':
                    found_toc_marker = True
                    best = i + 1
                    log.debug("  Marker TableContentEnd en source, hijo %d", i)
                    break

        # 2. Si no hay marcador, buscar patrón típico del índice
        if not found_toc_marker:
            for i, child in enumerate(children):
                if child.tag == q('w:p'):
                    style_id = get_style_id(child)
                    text = get_para_text(child).strip()

                    # El índice termina justo antes del primer título numerado (1., 2., etc.)
                    if style_id in ('Title1', 'Título 1') and text:
                        # Verificar que parece un título de contenido (empieza con número)
                        if re.match(r'^\d+\.?\s', text) or re.match(r'^[IVXLCDM]+\.\s', text):
                            # Si está cerca del principio, ignorar (es el TOC)
                            if i > 20:  # suficientemente lejos para ser contenido real
                                log.debug("  Primer título numerado en source hijo %d: '%s'", i, text[:50])
                                return i

        # 3. Buscar salto de sección como delimitador
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                pPr = child.find(q('w:pPr'))
                if pPr is not None:
                    sectPr = pPr.find(q('w:sectPr'))
                    if sectPr is not None:
                        # Después de un salto de sección suele empezar el contenido
                        log.debug("  Salto de sección en source hijo %d", i)
                        return i + 1 if i + 1 < len(children) else i

        return best


# ======================================================================
# REMAPEO DE ESTILOS
# ======================================================================

def remap_styles(xml_root: etree._Element, style_map: dict) -> int:
    """Reasigna estilos del source a los equivalentes del template."""
    changes = 0
    for p in xml_root.iter(q('w:p')):
        pPr = p.find(q('w:pPr'))
        if pPr is None:
            continue
        pStyle = pPr.find(q('w:pStyle'))
        if pStyle is None:
            continue
        old_val = pStyle.get(q('w:val'))
        if old_val in style_map:
            new_val = style_map[old_val]
            if new_val:
                pStyle.set(q('w:val'), new_val)
                changes += 1
    return changes


# ======================================================================
# MANEJO DE IMÁGENES
# ======================================================================

def get_image_number(filename: str) -> int:
    m = re.search(r'image(\d+)\.', filename)
    return int(m.group(1)) if m else 0


def find_all_rids_in_template(z_tmpl: zipfile.ZipFile) -> set:
    """Encuentra todos los rIds existentes en el template."""
    existing_rids = set()
    try:
        tmpl_rel_content = z_tmpl.read('word/_rels/document.xml.rels')
        tmpl_rel = parse_xml(tmpl_rel_content)
        for rel in tmpl_rel:
            rid = rel.get('Id')
            if rid:
                existing_rids.add(rid)
    except KeyError:
        log.warning("  No se encontró word/_rels/document.xml.rels en el template")
    return existing_rids


def find_next_available_rid(existing_rids: set) -> int:
    """Encuentra el siguiente rId disponible."""
    # Extraer números de rIds existentes
    rid_numbers = set()
    for rid in existing_rids:
        if rid.startswith('rId'):
            try:
                rid_numbers.add(int(rid[4:]))  # rId123 -> 123
            except ValueError:
                pass

    # Encontrar el primer número disponible desde rId40 (para evitar colisiones)
    # El template usa rId1-rId39, empezamos desde 40
    candidate = 40
    while candidate in rid_numbers:
        candidate += 1
    return candidate


def collect_src_relations(z_src: zipfile.ZipFile) -> tuple[dict, etree._Element]:
    """Procesa las relaciones del source y devuelve (rid_info, rel_root)."""
    src_rel = parse_xml(z_src.read('word/_rels/document.xml.rels'))
    src_rids = {}
    for rel in src_rel:
        rid = rel.get('Id')
        target = rel.get('Target', '').replace('\\', '/')
        rel_type = rel.get('Type', '')
        if 'image' in rel_type:
            src_rids[rid] = target
    return src_rids, src_rel


def rename_source_images(
    z_tmpl: zipfile.ZipFile,
    z_src: zipfile.ZipFile,
    src_rids: dict,
    src_start: int,
    body_src: etree._Element,
) -> tuple[dict, dict]:
    """
    Renombra imágenes del source para evitar colisiones con las del template.
    Solo procesa imágenes de hijos >= src_start.
    Devuelve (image_rename_map, rid_rename_map).
    """
    existing_tmpl_media = {
        name for name in z_tmpl.namelist()
        if name.startswith('word/media/')
    }

    # Encontrar todos los rIds existentes en el template
    existing_rids = find_all_rids_in_template(z_tmpl)

    image_rename_map = {}
    rid_rename_map = {}
    generated = set()

    # Crear mapeo de imágenes por hijo para identificar las que deben ser ignoradas
    children_src = list(body_src)
    src_images_by_child = {}

    # Primero, identificar qué imágenes están en cada hijo
    for i, child in enumerate(children_src):
        blips = collect_image_refs(child)
        if blips:
            src_images_by_child[i] = [rid for _, rid in blips]

    # Procesar solo imágenes de hijos >= src_start
    src_items = []
    for old_rid, rel_target in src_rids.items():
        # Verificar si este rId pertenece a un hijo que debe ser procesado
        should_process = False
        for child_index, rids in src_images_by_child.items():
            if child_index >= src_start and old_rid in rids:
                should_process = True
                break

        if should_process:
            rel_path = rel_target.replace('../', '')
            old_abs = f'word/{rel_path}' if not rel_path.startswith('word/') else rel_path
            old_num = get_image_number(old_abs)
            src_items.append((old_num, old_rid, old_abs))

    src_items.sort()

    # Asignar nuevos rIds disponibles
    next_rid_num = find_next_available_rid(existing_rids)

    for old_num, old_rid, old_abs in src_items:
        ext = old_abs.rsplit('.', 1)[1]
        candidate = next_rid_num
        new_abs = f'word/media/image{candidate}.{ext}'
        while new_abs in existing_tmpl_media or new_abs in generated:
            candidate += 1
            new_abs = f'word/media/image{candidate}.{ext}'

        # Crear nuevo rId en formato rIdXX
        new_rid = f'rId{candidate}'
        image_rename_map[old_abs] = new_abs
        generated.add(new_abs)
        rid_rename_map[old_rid] = new_rid
        log.debug("    %s -> %s (rId: %s -> %s)", old_abs, new_abs, old_rid, new_rid)
        next_rid_num = candidate + 1

    return image_rename_map, rid_rename_map


def update_document_title(xml_root: etree._Element, source_title: str, source_subtitle: str = ""):
    """Actualiza el título y subtítulo en el documento."""
    # Buscar el primer párrafo del template que contiene el título
    body = xml_root.find(q('w:body'))
    if body is not None:
        for child in body:
            if child.tag == q('w:p'):
                text = get_para_text(child)
                # Buscar párrafo que contiene elementos del título
                if "Cliente" in text and "Project Title" in text:
                    # Actualizar texto en los elementos t
                    for t_elem in child.findall(f'.//{q("w:t")}'):
                        t_text = t_elem.text or ""
                        if "Project Title" in t_text:
                            # Reemplazar con el título real del proyecto
                            new_text = t_text.replace("Project Title", source_title)
                            if source_subtitle:
                                new_text = new_text.replace("Subtitle", source_subtitle)
                            else:
                                new_text = new_text.replace("Subtitle", "")
                            t_elem.text = new_text
                    break


def extract_source_title(source_xml: etree._Element) -> tuple[str, str]:
    """Extrae el título y subtítulo del documento source."""
    body = source_xml.find(q('w:body'))
    if body is not None:
        children = list(body)
        for child in children:
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                text = get_para_text(child).strip()
                # Buscar primer título principal
                if style_id in ('Title1', 'Título 1') and text:
                    # Dividir título y subtítulo si están en el mismo párrafo
                    lines = text.split('\n')
                    title = lines[0].strip()
                    subtitle = lines[1].strip() if len(lines) > 1 else ""
                    return title, subtitle
    return "Documento sin título", ""


# ======================================================================
# FUSIÓN PRINCIPAL
# ======================================================================

def replace_content(
    template_path: str | Path,
    source_docx_path: str | Path,
    output_path: str | Path,
    style_map: dict | None = None,
) -> Path:
    """
    Núcleo de la conversión: fusiona template + source en un solo documento.
    """
    style_map = style_map or DEFAULT_STYLE_MAP
    template_path = Path(template_path)
    source_docx_path = Path(source_docx_path)
    output_path = Path(output_path)

    # ---- Validaciones ----
    if not template_path.exists():
        raise DocxError(f"Template no encontrado: {template_path}")
    if not source_docx_path.exists():
        raise DocxError(f"Documento no encontrado: {source_docx_path}")
    if not zipfile.is_zipfile(template_path):
        raise DocxError(f"El template no es un DOCX válido: {template_path}")
    if not zipfile.is_zipfile(source_docx_path):
        raise DocxError(f"El documento fuente no es un DOCX válido: {source_docx_path}")

    z_tmpl = zipfile.ZipFile(str(template_path), 'r')
    z_src = zipfile.ZipFile(str(source_docx_path), 'r')

    try:
        # ---- Leer XML ----
        tmpl_xml = parse_xml(z_tmpl.read('word/document.xml'))
        src_xml = parse_xml(z_src.read('word/document.xml'))
        tmpl_rel = parse_xml(z_tmpl.read('word/_rels/document.xml.rels'))
        src_rids, src_rel = collect_src_relations(z_src)

        body_tmpl = tmpl_xml.find(q('w:body'))
        body_src = src_xml.find(q('w:body'))

        if body_tmpl is None:
            raise DocxError("El template no tiene body")
        if body_src is None:
            raise DocxError("El documento fuente no tiene body")

        children_tmpl = list(body_tmpl)
        children_src = list(body_src)

        # ---- Remapear estilos en source ----
        changes = remap_styles(src_xml, style_map)
        log.info("  Estilos reasignados: %d", changes)

        # ---- Detectar límites ----
        tmpl_idx_end = SectionDetector.find_end_of_preface(body_tmpl)
        tmpl_back = SectionDetector.find_back_cover_start(body_tmpl)
        src_start = SectionDetector.find_content_start(body_src)

        log.info("  Template: prefacio h. hijo %d, contraportada h. hijo %d", tmpl_idx_end, tmpl_back)
        log.info("  Source: contenido real empieza en hijo %d", src_start)

        # ---- Extraer título del source ----
        source_title, source_subtitle = extract_source_title(src_xml)
        log.info("  Título del source: %s", source_title)
        if source_subtitle:
            log.info("  Subtítulo del source: %s", source_subtitle)

        # ---- Actualizar título en template ----
        update_document_title(tmpl_xml, source_title, source_subtitle)

        # ---- Renombrar imágenes del source ----
        image_rename_map, rid_rename_map = rename_source_images(
            z_tmpl, z_src, src_rids, src_start, body_src
        )
        log.info("  Imágenes renombradas: %d", len(image_rename_map))

        # ---- Corregir campos TOC en el template para que coincidan con el idioma/contentido del source ----
        # El template tiene campos TOC en inglés (\c "Figure", \c "Table") pero el contenido
        # del source usa "Figura" y "Tabla" en los estilos de caption.
        toc_fixes = {
            '\\c "Figure"': '\\c "Figura"',
            '\\c "Table"': '\\c "Tabla"',
        }
        for instr in tmpl_xml.iter(f'{{http://schemas.openxmlformats.org/wordprocessingml/2006/main}}instrText'):
            if instr.text:
                original = instr.text
                for old, new in toc_fixes.items():
                    if old in original:
                        instr.text = original.replace(old, new)
                        log.debug("  Campo TOC corregido: %s -> %s", old, new)
                        break

        # ---- Fusionar bodies ----
        for child in list(body_tmpl):
            body_tmpl.remove(child)

        # Prefacio del template
        for child in children_tmpl[:tmpl_idx_end + 1]:
            body_tmpl.append(copy.deepcopy(child))

        # Contenido del source (desde src_start, sin sectPr)
        for child in children_src[src_start:]:
            if child.tag != q('w:sectPr'):
                body_tmpl.append(copy.deepcopy(child))

        # Contraportada del template
        for child in children_tmpl[tmpl_back:]:
            body_tmpl.append(copy.deepcopy(child))

        # ---- Actualizar rIds en document.xml ----
        for blip, old_rid in collect_image_refs(tmpl_xml):
            if old_rid in rid_rename_map:
                blip.set(f'{{{NS["r"]}}}embed', rid_rename_map[old_rid])

        # ---- Construir zip de salida ----
        out_data = {}

        # 1. Partir del template (imágenes del template NUNCA se tocan)
        for item in z_tmpl.infolist():
            out_data[item.filename] = z_tmpl.read(item.filename)

        # 2. Añadir imágenes del source renombradas
        for old_abs, new_abs in image_rename_map.items():
            try:
                content = z_src.read(old_abs)
                out_data[new_abs] = content
            except KeyError:
                log.warning("  Imagen no encontrada en source: %s (ignorada)", old_abs)

        # 3. Añadir relaciones de imágenes del source
        rel_root = parse_xml(out_data.get('word/_rels/document.xml.rels', z_tmpl.read('word/_rels/document.xml.rels')))

        # Eliminar relaciones existentes que podrían colisionar
        existing_rids = set()
        for rel in list(rel_root):
            rid = rel.get('Id')
            if rid:
                existing_rids.add(rid)

        # Añadir nuevas relaciones con rIds únicos
        for old_rid, new_rid in rid_rename_map.items():
            if new_rid in existing_rids:
                log.debug("  rId %s ya existe en template, se omite", new_rid)
                continue
            for rel in src_rel:
                if rel.get('Id') == old_rid:
                    target = rel.get('Target', '')
                    old_target_abs = target.replace('../', '')
                    if not old_target_abs.startswith('word/'):
                        old_target_abs = f'word/{old_target_abs}'
                    new_target_abs = image_rename_map.get(old_target_abs, old_target_abs)
                    # Asegurar que el Target sea relativo correctamente (solo media/imageN.ext)
                    new_target = new_target_abs.replace('word/', '') if new_target_abs.startswith('word/') else new_target_abs
                    new_rel = copy.deepcopy(rel)
                    new_rel.set('Id', new_rid)
                    new_rel.set('Target', new_target)
                    rel_root.append(new_rel)
                    existing_rids.add(new_rid)
                    break

        out_data['word/_rels/document.xml.rels'] = etree.tostring(
            rel_root, xml_declaration=True, encoding='UTF-8', standalone=True)
        out_data['word/document.xml'] = etree.tostring(
            tmpl_xml, xml_declaration=True, encoding='UTF-8', standalone=True)

        # ---- Escribir ----
        with zipfile.ZipFile(str(output_path), 'w', zipfile.ZIP_DEFLATED) as zout:
            for fname, content in out_data.items():
                zout.writestr(fname, content)

    finally:
        z_tmpl.close()
        z_src.close()

    log.info("  ✅ Convertido: %s", output_path)
    return output_path


# ======================================================================
# CLI
# ======================================================================

def setup_logging(verbose: bool = False):
    """Configura logging con formato limpio."""
    level = logging.DEBUG if verbose else logging.INFO
    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(logging.Formatter('%(message)s'))
    log.addHandler(handler)
    log.setLevel(level)
    # Evitar duplicados
    if log.handlers.count(handler) > 1:
        log.removeHandler(handler)


def validate_docx(path: Path, label: str) -> None:
    """Valida que un archivo sea un DOCX no corrupto."""
    if not path.exists():
        raise DocxError(f"{label} no encontrado: {path}")
    if not zipfile.is_zipfile(path):
        raise DocxError(f"{label} no es un DOCX válido: {path}")
    try:
        with zipfile.ZipFile(str(path), 'r') as z:
            if 'word/document.xml' not in z.namelist():
                raise DocxError(f"{label} no contiene 'word/document.xml'")
    except (zipfile.BadZipFile, Exception) as e:
        raise DocxError(f"{label} corrupto: {e}")


def find_docx_files(directory: Path) -> list[Path]:
    """Busca archivos .docx en un directorio (no recursivo)."""
    return sorted(directory.glob('*.docx'))


def build_output_path(source: Path, output: str | None, suffix: str = '_r360mx') -> Path:
    """Construye la ruta de salida."""
    if output:
        return Path(output)
    return source.parent / f"{source.stem}{suffix}.docx"


def run_single(args) -> int:
    """Procesa un solo documento."""
    source = Path(args.documento)
    template = Path(args.plantilla)
    output = build_output_path(source, args.output)

    log.info("📄 Template: %s", template)
    log.info("📄 Documento: %s", source)
    log.info("📄 Salida:    %s", output)

    validate_docx(source, "Documento")
    validate_docx(template, "Plantilla")

    if args.dry_run:
        log.info("  🏁 Dry-run: todo correcto, no se genera nada.")
        return 0

    replace_content(template, source, output)
    return 0


def run_batch(args) -> int:
    """Procesa múltiples documentos en lote."""
    input_dir = Path(args.batch_dir)
    template = Path(args.plantilla)

    if not input_dir.is_dir():
        log.error("El directorio no existe: %s", input_dir)
        return 1

    validate_docx(template, "Plantilla")

    docx_files = find_docx_files(input_dir)
    if not docx_files:
        log.warning("  No se encontraron archivos .docx en %s", input_dir)
        return 0

    total = len(docx_files)
    ok = 0
    failed = 0

    log.info("📦 Procesando %d documento(s) en lote...", total)
    log.info("📄 Template: %s", template)

    for idx, source in enumerate(docx_files, 1):
        output = build_output_path(source, None)
        log.info("[%d/%d] %s -> %s", idx, total, source.name, output.name)

        if args.dry_run:
            ok += 1
            continue

        try:
            replace_content(template, source, output)
            ok += 1
        except DocxError as e:
            log.error("  ❌ Error: %s", e)
            failed += 1
        except Exception as e:
            log.error("  ❌ Error inesperado: %s", e)
            failed += 1

    log.info("")
    log.info("═══════════════════════════════════")
    log.info("  Resumen: %d OK, %d fallos de %d", ok, failed, total)
    log.info("═══════════════════════════════════")
    return 1 if failed > 0 else 0


def run_dump_styles(args) -> int:
    """Dump de estilos de un documento para depuración."""
    path = Path(args.documento)
    validate_docx(path, "Documento")

    with zipfile.ZipFile(str(path), 'r') as z:
        if 'word/styles.xml' in z.namelist():
            styles_xml = parse_xml(z.read('word/styles.xml'))
            styles = styles_xml.findall(f'.//{q("w:style")}')
            log.info("Estilos en %s:", path)
            for style in styles:
                style_id = style.get(q('w:styleId'))
                style_type = style.get(q('w:type'))
                name_elem = style.find(q('w:name'))
                name = name_elem.get(q('w:val')) if name_elem is not None else ''
                log.info("  %-20s type=%-10s name=%s", style_id or '', style_type or '', name)
        else:
            log.warning("  No se encontró word/styles.xml")

        # Mostrar estructura del documento
        doc_xml = parse_xml(z.read('word/document.xml'))
        body = doc_xml.find(q('w:body'))
        if body is not None:
            paras = get_paras(body)
            log.info("\nEstructura del body (%d elementos):", len(paras))
            for i, child in enumerate(paras[:100]):  # primeros 100
                style_id = get_style_id(child) if child.tag == q('w:p') else '[TABLE]'
                text = get_para_text(child)[:80] if child.tag == q('w:p') else ''
                log.info("  [%4d] %-20s %s", i, style_id or '', text)
    return 0


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description='Convierte documentos ENERGY REPORT al formato corporativo R360MX.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Ejemplos:
  %(prog)s informe.docx plantilla.docx
  %(prog)s informe.docx plantilla.docx -o salida.docx -v
  %(prog)s --batch ./informes/ plantilla.docx -v
  %(prog)s --dump-styles informe.docx
  %(prog)s informe.docx plantilla.docx --dry-run -v
        """,
    )

    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        help='Modo verbose (debug)',
    )

    # Subcomandos implícitos
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Valida sin generar archivos',
    )
    parser.add_argument(
        '--dump-styles',
        metavar='DOCUMENTO',
        help='Inspecciona los estilos y estructura de un DOCX',
    )

    # Batch mode
    parser.add_argument(
        '--batch',
        metavar='DIRECTORIO',
        dest='batch_dir',
        help='Modo batch: procesa todos los .docx del directorio',
    )

    # Posicionales
    parser.add_argument(
        'documento',
        nargs='?',
        help='Documento ENERGY REPORT .docx',
    )
    parser.add_argument(
        'plantilla',
        nargs='?',
        help='Plantilla R360MX .docx',
    )
    parser.add_argument(
        '-o', '--output',
        help='Archivo de salida (solo modo single)',
    )

    return parser


def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)

    setup_logging(args.verbose)

    try:
        # Modo dump-styles
        if args.dump_styles:
            return run_dump_styles(args)

        # Modo batch
        if args.batch_dir:
            return run_batch(args)

        # Modo single
        if not args.documento or not args.plantilla:
            parser.print_help()
            return 1

        return run_single(args)

    except DocxError as e:
        log.error("❌ %s", e)
        return 1
    except KeyboardInterrupt:
        log.info("\nInterrumpido por el usuario.")
        return 130
    except Exception as e:
        log.exception("❌ Error inesperado: %s", e)
        return 1


if __name__ == '__main__':
    sys.exit(main())