r360mx-docs-converter/apply_template.py

#!/usr/bin/env python3
"""
apply_template.py - Conversión de ENERGY REPORT a formato corporativo R360MX.

Aplica la plantilla oficial (portada + disclaimer + índice + contraportada)
a uno o varios documentos ENERGY REPORT de RatedPower.

Uso:
  # Simple
  python3 apply_template.py informe.docx plantilla.docx

  # Con opciones
  python3 apply_template.py informe.docx plantilla.docx -o salida.docx -v

  # Modo batch (procesa todo un directorio)
  python3 apply_template.py --batch ./informes/ plantilla.docx -v

  # Dry-run (solo muestra lo que haría)
  python3 apply_template.py informe.docx plantilla.docx --dry-run -v
"""

import sys
import os
import re
import copy
import logging
import argparse
import zipfile
import json
from pathlib import Path
from datetime import datetime
from lxml import etree

# Namespaces OOXML
NS = {
    'w':  'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
    'r':  'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    'a':  'http://schemas.openxmlformats.org/drawingml/2006/main',
    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
    'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
    'rel': 'http://schemas.openxmlformats.org/package/2006/relationships',
}

log = logging.getLogger('r360mx')

# ======================================================================
# MAPEO DE ESTILOS: source -> template
# ======================================================================
DEFAULT_STYLE_MAP = {
    # Estilos de título principales (RatedPower -> TEMPLATE R360MX)
    # IMPORTANTE: el template usa 'Ttulo1/2' SIN acento
    'Title1': 'Ttulo1',
    'Title2': 'Ttulo2',
    'Title3': 'Ttulo3',
    'Title1nfs': 'Title1nfs',

    # Índice / TOC
    'CustomStyleLevelOne': 'TDC1',
    'CustomStyleLevelTwo': 'TDC2',
    'Title2Index': 'Title2Index',
    'TableContentEnd': 'TableContentEnd',

    # Portada
    'CoverSubtitle20': 'CoverSubtitle20',

    # Captions de figuras/tablas en el cuerpo
    'NameTableImg': 'Descripcin',
}


# ======================================================================
# UTILIDADES XML
# ======================================================================

def parse_xml(content: bytes) -> etree._Element:
    return etree.fromstring(content)


def q(tag: str) -> str:
    """Convierte 'w:body' a la URL completa con namespace."""
    prefix, local = tag.split(':')
    return f'{{{NS[prefix]}}}{local}'


def nsmap(*prefixes: str) -> dict:
    """Construye nsmap para tostring."""
    return {p: NS[p] for p in prefixes}


def get_style_id(p_element: etree._Element) -> str | None:
    """Devuelve el styleId de un párrafo, o None."""
    pPr = p_element.find(q('w:pPr'))
    if pPr is None:
        return None
    pStyle = pPr.find(q('w:pStyle'))
    if pStyle is None:
        return None
    return pStyle.get(q('w:val'))


def get_para_text(p_element: etree._Element) -> str:
    """Obtiene el texto plano de un párrafo."""
    texts = p_element.findall(f'.//{q("w:t")}')
    return ''.join(t.text or '' for t in texts)


def get_paras(body: etree._Element) -> list:
    """Devuelve todos los párrafos y tablas del body en orden."""
    return [child for child in body if child.tag in (q('w:p'), q('w:tbl'))]


def collect_image_refs(xml_root: etree._Element) -> list[tuple]:
    """Encuentra todos los a:blip con r:embed."""
    blips = []
    for blip in xml_root.iter(f'{{{NS["a"]}}}blip'):
        rid = blip.get(f'{{{NS["r"]}}}embed')
        if rid:
            blips.append((blip, rid))
    return blips


def _find_section_boundaries(body: etree._Element) -> list[int]:
    """
    Encuentra los índices de todos los sectPr en el body.
    Cada sectPr marca el FINAL de una sección (el contenido de la sección
    está entre sectPr anteriores).
    Devuelve lista de índices de hijos donde hay sectPr.
    """
    boundaries = []
    for i, child in enumerate(body):
        if child.tag == q('w:sectPr'):
            boundaries.append(i)
        elif child.tag == q('w:p'):
            pPr = child.find(q('w:pPr'))
            if pPr is not None:
                sectPr = pPr.find(q('w:sectPr'))
                if sectPr is not None:
                    boundaries.append(i)
    return boundaries


class DocxError(Exception):
    """Error relacionado con el procesamiento de documentos DOCX."""
    pass


# ======================================================================
# DETECCIÓN INTELIGENTE DE SECCIONES
# ======================================================================

class SectionDetector:
    """
    Detecta las secciones clave en el template y el documento source
    basándose en marcadores, estilos y contenido, sin números mágicos.
    """

    MARKER_STYLES = {
        'indice_fin': 'TableContentEnd',
        'titulo_contenido': 'Ttulo1',  # El template usa Ttulo1 (sin acento)
    }

    @staticmethod
    def find_end_of_preface(body: etree._Element) -> int:
        """
        Encuentra dónde termina el prefacio del template
        (portada + disclaimer + índice).
        Busca el marcador `TableContentEnd` o `ContentStart`.
        También busca un salto de sección después del índice.
        """
        children = list(body)
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'TableContentEnd':
                    log.debug("  Marker 'TableContentEnd' encontrado en hijo %d", i)
                    return i
                if style_id == 'ContentStart':
                    log.debug("  Marker 'ContentStart' encontrado en hijo %d", i)
                    return i
                text = get_para_text(child).strip()
                if text.upper() == '<<CONTENT_START>>':
                    log.debug("  Marker textual '<<CONTENT_START>>' en hijo %d", i)
                    return i

        # Fallback: buscar primer Título 1 que parezca contenido real
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                text = get_para_text(child).strip()
                if style_id in ('Ttulo1', 'Title1', 'Título 1') and text:
                    # Si hay un salto de sección justo antes, ese es el límite
                    for j in range(max(0, i - 3), i):
                        prev_child = children[j]
                        if prev_child.tag == q('w:p'):
                            prev_pPr = prev_child.find(q('w:pPr'))
                            if prev_pPr is not None:
                                sectPr = prev_pPr.find(q('w:sectPr'))
                                if sectPr is not None:
                                    log.debug("  Salto de sección antes de Título 1 en hijo %d", j)
                                    return j
                    # Si no, devolver el índice del párrafo anterior al primer Título 1
                    return i - 1 if i > 0 else 0
        return 0

    @staticmethod
    def find_back_cover_start(body: etree._Element) -> int:
        """
        Encuentra dónde empieza la contraportada en el template.
        Busca DESDE EL FINAL hacia el principio para encontrar la última
        ocurrencia de 'RENOVABLES 360' o el marcador BackCover.
        """
        children = list(body)
        # Buscar desde el final hacia atrás
        for i in range(len(children) - 1, -1, -1):
            child = children[i]
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'BackCover':
                    log.debug("  Marker 'BackCover' en hijo %d (desde el final)", i)
                    return i
                text = get_para_text(child).strip()
                if 'RENOVABLES 360' in text.upper() or 'RENEWABLE 360' in text.upper():
                    log.debug("  Texto 'RENOVABLES 360' en hijo %d (desde el final)", i)
                    return i
        return len(children) - 1  # última página

    @staticmethod
    def find_content_start(body: etree._Element) -> int:
        """
        Encuentra el primer elemento de contenido real en el documento source,
        detectando dónde acaba el índice de RatedPower.
        """
        children = list(body)
        found_toc_marker = False
        best = None

        # 1. Buscar marcador TableContentEnd
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                if style_id == 'TableContentEnd':
                    found_toc_marker = True
                    best = i + 1
                    log.debug("  Marker TableContentEnd en source, hijo %d", i)
                    break

        # 2. Si no hay marcador, buscar patrón típico del índice
        if not found_toc_marker:
            for i, child in enumerate(children):
                if child.tag == q('w:p'):
                    style_id = get_style_id(child)
                    text = get_para_text(child).strip()

                    # El índice termina justo antes del primer título numerado (1., 2., etc.)
                    if style_id in ('Title1', 'Ttulo1', 'Título 1') and text:
                        # Verificar que parece un título de contenido (empieza con número)
                        if re.match(r'^\d+\.?\s', text) or re.match(r'^[IVXLCDM]+\.\s', text):
                            # Si está cerca del principio, ignorar (es el TOC)
                            if i > 20:  # suficientemente lejos para ser contenido real
                                log.debug("  Primer título numerado en source hijo %d: '%s'", i, text[:50])
                                return i

        # 3. Buscar salto de sección como delimitador
        for i, child in enumerate(children):
            if child.tag == q('w:p'):
                pPr = child.find(q('w:pPr'))
                if pPr is not None:
                    sectPr = pPr.find(q('w:sectPr'))
                    if sectPr is not None:
                        log.debug("  Salto de sección en source hijo %d", i)
                        candidate = i + 1
                        if candidate < len(children):
                            return candidate
                        break

        # Fallback: si no se encontró nada, devolver la mitad del documento
        # (asumiendo que el índice ocupa ~la primera mitad)
        if best is None:
            best = max(len(children) // 2, 10)
            log.debug("  Fallback: contenido empieza en hijo %d (mitad del doc)", best)

        return best


# ======================================================================
# REMAPEO DE ESTILOS
# ======================================================================

def remap_styles(xml_root: etree._Element, style_map: dict) -> int:
    """Reasigna estilos del source a los equivalentes del template."""
    changes = 0
    for p in xml_root.iter(q('w:p')):
        pPr = p.find(q('w:pPr'))
        if pPr is None:
            continue
        pStyle = pPr.find(q('w:pStyle'))
        if pStyle is None:
            continue
        old_val = pStyle.get(q('w:val'))
        if old_val in style_map:
            new_val = style_map[old_val]
            if new_val:
                pStyle.set(q('w:val'), new_val)
                changes += 1
    return changes


# ======================================================================
# MANEJO DE IMÁGENES
# ======================================================================

def get_image_number(filename: str) -> int:
    m = re.search(r'image(\d+)\.', filename)
    return int(m.group(1)) if m else 0


def find_all_rids_in_template(z_tmpl: zipfile.ZipFile) -> set:
    """Encuentra todos los rIds existentes en el template."""
    existing_rids = set()
    try:
        tmpl_rel_content = z_tmpl.read('word/_rels/document.xml.rels')
        tmpl_rel = parse_xml(tmpl_rel_content)
        for rel in tmpl_rel:
            rid = rel.get('Id')
            if rid:
                existing_rids.add(rid)
    except KeyError:
        log.warning("  No se encontró word/_rels/document.xml.rels en el template")
    return existing_rids


def find_next_available_rid(existing_rids: set) -> int:
    """Encuentra el siguiente rId disponible."""
    # Extraer números de rIds existentes
    rid_numbers = set()
    for rid in existing_rids:
        if rid.startswith('rId'):
            try:
                rid_numbers.add(int(rid[4:]))  # rId123 -> 123
            except ValueError:
                pass

    # Encontrar el primer número disponible desde rId40 (para evitar colisiones)
    # El template usa rId1-rId39, empezamos desde 40
    candidate = 40
    while candidate in rid_numbers:
        candidate += 1
    return candidate


def collect_src_relations(z_src: zipfile.ZipFile) -> tuple[dict, etree._Element]:
    """Procesa las relaciones del source y devuelve (rid_info, rel_root)."""
    src_rel = parse_xml(z_src.read('word/_rels/document.xml.rels'))
    src_rids = {}
    for rel in src_rel:
        rid = rel.get('Id')
        target = rel.get('Target', '').replace('\\', '/')
        rel_type = rel.get('Type', '')
        if 'image' in rel_type:
            src_rids[rid] = target
    return src_rids, src_rel


def rename_source_images(
    z_tmpl: zipfile.ZipFile,
    z_src: zipfile.ZipFile,
    src_rids: dict,
    src_start: int,
    body_src: etree._Element,
) -> tuple[dict, dict]:
    """
    Renombra imágenes del source para evitar colisiones con las del template.
    Solo procesa imágenes de hijos >= src_start.
    Devuelve (image_rename_map, rid_rename_map).
    """
    existing_tmpl_media = {
        name for name in z_tmpl.namelist()
        if name.startswith('word/media/')
    }

    # Encontrar todos los rIds existentes en el template
    existing_rids = find_all_rids_in_template(z_tmpl)

    image_rename_map = {}
    rid_rename_map = {}
    generated = set()

    # Crear mapeo de imágenes por hijo para identificar las que deben ser ignoradas
    children_src = list(body_src)
    src_images_by_child = {}

    # Primero, identificar qué imágenes están en cada hijo
    for i, child in enumerate(children_src):
        blips = collect_image_refs(child)
        if blips:
            src_images_by_child[i] = [rid for _, rid in blips]

    # Procesar solo imágenes de hijos >= src_start
    src_items = []
    for old_rid, rel_target in src_rids.items():
        # Verificar si este rId pertenece a un hijo que debe ser procesado
        should_process = False
        for child_index, rids in src_images_by_child.items():
            if child_index >= src_start and old_rid in rids:
                should_process = True
                break

        if should_process:
            rel_path = rel_target.replace('../', '')
            old_abs = f'word/{rel_path}' if not rel_path.startswith('word/') else rel_path
            old_num = get_image_number(old_abs)
            src_items.append((old_num, old_rid, old_abs))

    src_items.sort()

    # Asignar nuevos rIds disponibles
    next_rid_num = find_next_available_rid(existing_rids)

    for old_num, old_rid, old_abs in src_items:
        ext = old_abs.rsplit('.', 1)[1]
        candidate = next_rid_num
        new_abs = f'word/media/image{candidate}.{ext}'
        while new_abs in existing_tmpl_media or new_abs in generated:
            candidate += 1
            new_abs = f'word/media/image{candidate}.{ext}'

        # Crear nuevo rId en formato rIdXX
        new_rid = f'rId{candidate}'
        image_rename_map[old_abs] = new_abs
        generated.add(new_abs)
        rid_rename_map[old_rid] = new_rid
        log.debug("    %s -> %s (rId: %s -> %s)", old_abs, new_abs, old_rid, new_rid)
        next_rid_num = candidate + 1

    return image_rename_map, rid_rename_map


def update_document_title(xml_root: etree._Element, source_title: str, source_subtitle: str = ""):
    """Actualiza el título y subtítulo en el documento."""
    # Buscar el primer párrafo del template que contiene el título
    body = xml_root.find(q('w:body'))
    if body is not None:
        for child in body:
            if child.tag == q('w:p'):
                text = get_para_text(child)
                # Buscar párrafo que contiene elementos del título
                if "Cliente" in text and "Project Title" in text:
                    # Actualizar texto en los elementos t
                    for t_elem in child.findall(f'.//{q("w:t")}'):
                        t_text = t_elem.text or ""
                        if "Project Title" in t_text:
                            # Reemplazar con el título real del proyecto
                            new_text = t_text.replace("Project Title", source_title)
                            if source_subtitle:
                                new_text = new_text.replace("Subtitle", source_subtitle)
                            else:
                                new_text = new_text.replace("Subtitle", "")
                            t_elem.text = new_text
                    break


def extract_source_title(source_xml: etree._Element) -> tuple[str, str]:
    """Extrae el título y subtítulo del documento source."""
    body = source_xml.find(q('w:body'))
    if body is not None:
        children = list(body)
        for child in children:
            if child.tag == q('w:p'):
                style_id = get_style_id(child)
                text = get_para_text(child).strip()
                # Buscar primer título principal (no el del índice)
                if style_id in ('Title1', 'Ttulo1', 'Título 1') and text:
                    # Saltarse títulos que parecen del índice (muy cortos o numéricos genéricos)
                    if re.match(r'^\d+$', text) or text in ('Índice', 'Index', 'Contents', 'Tabla de contenido'):
                        continue
                    lines = text.split('\n')
                    title = lines[0].strip()
                    subtitle = lines[1].strip() if len(lines) > 1 else ""
                    return title, subtitle
    return "Documento sin título", ""


# ======================================================================
# FUSIÓN PRINCIPAL
# ======================================================================

def replace_content(
    template_path: str | Path,
    source_docx_path: str | Path,
    output_path: str | Path,
    style_map: dict | None = None,
) -> Path:
    """
    Núcleo de la conversión: fusiona template + source en un solo documento.
    """
    style_map = style_map or DEFAULT_STYLE_MAP
    template_path = Path(template_path)
    source_docx_path = Path(source_docx_path)
    output_path = Path(output_path)

    # ---- Validaciones ----
    if not template_path.exists():
        raise DocxError(f"Template no encontrado: {template_path}")
    if not source_docx_path.exists():
        raise DocxError(f"Documento no encontrado: {source_docx_path}")
    if not zipfile.is_zipfile(template_path):
        raise DocxError(f"El template no es un DOCX válido: {template_path}")
    if not zipfile.is_zipfile(source_docx_path):
        raise DocxError(f"El documento fuente no es un DOCX válido: {source_docx_path}")

    z_tmpl = zipfile.ZipFile(str(template_path), 'r')
    z_src = zipfile.ZipFile(str(source_docx_path), 'r')

    try:
        # ---- Leer XML ----
        tmpl_xml = parse_xml(z_tmpl.read('word/document.xml'))
        src_xml = parse_xml(z_src.read('word/document.xml'))
        tmpl_rel = parse_xml(z_tmpl.read('word/_rels/document.xml.rels'))
        src_rids, src_rel = collect_src_relations(z_src)

        body_tmpl = tmpl_xml.find(q('w:body'))
        body_src = src_xml.find(q('w:body'))

        if body_tmpl is None:
            raise DocxError("El template no tiene body")
        if body_src is None:
            raise DocxError("El documento fuente no tiene body")

        children_tmpl = list(body_tmpl)
        children_src = list(body_src)

        # ---- Remapear estilos en source ----
        changes = remap_styles(src_xml, style_map)
        log.info("  Estilos reasignados: %d", changes)

        # ---- Detectar límites por secciones ----
        # El template tiene 5 secciones: portada, disclaimer, índice, CONTENIDO, contraportada
        # Localizamos los sectPr que marcan el final de cada sección
        tmpl_sections = _find_section_boundaries(body_tmpl)
        # La sección 4 (índice 3) es la del contenido a reemplazar
        if len(tmpl_sections) < 4:
            log.warning("  Template tiene %d secciones, se esperaban al menos 4. Usando detección por estilos.", len(tmpl_sections)+1)
            tmpl_idx_end = SectionDetector.find_end_of_preface(body_tmpl)
            tmpl_back = SectionDetector.find_back_cover_start(body_tmpl)
        else:
            log.info("  Template: %d secciones detectadas", len(tmpl_sections)+1)
            tmpl_idx_end = tmpl_sections[2]  # sectPr de sección 3 -> contenido empieza en sección 4
            tmpl_back = tmpl_sections[3]      # sectPr de sección 4 -> contraportada empieza en sección 5

        src_start = SectionDetector.find_content_start(body_src)

        log.info("  Template: sección contenido entre hijos %d y %d", tmpl_idx_end+1, tmpl_back)
        log.info("  Source: contenido real empieza en hijo %d", src_start)

        # ---- Extraer título del source ----
        source_title, source_subtitle = extract_source_title(src_xml)
        log.info("  Título del source: %s", source_title)
        if source_subtitle:
            log.info("  Subtítulo del source: %s", source_subtitle)

        # ---- Actualizar título en template ----
        update_document_title(tmpl_xml, source_title, source_subtitle)

        # ---- Renombrar imágenes del source ----
        image_rename_map, rid_rename_map = rename_source_images(
            z_tmpl, z_src, src_rids, src_start, body_src
        )
        log.info("  Imágenes renombradas: %d", len(image_rename_map))

        # ---- Corregir campos TOC en el template para que coincidan con el idioma/contentido del source ----
        # El template tiene campos TOC en inglés (\c "Figure", \c "Table") pero el contenido
        # del source usa "Figura" y "Tabla" en los estilos de caption.
        toc_fixes = {
            '\\c "Figure"': '\\c "Figura"',
            '\\c "Table"': '\\c "Tabla"',
        }
        for instr in tmpl_xml.iter(f'{{http://schemas.openxmlformats.org/wordprocessingml/2006/main}}instrText'):
            if instr.text:
                original = instr.text
                for old, new in toc_fixes.items():
                    if old in original:
                        instr.text = original.replace(old, new)
                        log.debug("  Campo TOC corregido: %s -> %s", old, new)
                        break

        # ---- Fusionar bodies ----
        for child in list(body_tmpl):
            body_tmpl.remove(child)

        if len(tmpl_sections) >= 4:
            # Método por secciones: reemplazar solo la sección 4 (contenido)
            # Secciones 1-3: portada + disclaimer + índice
            sec3_end = tmpl_sections[2]  # sectPr de sección 3
            sec4_end = tmpl_sections[3]  # sectPr de sección 4

            for child in children_tmpl[:sec3_end + 1]:
                body_tmpl.append(copy.deepcopy(child))

            # Contenido del source (desde src_start, incluimos su sectPr si tiene)
            for child in children_src[src_start:]:
                # Incluir sectPr del source para mantener propiedades de página
                body_tmpl.append(copy.deepcopy(child))

            # Sección 5: contraportada (después del sectPr de sección 4)
            for child in children_tmpl[sec4_end + 1:]:
                body_tmpl.append(copy.deepcopy(child))
        else:
            # Fallback por estilos
            for child in children_tmpl[:tmpl_idx_end + 1]:
                body_tmpl.append(copy.deepcopy(child))
            for child in children_src[src_start:]:
                if child.tag != q('w:sectPr'):
                    body_tmpl.append(copy.deepcopy(child))
            for child in children_tmpl[tmpl_back:]:
                body_tmpl.append(copy.deepcopy(child))

        # ---- Actualizar rIds en document.xml ----
        for blip, old_rid in collect_image_refs(tmpl_xml):
            if old_rid in rid_rename_map:
                blip.set(f'{{{NS["r"]}}}embed', rid_rename_map[old_rid])

        # ---- Construir zip de salida ----
        out_data = {}

        # 1. Partir del template (imágenes del template NUNCA se tocan)
        for item in z_tmpl.infolist():
            out_data[item.filename] = z_tmpl.read(item.filename)

        # 2. Añadir imágenes del source renombradas
        for old_abs, new_abs in image_rename_map.items():
            try:
                content = z_src.read(old_abs)
                out_data[new_abs] = content
            except KeyError:
                log.warning("  Imagen no encontrada en source: %s (ignorada)", old_abs)

        # 3. Añadir relaciones de imágenes del source (sin duplicados)
        rel_root = parse_xml(out_data.get('word/_rels/document.xml.rels', z_tmpl.read('word/_rels/document.xml.rels')))

        # Relaciones existentes en el template
        existing_rids = set()
        for rel in list(rel_root):
            rid = rel.get('Id')
            if rid:
                existing_rids.add(rid)

        # Construir mapa old_rid -> src_rel element
        src_rel_by_rid = {}
        for rel in src_rel:
            rid = rel.get('Id')
            if rid:
                src_rel_by_rid[rid] = rel

        # Añadir cada nueva relación una sola vez
        for old_rid, new_rid in rid_rename_map.items():
            if new_rid in existing_rids:
                continue
            src_rel_elem = src_rel_by_rid.get(old_rid)
            if src_rel_elem is None:
                continue

            target = src_rel_elem.get('Target', '')
            old_target_abs = target.replace('../', '')
            if not old_target_abs.startswith('word/'):
                old_target_abs = f'word/{old_target_abs}'
            new_target_abs = image_rename_map.get(old_target_abs, old_target_abs)
            new_target = new_target_abs.replace('word/', '') if new_target_abs.startswith('word/') else new_target_abs

            new_rel = copy.deepcopy(src_rel_elem)
            new_rel.set('Id', new_rid)
            new_rel.set('Target', new_target)
            rel_root.append(new_rel)
            existing_rids.add(new_rid)

        out_data['word/_rels/document.xml.rels'] = etree.tostring(
            rel_root, xml_declaration=True, encoding='UTF-8')
        out_data['word/document.xml'] = etree.tostring(
            tmpl_xml, xml_declaration=True, encoding='UTF-8')

        # ---- Escribir ----
        with zipfile.ZipFile(str(output_path), 'w', zipfile.ZIP_DEFLATED) as zout:
            for fname, content in out_data.items():
                zout.writestr(fname, content)

    finally:
        z_tmpl.close()
        z_src.close()

    log.info("  ✅ Convertido: %s", output_path)
    return output_path


# ======================================================================
# CLI
# ======================================================================

def setup_logging(verbose: bool = False):
    """Configura logging con formato limpio."""
    level = logging.DEBUG if verbose else logging.INFO
    log.setLevel(level)
    # Solo añadir handler si no tiene ninguno aún
    if not log.handlers:
        handler = logging.StreamHandler(sys.stderr)
        handler.setFormatter(logging.Formatter('%(message)s'))
        log.addHandler(handler)


def validate_docx(path: Path, label: str) -> None:
    """Valida que un archivo sea un DOCX no corrupto."""
    if not path.exists():
        raise DocxError(f"{label} no encontrado: {path}")
    if not zipfile.is_zipfile(path):
        raise DocxError(f"{label} no es un DOCX válido: {path}")
    try:
        with zipfile.ZipFile(str(path), 'r') as z:
            if 'word/document.xml' not in z.namelist():
                raise DocxError(f"{label} no contiene 'word/document.xml'")
    except (zipfile.BadZipFile, Exception) as e:
        raise DocxError(f"{label} corrupto: {e}")


def find_docx_files(directory: Path) -> list[Path]:
    """Busca archivos .docx en un directorio (no recursivo)."""
    return sorted(directory.glob('*.docx'))


def build_output_path(source: Path, output: str | None, suffix: str = '_r360mx') -> Path:
    """Construye la ruta de salida."""
    if output:
        return Path(output)
    return source.parent / f"{source.stem}{suffix}.docx"


def run_single(args) -> int:
    """Procesa un solo documento."""
    source = Path(args.documento)
    template = Path(args.plantilla)
    output = build_output_path(source, args.output)

    log.info("📄 Template: %s", template)
    log.info("📄 Documento: %s", source)
    log.info("📄 Salida:    %s", output)

    validate_docx(source, "Documento")
    validate_docx(template, "Plantilla")

    if args.dry_run:
        log.info("  🏁 Dry-run: todo correcto, no se genera nada.")
        return 0

    replace_content(template, source, output)
    return 0


def run_batch(args) -> int:
    """Procesa múltiples documentos en lote."""
    input_dir = Path(args.batch_dir)
    template = Path(args.plantilla)

    if not input_dir.is_dir():
        log.error("El directorio no existe: %s", input_dir)
        return 1

    validate_docx(template, "Plantilla")

    docx_files = find_docx_files(input_dir)
    if not docx_files:
        log.warning("  No se encontraron archivos .docx en %s", input_dir)
        return 0

    total = len(docx_files)
    ok = 0
    failed = 0

    log.info("📦 Procesando %d documento(s) en lote...", total)
    log.info("📄 Template: %s", template)

    for idx, source in enumerate(docx_files, 1):
        output = build_output_path(source, None)
        log.info("[%d/%d] %s -> %s", idx, total, source.name, output.name)

        if args.dry_run:
            ok += 1
            continue

        try:
            replace_content(template, source, output)
            ok += 1
        except DocxError as e:
            log.error("  ❌ Error: %s", e)
            failed += 1
        except Exception as e:
            log.error("  ❌ Error inesperado: %s", e)
            failed += 1

    log.info("")
    log.info("═══════════════════════════════════")
    log.info("  Resumen: %d OK, %d fallos de %d", ok, failed, total)
    log.info("═══════════════════════════════════")
    return 1 if failed > 0 else 0


def run_dump_styles(args) -> int:
    """Dump de estilos de un documento para depuración."""
    path = Path(args.documento)
    validate_docx(path, "Documento")

    with zipfile.ZipFile(str(path), 'r') as z:
        if 'word/styles.xml' in z.namelist():
            styles_xml = parse_xml(z.read('word/styles.xml'))
            styles = styles_xml.findall(f'.//{q("w:style")}')
            log.info("Estilos en %s:", path)
            for style in styles:
                style_id = style.get(q('w:styleId'))
                style_type = style.get(q('w:type'))
                name_elem = style.find(q('w:name'))
                name = name_elem.get(q('w:val')) if name_elem is not None else ''
                log.info("  %-20s type=%-10s name=%s", style_id or '', style_type or '', name)
        else:
            log.warning("  No se encontró word/styles.xml")

        # Mostrar estructura del documento
        doc_xml = parse_xml(z.read('word/document.xml'))
        body = doc_xml.find(q('w:body'))
        if body is not None:
            paras = get_paras(body)
            log.info("\nEstructura del body (%d elementos):", len(paras))
            for i, child in enumerate(paras[:100]):  # primeros 100
                style_id = get_style_id(child) if child.tag == q('w:p') else '[TABLE]'
                text = get_para_text(child)[:80] if child.tag == q('w:p') else ''
                log.info("  [%4d] %-20s %s", i, style_id or '', text)
    return 0


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description='Convierte documentos ENERGY REPORT al formato corporativo R360MX.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Ejemplos:
  %(prog)s informe.docx plantilla.docx
  %(prog)s informe.docx plantilla.docx -o salida.docx -v
  %(prog)s --batch ./informes/ plantilla.docx -v
  %(prog)s --dump-styles informe.docx
  %(prog)s informe.docx plantilla.docx --dry-run -v
        """,
    )

    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
        help='Modo verbose (debug)',
    )

    # Subcomandos implícitos
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Valida sin generar archivos',
    )
    parser.add_argument(
        '--dump-styles',
        metavar='DOCUMENTO',
        help='Inspecciona los estilos y estructura de un DOCX',
    )

    # Batch mode
    parser.add_argument(
        '--batch',
        metavar='DIRECTORIO',
        dest='batch_dir',
        help='Modo batch: procesa todos los .docx del directorio',
    )

    # Posicionales
    parser.add_argument(
        'documento',
        nargs='?',
        help='Documento ENERGY REPORT .docx',
    )
    parser.add_argument(
        'plantilla',
        nargs='?',
        help='Plantilla R360MX .docx',
    )
    parser.add_argument(
        '-o', '--output',
        help='Archivo de salida (solo modo single)',
    )

    return parser


def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)

    setup_logging(args.verbose)

    try:
        # Modo dump-styles
        if args.dump_styles:
            return run_dump_styles(args)

        # Modo batch
        if args.batch_dir:
            return run_batch(args)

        # Modo single
        if not args.documento or not args.plantilla:
            parser.print_help()
            return 1

        return run_single(args)

    except DocxError as e:
        log.error("❌ %s", e)
        return 1
    except KeyboardInterrupt:
        log.info("\nInterrumpido por el usuario.")
        return 130
    except Exception as e:
        log.exception("❌ Error inesperado: %s", e)
        return 1


if __name__ == '__main__':
    sys.exit(main())