Initial commit: script de conversion de documentos ENERGY REPORT a formato R360MX

2026-04-30 23:00:33 +02:00
parent 32a0789c0e
commit 55ca3e1625
3 changed files with 192 additions and 2 deletions
@@ -1,3 +1,29 @@
-# r360mx-docs-converter
+# R360MX Docs Converter
-Scripts para conversión de documentos ENERGY REPORT al formato R360MX
+Convierte documentos ENERGY REPORT de RatedPower al formato corporativo R360MX,
 aplicando la plantilla oficial de portada, disclaimer, índice y contraportada.
 ## Requisitos
 - Python 3.10+
 - pip install -r requirements.txt
 ## Uso
 ```bash
 python3 apply_template.py <documento_ratedpower.docx> <plantilla_r360mx.docx>
 ```
 Genera un archivo `<documento>_r360mx.docx` en el mismo directorio.
 ## Estructura del template
 El template `portada.docx` contiene 5 hojas:
 1. Portada (con campos rellenables)
 2. Disclaimer & Revisions
 3. Índice TDC (se actualiza al abrir en Word)
 4. Contenido (se reemplaza por el del documento original)
 5. Contraportada RENOVABLES 360
 El script detecta automáticamente dónde termina el índice de RatedPower
 y dónde empieza el contenido real.
@@ -0,0 +1,163 @@
 #!/usr/bin/env python3
 """
 Aplica plantilla portada.docx a un ENERGY REPORT.
 Estrategia: partir del DOCX ORIGINAL (que tiene todas sus imágenes y relaciones intactas)
 y reemplazar solo los primeros hijos del body (portada+disclaimer+índice del original)
 por los del template. La contraportada del template se añade al final.
 Así las imágenes del contenido original mantienen sus relaciones intactas.
 """
 import sys, os, shutil, copy, zipfile, re
 from lxml import etree
 w = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
 def get_xml(path):
    with zipfile.ZipFile(path, 'r') as z:
        return z.read('word/document.xml')
 def find_content_start(children):
    """Encuentra primer título de contenido real (después del índice)."""
    found_toc = False
    for i, child in enumerate(children):
        if child.tag == f'{{{w}}}p':
            style = child.find(f'.//{{{w}}}pStyle')
            sval = style.get(f'{{{w}}}val') if style is not None else ''
            texts = child.findall(f'.//{{{w}}}t')
            text = ''.join(t.text or '' for t in texts)
            if sval == 'Title2Index': found_toc = True; continue
            if found_toc and sval == 'Title1' and text and (text[0].isdigit() or text[0] in 'IVX'):
                if '. ' in text[:6] or text[-1].isdigit(): return i
    for i, child in enumerate(children):
        if child.tag == f'{{{w}}}p':
            style = child.find(f'.//{{{w}}}pStyle')
            sval = style.get(f'{{{w}}}val') if style is not None else ''
            texts = child.findall(f'.//{{{w}}}t')
            text = ''.join(t.text or '' for t in texts)
            if sval == 'Title1' and text and text[0].isdigit() and '. ' in text[:6]: return i
    return 69
 def replace_content(template_path, source_docx_path, output_path):
    tmpl_xml = etree.fromstring(get_xml(template_path))
    src_xml = etree.fromstring(get_xml(source_docx_path))
    body_tmpl = tmpl_xml.find(f'{{{w}}}body')
    body_src = src_xml.find(f'{{{w}}}body')
    children_tmpl = list(body_tmpl)
    children_src = list(body_src)
    # ===== DETECTAR LÍMITES =====
    # Template
    tmpl_idx_end = 36
    for i, child in enumerate(children_tmpl):
        if child.tag == f'{{{w}}}p':
            style = child.find(f'.//{{{w}}}pStyle')
            sval = style.get(f'{{{w}}}val') if style is not None else ''
            if sval == 'TableContentEnd': tmpl_idx_end = i
            elif sval == 'Ttulo1' and i > tmpl_idx_end: break
    if tmpl_idx_end < 10: tmpl_idx_end = 36
    tmpl_back = 47
    for i, child in enumerate(children_tmpl):
        if child.tag == f'{{{w}}}p':
            texts = child.findall(f'.//{{{w}}}t')
            if 'RENOVABLES 360' in ''.join(t.text or '' for t in texts): tmpl_back = i; break
    # Original: dónde empieza el contenido real
    src_start = find_content_start(children_src)
    print(f"  Template: índice h. {tmpl_idx_end}, contraportada h. {tmpl_back}")
    print(f"  Original: contenido real empieza en hijo {src_start}")
    # ===== ESTRATEGIA: PARTIR DEL ORIGINAL, REEMPLAZAR PORTADA + AÑADIR CONTRAPORTADA =====
    # Construir nuevo body:
    # 1. Portada + Disclaimer + Índice del TEMPLATE
    # 2. Contenido real del ORIGINAL (desde src_start, sin sectPr)
    # 3. Contraportada del TEMPLATE
    for child in list(body_tmpl): body_tmpl.remove(child)
    for child in children_tmpl[:tmpl_idx_end + 1]:
        body_tmpl.append(copy.deepcopy(child))
    for child in children_src[src_start:]:
        if child.tag != f'{{{w}}}sectPr':
            body_tmpl.append(copy.deepcopy(child))
    for child in children_tmpl[tmpl_back:]:
        body_tmpl.append(copy.deepcopy(child))
    # ===== COPIAR ARCHIVOS =====
    # Partir del DOCX ORIGINAL (imágenes y relaciones del contenido intactas)
    with zipfile.ZipFile(source_docx_path, 'r') as z:
        out_data = {item.filename: z.read(item.filename) for item in z.infolist()}
    # Añadir archivos del template que no están en el original
    with zipfile.ZipFile(template_path, 'r') as z:
        for item in z.infolist():
            fname = item.filename
            if fname not in out_data:
                out_data[fname] = z.read(fname)
            elif 'media/' in fname:
                # Las imágenes del template se añaden con sufijo _tmpl para no colisionar
                base, ext = fname.rsplit('.', 1)
                new_fname = f"{base}_tmpl.{ext}"
                if new_fname not in out_data:
                    out_data[new_fname] = z.read(fname)
    # Reemplazar document.xml
    out_data['word/document.xml'] = etree.tostring(tmpl_xml, xml_declaration=True, encoding='UTF-8', standalone=True)
    # ===== ACTUALIZAR RELACIONES =====
    # Las imágenes del template ahora tienen _tmpl en el nombre
    # Necesito actualizar las relaciones del template para que apunten a _tmpl
    rels_path = 'word/_rels/document.xml.rels'
    if rels_path in out_data:
        rels_xml = etree.fromstring(out_data[rels_path])
        for rel in rels_xml:
            target = rel.get('Target', '')
            # Las relaciones de imágenes del template que se sobrescribieron
            if target.startswith('media/') and not target.startswith('media/image'):
                pass  # no hay conflictos con nombres no-numéricos
            # Las imágenes numéricas del template están sobrescritas por las del original
            # Pero nosotros las copiamos como _tmpl, así que hay que actualizar las relaciones
            # SÓLO si la imagen original fue sobrescrita
            m = re.match(r'media/(image\d+)\.(\w+)', target)
            if m:
                img_name = m.group(1)
                ext = m.group(2)
                # Verificar si esta imagen existe en el original
                orig_path = f'word/media/{img_name}.{ext}'
                if orig_path not in dict([(i.filename, None) for i in zipfile.ZipFile(source_docx_path, 'r').infolist() if not hasattr(i, 'filename')]):
                    pass  # No fue sobrescrita
                # Es más fácil: simplemente cambiar todas las referencias a imágenes
                # del template que colisionan a la versión _tmpl
                new_target = f'media/{img_name}_tmpl.{ext}'
                # Solo cambiamos si existe la versión _tmpl
                if f'word/{new_target}' in out_data:
                    rel.set('Target', new_target)
        out_data[rels_path] = etree.tostring(rels_xml, xml_declaration=True, encoding='UTF-8', standalone=True)
    # Escribir
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zout:
        for fname, content in out_data.items():
            zout.writestr(fname, content)
    return output_path
 if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Uso: apply_template.py <documento.docx> <plantilla.docx>")
        sys.exit(1)
    docx_path = sys.argv[1]; template_path = sys.argv[2]
    base_dir = os.path.dirname(docx_path)
    base_name = os.path.splitext(os.path.basename(docx_path))[0]
    output_path = os.path.join(base_dir, f"{base_name}_r360mx.docx")
    print(f"📄 Template: {template_path}")
    print(f"📄 Documento: {docx_path}")
    print(f"📄 Salida: {output_path}")
    replace_content(template_path, docx_path, output_path)
    print(f"✅ Convertido: {output_path}")
    import subprocess
    subprocess.Popen(['nextcloudcmd', '--non-interactive', '--user', 'JavierBrana', '--password', '%5qJuIrZ^eoq3rFYU$OpuV2aM', '/home/javi/Nextcloud', 'https://cloud.r360mx.com'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -0,0 +1 @@
 lxml>=5.0.0