Initial commit: script de conversion de documentos ENERGY REPORT a formato R360MX

2026-04-30 23:00:33 +02:00
parent 32a0789c0e
commit 55ca3e1625
3 changed files with 192 additions and 2 deletions
@@ -1,3 +1,29 @@
-# r360mx-docs-converter
+# R360MX Docs Converter

-Scripts para conversión de documentos ENERGY REPORT al formato R360MX
+Convierte documentos ENERGY REPORT de RatedPower al formato corporativo R360MX,
+aplicando la plantilla oficial de portada, disclaimer, índice y contraportada.
+
+## Requisitos
+
+- Python 3.10+
+- pip install -r requirements.txt
+
+## Uso
+
+```bash
+python3 apply_template.py <documento_ratedpower.docx> <plantilla_r360mx.docx>
+```
+
+Genera un archivo `<documento>_r360mx.docx` en el mismo directorio.
+
+## Estructura del template
+
+El template `portada.docx` contiene 5 hojas:
+1. Portada (con campos rellenables)
+2. Disclaimer & Revisions
+3. Índice TDC (se actualiza al abrir en Word)
+4. Contenido (se reemplaza por el del documento original)
+5. Contraportada RENOVABLES 360
+
+El script detecta automáticamente dónde termina el índice de RatedPower
+y dónde empieza el contenido real.
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""
+Aplica plantilla portada.docx a un ENERGY REPORT.
+Estrategia: partir del DOCX ORIGINAL (que tiene todas sus imágenes y relaciones intactas)
+y reemplazar solo los primeros hijos del body (portada+disclaimer+índice del original)
+por los del template. La contraportada del template se añade al final.
+
+Así las imágenes del contenido original mantienen sus relaciones intactas.
+"""
+import sys, os, shutil, copy, zipfile, re
+from lxml import etree
+
+w = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+
+def get_xml(path):
+    with zipfile.ZipFile(path, 'r') as z:
+        return z.read('word/document.xml')
+
+def find_content_start(children):
+    """Encuentra primer título de contenido real (después del índice)."""
+    found_toc = False
+    for i, child in enumerate(children):
+        if child.tag == f'{{{w}}}p':
+            style = child.find(f'.//{{{w}}}pStyle')
+            sval = style.get(f'{{{w}}}val') if style is not None else ''
+            texts = child.findall(f'.//{{{w}}}t')
+            text = ''.join(t.text or '' for t in texts)
+            if sval == 'Title2Index': found_toc = True; continue
+            if found_toc and sval == 'Title1' and text and (text[0].isdigit() or text[0] in 'IVX'):
+                if '. ' in text[:6] or text[-1].isdigit(): return i
+    for i, child in enumerate(children):
+        if child.tag == f'{{{w}}}p':
+            style = child.find(f'.//{{{w}}}pStyle')
+            sval = style.get(f'{{{w}}}val') if style is not None else ''
+            texts = child.findall(f'.//{{{w}}}t')
+            text = ''.join(t.text or '' for t in texts)
+            if sval == 'Title1' and text and text[0].isdigit() and '. ' in text[:6]: return i
+    return 69
+
+def replace_content(template_path, source_docx_path, output_path):
+    tmpl_xml = etree.fromstring(get_xml(template_path))
+    src_xml = etree.fromstring(get_xml(source_docx_path))
+    
+    body_tmpl = tmpl_xml.find(f'{{{w}}}body')
+    body_src = src_xml.find(f'{{{w}}}body')
+    
+    children_tmpl = list(body_tmpl)
+    children_src = list(body_src)
+    
+    # ===== DETECTAR LÍMITES =====
+    # Template
+    tmpl_idx_end = 36
+    for i, child in enumerate(children_tmpl):
+        if child.tag == f'{{{w}}}p':
+            style = child.find(f'.//{{{w}}}pStyle')
+            sval = style.get(f'{{{w}}}val') if style is not None else ''
+            if sval == 'TableContentEnd': tmpl_idx_end = i
+            elif sval == 'Ttulo1' and i > tmpl_idx_end: break
+    if tmpl_idx_end < 10: tmpl_idx_end = 36
+    
+    tmpl_back = 47
+    for i, child in enumerate(children_tmpl):
+        if child.tag == f'{{{w}}}p':
+            texts = child.findall(f'.//{{{w}}}t')
+            if 'RENOVABLES 360' in ''.join(t.text or '' for t in texts): tmpl_back = i; break
+    
+    # Original: dónde empieza el contenido real
+    src_start = find_content_start(children_src)
+    
+    print(f"  Template: índice h. {tmpl_idx_end}, contraportada h. {tmpl_back}")
+    print(f"  Original: contenido real empieza en hijo {src_start}")
+    
+    # ===== ESTRATEGIA: PARTIR DEL ORIGINAL, REEMPLAZAR PORTADA + AÑADIR CONTRAPORTADA =====
+    # Construir nuevo body:
+    # 1. Portada + Disclaimer + Índice del TEMPLATE
+    # 2. Contenido real del ORIGINAL (desde src_start, sin sectPr)
+    # 3. Contraportada del TEMPLATE
+    
+    for child in list(body_tmpl): body_tmpl.remove(child)
+    
+    for child in children_tmpl[:tmpl_idx_end + 1]:
+        body_tmpl.append(copy.deepcopy(child))
+    for child in children_src[src_start:]:
+        if child.tag != f'{{{w}}}sectPr':
+            body_tmpl.append(copy.deepcopy(child))
+    for child in children_tmpl[tmpl_back:]:
+        body_tmpl.append(copy.deepcopy(child))
+    
+    # ===== COPIAR ARCHIVOS =====
+    # Partir del DOCX ORIGINAL (imágenes y relaciones del contenido intactas)
+    with zipfile.ZipFile(source_docx_path, 'r') as z:
+        out_data = {item.filename: z.read(item.filename) for item in z.infolist()}
+    
+    # Añadir archivos del template que no están en el original
+    with zipfile.ZipFile(template_path, 'r') as z:
+        for item in z.infolist():
+            fname = item.filename
+            if fname not in out_data:
+                out_data[fname] = z.read(fname)
+            elif 'media/' in fname:
+                # Las imágenes del template se añaden con sufijo _tmpl para no colisionar
+                base, ext = fname.rsplit('.', 1)
+                new_fname = f"{base}_tmpl.{ext}"
+                if new_fname not in out_data:
+                    out_data[new_fname] = z.read(fname)
+    
+    # Reemplazar document.xml
+    out_data['word/document.xml'] = etree.tostring(tmpl_xml, xml_declaration=True, encoding='UTF-8', standalone=True)
+    
+    # ===== ACTUALIZAR RELACIONES =====
+    # Las imágenes del template ahora tienen _tmpl en el nombre
+    # Necesito actualizar las relaciones del template para que apunten a _tmpl
+    
+    rels_path = 'word/_rels/document.xml.rels'
+    if rels_path in out_data:
+        rels_xml = etree.fromstring(out_data[rels_path])
+        for rel in rels_xml:
+            target = rel.get('Target', '')
+            # Las relaciones de imágenes del template que se sobrescribieron
+            if target.startswith('media/') and not target.startswith('media/image'):
+                pass  # no hay conflictos con nombres no-numéricos
+            # Las imágenes numéricas del template están sobrescritas por las del original
+            # Pero nosotros las copiamos como _tmpl, así que hay que actualizar las relaciones
+            # SÓLO si la imagen original fue sobrescrita
+            m = re.match(r'media/(image\d+)\.(\w+)', target)
+            if m:
+                img_name = m.group(1)
+                ext = m.group(2)
+                # Verificar si esta imagen existe en el original
+                orig_path = f'word/media/{img_name}.{ext}'
+                if orig_path not in dict([(i.filename, None) for i in zipfile.ZipFile(source_docx_path, 'r').infolist() if not hasattr(i, 'filename')]):
+                    pass  # No fue sobrescrita
+                # Es más fácil: simplemente cambiar todas las referencias a imágenes
+                # del template que colisionan a la versión _tmpl
+                new_target = f'media/{img_name}_tmpl.{ext}'
+                # Solo cambiamos si existe la versión _tmpl
+                if f'word/{new_target}' in out_data:
+                    rel.set('Target', new_target)
+        
+        out_data[rels_path] = etree.tostring(rels_xml, xml_declaration=True, encoding='UTF-8', standalone=True)
+    
+    # Escribir
+    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zout:
+        for fname, content in out_data.items():
+            zout.writestr(fname, content)
+    
+    return output_path
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print("Uso: apply_template.py <documento.docx> <plantilla.docx>")
+        sys.exit(1)
+    docx_path = sys.argv[1]; template_path = sys.argv[2]
+    base_dir = os.path.dirname(docx_path)
+    base_name = os.path.splitext(os.path.basename(docx_path))[0]
+    output_path = os.path.join(base_dir, f"{base_name}_r360mx.docx")
+    print(f"📄 Template: {template_path}")
+    print(f"📄 Documento: {docx_path}")
+    print(f"📄 Salida: {output_path}")
+    replace_content(template_path, docx_path, output_path)
+    print(f"✅ Convertido: {output_path}")
+    import subprocess
+    subprocess.Popen(['nextcloudcmd', '--non-interactive', '--user', 'JavierBrana', '--password', '%5qJuIrZ^eoq3rFYU$OpuV2aM', '/home/javi/Nextcloud', 'https://cloud.r360mx.com'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -0,0 +1 @@
+lxml>=5.0.0