More fixes for RequestPart mixing

2023-07-30 11:39:29 +01:00
parent 6b618f3abe 0732ffa76e
commit f98f089d63
55 changed files with 3789 additions and 653 deletions
@@ -4,9 +4,13 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
-import java.util.*;
+
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.RequestPart;
@@ -17,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.GeneralUtils;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@@ -24,7 +29,7 @@ import stirling.software.SPDF.utils.WebResponseUtils;
 public class ConvertHtmlToPDF {


-	 @PostMapping(consumes = "multipart/form-data", value = "/convert-to-pdf")
+	 @PostMapping(consumes = "multipart/form-data", value = "/html-to-pdf")
 	    @Operation(
 	        summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
 	        description = "This endpoint takes an HTML or ZIP file input and converts it to a PDF format."
@@ -40,61 +45,83 @@ public class ConvertHtmlToPDF {
 	        if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
 	            throw new IllegalArgumentException("File must be either .html or .zip format.");
 	        }
-
 	        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-	        Path tempInputFile;
-
-	        if (originalFilename.endsWith(".html")) {
-	            tempInputFile = Files.createTempFile("input_", ".html");
-	            Files.write(tempInputFile, fileInput.getBytes());
-	        } else {
-	            tempInputFile = unzipAndGetMainHtml(fileInput);
+	        Path tempInputFile = null;
+	        byte[] pdfBytes;
+	        try {
+		        if (originalFilename.endsWith(".html")) {
+		            tempInputFile = Files.createTempFile("input_", ".html");
+		            Files.write(tempInputFile, fileInput.getBytes());
+		        } else {
+		            tempInputFile = unzipAndGetMainHtml(fileInput);
+		        }
+	
+		        List<String> command = new ArrayList<>();
+		        command.add("weasyprint");
+		        command.add(tempInputFile.toString()); 
+		        command.add(tempOutputFile.toString());
+		        ProcessExecutorResult returnCode;
+		        if (originalFilename.endsWith(".zip")) {	        	
+		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+	                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
+		        } else {
+	
+		        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+		                                        .runCommandWithOutputHandling(command);
+		        }
+	
+		        pdfBytes = Files.readAllBytes(tempOutputFile);
+	        } finally {
+		        // Clean up temporary files
+		        Files.delete(tempOutputFile);
+		        Files.delete(tempInputFile);
+		        
+		        if (originalFilename.endsWith(".zip")) {
+		        	GeneralUtils.deleteDirectory(tempInputFile.getParent());
+		        }
 	        }
-
-	        List<String> command = new ArrayList<>();
-	        command.add("weasyprint");
-	        command.add(tempInputFile.toString()); 
-	        command.add(tempOutputFile.toString());
-	        int returnCode = 0;
-	        if (originalFilename.endsWith(".zip")) {	        	
-	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
-                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
-	        } else {
-
-	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
-	                                        .runCommandWithOutputHandling(command);
-	        }
-
-	        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
-
-	        // Clean up temporary files
-	        Files.delete(tempOutputFile);
-	        Files.delete(tempInputFile);
-	        if (originalFilename.endsWith(".zip")) {
-	        	GeneralUtils.deleteDirectory(tempInputFile.getParent());
-	        }
-
 	        String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf";  // Remove file extension and append .pdf
 	        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
 	    }



-    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
-        Path tempDirectory = Files.createTempDirectory("unzipped_");
-        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
-            ZipEntry entry = zipIn.getNextEntry();
-            while (entry != null) {
-                Path filePath = tempDirectory.resolve(entry.getName());
-                if (!entry.isDirectory()) {
-                    Files.copy(zipIn, filePath);
-                }
-                zipIn.closeEntry();
-                entry = zipIn.getNextEntry();
-            }
-        }
-        return tempDirectory.resolve("index.html");
-    }
+	    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
+	        Path tempDirectory = Files.createTempDirectory("unzipped_");
+	        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
+	            ZipEntry entry = zipIn.getNextEntry();
+	            while (entry != null) {
+	                Path filePath = tempDirectory.resolve(entry.getName());
+	                if (entry.isDirectory()) {
+	                    Files.createDirectories(filePath);  // Explicitly create the directory structure
+	                } else {
+	                    Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
+	                    Files.copy(zipIn, filePath);
+	                }
+	                zipIn.closeEntry();
+	                entry = zipIn.getNextEntry();
+	            }
+	        }
+
+	        //search for the main HTML file.
+	        try (Stream<Path> walk = Files.walk(tempDirectory)) {
+	            List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
+	                                       .collect(Collectors.toList());
+
+	            if (htmlFiles.isEmpty()) {
+	                throw new IOException("No HTML files found in the unzipped directory.");
+	            }
+
+	            // Prioritize 'index.html' if it exists, otherwise use the first .html file
+	            for (Path htmlFile : htmlFiles) {
+	                if (htmlFile.getFileName().toString().equals("index.html")) {
+	                    return htmlFile;
+	                }
+	            }
+
+	            return htmlFiles.get(0);
+	        }
+	    }

    
   
@@ -43,7 +43,7 @@ public class ConvertImgPDFController {
            @Parameter(description = "Choose between a single image containing all pages or separate images for each page", schema = @Schema(allowableValues = {"single", "multiple"}))
                    String singleOrMultiple,
            @RequestParam("colorType")
-            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
+            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
                    String colorType,
            @RequestParam("dpi")
            @Parameter(description = "The DPI (dots per inch) for the output image(s)")
@@ -94,7 +94,7 @@ public class ConvertImgPDFController {
            @Parameter(description = "Whether to stretch the images to fit the PDF page or maintain the aspect ratio", example = "false")
                    boolean stretchToFit,
            @RequestParam("colorType")
-            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
+            @Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
                    String colorType,
            @RequestParam(defaultValue = "false", name = "autoRotate")
            @Parameter(description = "Whether to automatically rotate the images to better fit the PDF page", example = "true")
@@ -19,6 +19,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@@ -41,7 +42,7 @@ public class ConvertOfficeController {

        // Run the LibreOffice command
        List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
-        int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
+        ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);

        // Read the converted PDF file
        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -62,10 +63,10 @@ public class ConvertOfficeController {
        summary = "Convert a file to a PDF using LibreOffice",
        description = "This endpoint converts a given file to a PDF using LibreOffice API  Input:Any Output:PDF Type:SISO"
    )
-    public ResponseEntity<byte[]> processPdfWithOCR(
+    public ResponseEntity<byte[]> processFileToPDF(
        @RequestPart(required = true, value = "fileInput")
        @Parameter(
-            description = "The input file to be converted to a PDF file using OCR",
+            description = "The input file to be converted to a PDF file using LibreOffice",
            required = true
        )
            MultipartFile inputFile
@@ -16,6 +16,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@@ -49,7 +50,7 @@ public class ConvertPDFToPDFA {
        command.add(tempInputFile.toString());
        command.add(tempOutputFile.toString());

-        int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
+        ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);

        // Read the optimized PDF file
        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -8,6 +8,7 @@ import java.util.List;

 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
 import org.springframework.web.bind.annotation.RequestPart;
 import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.multipart.MultipartFile;
@@ -17,6 +18,7 @@ import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import stirling.software.SPDF.utils.GeneralUtils;
 import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
 import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@@ -29,32 +31,35 @@ public class ConvertWebsiteToPDF {
 	    description = "This endpoint fetches content from a URL and converts it to a PDF format."
 	)
 	public ResponseEntity<byte[]> urlToPdf(
-	    @RequestPart(required = true, value = "urlInput")
+	    @RequestParam(required = true, value = "urlInput")
 	    @Parameter(description = "The input URL to be converted to a PDF file", required = true)
 	        String URL) throws IOException, InterruptedException {

 	    // Validate the URL format
-	    if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) {
+	    if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) {
 	        throw new IllegalArgumentException("Invalid URL format provided.");
 	    }
-
-	    // Prepare the output file path
-	    Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-
-	    // Prepare the OCRmyPDF command
-	    List<String> command = new ArrayList<>();
-	    command.add("weasyprint");
-	    command.add(URL);
-	    command.add(tempOutputFile.toString());
-
-	    int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
-
-	    // Read the optimized PDF file
-	    byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
-
-	    // Clean up the temporary files
-	    Files.delete(tempOutputFile);
-
+	    Path tempOutputFile = null;
+	    byte[] pdfBytes;
+	    try {
+		    // Prepare the output file path
+		    tempOutputFile = Files.createTempFile("output_", ".pdf");
+	
+		    // Prepare the OCRmyPDF command
+		    List<String> command = new ArrayList<>();
+		    command.add("weasyprint");
+		    command.add(URL);
+		    command.add(tempOutputFile.toString());
+	
+		    ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
+	
+		    // Read the optimized PDF file
+		    pdfBytes = Files.readAllBytes(tempOutputFile);
+	    }
+	    finally {
+		    // Clean up the temporary files
+		    Files.delete(tempOutputFile);
+	    }
 	    // Convert URL to a safe filename
 	    String outputFilename = convertURLToFileName(URL);