html and url to pdf init

2023-07-22 16:57:40 +01:00
parent 749461334d
commit 4367ae7934
5 changed files with 228 additions and 69 deletions
@@ -1,66 +1,103 @@
-package stirling.software.SPDF.controller.api.converters;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.springframework.http.ResponseEntity;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestPart;
-import org.springframework.web.bind.annotation.RestController;
-import org.springframework.web.multipart.MultipartFile;
-
-import io.swagger.v3.oas.annotations.Operation;
-import io.swagger.v3.oas.annotations.Parameter;
-import io.swagger.v3.oas.annotations.tags.Tag;
-import stirling.software.SPDF.utils.ProcessExecutor;
-import stirling.software.SPDF.utils.WebResponseUtils;
-
-@RestController
-@Tag(name = "Convert", description = "Convert APIs")
-public class ConvertHtmlToPDF {
-
-	@PostMapping(consumes = "multipart/form-data", value = "/pdf-to-pdfa")
-	@Operation(
-	    summary = "Convert a PDF to a PDF/A",
-	    description = "This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO"
-	)
-	public ResponseEntity<byte[]> pdfToPdfA(
-	    @RequestPart(required = true, value = "fileInput")
-	    @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true)
-	        MultipartFile inputFile) throws IOException, InterruptedException {
-
-        // Save the uploaded file to a temporary location
-        Path tempInputFile = Files.createTempFile("input_", ".pdf");
-        inputFile.transferTo(tempInputFile.toFile());
-
-        // Prepare the output file path
-        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-
-        // Prepare the OCRmyPDF command
-        List<String> command = new ArrayList<>();
-        command.add("ocrmypdf");
-        command.add("--skip-text");
-        command.add("--tesseract-timeout=0");
-        command.add("--output-type");
-        command.add("pdfa");
-        command.add(tempInputFile.toString());
-        command.add(tempOutputFile.toString());
-
-        int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
-
-        // Read the optimized PDF file
-        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
-
-        // Clean up the temporary files
-        Files.delete(tempInputFile);
-        Files.delete(tempOutputFile);
-
-        // Return the optimized PDF as a response
-        String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf";
-        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
-    }
-
-}
+package stirling.software.SPDF.controller.api.converters;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.*;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestPart;
+import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.multipart.MultipartFile;
+
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import stirling.software.SPDF.utils.GeneralUtils;
+import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.WebResponseUtils;
+
+@RestController
+@Tag(name = "Convert", description = "Convert APIs")
+public class ConvertHtmlToPDF {
+
+
+	 @PostMapping(consumes = "multipart/form-data", value = "/convert-to-pdf")
+	    @Operation(
+	        summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
+	        description = "This endpoint takes an HTML or ZIP file input and converts it to a PDF format."
+	    )
+	    public ResponseEntity<byte[]> HtmlToPdf(
+	            @RequestPart(required = true, value = "fileInput") MultipartFile fileInput) throws IOException, InterruptedException {
+
+	        if (fileInput == null) {
+	            throw new IllegalArgumentException("Please provide an HTML or ZIP file for conversion.");
+	        }
+
+	        String originalFilename = fileInput.getOriginalFilename();
+	        if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
+	            throw new IllegalArgumentException("File must be either .html or .zip format.");
+	        }
+
+	        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+	        Path tempInputFile;
+
+	        if (originalFilename.endsWith(".html")) {
+	            tempInputFile = Files.createTempFile("input_", ".html");
+	            Files.write(tempInputFile, fileInput.getBytes());
+	        } else {
+	            tempInputFile = unzipAndGetMainHtml(fileInput);
+	        }
+
+	        List<String> command = new ArrayList<>();
+	        command.add("weasyprint");
+	        command.add(tempInputFile.toString()); 
+	        command.add(tempOutputFile.toString());
+	        int returnCode = 0;
+	        if (originalFilename.endsWith(".zip")) {	        	
+	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+                .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
+	        } else {
+
+	        	returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
+	                                        .runCommandWithOutputHandling(command);
+	        }
+
+	        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
+
+	        // Clean up temporary files
+	        Files.delete(tempOutputFile);
+	        Files.delete(tempInputFile);
+	        if (originalFilename.endsWith(".zip")) {
+	        	GeneralUtils.deleteDirectory(tempInputFile.getParent());
+	        }
+
+	        String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf";  // Remove file extension and append .pdf
+	        return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
+	    }
+
+
+
+    private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
+        Path tempDirectory = Files.createTempDirectory("unzipped_");
+        try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
+            ZipEntry entry = zipIn.getNextEntry();
+            while (entry != null) {
+                Path filePath = tempDirectory.resolve(entry.getName());
+                if (!entry.isDirectory()) {
+                    Files.copy(zipIn, filePath);
+                }
+                zipIn.closeEntry();
+                entry = zipIn.getNextEntry();
+            }
+        }
+        return tempDirectory.resolve("index.html");
+    }
+
+    
+   
+
+
+}
@@ -0,0 +1,73 @@
+package stirling.software.SPDF.controller.api.converters;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestPart;
+import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.multipart.MultipartFile;
+
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import stirling.software.SPDF.utils.GeneralUtils;
+import stirling.software.SPDF.utils.ProcessExecutor;
+import stirling.software.SPDF.utils.WebResponseUtils;
+
+@RestController
+@Tag(name = "Convert", description = "Convert APIs")
+public class ConvertWebsiteToPDF {
+
+	@PostMapping(consumes = "multipart/form-data", value = "/url-to-pdf")
+	@Operation(
+	    summary = "Convert a URL to a PDF",
+	    description = "This endpoint fetches content from a URL and converts it to a PDF format."
+	)
+	public ResponseEntity<byte[]> urlToPdf(
+	    @RequestPart(required = true, value = "urlInput")
+	    @Parameter(description = "The input URL to be converted to a PDF file", required = true)
+	        String URL) throws IOException, InterruptedException {
+
+	    // Validate the URL format
+	    if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) {
+	        throw new IllegalArgumentException("Invalid URL format provided.");
+	    }
+
+	    // Prepare the output file path
+	    Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+
+	    // Prepare the OCRmyPDF command
+	    List<String> command = new ArrayList<>();
+	    command.add("weasyprint");
+	    command.add(URL);
+	    command.add(tempOutputFile.toString());
+
+	    int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
+
+	    // Read the optimized PDF file
+	    byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
+
+	    // Clean up the temporary files
+	    Files.delete(tempOutputFile);
+
+	    // Convert URL to a safe filename
+	    String outputFilename = convertURLToFileName(URL);
+	    
+	    return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
+	}
+
+	private String convertURLToFileName(String url) {
+	    String safeName = url.replaceAll("[^a-zA-Z0-9]", "_");
+	    if(safeName.length() > 50) {
+	        safeName = safeName.substring(0, 50); // restrict to 50 characters
+	    }
+	    return safeName + ".pdf";
+	}
+
+
+}