Fixes and others (#83)

Features ------------- Custom application name via APP_NAME docker env (These next 3 are done with OCRMyPDF) Extra features to OCR for scanned page cleanup (tilt/noise fixing) Adding OCR ability to read and output to text file Added Dedicated PDF/A conversion page Bug fixes -------------- Fix concurrent calls on Libre and OCRMyPDF jbig fix for compressions Fix for compression metadata issues due to forced conversions to PDF/A Other -------- Removal of UK US language and just using "English" due to extra development time Still issue with concurrent files for PDF to image... will fix later sorry
2023-04-01 21:02:54 +01:00
parent 0b4e3de455
commit 6d5dbd9729
23 changed files with 531 additions and 537 deletions
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertImgPDFController.java
+++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertImgPDFController.java
@@ -73,7 +73,6 @@ public class ConvertImgPDFController {
        if (singleImage) {
            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.parseMediaType(getMediaType(imageFormat)));
-            headers.setCacheControl("must-revalidate, post-check=0, pre-check=0");
            ResponseEntity<Resource> response = new ResponseEntity<>(new ByteArrayResource(result), headers, HttpStatus.OK);
            return response;
        } else {
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java
+++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java
@@ -53,7 +53,7 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup
            "-o",
            tempOutputFile.toString(),
            tempInputFile.toString()));
-    int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
+    int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);

    // Read the converted PDF file
    byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java
+++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java
@@ -0,0 +1,75 @@
+package stirling.software.SPDF.controller.converters;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.stereotype.Controller;
+import org.springframework.ui.Model;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.multipart.MultipartFile;
+
+import com.itextpdf.xmp.XMPException;
+
+import stirling.software.SPDF.utils.PdfUtils;
+import stirling.software.SPDF.utils.ProcessExecutor;
+@Controller
+public class ConvertPDFToPDFA {
+
+	@GetMapping("/pdf-to-pdfa")
+    public String pdfToPdfAForm(Model model) {
+        model.addAttribute("currentPage", "pdf-to-pdfa");
+        return "convert/pdf-to-pdfa";
+    }
+
+    
+    @PostMapping("/pdf-to-pdfa")
+    public ResponseEntity<byte[]> pdfToPdfA(
+            @RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
+
+    	
+        // Save the uploaded file to a temporary location
+        Path tempInputFile = Files.createTempFile("input_", ".pdf");
+        inputFile.transferTo(tempInputFile.toFile());
+
+        // Prepare the output file path
+        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+
+        // Prepare the OCRmyPDF command
+        List<String> command = new ArrayList<>();
+        command.add("ocrmypdf");
+        command.add("--skip-text");
+        command.add("--tesseract-timeout=0");
+        command.add("--output-type");
+        command.add("pdfa");
+        command.add(tempInputFile.toString());
+        command.add(tempOutputFile.toString());
+
+        int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
+        
+        // Read the optimized PDF file
+        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
+
+        // Clean up the temporary files
+        Files.delete(tempInputFile);
+        Files.delete(tempOutputFile);
+
+        // Return the optimized PDF as a response
+        String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf";
+        HttpHeaders headers = new HttpHeaders();
+        headers.setContentType(MediaType.APPLICATION_PDF);
+        headers.setContentDispositionFormData("attachment", outputFilename);
+        return ResponseEntity.ok().headers(headers).body(pdfBytes);
+}
+
+
+}