Removal of Ghostscript to use qpdf and tesseract directly (#2338)

* navbar fix multi tool and compress location * release notes and ghostscript removal * cleanups * formatting * update docs * more * more * docs * release bump * Hardening suggestions for Stirling-PDF / ghostscript (#2339) * Protect `readLine()` against DoS * Sanitized user-provided file names in HTTP multipart uploads --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
2024-11-26 20:50:35 +00:00
parent 654bc94d44
commit 833b3c45c6
69 changed files with 1106 additions and 665 deletions
--- a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
+++ b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
@@ -188,7 +188,7 @@ public class EndpointConfiguration {
        addEndpointToGroup("OpenCV", "extract-image-scans");

        // LibreOffice
-        addEndpointToGroup("LibreOffice", "repair");
+        addEndpointToGroup("qpdf", "repair");
        addEndpointToGroup("LibreOffice", "file-to-pdf");
        addEndpointToGroup("LibreOffice", "pdf-to-word");
        addEndpointToGroup("LibreOffice", "pdf-to-presentation");
@@ -199,10 +199,11 @@ public class EndpointConfiguration {
        // Unoconv
        addEndpointToGroup("Unoconv", "file-to-pdf");

-        // OCRmyPDF
-        addEndpointToGroup("OCRmyPDF", "compress-pdf");
-        addEndpointToGroup("OCRmyPDF", "pdf-to-pdfa");
-        addEndpointToGroup("OCRmyPDF", "ocr-pdf");
+        // qpdf
+        addEndpointToGroup("qpdf", "compress-pdf");
+        addEndpointToGroup("qpdf", "pdf-to-pdfa");
+
+        addEndpointToGroup("tesseract", "ocr-pdf");

        // Java
        addEndpointToGroup("Java", "merge-pdfs");
@@ -248,10 +249,10 @@ public class EndpointConfiguration {
        addEndpointToGroup("Javascript", "compare");
        addEndpointToGroup("Javascript", "adjust-contrast");

-        // Ghostscript dependent endpoints
-        addEndpointToGroup("Ghostscript", "compress-pdf");
-        addEndpointToGroup("Ghostscript", "pdf-to-pdfa");
-        addEndpointToGroup("Ghostscript", "repair");
+        // qpdf dependent endpoints
+        addEndpointToGroup("qpdf", "compress-pdf");
+        addEndpointToGroup("qpdf", "pdf-to-pdfa");
+        addEndpointToGroup("qpdf", "repair");

        // Weasyprint dependent endpoints
        addEndpointToGroup("Weasyprint", "html-to-pdf");
--- a/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java
+++ b/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java
@@ -37,12 +37,13 @@ public class ExternalAppDepConfig {
    private final Map<String, List<String>> commandToGroupMapping =
            new HashMap<>() {
                {
-                    put("gs", List.of("Ghostscript"));
                    put("soffice", List.of("LibreOffice"));
-                    put("ocrmypdf", List.of("OCRmyPDF"));
                    put("weasyprint", List.of("Weasyprint"));
                    put("pdftohtml", List.of("Pdftohtml"));
                    put("unoconv", List.of("Unoconv"));
+                    put("qpdf", List.of("qpdf"));
+                    put("tesseract", List.of("tesseract"));
+                    
                }
            };

@@ -97,9 +98,9 @@ public class ExternalAppDepConfig {
    public void checkDependencies() {

        // Check core dependencies
-        checkDependencyAndDisableGroup("gs");
+    	checkDependencyAndDisableGroup("tesseract");
        checkDependencyAndDisableGroup("soffice");
-        checkDependencyAndDisableGroup("ocrmypdf");
+        checkDependencyAndDisableGroup("qpdf");
        checkDependencyAndDisableGroup("weasyprint");
        checkDependencyAndDisableGroup("pdftohtml");
        checkDependencyAndDisableGroup("unoconv");
--- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java
@@ -1,12 +1,13 @@
 package stirling.software.SPDF.controller.api.converters;

-import java.io.FileOutputStream;
-import java.io.OutputStream;
+import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;

+import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.http.MediaType;
@@ -37,59 +38,90 @@ public class ConvertPDFToPDFA {
    @Operation(
            summary = "Convert a PDF to a PDF/A",
            description =
-                    "This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
+                    "This endpoint converts a PDF file to a PDF/A file using LibreOffice. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
    public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request)
            throws Exception {
        MultipartFile inputFile = request.getFileInput();
        String outputFormat = request.getOutputFormat();

-        // Convert MultipartFile to byte[]
-        byte[] pdfBytes = inputFile.getBytes();
-
-        // Save the uploaded file to a temporary location
-        Path tempInputFile = Files.createTempFile("input_", ".pdf");
-        try (OutputStream outputStream = new FileOutputStream(tempInputFile.toFile())) {
-            outputStream.write(pdfBytes);
+        // Validate input file type
+        if (!"application/pdf".equals(inputFile.getContentType())) {
+            logger.error("Invalid input file type: {}", inputFile.getContentType());
+            throw new IllegalArgumentException("Input file must be a PDF");
        }

-        // Prepare the output file path
-        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-
-        // Prepare the ghostscript command
-        List<String> command = new ArrayList<>();
-        command.add("gs");
-        command.add("-dPDFA=" + ("pdfa".equals(outputFormat) ? "2" : "1"));
-        command.add("-dNOPAUSE");
-        command.add("-dBATCH");
-        command.add("-sColorConversionStrategy=sRGB");
-        command.add("-sDEVICE=pdfwrite");
-        command.add("-dPDFACompatibilityPolicy=2");
-        command.add("-o");
-        command.add(tempOutputFile.toString());
-        command.add(tempInputFile.toString());
-
-        ProcessExecutorResult returnCode =
-                ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
-                        .runCommandWithOutputHandling(command);
-
-        if (returnCode.getRc() != 0) {
-            logger.info(
-                    outputFormat + " conversion failed with return code: " + returnCode.getRc());
+        // Get the original filename without extension
+        String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
+        if (originalFileName == null || originalFileName.trim().isEmpty()) {
+            originalFileName = "output.pdf";
        }
+        String baseFileName =
+                originalFileName.contains(".")
+                        ? originalFileName.substring(0, originalFileName.lastIndexOf('.'))
+                        : originalFileName;
+
+        Path tempInputFile = null;
+        Path tempOutputDir = null;
+        byte[] fileBytes;

        try {
-            byte[] pdfBytesOutput = Files.readAllBytes(tempOutputFile);
-            // Return the optimized PDF as a response
-            String outputFilename =
-                    Filenames.toSimpleFileName(inputFile.getOriginalFilename())
-                                    .replaceFirst("[.][^.]+$", "")
-                            + "_PDFA.pdf";
+            // Save uploaded file to temp location
+            tempInputFile = Files.createTempFile("input_", ".pdf");
+            inputFile.transferTo(tempInputFile);
+
+            // Create temp output directory
+            tempOutputDir = Files.createTempDirectory("output_");
+
+            // Determine PDF/A filter based on requested format
+            String pdfFilter =
+                    "pdfa".equals(outputFormat)
+                            ? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export"
+                            : "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export";
+
+            // Prepare LibreOffice command
+            List<String> command =
+                    new ArrayList<>(
+                            Arrays.asList(
+                                    "soffice",
+                                    "--headless",
+                                    "--nologo",
+                                    "--convert-to",
+                                    "pdf:" + pdfFilter,
+                                    "--outdir",
+                                    tempOutputDir.toString(),
+                                    tempInputFile.toString()));
+
+            ProcessExecutorResult returnCode =
+                    ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
+                            .runCommandWithOutputHandling(command);
+
+            if (returnCode.getRc() != 0) {
+                logger.error("PDF/A conversion failed with return code: {}", returnCode.getRc());
+                throw new RuntimeException("PDF/A conversion failed");
+            }
+
+            // Get the output file
+            File[] outputFiles = tempOutputDir.toFile().listFiles();
+            if (outputFiles == null || outputFiles.length != 1) {
+                throw new RuntimeException(
+                        "Expected exactly one output file but found "
+                                + (outputFiles == null ? "none" : outputFiles.length));
+            }
+
+            fileBytes = FileUtils.readFileToByteArray(outputFiles[0]);
+            String outputFilename = baseFileName + "_PDFA.pdf";
+
            return WebResponseUtils.bytesToWebResponse(
-                    pdfBytesOutput, outputFilename, MediaType.APPLICATION_PDF);
+                    fileBytes, outputFilename, MediaType.APPLICATION_PDF);
+
        } finally {
-            // Clean up the temporary files
-            Files.deleteIfExists(tempInputFile);
-            Files.deleteIfExists(tempOutputFile);
+            // Clean up temporary files
+            if (tempInputFile != null) {
+                Files.deleteIfExists(tempInputFile);
+            }
+            if (tempOutputDir != null) {
+                FileUtils.deleteDirectory(tempOutputDir.toFile());
+            }
        }
    }
 }
--- a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java
@@ -20,7 +20,7 @@ import org.springframework.web.bind.annotation.RestController;

 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
-import stirling.software.SPDF.controller.api.CropController;
+
 import stirling.software.SPDF.model.api.extract.PDFFilePage;
 import stirling.software.SPDF.pdf.FlexibleCSVWriter;
 import technology.tabula.ObjectExtractor;
@@ -37,11 +37,15 @@ public class ExtractCSVController {
    private static final Logger logger = LoggerFactory.getLogger(ExtractCSVController.class);

    @PostMapping(value = "/pdf/csv", consumes = "multipart/form-data")
-    @Operation(summary = "Extracts a CSV document from a PDF", description = "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO")
+    @Operation(
+            summary = "Extracts a CSV document from a PDF",
+            description =
+                    "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO")
    public ResponseEntity<String> PdfToCsv(@ModelAttribute PDFFilePage form) throws Exception {
        StringWriter writer = new StringWriter();
        try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) {
-            CSVFormat format = CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build();
+            CSVFormat format =
+                    CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build();
            Writer csvWriter = new FlexibleCSVWriter(format);
            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
            try (ObjectExtractor extractor = new ObjectExtractor(document)) {
@@ -56,8 +60,8 @@ public class ExtractCSVController {
                ContentDisposition.builder("attachment")
                        .filename(
                                form.getFileInput()
-                                        .getOriginalFilename()
-                                        .replaceFirst("[.][^.]+$", "")
+                                                .getOriginalFilename()
+                                                .replaceFirst("[.][^.]+$", "")
                                        + "_extracted.csv")
                        .build());
        headers.setContentType(MediaType.parseMediaType("text/csv"));
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
@@ -10,7 +10,6 @@ import java.util.List;

 import javax.imageio.ImageIO;

-import org.apache.commons.io.FileUtils;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -53,6 +52,54 @@ public class CompressController {
        this.pdfDocumentFactory = pdfDocumentFactory;
    }

+    private void compressImagesInPDF(Path pdfFile, double initialScaleFactor) throws Exception {
+        byte[] fileBytes = Files.readAllBytes(pdfFile);
+        try (PDDocument doc = Loader.loadPDF(fileBytes)) {
+            double scaleFactor = initialScaleFactor;
+
+            for (PDPage page : doc.getPages()) {
+                PDResources res = page.getResources();
+                if (res != null && res.getXObjectNames() != null) {
+                    for (COSName name : res.getXObjectNames()) {
+                        PDXObject xobj = res.getXObject(name);
+                        if (xobj instanceof PDImageXObject) {
+                            PDImageXObject image = (PDImageXObject) xobj;
+                            BufferedImage bufferedImage = image.getImage();
+
+                            int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
+                            int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
+
+                            if (newWidth == 0 || newHeight == 0) {
+                                continue;
+                            }
+
+                            Image scaledImage =
+                                    bufferedImage.getScaledInstance(
+                                            newWidth, newHeight, Image.SCALE_SMOOTH);
+
+                            BufferedImage scaledBufferedImage =
+                                    new BufferedImage(
+                                            newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
+                            scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
+
+                            ByteArrayOutputStream compressedImageStream =
+                                    new ByteArrayOutputStream();
+                            ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
+                            byte[] imageBytes = compressedImageStream.toByteArray();
+                            compressedImageStream.close();
+
+                            PDImageXObject compressedImage =
+                                    PDImageXObject.createFromByteArray(
+                                            doc, imageBytes, image.getCOSObject().toString());
+                            res.put(name, compressedImage);
+                        }
+                    }
+                }
+            }
+            doc.save(pdfFile.toString());
+        }
+    }
+
    @PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
    @Operation(
            summary = "Optimize PDF file",
@@ -75,209 +122,92 @@ public class CompressController {
            autoMode = true;
        }

-        // Save the uploaded file to a temporary location
        Path tempInputFile = Files.createTempFile("input_", ".pdf");
        inputFile.transferTo(tempInputFile.toFile());

        long inputFileSize = Files.size(tempInputFile);

-        // Prepare the output file path
-
        Path tempOutputFile = null;
        byte[] pdfBytes;
        try {
            tempOutputFile = Files.createTempFile("output_", ".pdf");
-            // Determine initial optimization level based on expected size reduction, only if in
-            // autoMode
+
            if (autoMode) {
                double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
-                if (sizeReductionRatio > 0.7) {
-                    optimizeLevel = 1;
-                } else if (sizeReductionRatio > 0.5) {
-                    optimizeLevel = 2;
-                } else if (sizeReductionRatio > 0.35) {
-                    optimizeLevel = 3;
-                } else {
-                    optimizeLevel = 3;
-                }
+                optimizeLevel = determineOptimizeLevel(sizeReductionRatio);
            }

            boolean sizeMet = false;
-            while (!sizeMet && optimizeLevel <= 4) {
-                // Prepare the Ghostscript command
-                List<String> command = new ArrayList<>();
-                command.add("gs");
-                command.add("-sDEVICE=pdfwrite");
-                command.add("-dCompatibilityLevel=1.5");
+            while (!sizeMet && optimizeLevel <= 9) {

-                switch (optimizeLevel) {
-                    case 1:
-                        command.add("-dPDFSETTINGS=/prepress");
-                        break;
-                    case 2:
-                        command.add("-dPDFSETTINGS=/printer");
-                        break;
-                    case 3:
-                        command.add("-dPDFSETTINGS=/ebook");
-                        break;
-                    case 4:
-                        command.add("-dPDFSETTINGS=/screen");
-                        break;
-                    default:
-                        command.add("-dPDFSETTINGS=/default");
+                // Apply additional image compression for levels 6-9
+                if (optimizeLevel >= 6) {
+                    // Calculate scale factor based on optimization level
+                    double scaleFactor =
+                            switch (optimizeLevel) {
+                                case 6 -> 0.9; // 90% of original size
+                                case 7 -> 0.8; // 80% of original size
+                                case 8 -> 0.65; // 70% of original size
+                                case 9 -> 0.5; // 60% of original size
+                                default -> 1.0;
+                            };
+                    compressImagesInPDF(tempInputFile, scaleFactor);
                }

-                command.add("-dNOPAUSE");
-                command.add("-dQUIET");
-                command.add("-dBATCH");
-                command.add("-sOutputFile=" + tempOutputFile.toString());
+                // Run QPDF optimization
+                List<String> command = new ArrayList<>();
+                command.add("qpdf");
+                if (request.getNormalize()) {
+                    command.add("--normalize-content=y");
+                }
+                if (request.getLinearize()) {
+                    command.add("--linearize");
+                }
+                command.add("--optimize-images");
+                command.add("--recompress-flate");
+                command.add("--compression-level=" + optimizeLevel);
+                command.add("--compress-streams=y");
+                command.add("--object-streams=generate");
                command.add(tempInputFile.toString());
+                command.add(tempOutputFile.toString());

-                ProcessExecutorResult returnCode =
-                        ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
-                                .runCommandWithOutputHandling(command);
+                ProcessExecutorResult returnCode = null;
+                try {
+                    returnCode =
+                            ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
+                                    .runCommandWithOutputHandling(command);
+                } catch (Exception e) {
+                    if (returnCode != null && returnCode.getRc() != 3) {
+                        throw e;
+                    }
+                }

-                // Check if file size is within expected size or not auto mode so instantly finish
+                // Check if file size is within expected size or not auto mode
                long outputFileSize = Files.size(tempOutputFile);
                if (outputFileSize <= expectedOutputSize || !autoMode) {
                    sizeMet = true;
                } else {
-                    // Increase optimization level for next iteration
-                    optimizeLevel++;
-                    if (autoMode && optimizeLevel > 4) {
-                        logger.info("Skipping level 5 due to bad results in auto mode");
+                    optimizeLevel =
+                            incrementOptimizeLevel(
+                                    optimizeLevel, outputFileSize, expectedOutputSize);
+                    if (autoMode && optimizeLevel > 9) {
+                        logger.info("Maximum compression level reached in auto mode");
                        sizeMet = true;
-                    } else {
-                        logger.info(
-                                "Increasing ghostscript optimisation level to " + optimizeLevel);
                    }
                }
            }

-            if (expectedOutputSize != null && autoMode) {
-                long outputFileSize = Files.size(tempOutputFile);
-                byte[] fileBytes = Files.readAllBytes(tempOutputFile);
-                if (outputFileSize > expectedOutputSize) {
-                    try (PDDocument doc = Loader.loadPDF(fileBytes)) {
-                        long previousFileSize = 0;
-                        double scaleFactorConst = 0.9f;
-                        double scaleFactor = 0.9f;
-                        while (true) {
-                            for (PDPage page : doc.getPages()) {
-                                PDResources res = page.getResources();
-                                if (res != null && res.getXObjectNames() != null) {
-                                    for (COSName name : res.getXObjectNames()) {
-                                        PDXObject xobj = res.getXObject(name);
-                                        if (xobj != null && xobj instanceof PDImageXObject) {
-                                            PDImageXObject image = (PDImageXObject) xobj;
-
-                                            // Get the image in BufferedImage format
-                                            BufferedImage bufferedImage = image.getImage();
-
-                                            // Calculate the new dimensions
-                                            int newWidth =
-                                                    (int)
-                                                            (bufferedImage.getWidth()
-                                                                    * scaleFactorConst);
-                                            int newHeight =
-                                                    (int)
-                                                            (bufferedImage.getHeight()
-                                                                    * scaleFactorConst);
-
-                                            // If the new dimensions are zero, skip this iteration
-                                            if (newWidth == 0 || newHeight == 0) {
-                                                continue;
-                                            }
-
-                                            // Otherwise, proceed with the scaling
-                                            Image scaledImage =
-                                                    bufferedImage.getScaledInstance(
-                                                            newWidth,
-                                                            newHeight,
-                                                            Image.SCALE_SMOOTH);
-
-                                            // Convert the scaled image back to a BufferedImage
-                                            BufferedImage scaledBufferedImage =
-                                                    new BufferedImage(
-                                                            newWidth,
-                                                            newHeight,
-                                                            BufferedImage.TYPE_INT_RGB);
-                                            scaledBufferedImage
-                                                    .getGraphics()
-                                                    .drawImage(scaledImage, 0, 0, null);
-
-                                            // Compress the scaled image
-                                            ByteArrayOutputStream compressedImageStream =
-                                                    new ByteArrayOutputStream();
-                                            ImageIO.write(
-                                                    scaledBufferedImage,
-                                                    "jpeg",
-                                                    compressedImageStream);
-                                            byte[] imageBytes = compressedImageStream.toByteArray();
-                                            compressedImageStream.close();
-
-                                            PDImageXObject compressedImage =
-                                                    PDImageXObject.createFromByteArray(
-                                                            doc,
-                                                            imageBytes,
-                                                            image.getCOSObject().toString());
-
-                                            // Replace the image in the resources with the
-                                            // compressed
-                                            // version
-                                            res.put(name, compressedImage);
-                                        }
-                                    }
-                                }
-                            }
-
-                            // save the document to tempOutputFile again
-                            doc.save(tempOutputFile.toString());
-
-                            long currentSize = Files.size(tempOutputFile);
-                            // Check if the overall PDF size is still larger than expectedOutputSize
-                            if (currentSize > expectedOutputSize) {
-                                // Log the current file size and scaleFactor
-
-                                logger.info(
-                                        "Current file size: "
-                                                + FileUtils.byteCountToDisplaySize(currentSize));
-                                logger.info("Current scale factor: " + scaleFactor);
-
-                                // The file is still too large, reduce scaleFactor and try again
-                                scaleFactor *= 0.9f; // reduce scaleFactor by 10%
-                                // Avoid scaleFactor being too small, causing the image to shrink to
-                                // 0
-                                if (scaleFactor < 0.2f || previousFileSize == currentSize) {
-                                    throw new RuntimeException(
-                                            "Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
-                                                    + FileUtils.byteCountToDisplaySize(currentSize)
-                                                    + ", "
-                                                    + currentSize
-                                                    + " bytes");
-                                }
-                                previousFileSize = currentSize;
-                            } else {
-                                // The file is small enough, break the loop
-                                break;
-                            }
-                        }
-                    }
-                }
-            }
            // Read the optimized PDF file
            pdfBytes = Files.readAllBytes(tempOutputFile);
            Path finalFile = tempOutputFile;
+
            // Check if optimized file is larger than the original
            if (pdfBytes.length > inputFileSize) {
-                // Log the occurrence
                logger.warn(
                        "Optimized file is larger than the original. Returning the original file instead.");
-
-                // Read the original file again
                finalFile = tempInputFile;
            }
-            // Return the optimized PDF as a response
+
            String outputFilename =
                    Filenames.toSimpleFileName(inputFile.getOriginalFilename())
                                    .replaceFirst("[.][^.]+$", "")
@@ -286,10 +216,31 @@ public class CompressController {
                    pdfDocumentFactory.load(finalFile.toFile()), outputFilename);

        } finally {
-            // Clean up the temporary files
-            // deleted by multipart file handler deu to transferTo?
-            // Files.deleteIfExists(tempInputFile);
            Files.deleteIfExists(tempOutputFile);
        }
    }
+
+    private int determineOptimizeLevel(double sizeReductionRatio) {
+        if (sizeReductionRatio > 0.9) return 1;
+        if (sizeReductionRatio > 0.8) return 2;
+        if (sizeReductionRatio > 0.7) return 3;
+        if (sizeReductionRatio > 0.6) return 4;
+        if (sizeReductionRatio > 0.5) return 5;
+        if (sizeReductionRatio > 0.4) return 6;
+        if (sizeReductionRatio > 0.3) return 7;
+        if (sizeReductionRatio > 0.2) return 8;
+        return 9;
+    }
+
+    private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
+        double currentRatio = currentSize / (double) targetSize;
+        logger.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
+
+        if (currentRatio > 2.0) {
+            return Math.min(9, currentLevel + 3);
+        } else if (currentRatio > 1.5) {
+            return Math.min(9, currentLevel + 2);
+        }
+        return Math.min(9, currentLevel + 1);
+    }
 }
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/FakeScanControllerWIP.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/FakeScanControllerWIP.java
@@ -58,7 +58,7 @@ public class FakeScanControllerWIP {
    @Operation(
            summary = "Repair a PDF file",
            description =
-                    "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
+                    "This endpoint repairs a given PDF file by running qpdf command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
    public ResponseEntity<byte[]> fakeScan(@ModelAttribute PDFFile request) throws IOException {
        MultipartFile inputFile = request.getFileInput();

--- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
@@ -1,19 +1,31 @@
 package stirling.software.SPDF.controller.api.misc;

-import java.io.ByteArrayInputStream;
+import io.github.pixee.security.BoundedLineReader;
+import io.github.pixee.security.Filenames;
+import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileOutputStream;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;

+import javax.imageio.ImageIO;
+
+import org.apache.pdfbox.multipdf.PDFMergerUtility;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.text.PDFTextStripper;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
@@ -23,24 +35,29 @@ import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.multipart.MultipartFile;

-import io.github.pixee.security.Filenames;
-import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;

+import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.ApplicationProperties;
 import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
 import stirling.software.SPDF.service.CustomPDDocumentFactory;
-import stirling.software.SPDF.utils.ProcessExecutor;
-import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
-import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
+@Slf4j
 public class OCRController {

-    @Autowired ApplicationProperties applicationProperties;
+    @Autowired private ApplicationProperties applicationProperties;

+    private final CustomPDDocumentFactory pdfDocumentFactory;
+
+    @Autowired
+    public OCRController(CustomPDDocumentFactory pdfDocumentFactory) {
+        this.pdfDocumentFactory = pdfDocumentFactory;
+    }
+
+    /** Gets the list of available Tesseract languages from the tessdata directory */
    public List<String> getAvailableTesseractLanguages() {
        String tessdataDir = applicationProperties.getSystem().getTessdataDir();
        File[] files = new File(tessdataDir).listFiles();
@@ -54,196 +71,161 @@ public class OCRController {
                .collect(Collectors.toList());
    }

-    private final CustomPDDocumentFactory pdfDocumentFactory;
-
-    @Autowired
-    public OCRController(CustomPDDocumentFactory pdfDocumentFactory) {
-        this.pdfDocumentFactory = pdfDocumentFactory;
-    }
-
    @PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
-    @Operation(
-            summary = "Process a PDF file with OCR",
-            description =
-                    "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
    public ResponseEntity<byte[]> processPdfWithOCR(
            @ModelAttribute ProcessPdfWithOcrRequest request)
            throws IOException, InterruptedException {
        MultipartFile inputFile = request.getFileInput();
-        List<String> selectedLanguages = request.getLanguages();
-        Boolean sidecar = request.isSidecar();
-        Boolean deskew = request.isDeskew();
-        Boolean clean = request.isClean();
-        Boolean cleanFinal = request.isCleanFinal();
+        List<String> languages = request.getLanguages();
        String ocrType = request.getOcrType();
-        String ocrRenderType = request.getOcrRenderType();
-        Boolean removeImagesAfter = request.isRemoveImagesAfter();
-        // --output-type pdfa
-        if (selectedLanguages == null || selectedLanguages.isEmpty()) {
-            throw new IOException("Please select at least one language.");
-        }

-        if (!"hocr".equals(ocrRenderType) && !"sandwich".equals(ocrRenderType)) {
-            throw new IOException("ocrRenderType wrong");
-        }
+        Path tempDir = Files.createTempDirectory("ocr_process");
+        Path tempInputFile = tempDir.resolve("input.pdf");
+        Path tempOutputDir = tempDir.resolve("output");
+        Path tempImagesDir = tempDir.resolve("images");
+        Path finalOutputFile = tempDir.resolve("final_output.pdf");

-        // Get available Tesseract languages
-        List<String> availableLanguages = getAvailableTesseractLanguages();
-
-        // Validate selected languages
-        selectedLanguages =
-                selectedLanguages.stream().filter(availableLanguages::contains).toList();
-
-        if (selectedLanguages.isEmpty()) {
-            throw new IOException("None of the selected languages are valid.");
-        }
-        // Save the uploaded file to a temporary location
-        Path tempInputFile = Files.createTempFile("input_", ".pdf");
-        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
-        Path sidecarTextPath = null;
+        Files.createDirectories(tempOutputDir);
+        Files.createDirectories(tempImagesDir);

        try {
+            // Save input file
            inputFile.transferTo(tempInputFile.toFile());
+            PDFMergerUtility merger = new PDFMergerUtility();
+            merger.setDestinationFileName(finalOutputFile.toString());

-            // Run OCR Command
-            String languageOption = String.join("+", selectedLanguages);
+            try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) {
+                PDFRenderer pdfRenderer = new PDFRenderer(document);
+                int pageCount = document.getNumberOfPages();

-            List<String> command =
-                    new ArrayList<>(
-                            Arrays.asList(
-                                    "ocrmypdf",
-                                    "--verbose",
-                                    "2",
-                                    "--output-type",
-                                    "pdf",
-                                    "--pdf-renderer",
-                                    ocrRenderType));
+                for (int pageNum = 0; pageNum < pageCount; pageNum++) {
+                    PDPage page = document.getPage(pageNum);
+                    boolean hasText = false;

-            if (sidecar != null && sidecar) {
-                sidecarTextPath = Files.createTempFile("sidecar", ".txt");
-                command.add("--sidecar");
-                command.add(sidecarTextPath.toString());
-            }
+                    // Check for existing text
+                    try (PDDocument tempDoc = new PDDocument()) {
+                        tempDoc.addPage(page);
+                        PDFTextStripper stripper = new PDFTextStripper();
+                        hasText = !stripper.getText(tempDoc).trim().isEmpty();
+                    }

-            if (deskew != null && deskew) {
-                command.add("--deskew");
-            }
-            if (clean != null && clean) {
-                command.add("--clean");
-            }
-            if (cleanFinal != null && cleanFinal) {
-                command.add("--clean-final");
-            }
-            if (ocrType != null && !"".equals(ocrType)) {
-                if ("skip-text".equals(ocrType)) {
-                    command.add("--skip-text");
-                } else if ("force-ocr".equals(ocrType)) {
-                    command.add("--force-ocr");
-                } else if ("Normal".equals(ocrType)) {
+                    boolean shouldOcr =
+                            switch (ocrType) {
+                                case "skip-text" -> !hasText;
+                                case "force-ocr" -> true;
+                                default -> true;
+                            };

-                }
-            }
+                    Path pageOutputPath =
+                            tempOutputDir.resolve(String.format("page_%d.pdf", pageNum));

-            command.addAll(
-                    Arrays.asList(
-                            "--language",
-                            languageOption,
-                            tempInputFile.toString(),
-                            tempOutputFile.toString()));
+                    if (shouldOcr) {
+                        // Convert page to image
+                        BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300);
+                        Path imagePath =
+                                tempImagesDir.resolve(String.format("page_%d.png", pageNum));
+                        ImageIO.write(image, "png", imagePath.toFile());

-            // Run CLI command
-            ProcessExecutorResult result =
-                    ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
-                            .runCommandWithOutputHandling(command);
-            if (result.getRc() != 0
-                    && result.getMessages().contains("multiprocessing/synchronize.py")
-                    && result.getMessages()
-                            .contains("OSError: [Errno 38] Function not implemented")) {
-                command.add("--jobs");
-                command.add("1");
-                result =
-                        ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
-                                .runCommandWithOutputHandling(command);
-            }
+                        // Build OCR command
+                        List<String> command = new ArrayList<>();
+                        command.add("tesseract");
+                        command.add(imagePath.toString());
+                        command.add(
+                                tempOutputDir
+                                        .resolve(String.format("page_%d", pageNum))
+                                        .toString());
+                        command.add("-l");
+                        command.add(String.join("+", languages));
+                        command.add("pdf"); // Always output PDF

-            // Remove images from the OCR processed PDF if the flag is set to true
-            if (removeImagesAfter != null && removeImagesAfter) {
-                Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
+                        ProcessBuilder pb = new ProcessBuilder(command);
+                        Process process = pb.start();

-                List<String> gsCommand =
-                        Arrays.asList(
-                                "gs",
-                                "-sDEVICE=pdfwrite",
-                                "-dFILTERIMAGE",
-                                "-o",
-                                tempPdfWithoutImages.toString(),
-                                tempOutputFile.toString());
+                        // Capture any error output
+                        try (BufferedReader reader =
+                                new BufferedReader(
+                                        new InputStreamReader(process.getErrorStream()))) {
+                            String line;
+                            while ((line = BoundedLineReader.readLine(reader, 5_000_000)) != null) {
+                                log.debug("Tesseract: {}", line);
+                            }
+                        }

-                ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
-                        .runCommandWithOutputHandling(gsCommand);
-                tempOutputFile = tempPdfWithoutImages;
-            }
-            // Read the OCR processed PDF file
-            byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.toFile());
+                        int exitCode = process.waitFor();
+                        if (exitCode != 0) {
+                            throw new RuntimeException(
+                                    "Tesseract failed with exit code: " + exitCode);
+                        }

-            // Return the OCR processed PDF as a response
-            String outputFilename =
-                    Filenames.toSimpleFileName(inputFile.getOriginalFilename())
-                                    .replaceFirst("[.][^.]+$", "")
-                            + "_OCR.pdf";
-
-            if (sidecar != null && sidecar) {
-                // Create a zip file containing both the PDF and the text file
-                String outputZipFilename =
-                        Filenames.toSimpleFileName(inputFile.getOriginalFilename())
-                                        .replaceFirst("[.][^.]+$", "")
-                                + "_OCR.zip";
-                Path tempZipFile = Files.createTempFile("output_", ".zip");
-
-                try (ZipOutputStream zipOut =
-                        new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
-                    // Add PDF file to the zip
-                    ZipEntry pdfEntry = new ZipEntry(outputFilename);
-                    zipOut.putNextEntry(pdfEntry);
-                    try (ByteArrayInputStream pdfInputStream = new ByteArrayInputStream(pdfBytes)) {
-                        byte[] buffer = new byte[1024];
-                        int length;
-                        while ((length = pdfInputStream.read(buffer)) != -1) {
-                            zipOut.write(buffer, 0, length);
+                        // Add OCR'd PDF to merger
+                        merger.addSource(pageOutputPath.toFile());
+                    } else {
+                        // Save original page without OCR
+                        try (PDDocument pageDoc = new PDDocument()) {
+                            pageDoc.addPage(page);
+                            pageDoc.save(pageOutputPath.toFile());
+                            merger.addSource(pageOutputPath.toFile());
                        }
                    }
-                    zipOut.closeEntry();
-
-                    // Add text file to the zip
-                    ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
-                    zipOut.putNextEntry(txtEntry);
-                    Files.copy(sidecarTextPath, zipOut);
-                    zipOut.closeEntry();
                }
-
-                byte[] zipBytes = Files.readAllBytes(tempZipFile);
-
-                // Clean up the temporary zip file
-                Files.deleteIfExists(tempZipFile);
-                Files.deleteIfExists(tempOutputFile);
-                Files.deleteIfExists(sidecarTextPath);
-
-                // Return the zip file containing both the PDF and the text file
-                return WebResponseUtils.bytesToWebResponse(
-                        zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
-            } else {
-                // Return the OCR processed PDF as a response
-                Files.deleteIfExists(tempOutputFile);
-                return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
            }
+
+            // Merge all pages into final PDF
+            merger.mergeDocuments(null);
+
+            // Read the final PDF file
+            byte[] pdfContent = Files.readAllBytes(finalOutputFile);
+            String outputFilename =
+                    Filenames.toSimpleFileName(inputFile.getOriginalFilename()).replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
+
+            return ResponseEntity.ok()
+                    .header(
+                            "Content-Disposition",
+                            "attachment; filename=\"" + outputFilename + "\"")
+                    .contentType(MediaType.APPLICATION_PDF)
+                    .body(pdfContent);
+
        } finally {
-            // Clean up the temporary files
-            Files.deleteIfExists(tempOutputFile);
-            // Comment out as transferTo makes multipart handle cleanup
-            // Files.deleteIfExists(tempInputFile);
-            if (sidecarTextPath != null) {
-                Files.deleteIfExists(sidecarTextPath);
+            // Clean up temporary files
+            deleteDirectory(tempDir);
+        }
+    }
+
+    private void addFileToZip(File file, String filename, ZipOutputStream zipOut)
+            throws IOException {
+        if (!file.exists()) {
+            log.warn("File {} does not exist, skipping", file);
+            return;
+        }
+
+        try (FileInputStream fis = new FileInputStream(file)) {
+            ZipEntry zipEntry = new ZipEntry(filename);
+            zipOut.putNextEntry(zipEntry);
+
+            byte[] buffer = new byte[1024];
+            int length;
+            while ((length = fis.read(buffer)) >= 0) {
+                zipOut.write(buffer, 0, length);
            }
+
+            zipOut.closeEntry();
+        }
+    }
+
+    private void deleteDirectory(Path directory) {
+        try {
+            Files.walk(directory)
+                    .sorted(Comparator.reverseOrder())
+                    .forEach(
+                            path -> {
+                                try {
+                                    Files.delete(path);
+                                } catch (IOException e) {
+                                    log.error("Error deleting {}: {}", path, e.getMessage());
+                                }
+                            });
+        } catch (IOException e) {
+            log.error("Error walking directory {}: {}", directory, e.getMessage());
        }
    }
 }
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
@@ -44,7 +44,7 @@ public class RepairController {
    @Operation(
            summary = "Repair a PDF file",
            description =
-                    "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO")
+                    "This endpoint repairs a given PDF file by running qpdf command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO")
    public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request)
            throws IOException, InterruptedException {
        MultipartFile inputFile = request.getFileInput();
@@ -56,14 +56,15 @@ public class RepairController {
        try {

            List<String> command = new ArrayList<>();
-            command.add("gs");
-            command.add("-o");
-            command.add(tempOutputFile.toString());
-            command.add("-sDEVICE=pdfwrite");
+            command.add("qpdf");
+            command.add("--replace-input"); // Automatically fixes problems it can
+            command.add("--qdf"); // Linearizes and normalizes PDF structure
+            command.add("--object-streams=disable"); // Can help with some corruptions
            command.add(tempInputFile.toString());
+            command.add(tempOutputFile.toString());

            ProcessExecutorResult returnCode =
-                    ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
+                    ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
                            .runCommandWithOutputHandling(command);

            // Read the optimized PDF file
--- a/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java
@@ -98,10 +98,10 @@ public class CertSignController {

        public CreateSignature(KeyStore keystore, char[] pin)
                throws KeyStoreException,
-                UnrecoverableKeyException,
-                NoSuchAlgorithmException,
-                IOException,
-                CertificateException {
+                        UnrecoverableKeyException,
+                        NoSuchAlgorithmException,
+                        IOException,
+                        CertificateException {
            super(keystore, pin);
            ClassPathResource resource = new ClassPathResource("static/images/signature.png");
            try (InputStream is = resource.getInputStream()) {
@@ -160,7 +160,8 @@ public class CertSignController {
                        extState.setNonStrokingAlphaConstant(0.5f);
                        cs.setGraphicsStateParameters(extState);
                        cs.transform(Matrix.getScaleInstance(0.08f, 0.08f));
-                        PDImageXObject img = PDImageXObject.createFromFileByExtension(logoFile, doc);
+                        PDImageXObject img =
+                                PDImageXObject.createFromFileByExtension(logoFile, doc);
                        cs.drawImage(img, 100, 0);
                        cs.restoreGraphicsState();
                    }
@@ -208,7 +209,10 @@ public class CertSignController {
    }

    @PostMapping(consumes = "multipart/form-data", value = "/cert-sign")
-    @Operation(summary = "Sign PDF with a Digital Certificate", description = "This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO")
+    @Operation(
+            summary = "Sign PDF with a Digital Certificate",
+            description =
+                    "This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO")
    public ResponseEntity<byte[]> signPDFWithCert(@ModelAttribute SignPDFWithCertRequest request)
            throws Exception {
        MultipartFile pdf = request.getFileInput();
@@ -238,7 +242,7 @@ public class CertSignController {
                PrivateKey privateKey = getPrivateKeyFromPEM(privateKeyFile.getBytes(), password);
                Certificate cert = (Certificate) getCertificateFromPEM(certFile.getBytes());
                ks.setKeyEntry(
-                        "alias", privateKey, password.toCharArray(), new Certificate[] { cert });
+                        "alias", privateKey, password.toCharArray(), new Certificate[] {cert});
                break;
            case "PKCS12":
                ks = KeyStore.getInstance("PKCS12");
@@ -310,19 +314,22 @@ public class CertSignController {

    private PrivateKey getPrivateKeyFromPEM(byte[] pemBytes, String password)
            throws IOException, OperatorCreationException, PKCSException {
-        try (PEMParser pemParser = new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) {
+        try (PEMParser pemParser =
+                new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) {
            Object pemObject = pemParser.readObject();
            JcaPEMKeyConverter converter = new JcaPEMKeyConverter().setProvider("BC");
            PrivateKeyInfo pkInfo;
            if (pemObject instanceof PKCS8EncryptedPrivateKeyInfo) {
-                InputDecryptorProvider decProv = new JceOpenSSLPKCS8DecryptorProviderBuilder()
-                        .build(password.toCharArray());
+                InputDecryptorProvider decProv =
+                        new JceOpenSSLPKCS8DecryptorProviderBuilder().build(password.toCharArray());
                pkInfo = ((PKCS8EncryptedPrivateKeyInfo) pemObject).decryptPrivateKeyInfo(decProv);
            } else if (pemObject instanceof PEMEncryptedKeyPair) {
-                PEMDecryptorProvider decProv = new JcePEMDecryptorProviderBuilder().build(password.toCharArray());
-                pkInfo = ((PEMEncryptedKeyPair) pemObject)
-                        .decryptKeyPair(decProv)
-                        .getPrivateKeyInfo();
+                PEMDecryptorProvider decProv =
+                        new JcePEMDecryptorProviderBuilder().build(password.toCharArray());
+                pkInfo =
+                        ((PEMEncryptedKeyPair) pemObject)
+                                .decryptKeyPair(decProv)
+                                .getPrivateKeyInfo();
            } else {
                pkInfo = ((PEMKeyPair) pemObject).getPrivateKeyInfo();
            }
--- a/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java
+++ b/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java
@@ -55,6 +55,11 @@ public class HomeWebController {
        return "licenses";
    }

+    @GetMapping("/releases")
+    public String getReleaseNotes(Model model) {
+        return "releases";
+    }
+
    @GetMapping("/")
    public String home(Model model) {
        model.addAttribute("currentPage", "home");
--- a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java
+++ b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java
@@ -320,12 +320,20 @@ public class ApplicationProperties {
        public static class SessionLimit {
            private int libreOfficeSessionLimit;
            private int pdfToHtmlSessionLimit;
-            private int ocrMyPdfSessionLimit;
            private int pythonOpenCvSessionLimit;
-            private int ghostScriptSessionLimit;
            private int weasyPrintSessionLimit;
            private int installAppSessionLimit;
            private int calibreSessionLimit;
+            private int qpdfSessionLimit;
+            private int tesseractSessionLimit;
+
+            public int getQpdfSessionLimit() {
+                return qpdfSessionLimit > 0 ? qpdfSessionLimit : 2;
+            }
+
+            public int getTesseractSessionLimit() {
+                return tesseractSessionLimit > 0 ? tesseractSessionLimit : 1;
+            }

            public int getLibreOfficeSessionLimit() {
                return libreOfficeSessionLimit > 0 ? libreOfficeSessionLimit : 1;
@@ -335,18 +343,10 @@ public class ApplicationProperties {
                return pdfToHtmlSessionLimit > 0 ? pdfToHtmlSessionLimit : 1;
            }

-            public int getOcrMyPdfSessionLimit() {
-                return ocrMyPdfSessionLimit > 0 ? ocrMyPdfSessionLimit : 2;
-            }
-
            public int getPythonOpenCvSessionLimit() {
                return pythonOpenCvSessionLimit > 0 ? pythonOpenCvSessionLimit : 8;
            }

-            public int getGhostScriptSessionLimit() {
-                return ghostScriptSessionLimit > 0 ? ghostScriptSessionLimit : 16;
-            }
-
            public int getWeasyPrintSessionLimit() {
                return weasyPrintSessionLimit > 0 ? weasyPrintSessionLimit : 16;
            }
@@ -364,12 +364,20 @@ public class ApplicationProperties {
        public static class TimeoutMinutes {
            private long libreOfficeTimeoutMinutes;
            private long pdfToHtmlTimeoutMinutes;
-            private long ocrMyPdfTimeoutMinutes;
            private long pythonOpenCvTimeoutMinutes;
-            private long ghostScriptTimeoutMinutes;
            private long weasyPrintTimeoutMinutes;
            private long installAppTimeoutMinutes;
            private long calibreTimeoutMinutes;
+            private long tesseractTimeoutMinutes;
+            private long qpdfTimeoutMinutes;
+
+            public long getTesseractTimeoutMinutes() {
+                return tesseractTimeoutMinutes > 0 ? tesseractTimeoutMinutes : 30;
+            }
+
+            public long getQpdfTimeoutMinutes() {
+                return qpdfTimeoutMinutes > 0 ? qpdfTimeoutMinutes : 30;
+            }

            public long getLibreOfficeTimeoutMinutes() {
                return libreOfficeTimeoutMinutes > 0 ? libreOfficeTimeoutMinutes : 30;
@@ -379,18 +387,10 @@ public class ApplicationProperties {
                return pdfToHtmlTimeoutMinutes > 0 ? pdfToHtmlTimeoutMinutes : 20;
            }

-            public long getOcrMyPdfTimeoutMinutes() {
-                return ocrMyPdfTimeoutMinutes > 0 ? ocrMyPdfTimeoutMinutes : 30;
-            }
-
            public long getPythonOpenCvTimeoutMinutes() {
                return pythonOpenCvTimeoutMinutes > 0 ? pythonOpenCvTimeoutMinutes : 30;
            }

-            public long getGhostScriptTimeoutMinutes() {
-                return ghostScriptTimeoutMinutes > 0 ? ghostScriptTimeoutMinutes : 30;
-            }
-
            public long getWeasyPrintTimeoutMinutes() {
                return weasyPrintTimeoutMinutes > 0 ? weasyPrintTimeoutMinutes : 30;
            }
--- a/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java
+++ b/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java
@@ -18,4 +18,15 @@ public class OptimizePdfRequest extends PDFFile {

    @Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
    private String expectedOutputSize;
+
+    @Schema(
+            description = "Whether to linearize the PDF for faster web viewing. Default is false.",
+            defaultValue = "false")
+    private Boolean linearize = false;
+
+    @Schema(
+            description =
+                    "Whether to normalize the PDF content for better compatibility. Default is true.",
+            defaultValue = "true")
+    private Boolean normalize = true;
 }
--- a/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java
+++ b/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java
@@ -15,18 +15,6 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
    @Schema(description = "List of languages to use in OCR processing")
    private List<String> languages;

-    @Schema(description = "Include OCR text in a sidecar text file if set to true")
-    private boolean sidecar;
-
-    @Schema(description = "Deskew the input file if set to true")
-    private boolean deskew;
-
-    @Schema(description = "Clean the input file if set to true")
-    private boolean clean;
-
-    @Schema(description = "Clean the final output if set to true")
-    private boolean cleanFinal;
-
    @Schema(
            description = "Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'",
            allowableValues = {"skip-text", "force-ocr", "Normal"})
@@ -37,7 +25,4 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
            allowableValues = {"hocr", "sandwich"},
            defaultValue = "hocr")
    private String ocrRenderType = "hocr";
-
-    @Schema(description = "Remove images from the output PDF if set to true")
-    private boolean removeImagesAfter;
 }
--- a/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java
+++ b/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java
@@ -34,17 +34,15 @@ public class MetricsAggregatorService {
                        counter -> {
                            String method = counter.getId().getTag("method");
                            String uri = counter.getId().getTag("uri");
-                            
+
                            // Skip if either method or uri is null
                            if (method == null || uri == null) {
                                return;
                            }
-                
-                            String key = String.format(
-                                "http_requests_%s_%s",
-                                method,
-                                uri.replace("/", "_")
-                            );
+
+                            String key =
+                                    String.format(
+                                            "http_requests_%s_%s", method, uri.replace("/", "_"));

                            double currentCount = counter.count();
                            double lastCount = lastSentMetrics.getOrDefault(key, 0.0);
--- a/src/main/java/stirling/software/SPDF/service/PostHogService.java
+++ b/src/main/java/stirling/software/SPDF/service/PostHogService.java
@@ -31,7 +31,7 @@ public class PostHogService {
    private final ApplicationProperties applicationProperties;
    private final UserServiceInterface userService;
    private final Environment env;
-    
+
    @Autowired
    public PostHogService(
            PostHog postHog,
@@ -71,16 +71,16 @@ public class PostHogService {
        Map<String, Object> metrics = new HashMap<>();

        try {
-        	//Application version
-        	metrics.put("app_version", appVersion);
-        	 String deploymentType = "JAR"; // default
-             if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) {
-                 deploymentType = "EXE";
-             } else if (isRunningInDocker()) {
-                 deploymentType = "DOCKER";
-             }
-             metrics.put("deployment_type", deploymentType);
-        	
+            // Application version
+            metrics.put("app_version", appVersion);
+            String deploymentType = "JAR"; // default
+            if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) {
+                deploymentType = "EXE";
+            } else if (isRunningInDocker()) {
+                deploymentType = "DOCKER";
+            }
+            metrics.put("deployment_type", deploymentType);
+
            // System info
            metrics.put("os_name", System.getProperty("os.name"));
            metrics.put("os_version", System.getProperty("os.version"));
--- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
+++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
@@ -29,12 +29,12 @@ public class ProcessExecutor {
    public enum Processes {
        LIBRE_OFFICE,
        PDFTOHTML,
-        OCR_MY_PDF,
        PYTHON_OPENCV,
-        GHOSTSCRIPT,
        WEASYPRINT,
        INSTALL_APP,
-        CALIBRE
+        CALIBRE,
+        TESSERACT,
+        QPDF
    }

    private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
@@ -59,21 +59,11 @@ public class ProcessExecutor {
                                                .getProcessExecutor()
                                                .getSessionLimit()
                                                .getPdfToHtmlSessionLimit();
-                                case OCR_MY_PDF ->
-                                        applicationProperties
-                                                .getProcessExecutor()
-                                                .getSessionLimit()
-                                                .getOcrMyPdfSessionLimit();
                                case PYTHON_OPENCV ->
                                        applicationProperties
                                                .getProcessExecutor()
                                                .getSessionLimit()
                                                .getPythonOpenCvSessionLimit();
-                                case GHOSTSCRIPT ->
-                                        applicationProperties
-                                                .getProcessExecutor()
-                                                .getSessionLimit()
-                                                .getGhostScriptSessionLimit();
                                case WEASYPRINT ->
                                        applicationProperties
                                                .getProcessExecutor()
@@ -84,6 +74,16 @@ public class ProcessExecutor {
                                                .getProcessExecutor()
                                                .getSessionLimit()
                                                .getInstallAppSessionLimit();
+                                case TESSERACT ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getSessionLimit()
+                                                .getTesseractSessionLimit();
+                                case QPDF ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getSessionLimit()
+                                                .getQpdfSessionLimit();
                                case CALIBRE ->
                                        applicationProperties
                                                .getProcessExecutor()
@@ -103,21 +103,11 @@ public class ProcessExecutor {
                                                .getProcessExecutor()
                                                .getTimeoutMinutes()
                                                .getPdfToHtmlTimeoutMinutes();
-                                case OCR_MY_PDF ->
-                                        applicationProperties
-                                                .getProcessExecutor()
-                                                .getTimeoutMinutes()
-                                                .getOcrMyPdfTimeoutMinutes();
                                case PYTHON_OPENCV ->
                                        applicationProperties
                                                .getProcessExecutor()
                                                .getTimeoutMinutes()
                                                .getPythonOpenCvTimeoutMinutes();
-                                case GHOSTSCRIPT ->
-                                        applicationProperties
-                                                .getProcessExecutor()
-                                                .getTimeoutMinutes()
-                                                .getGhostScriptTimeoutMinutes();
                                case WEASYPRINT ->
                                        applicationProperties
                                                .getProcessExecutor()
@@ -128,6 +118,16 @@ public class ProcessExecutor {
                                                .getProcessExecutor()
                                                .getTimeoutMinutes()
                                                .getInstallAppTimeoutMinutes();
+                                case TESSERACT ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getTimeoutMinutes()
+                                                .getTesseractTimeoutMinutes();
+                                case QPDF ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getTimeoutMinutes()
+                                                .getQpdfTimeoutMinutes();
                                case CALIBRE ->
                                        applicationProperties
                                                .getProcessExecutor()