ocr remove images

This commit is contained in:
Anthony Stirling
2023-05-01 21:53:10 +01:00
parent 30c56a0ec9
commit 9af537c985
10 changed files with 76 additions and 4 deletions

View File

@@ -47,7 +47,8 @@ public class OCRController {
@RequestParam("languages") List<String> selectedLanguages, @RequestParam(name = "sidecar", required = false) Boolean sidecar,
@RequestParam(name = "deskew", required = false) Boolean deskew, @RequestParam(name = "clean", required = false) Boolean clean,
@RequestParam(name = "clean-final", required = false) Boolean cleanFinal, @RequestParam(name = "ocrType", required = false) String ocrType,
@RequestParam(name = "ocrRenderType", required = false, defaultValue = "hocr") String ocrRenderType)
@RequestParam(name = "ocrRenderType", required = false, defaultValue = "hocr") String ocrRenderType,
@RequestParam(name = "removeImagesAfter", required = false) Boolean removeImagesAfter)
throws IOException, InterruptedException {
// --output-type pdfa
@@ -114,11 +115,24 @@ public class OCRController {
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Remove images from the OCR processed PDF if the flag is set to true
if (removeImagesAfter != null && removeImagesAfter) {
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
int gsReturnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";

View File

@@ -72,4 +72,13 @@ public class OtherWebController {
model.addAttribute("currentPage", "add-image");
return "other/add-image";
}
@GetMapping("/adjust-contrast")
@Hidden
public String contrast(Model model) {
model.addAttribute("currentPage", "adjust-contrast");
return "other/adjust-contrast";
}
}

View File

@@ -13,7 +13,7 @@ import java.util.concurrent.Semaphore;
public class ProcessExecutor {
public enum Processes {
LIBRE_OFFICE, OCR_MY_PDF, PYTHON_OPENCV
LIBRE_OFFICE, OCR_MY_PDF, PYTHON_OPENCV, GHOSTSCRIPT
}
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
@@ -24,6 +24,7 @@ public class ProcessExecutor {
case LIBRE_OFFICE -> 1;
case OCR_MY_PDF -> 2;
case PYTHON_OPENCV -> 8;
case GHOSTSCRIPT -> 16;
};
return new ProcessExecutor(semaphoreLimit);
});