format and move everything, other in own folder
This commit is contained in:
@@ -30,22 +30,6 @@ public class MergeController {
|
||||
return "merge-pdfs";
|
||||
}
|
||||
|
||||
@PostMapping("/merge-pdfs")
|
||||
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
|
||||
// Read the input PDF files into PDDocument objects
|
||||
List<PDDocument> documents = new ArrayList<>();
|
||||
|
||||
// Loop through the files array and read each file into a PDDocument
|
||||
for (MultipartFile file : files) {
|
||||
documents.add(PDDocument.load(file.getInputStream()));
|
||||
}
|
||||
|
||||
PDDocument mergedDoc = mergeDocuments(documents);
|
||||
|
||||
// Return the merged PDF as a response
|
||||
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_merged.pdf");
|
||||
}
|
||||
|
||||
private PDDocument mergeDocuments(List<PDDocument> documents) throws IOException {
|
||||
// Create a new empty document
|
||||
PDDocument mergedDoc = new PDDocument();
|
||||
@@ -64,4 +48,20 @@ public class MergeController {
|
||||
return mergedDoc;
|
||||
}
|
||||
|
||||
@PostMapping("/merge-pdfs")
|
||||
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
|
||||
// Read the input PDF files into PDDocument objects
|
||||
List<PDDocument> documents = new ArrayList<>();
|
||||
|
||||
// Loop through the files array and read each file into a PDDocument
|
||||
for (MultipartFile file : files) {
|
||||
documents.add(PDDocument.load(file.getInputStream()));
|
||||
}
|
||||
|
||||
PDDocument mergedDoc = mergeDocuments(documents);
|
||||
|
||||
// Return the merged PDF as a response
|
||||
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_merged.pdf");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -9,12 +9,12 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
@Controller
|
||||
public class MultiToolController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(MultiToolController.class);
|
||||
private static final Logger logger = LoggerFactory.getLogger(MultiToolController.class);
|
||||
|
||||
@GetMapping("/multi-tool")
|
||||
public String multiToolForm(Model model) {
|
||||
model.addAttribute("currentPage", "multi-tool");
|
||||
return "multi-tool";
|
||||
}
|
||||
@GetMapping("/multi-tool")
|
||||
public String multiToolForm(Model model) {
|
||||
model.addAttribute("currentPage", "multi-tool");
|
||||
return "multi-tool";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,180 +0,0 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@Controller
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
@GetMapping("/ocr-pdf")
|
||||
public ModelAndView ocrPdfPage() {
|
||||
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
|
||||
modelAndView.addObject("languages", getAvailableTesseractLanguages());
|
||||
modelAndView.addObject("currentPage", "ocr-pdf");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@PostMapping("/ocr-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("languages") List<String> selectedLanguages,
|
||||
@RequestParam(name = "sidecar", required = false) Boolean sidecar,
|
||||
@RequestParam(name = "deskew", required = false) Boolean deskew,
|
||||
@RequestParam(name = "clean", required = false) Boolean clean,
|
||||
@RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
|
||||
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
|
||||
|
||||
|
||||
//--output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.size() < 1) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
// Validate and sanitize selected languages using regex
|
||||
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
|
||||
selectedLanguages = selectedLanguages.stream()
|
||||
.filter(lang -> Pattern.matches(languagePattern, lang))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
if (selectedLanguages.isEmpty()) {
|
||||
throw new IOException("None of the selected languages are valid.");
|
||||
}
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the output file path
|
||||
Path sidecarTextPath = null;
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--output-type", "pdf"));
|
||||
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarTextPath.toString());
|
||||
}
|
||||
|
||||
if (deskew != null && deskew) {
|
||||
command.add("--deskew");
|
||||
}
|
||||
if (clean != null && clean) {
|
||||
command.add("--clean");
|
||||
}
|
||||
if (cleanFinal != null && cleanFinal) {
|
||||
command.add("--clean-final");
|
||||
}
|
||||
if (ocrType != null && !ocrType.equals("")) {
|
||||
if("skip-text".equals(ocrType)) {
|
||||
command.add("--skip-text");
|
||||
} else if("force-ocr".equals(ocrType)) {
|
||||
command.add("--force-ocr");
|
||||
} else if("Normal".equals(ocrType)) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
command.addAll(Arrays.asList("--language", languageOption,
|
||||
tempInputFile.toString(), tempOutputFile.toString()));
|
||||
|
||||
//Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(sidecarTextPath, zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(sidecarTextPath);
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment", outputZipFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(zipBytes);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files)
|
||||
.filter(file -> file.getName().endsWith(".traineddata"))
|
||||
.map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd"))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -10,11 +10,6 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
public class PdfController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
|
||||
|
||||
@GetMapping("/home")
|
||||
public String root(Model model) {
|
||||
return "redirect:/";
|
||||
}
|
||||
|
||||
@GetMapping("/")
|
||||
public String home(Model model) {
|
||||
@@ -22,6 +17,9 @@ public class PdfController {
|
||||
return "home";
|
||||
}
|
||||
|
||||
@GetMapping("/home")
|
||||
public String root(Model model) {
|
||||
return "redirect:/";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -23,18 +23,6 @@ public class RearrangePagesPDFController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(RearrangePagesPDFController.class);
|
||||
|
||||
@GetMapping("/pdf-organizer")
|
||||
public String pageOrganizer(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-organizer");
|
||||
return "pdf-organizer";
|
||||
}
|
||||
|
||||
@GetMapping("/remove-pages")
|
||||
public String pageDeleter(Model model) {
|
||||
model.addAttribute("currentPage", "remove-pages");
|
||||
return "remove-pages";
|
||||
}
|
||||
|
||||
@PostMapping("/remove-pages")
|
||||
public ResponseEntity<byte[]> deletePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pagesToDelete") String pagesToDelete) throws IOException {
|
||||
|
||||
@@ -53,6 +41,12 @@ public class RearrangePagesPDFController {
|
||||
|
||||
}
|
||||
|
||||
@GetMapping("/remove-pages")
|
||||
public String pageDeleter(Model model) {
|
||||
model.addAttribute("currentPage", "remove-pages");
|
||||
return "remove-pages";
|
||||
}
|
||||
|
||||
private List<Integer> pageOrderToString(String[] pageOrderArr, int totalPages) {
|
||||
List<Integer> newPageOrder = new ArrayList<>();
|
||||
// loop through the page order array
|
||||
@@ -81,6 +75,12 @@ public class RearrangePagesPDFController {
|
||||
return newPageOrder;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-organizer")
|
||||
public String pageOrganizer(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-organizer");
|
||||
return "pdf-organizer";
|
||||
}
|
||||
|
||||
@PostMapping("/rearrange-pages")
|
||||
public ResponseEntity<byte[]> rearrangePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pageOrder") String pageOrder) {
|
||||
try {
|
||||
|
||||
@@ -22,12 +22,6 @@ public class RotationController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(RotationController.class);
|
||||
|
||||
@GetMapping("/rotate-pdf")
|
||||
public String rotatePdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "rotate-pdf");
|
||||
return "rotate-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/rotate-pdf")
|
||||
public ResponseEntity<byte[]> rotatePDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("angle") Integer angle) throws IOException {
|
||||
|
||||
@@ -45,4 +39,10 @@ public class RotationController {
|
||||
|
||||
}
|
||||
|
||||
@GetMapping("/rotate-pdf")
|
||||
public String rotatePdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "rotate-pdf");
|
||||
return "rotate-pdf";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -27,17 +27,12 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
@Controller
|
||||
public class SplitPDFController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class);
|
||||
|
||||
@GetMapping("/split-pdfs")
|
||||
public String splitPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "split-pdfs");
|
||||
return "split-pdfs";
|
||||
}
|
||||
|
||||
@PostMapping("/split-pages")
|
||||
public ResponseEntity<Resource> splitPdf(@RequestParam("fileInput") MultipartFile file, @RequestParam("pages") String pages) throws IOException {
|
||||
// parse user input
|
||||
@@ -128,7 +123,13 @@ public class SplitPDFController {
|
||||
Files.delete(zipFile);
|
||||
|
||||
// return the Resource in the response
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.contentLength(resource.contentLength()).body(resource);
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip")
|
||||
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
|
||||
}
|
||||
|
||||
@GetMapping("/split-pdfs")
|
||||
public String splitPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "split-pdfs");
|
||||
return "split-pdfs";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,28 +25,6 @@ public class ConvertImgPDFController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class);
|
||||
|
||||
@GetMapping("/img-to-pdf")
|
||||
public String convertToPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "img-to-pdf");
|
||||
return "convert/img-to-pdf";
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-img")
|
||||
public String pdfToimgForm(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-to-img");
|
||||
return "convert/pdf-to-img";
|
||||
}
|
||||
|
||||
@PostMapping("/img-to-pdf")
|
||||
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file,
|
||||
@RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
|
||||
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
|
||||
// Convert the file to PDF and get the resulting bytes
|
||||
System.out.println(stretchToFit);
|
||||
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
|
||||
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_coverted.pdf");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-img")
|
||||
public ResponseEntity<Resource> convertToImage(@RequestParam("fileInput") MultipartFile file, @RequestParam("imageFormat") String imageFormat,
|
||||
@RequestParam("singleOrMultiple") String singleOrMultiple, @RequestParam("colorType") String colorType, @RequestParam("dpi") String dpi) throws IOException {
|
||||
@@ -78,11 +56,27 @@ public class ConvertImgPDFController {
|
||||
} else {
|
||||
ByteArrayResource resource = new ByteArrayResource(result);
|
||||
// return the Resource in the response
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.contentLength(resource.contentLength()).body(resource);
|
||||
return ResponseEntity.ok()
|
||||
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip")
|
||||
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
|
||||
}
|
||||
}
|
||||
|
||||
@PostMapping("/img-to-pdf")
|
||||
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file, @RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
|
||||
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
|
||||
// Convert the file to PDF and get the resulting bytes
|
||||
System.out.println(stretchToFit);
|
||||
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
|
||||
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_coverted.pdf");
|
||||
}
|
||||
|
||||
@GetMapping("/img-to-pdf")
|
||||
public String convertToPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "img-to-pdf");
|
||||
return "convert/img-to-pdf";
|
||||
}
|
||||
|
||||
private String getMediaType(String imageFormat) {
|
||||
if (imageFormat.equalsIgnoreCase("PNG"))
|
||||
return "image/png";
|
||||
@@ -94,4 +88,10 @@ public class ConvertImgPDFController {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-img")
|
||||
public String pdfToimgForm(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-to-img");
|
||||
return "convert/pdf-to-img";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -19,62 +19,57 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@Controller
|
||||
public class ConvertOfficeController {
|
||||
|
||||
|
||||
@GetMapping("/file-to-pdf")
|
||||
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
// Check for valid file extension
|
||||
String originalFilename = inputFile.getOriginalFilename();
|
||||
if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
|
||||
throw new IllegalArgumentException("Invalid file extension");
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the converted PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
return pdfBytes;
|
||||
}
|
||||
|
||||
@GetMapping("/file-to-pdf")
|
||||
public String convertToPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "file-to-pdf");
|
||||
return "convert/file-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/file-to-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
//unused but can start server instance if startup time is to long
|
||||
//LibreOfficeListener.getInstance().start();
|
||||
|
||||
byte[] pdfByteArray = convertToPdf(inputFile);
|
||||
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
|
||||
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
// Check for valid file extension
|
||||
String originalFilename = inputFile.getOriginalFilename();
|
||||
if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
|
||||
throw new IllegalArgumentException("Invalid file extension");
|
||||
private boolean isValidFileExtension(String fileExtension) {
|
||||
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
|
||||
return fileExtension.matches(extensionPattern);
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
@PostMapping("/file-to-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
// unused but can start server instance if startup time is to long
|
||||
// LibreOfficeListener.getInstance().start();
|
||||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
|
||||
"-f",
|
||||
"pdf",
|
||||
"-o",
|
||||
tempOutputFile.toString(),
|
||||
tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the converted PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
return pdfBytes;
|
||||
}
|
||||
private boolean isValidFileExtension(String fileExtension) {
|
||||
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
|
||||
return fileExtension.matches(extensionPattern);
|
||||
}
|
||||
byte[] pdfByteArray = convertToPdf(inputFile);
|
||||
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -15,12 +15,10 @@ import stirling.software.SPDF.utils.PDFToFile;
|
||||
@Controller
|
||||
public class ConvertPDFToOffice {
|
||||
|
||||
|
||||
|
||||
@GetMapping("/pdf-to-word")
|
||||
public ModelAndView pdfToWord() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
|
||||
modelAndView.addObject("currentPage", "pdf-to-word");
|
||||
@GetMapping("/pdf-to-html")
|
||||
public ModelAndView pdfToHTML() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
|
||||
modelAndView.addObject("currentPage", "pdf-to-html");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@@ -38,10 +36,10 @@ public class ConvertPDFToOffice {
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-html")
|
||||
public ModelAndView pdfToHTML() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
|
||||
modelAndView.addObject("currentPage", "pdf-to-html");
|
||||
@GetMapping("/pdf-to-word")
|
||||
public ModelAndView pdfToWord() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
|
||||
modelAndView.addObject("currentPage", "pdf-to-word");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@@ -52,46 +50,37 @@ public class ConvertPDFToOffice {
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/pdf-to-word")
|
||||
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-presentation")
|
||||
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-text")
|
||||
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/pdf-to-html")
|
||||
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-presentation")
|
||||
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
|
||||
throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-text")
|
||||
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
|
||||
throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-word")
|
||||
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
|
||||
throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-xml")
|
||||
public ResponseEntity<byte[]> processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -17,21 +17,13 @@ import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@Controller
|
||||
public class ConvertPDFToPDFA {
|
||||
|
||||
@GetMapping("/pdf-to-pdfa")
|
||||
public String pdfToPdfAForm(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-to-pdfa");
|
||||
return "convert/pdf-to-pdfa";
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/pdf-to-pdfa")
|
||||
public ResponseEntity<byte[]> pdfToPdfA(
|
||||
@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
public ResponseEntity<byte[]> pdfToPdfA(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
@@ -50,7 +42,7 @@ public class ConvertPDFToPDFA {
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
@@ -64,7 +56,12 @@ public class ConvertPDFToPDFA {
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-pdfa")
|
||||
public String pdfToPdfAForm(Model model) {
|
||||
model.addAttribute("currentPage", "pdf-to-pdfa");
|
||||
return "convert/pdf-to-pdfa";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
package stirling.software.SPDF.controller.other;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@@ -20,7 +20,6 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
|
||||
@Controller
|
||||
public class CompressController {
|
||||
|
||||
@@ -29,16 +28,13 @@ public class CompressController {
|
||||
@GetMapping("/compress-pdf")
|
||||
public String compressPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "compress-pdf");
|
||||
return "compress-pdf";
|
||||
return "other/compress-pdf";
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/compress-pdf")
|
||||
public ResponseEntity<byte[]> optimizePdf(
|
||||
@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("optimizeLevel") int optimizeLevel,
|
||||
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
|
||||
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
|
||||
public ResponseEntity<byte[]> optimizePdf(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel,
|
||||
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
@@ -57,7 +53,6 @@ public class CompressController {
|
||||
command.add("--output-type");
|
||||
command.add("pdf");
|
||||
|
||||
|
||||
if (fastWebView != null && fastWebView) {
|
||||
long fileSize = inputFile.getSize();
|
||||
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
|
||||
@@ -73,7 +68,7 @@ public class CompressController {
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
@@ -87,6 +82,6 @@ public class CompressController {
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
package stirling.software.SPDF.controller.other;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.Image;
|
||||
@@ -36,15 +36,9 @@ public class ExtractImagesController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
|
||||
|
||||
@GetMapping("/extract-images")
|
||||
public String extractImagesForm(Model model) {
|
||||
model.addAttribute("currentPage", "extract-images");
|
||||
return "extract-images";
|
||||
}
|
||||
|
||||
@PostMapping("/extract-images")
|
||||
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
|
||||
|
||||
|
||||
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
PDDocument document = PDDocument.load(file.getBytes());
|
||||
|
||||
@@ -58,7 +52,7 @@ public class ExtractImagesController {
|
||||
zos.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
||||
int imageIndex = 1;
|
||||
|
||||
|
||||
int pageNum = 1;
|
||||
// Iterate over each page
|
||||
for (PDPage page : document.getPages()) {
|
||||
@@ -72,21 +66,18 @@ public class ExtractImagesController {
|
||||
RenderedImage renderedImage = image.getImage();
|
||||
BufferedImage bufferedImage = null;
|
||||
if (format.equalsIgnoreCase("png")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_ARGB);
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
|
||||
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
|
||||
} else if (format.equalsIgnoreCase("gif")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
|
||||
// Write image to zip file
|
||||
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
ZipEntry zipEntry = new ZipEntry(imageName);
|
||||
zos.putNextEntry(zipEntry);
|
||||
|
||||
|
||||
Graphics2D g = bufferedImage.createGraphics();
|
||||
g.drawImage((Image) renderedImage, 0, 0, null);
|
||||
g.dispose();
|
||||
@@ -94,12 +85,11 @@ public class ExtractImagesController {
|
||||
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
|
||||
ImageIO.write(bufferedImage, format, imageBaos);
|
||||
zos.write(imageBaos.toByteArray());
|
||||
|
||||
|
||||
|
||||
zos.closeEntry();
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close ZipOutputStream and PDDocument
|
||||
@@ -110,20 +100,22 @@ public class ExtractImagesController {
|
||||
byte[] zipContents = baos.toByteArray();
|
||||
ByteArrayResource resource = new ByteArrayResource(zipContents);
|
||||
|
||||
// Set content disposition header to indicate that the response should be downloaded as a file
|
||||
// Set content disposition header to indicate that the response should be
|
||||
// downloaded as a file
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentLength(zipContents.length);
|
||||
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
|
||||
|
||||
|
||||
// Return ResponseEntity with ByteArrayResource and headers
|
||||
return ResponseEntity
|
||||
.status(HttpStatus.OK)
|
||||
.headers(headers)
|
||||
|
||||
.header("Cache-Control", "no-cache")
|
||||
.contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.body(resource);
|
||||
return ResponseEntity.status(HttpStatus.OK).headers(headers)
|
||||
|
||||
.header("Cache-Control", "no-cache").contentType(MediaType.APPLICATION_OCTET_STREAM).body(resource);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@GetMapping("/extract-images")
|
||||
public String extractImagesForm(Model model) {
|
||||
model.addAttribute("currentPage", "extract-images");
|
||||
return "other/extract-images";
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package stirling.software.SPDF.controller.security;
|
||||
package stirling.software.SPDF.controller.other;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
@@ -26,19 +26,20 @@ public class MetadataController {
|
||||
@GetMapping("/change-metadata")
|
||||
public String addWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "change-metadata");
|
||||
return "security/change-metadata";
|
||||
return "other/change-metadata";
|
||||
}
|
||||
|
||||
private String checkUndefined(String entry) {
|
||||
// Check if the string is "undefined"
|
||||
if("undefined".equals(entry)) {
|
||||
if ("undefined".equals(entry)) {
|
||||
// Return null if it is
|
||||
return null;
|
||||
}
|
||||
// Return the original string if it's not "undefined"
|
||||
return entry;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@PostMapping("/update-metadata")
|
||||
public ResponseEntity<byte[]> metadata(@RequestParam("fileInput") MultipartFile pdfFile,
|
||||
@RequestParam(value = "deleteAll", required = false, defaultValue = "false") Boolean deleteAll, @RequestParam(value = "author", required = false) String author,
|
||||
@@ -50,10 +51,10 @@ public class MetadataController {
|
||||
|
||||
// Load the PDF file into a PDDocument
|
||||
PDDocument document = PDDocument.load(pdfFile.getBytes());
|
||||
|
||||
|
||||
// Get the document information from the PDF
|
||||
PDDocumentInformation info = document.getDocumentInformation();
|
||||
|
||||
|
||||
// Check if each metadata value is "undefined" and set it to null if it is
|
||||
author = checkUndefined(author);
|
||||
creationDate = checkUndefined(creationDate);
|
||||
@@ -64,8 +65,9 @@ public class MetadataController {
|
||||
subject = checkUndefined(subject);
|
||||
title = checkUndefined(title);
|
||||
trapped = checkUndefined(trapped);
|
||||
|
||||
// If the "deleteAll" flag is set, remove all metadata from the document information
|
||||
|
||||
// If the "deleteAll" flag is set, remove all metadata from the document
|
||||
// information
|
||||
if (deleteAll) {
|
||||
for (String key : info.getMetadataKeys()) {
|
||||
info.setCustomMetadataValue(key, null);
|
||||
@@ -83,7 +85,7 @@ public class MetadataController {
|
||||
title = null;
|
||||
trapped = null;
|
||||
} else {
|
||||
// Iterate through the request parameters and set the metadata values
|
||||
// Iterate through the request parameters and set the metadata values
|
||||
for (Entry<String, String> entry : allRequestParams.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
// Check if the key is a standard metadata key
|
||||
@@ -128,11 +130,9 @@ public class MetadataController {
|
||||
info.setSubject(subject);
|
||||
info.setTitle(title);
|
||||
info.setTrapped(trapped);
|
||||
|
||||
|
||||
document.setDocumentInformation(info);
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
package stirling.software.SPDF.controller.other;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@Controller
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@GetMapping("/ocr-pdf")
|
||||
public ModelAndView ocrPdfPage() {
|
||||
ModelAndView modelAndView = new ModelAndView("other/ocr-pdf");
|
||||
modelAndView.addObject("languages", getAvailableTesseractLanguages());
|
||||
modelAndView.addObject("currentPage", "ocr-pdf");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@PostMapping("/ocr-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("languages") List<String> selectedLanguages,
|
||||
@RequestParam(name = "sidecar", required = false) Boolean sidecar, @RequestParam(name = "deskew", required = false) Boolean deskew,
|
||||
@RequestParam(name = "clean", required = false) Boolean clean, @RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
|
||||
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
|
||||
|
||||
// --output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.size() < 1) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
// Validate and sanitize selected languages using regex
|
||||
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
|
||||
selectedLanguages = selectedLanguages.stream().filter(lang -> Pattern.matches(languagePattern, lang)).collect(Collectors.toList());
|
||||
|
||||
if (selectedLanguages.isEmpty()) {
|
||||
throw new IOException("None of the selected languages are valid.");
|
||||
}
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the output file path
|
||||
Path sidecarTextPath = null;
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf"));
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarTextPath.toString());
|
||||
}
|
||||
|
||||
if (deskew != null && deskew) {
|
||||
command.add("--deskew");
|
||||
}
|
||||
if (clean != null && clean) {
|
||||
command.add("--clean");
|
||||
}
|
||||
if (cleanFinal != null && cleanFinal) {
|
||||
command.add("--clean-final");
|
||||
}
|
||||
if (ocrType != null && !ocrType.equals("")) {
|
||||
if ("skip-text".equals(ocrType)) {
|
||||
command.add("--skip-text");
|
||||
} else if ("force-ocr".equals(ocrType)) {
|
||||
command.add("--force-ocr");
|
||||
} else if ("Normal".equals(ocrType)) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(sidecarTextPath, zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(sidecarTextPath);
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment", outputZipFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(zipBytes);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
package stirling.software.SPDF.controller.other;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@@ -23,7 +23,7 @@ public class OverlayImageController {
|
||||
@GetMapping("/add-image")
|
||||
public String overlayImage(Model model) {
|
||||
model.addAttribute("currentPage", "add-image");
|
||||
return "add-image";
|
||||
return "other/add-image";
|
||||
}
|
||||
|
||||
@PostMapping("/add-image")
|
||||
@@ -28,23 +28,11 @@ public class PasswordController {
|
||||
return "security/add-password";
|
||||
}
|
||||
|
||||
@GetMapping("/remove-password")
|
||||
public String removePasswordForm(Model model) {
|
||||
model.addAttribute("currentPage", "remove-password");
|
||||
return "security/remove-password";
|
||||
}
|
||||
|
||||
@GetMapping("/change-permissions")
|
||||
public String permissionsForm(Model model) {
|
||||
model.addAttribute("currentPage", "change-permissions");
|
||||
return "security/change-permissions";
|
||||
}
|
||||
|
||||
@PostMapping("/remove-password")
|
||||
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile fileInput, @RequestParam(name = "password") String password) throws IOException {
|
||||
PDDocument document = PDDocument.load(fileInput.getBytes(), password);
|
||||
document.setAllSecurityToBeRemoved(true);
|
||||
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_password_removed.pdf");
|
||||
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_password_removed.pdf");
|
||||
}
|
||||
|
||||
@PostMapping("/add-password")
|
||||
@@ -75,7 +63,19 @@ public class PasswordController {
|
||||
|
||||
document.protect(spp);
|
||||
|
||||
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_passworded.pdf");
|
||||
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_passworded.pdf");
|
||||
}
|
||||
|
||||
@GetMapping("/change-permissions")
|
||||
public String permissionsForm(Model model) {
|
||||
model.addAttribute("currentPage", "change-permissions");
|
||||
return "security/change-permissions";
|
||||
}
|
||||
|
||||
@GetMapping("/remove-password")
|
||||
public String removePasswordForm(Model model) {
|
||||
model.addAttribute("currentPage", "remove-password");
|
||||
return "security/remove-password";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -31,34 +31,18 @@ import stirling.software.SPDF.utils.WatermarkRemover;
|
||||
@Controller
|
||||
public class WatermarkController {
|
||||
|
||||
@GetMapping("/add-watermark")
|
||||
public String addWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "add-watermark");
|
||||
return "security/add-watermark";
|
||||
}
|
||||
|
||||
@GetMapping("/remove-watermark")
|
||||
public String removeWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "remove-watermark");
|
||||
return "security/remove-watermark";
|
||||
}
|
||||
|
||||
@PostMapping("/add-watermark")
|
||||
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
|
||||
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
|
||||
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity,
|
||||
@RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer, @RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer)
|
||||
throws IOException {
|
||||
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity, @RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer,
|
||||
@RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer) throws IOException {
|
||||
|
||||
// Load the input PDF
|
||||
PDDocument document = PDDocument.load(pdfFile.getInputStream());
|
||||
|
||||
// Create a page in the document
|
||||
for (PDPage page : document.getPages()) {
|
||||
|
||||
|
||||
|
||||
|
||||
// Get the page's content stream
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
@@ -66,7 +50,7 @@ public class WatermarkController {
|
||||
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
||||
graphicsState.setNonStrokingAlphaConstant(opacity);
|
||||
contentStream.setGraphicsStateParameters(graphicsState);
|
||||
|
||||
|
||||
// Set font of watermark
|
||||
PDFont font = PDType1Font.HELVETICA_BOLD;
|
||||
contentStream.beginText();
|
||||
@@ -94,19 +78,22 @@ public class WatermarkController {
|
||||
// Close the content stream
|
||||
contentStream.close();
|
||||
}
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@GetMapping("/add-watermark")
|
||||
public String addWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "add-watermark");
|
||||
return "security/add-watermark";
|
||||
}
|
||||
|
||||
@PostMapping("/remove-watermark")
|
||||
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
|
||||
|
||||
|
||||
// Load the input PDF
|
||||
PDDocument document = PDDocument.load(pdfFile.getInputStream());
|
||||
|
||||
// Create a new PDF document for the output
|
||||
// Create a new PDF document for the output
|
||||
PDDocument outputDocument = new PDDocument();
|
||||
|
||||
// Loop through the pages
|
||||
@@ -115,7 +102,8 @@ public class WatermarkController {
|
||||
PDPage page = document.getPage(i);
|
||||
|
||||
// Process the content stream to remove the watermark text
|
||||
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
|
||||
WatermarkRemover editor = new WatermarkRemover(watermarkText) {
|
||||
};
|
||||
editor.processPage(page);
|
||||
editor.processPage(page);
|
||||
// Add the page to the output document
|
||||
@@ -149,9 +137,14 @@ public class WatermarkController {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
|
||||
}
|
||||
|
||||
|
||||
@GetMapping("/remove-watermark")
|
||||
public String removeWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "remove-watermark");
|
||||
return "security/remove-watermark";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user