format and move everything, other in own folder

This commit is contained in:
Anthony Stirling
2023-04-22 12:51:01 +01:00
parent af6cd2e38b
commit 78d3fd3768
33 changed files with 702 additions and 763 deletions

View File

@@ -30,22 +30,6 @@ public class MergeController {
return "merge-pdfs";
}
@PostMapping("/merge-pdfs")
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
// Read the input PDF files into PDDocument objects
List<PDDocument> documents = new ArrayList<>();
// Loop through the files array and read each file into a PDDocument
for (MultipartFile file : files) {
documents.add(PDDocument.load(file.getInputStream()));
}
PDDocument mergedDoc = mergeDocuments(documents);
// Return the merged PDF as a response
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_merged.pdf");
}
private PDDocument mergeDocuments(List<PDDocument> documents) throws IOException {
// Create a new empty document
PDDocument mergedDoc = new PDDocument();
@@ -64,4 +48,20 @@ public class MergeController {
return mergedDoc;
}
@PostMapping("/merge-pdfs")
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
// Read the input PDF files into PDDocument objects
List<PDDocument> documents = new ArrayList<>();
// Loop through the files array and read each file into a PDDocument
for (MultipartFile file : files) {
documents.add(PDDocument.load(file.getInputStream()));
}
PDDocument mergedDoc = mergeDocuments(documents);
// Return the merged PDF as a response
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_merged.pdf");
}
}

View File

@@ -9,12 +9,12 @@ import org.springframework.web.bind.annotation.GetMapping;
@Controller
public class MultiToolController {
private static final Logger logger = LoggerFactory.getLogger(MultiToolController.class);
private static final Logger logger = LoggerFactory.getLogger(MultiToolController.class);
@GetMapping("/multi-tool")
public String multiToolForm(Model model) {
model.addAttribute("currentPage", "multi-tool");
return "multi-tool";
}
@GetMapping("/multi-tool")
public String multiToolForm(Model model) {
model.addAttribute("currentPage", "multi-tool");
return "multi-tool";
}
}

View File

@@ -1,180 +0,0 @@
package stirling.software.SPDF.controller;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar,
@RequestParam(name = "deskew", required = false) Boolean deskew,
@RequestParam(name = "clean", required = false) Boolean clean,
@RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
//--output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Validate and sanitize selected languages using regex
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
selectedLanguages = selectedLanguages.stream()
.filter(lang -> Pattern.matches(languagePattern, lang))
.collect(Collectors.toList());
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the output file path
Path sidecarTextPath = null;
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--output-type", "pdf"));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !ocrType.equals("")) {
if("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if("Normal".equals(ocrType)) {
}
}
command.addAll(Arrays.asList("--language", languageOption,
tempInputFile.toString(), tempOutputFile.toString()));
//Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd"))
.collect(Collectors.toList());
}
}

View File

@@ -10,11 +10,6 @@ import org.springframework.web.bind.annotation.GetMapping;
public class PdfController {
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
}
@GetMapping("/")
public String home(Model model) {
@@ -22,6 +17,9 @@ public class PdfController {
return "home";
}
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
}
}

View File

@@ -23,18 +23,6 @@ public class RearrangePagesPDFController {
private static final Logger logger = LoggerFactory.getLogger(RearrangePagesPDFController.class);
@GetMapping("/pdf-organizer")
public String pageOrganizer(Model model) {
model.addAttribute("currentPage", "pdf-organizer");
return "pdf-organizer";
}
@GetMapping("/remove-pages")
public String pageDeleter(Model model) {
model.addAttribute("currentPage", "remove-pages");
return "remove-pages";
}
@PostMapping("/remove-pages")
public ResponseEntity<byte[]> deletePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pagesToDelete") String pagesToDelete) throws IOException {
@@ -53,6 +41,12 @@ public class RearrangePagesPDFController {
}
@GetMapping("/remove-pages")
public String pageDeleter(Model model) {
model.addAttribute("currentPage", "remove-pages");
return "remove-pages";
}
private List<Integer> pageOrderToString(String[] pageOrderArr, int totalPages) {
List<Integer> newPageOrder = new ArrayList<>();
// loop through the page order array
@@ -81,6 +75,12 @@ public class RearrangePagesPDFController {
return newPageOrder;
}
@GetMapping("/pdf-organizer")
public String pageOrganizer(Model model) {
model.addAttribute("currentPage", "pdf-organizer");
return "pdf-organizer";
}
@PostMapping("/rearrange-pages")
public ResponseEntity<byte[]> rearrangePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pageOrder") String pageOrder) {
try {

View File

@@ -22,12 +22,6 @@ public class RotationController {
private static final Logger logger = LoggerFactory.getLogger(RotationController.class);
@GetMapping("/rotate-pdf")
public String rotatePdfForm(Model model) {
model.addAttribute("currentPage", "rotate-pdf");
return "rotate-pdf";
}
@PostMapping("/rotate-pdf")
public ResponseEntity<byte[]> rotatePDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("angle") Integer angle) throws IOException {
@@ -45,4 +39,10 @@ public class RotationController {
}
@GetMapping("/rotate-pdf")
public String rotatePdfForm(Model model) {
model.addAttribute("currentPage", "rotate-pdf");
return "rotate-pdf";
}
}

View File

@@ -27,17 +27,12 @@ import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
@Controller
public class SplitPDFController {
private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class);
@GetMapping("/split-pdfs")
public String splitPdfForm(Model model) {
model.addAttribute("currentPage", "split-pdfs");
return "split-pdfs";
}
@PostMapping("/split-pages")
public ResponseEntity<Resource> splitPdf(@RequestParam("fileInput") MultipartFile file, @RequestParam("pages") String pages) throws IOException {
// parse user input
@@ -128,7 +123,13 @@ public class SplitPDFController {
Files.delete(zipFile);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip")
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
}
@GetMapping("/split-pdfs")
public String splitPdfForm(Model model) {
model.addAttribute("currentPage", "split-pdfs");
return "split-pdfs";
}
}

View File

@@ -25,28 +25,6 @@ public class ConvertImgPDFController {
private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class);
@GetMapping("/img-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "img-to-pdf");
return "convert/img-to-pdf";
}
@GetMapping("/pdf-to-img")
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
@PostMapping("/img-to-pdf")
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file,
@RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
// Convert the file to PDF and get the resulting bytes
System.out.println(stretchToFit);
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_coverted.pdf");
}
@PostMapping("/pdf-to-img")
public ResponseEntity<Resource> convertToImage(@RequestParam("fileInput") MultipartFile file, @RequestParam("imageFormat") String imageFormat,
@RequestParam("singleOrMultiple") String singleOrMultiple, @RequestParam("colorType") String colorType, @RequestParam("dpi") String dpi) throws IOException {
@@ -78,11 +56,27 @@ public class ConvertImgPDFController {
} else {
ByteArrayResource resource = new ByteArrayResource(result);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
return ResponseEntity.ok()
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip")
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
}
}
@PostMapping("/img-to-pdf")
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file, @RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
// Convert the file to PDF and get the resulting bytes
System.out.println(stretchToFit);
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_coverted.pdf");
}
@GetMapping("/img-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "img-to-pdf");
return "convert/img-to-pdf";
}
private String getMediaType(String imageFormat) {
if (imageFormat.equalsIgnoreCase("PNG"))
return "image/png";
@@ -94,4 +88,10 @@ public class ConvertImgPDFController {
return "application/octet-stream";
}
@GetMapping("/pdf-to-img")
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
}

View File

@@ -19,62 +19,57 @@ import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertOfficeController {
@GetMapping("/file-to-pdf")
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Check for valid file extension
String originalFilename = inputFile.getOriginalFilename();
if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
throw new IllegalArgumentException("Invalid file extension");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
return pdfBytes;
}
@GetMapping("/file-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "file-to-pdf");
return "convert/file-to-pdf";
}
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
//unused but can start server instance if startup time is to long
//LibreOfficeListener.getInstance().start();
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Check for valid file extension
String originalFilename = inputFile.getOriginalFilename();
if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
throw new IllegalArgumentException("Invalid file extension");
private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// unused but can start server instance if startup time is to long
// LibreOfficeListener.getInstance().start();
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
return pdfBytes;
}
private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
}
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
}

View File

@@ -15,12 +15,10 @@ import stirling.software.SPDF.utils.PDFToFile;
@Controller
public class ConvertPDFToOffice {
@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
return modelAndView;
}
@@ -38,10 +36,10 @@ public class ConvertPDFToOffice {
return modelAndView;
}
@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
return modelAndView;
}
@@ -52,46 +50,37 @@ public class ConvertPDFToOffice {
return modelAndView;
}
@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-presentation")
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}
@PostMapping("/pdf-to-text")
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-html")
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
}
@PostMapping("/pdf-to-presentation")
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}
@PostMapping("/pdf-to-text")
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-xml")
public ResponseEntity<byte[]> processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
}
}

View File

@@ -17,21 +17,13 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertPDFToPDFA {
@GetMapping("/pdf-to-pdfa")
public String pdfToPdfAForm(Model model) {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "convert/pdf-to-pdfa";
}
@PostMapping("/pdf-to-pdfa")
public ResponseEntity<byte[]> pdfToPdfA(
@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
public ResponseEntity<byte[]> pdfToPdfA(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
@@ -50,7 +42,7 @@ public class ConvertPDFToPDFA {
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -64,7 +56,12 @@ public class ConvertPDFToPDFA {
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
@GetMapping("/pdf-to-pdfa")
public String pdfToPdfAForm(Model model) {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "convert/pdf-to-pdfa";
}
}

View File

@@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
import java.nio.file.Files;
@@ -20,7 +20,6 @@ import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class CompressController {
@@ -29,16 +28,13 @@ public class CompressController {
@GetMapping("/compress-pdf")
public String compressPdfForm(Model model) {
model.addAttribute("currentPage", "compress-pdf");
return "compress-pdf";
return "other/compress-pdf";
}
@PostMapping("/compress-pdf")
public ResponseEntity<byte[]> optimizePdf(
@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
public ResponseEntity<byte[]> optimizePdf(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy)
throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
@@ -57,7 +53,6 @@ public class CompressController {
command.add("--output-type");
command.add("pdf");
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
@@ -73,7 +68,7 @@ public class CompressController {
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -87,6 +82,6 @@ public class CompressController {
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
}

View File

@@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.awt.Graphics2D;
import java.awt.Image;
@@ -36,15 +36,9 @@ public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "extract-images";
}
@PostMapping("/extract-images")
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
PDDocument document = PDDocument.load(file.getBytes());
@@ -58,7 +52,7 @@ public class ExtractImagesController {
zos.setLevel(Deflater.BEST_COMPRESSION);
int imageIndex = 1;
int pageNum = 1;
// Iterate over each page
for (PDPage page : document.getPages()) {
@@ -72,21 +66,18 @@ public class ExtractImagesController {
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if (format.equalsIgnoreCase("png")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
} else if (format.equalsIgnoreCase("gif")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
}
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
ZipEntry zipEntry = new ZipEntry(imageName);
zos.putNextEntry(zipEntry);
Graphics2D g = bufferedImage.createGraphics();
g.drawImage((Image) renderedImage, 0, 0, null);
g.dispose();
@@ -94,12 +85,11 @@ public class ExtractImagesController {
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, format, imageBaos);
zos.write(imageBaos.toByteArray());
zos.closeEntry();
imageIndex++;
}
}
}
}
// Close ZipOutputStream and PDDocument
@@ -110,20 +100,22 @@ public class ExtractImagesController {
byte[] zipContents = baos.toByteArray();
ByteArrayResource resource = new ByteArrayResource(zipContents);
// Set content disposition header to indicate that the response should be downloaded as a file
// Set content disposition header to indicate that the response should be
// downloaded as a file
HttpHeaders headers = new HttpHeaders();
headers.setContentLength(zipContents.length);
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
// Return ResponseEntity with ByteArrayResource and headers
return ResponseEntity
.status(HttpStatus.OK)
.headers(headers)
.header("Cache-Control", "no-cache")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.body(resource);
return ResponseEntity.status(HttpStatus.OK).headers(headers)
.header("Cache-Control", "no-cache").contentType(MediaType.APPLICATION_OCTET_STREAM).body(resource);
}
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "other/extract-images";
}
}

View File

@@ -1,4 +1,4 @@
package stirling.software.SPDF.controller.security;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
import java.text.ParseException;
@@ -26,19 +26,20 @@ public class MetadataController {
@GetMapping("/change-metadata")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "change-metadata");
return "security/change-metadata";
return "other/change-metadata";
}
private String checkUndefined(String entry) {
// Check if the string is "undefined"
if("undefined".equals(entry)) {
if ("undefined".equals(entry)) {
// Return null if it is
return null;
}
// Return the original string if it's not "undefined"
return entry;
}
@PostMapping("/update-metadata")
public ResponseEntity<byte[]> metadata(@RequestParam("fileInput") MultipartFile pdfFile,
@RequestParam(value = "deleteAll", required = false, defaultValue = "false") Boolean deleteAll, @RequestParam(value = "author", required = false) String author,
@@ -50,10 +51,10 @@ public class MetadataController {
// Load the PDF file into a PDDocument
PDDocument document = PDDocument.load(pdfFile.getBytes());
// Get the document information from the PDF
PDDocumentInformation info = document.getDocumentInformation();
// Check if each metadata value is "undefined" and set it to null if it is
author = checkUndefined(author);
creationDate = checkUndefined(creationDate);
@@ -64,8 +65,9 @@ public class MetadataController {
subject = checkUndefined(subject);
title = checkUndefined(title);
trapped = checkUndefined(trapped);
// If the "deleteAll" flag is set, remove all metadata from the document information
// If the "deleteAll" flag is set, remove all metadata from the document
// information
if (deleteAll) {
for (String key : info.getMetadataKeys()) {
info.setCustomMetadataValue(key, null);
@@ -83,7 +85,7 @@ public class MetadataController {
title = null;
trapped = null;
} else {
// Iterate through the request parameters and set the metadata values
// Iterate through the request parameters and set the metadata values
for (Entry<String, String> entry : allRequestParams.entrySet()) {
String key = entry.getKey();
// Check if the key is a standard metadata key
@@ -128,11 +130,9 @@ public class MetadataController {
info.setSubject(subject);
info.setTitle(title);
info.setTrapped(trapped);
document.setDocumentInformation(info);
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
}
}

View File

@@ -0,0 +1,168 @@
package stirling.software.SPDF.controller.other;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
}
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("other/ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar, @RequestParam(name = "deskew", required = false) Boolean deskew,
@RequestParam(name = "clean", required = false) Boolean clean, @RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
// --output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Validate and sanitize selected languages using regex
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
selectedLanguages = selectedLanguages.stream().filter(lang -> Pattern.matches(languagePattern, lang)).collect(Collectors.toList());
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the output file path
Path sidecarTextPath = null;
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf"));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !ocrType.equals("")) {
if ("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if ("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if ("Normal".equals(ocrType)) {
}
}
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
}

View File

@@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
@@ -23,7 +23,7 @@ public class OverlayImageController {
@GetMapping("/add-image")
public String overlayImage(Model model) {
model.addAttribute("currentPage", "add-image");
return "add-image";
return "other/add-image";
}
@PostMapping("/add-image")

View File

@@ -28,23 +28,11 @@ public class PasswordController {
return "security/add-password";
}
@GetMapping("/remove-password")
public String removePasswordForm(Model model) {
model.addAttribute("currentPage", "remove-password");
return "security/remove-password";
}
@GetMapping("/change-permissions")
public String permissionsForm(Model model) {
model.addAttribute("currentPage", "change-permissions");
return "security/change-permissions";
}
@PostMapping("/remove-password")
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile fileInput, @RequestParam(name = "password") String password) throws IOException {
PDDocument document = PDDocument.load(fileInput.getBytes(), password);
document.setAllSecurityToBeRemoved(true);
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_password_removed.pdf");
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_password_removed.pdf");
}
@PostMapping("/add-password")
@@ -75,7 +63,19 @@ public class PasswordController {
document.protect(spp);
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_passworded.pdf");
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_passworded.pdf");
}
@GetMapping("/change-permissions")
public String permissionsForm(Model model) {
model.addAttribute("currentPage", "change-permissions");
return "security/change-permissions";
}
@GetMapping("/remove-password")
public String removePasswordForm(Model model) {
model.addAttribute("currentPage", "remove-password");
return "security/remove-password";
}
}

View File

@@ -31,34 +31,18 @@ import stirling.software.SPDF.utils.WatermarkRemover;
@Controller
public class WatermarkController {
@GetMapping("/add-watermark")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "add-watermark");
return "security/add-watermark";
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
@PostMapping("/add-watermark")
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity,
@RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer, @RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer)
throws IOException {
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity, @RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer,
@RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer) throws IOException {
// Load the input PDF
PDDocument document = PDDocument.load(pdfFile.getInputStream());
// Create a page in the document
for (PDPage page : document.getPages()) {
// Get the page's content stream
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
@@ -66,7 +50,7 @@ public class WatermarkController {
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
graphicsState.setNonStrokingAlphaConstant(opacity);
contentStream.setGraphicsStateParameters(graphicsState);
// Set font of watermark
PDFont font = PDType1Font.HELVETICA_BOLD;
contentStream.beginText();
@@ -94,19 +78,22 @@ public class WatermarkController {
// Close the content stream
contentStream.close();
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
}
@GetMapping("/add-watermark")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "add-watermark");
return "security/add-watermark";
}
@PostMapping("/remove-watermark")
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
// Load the input PDF
PDDocument document = PDDocument.load(pdfFile.getInputStream());
// Create a new PDF document for the output
// Create a new PDF document for the output
PDDocument outputDocument = new PDDocument();
// Loop through the pages
@@ -115,7 +102,8 @@ public class WatermarkController {
PDPage page = document.getPage(i);
// Process the content stream to remove the watermark text
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
WatermarkRemover editor = new WatermarkRemover(watermarkText) {
};
editor.processPage(page);
editor.processPage(page);
// Add the page to the output document
@@ -149,9 +137,14 @@ public class WatermarkController {
}
}
}
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
}