Metadata handling for all PDF endpoints (#1894)

* Add image support to multi-tool page

Related to #278

* changes to support image types

* final touches

* final touches

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* Update translation files (#1888)

Signed-off-by: GitHub Action <action@github.com>
Co-authored-by: GitHub Action <action@github.com>

* final touches

Signed-off-by: a <a>

---------

Signed-off-by: a <a>
Signed-off-by: GitHub Action <action@github.com>
Co-authored-by: a <a>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: GitHub Action <action@github.com>
This commit is contained in:
Anthony Stirling
2024-09-14 16:29:39 +01:00
committed by GitHub
parent bb1c859e0d
commit de4144a1a4
43 changed files with 696 additions and 284 deletions

View File

@@ -1,30 +1,36 @@
package stirling.software.SPDF.controller.api.converters;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
// Disabled for now
// @RestController
// @Tag(name = "Convert", description = "Convert APIs")
// @RequestMapping("/api/v1/convert")
public class ConvertBookToPDFController {
@Autowired
@Qualifier("bookAndHtmlFormatsInstalled")
private boolean bookAndHtmlFormatsInstalled;
private final boolean bookAndHtmlFormatsInstalled;
private final CustomPDDocumentFactory pdfDocumentFactory;
// @Autowired
public ConvertBookToPDFController(
CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
}
@PostMapping(consumes = "multipart/form-data", value = "/book/pdf")
@Operation(

View File

@@ -1,28 +1,25 @@
package stirling.software.SPDF.controller.api.converters;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
// Disabled for now
// @RestController
// @Tag(name = "Convert", description = "Convert APIs")
// @RequestMapping("/api/v1/convert")
public class ConvertHtmlToPDF {
@Autowired
// @Autowired
@Qualifier("bookAndHtmlFormatsInstalled")
private boolean bookAndHtmlFormatsInstalled;

View File

@@ -16,6 +16,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.rendering.ImageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -30,6 +31,7 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.ConvertToImageRequest;
import stirling.software.SPDF.model.api.converters.ConvertToPdfRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.CheckProgramInstall;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@@ -43,6 +45,13 @@ public class ConvertImgPDFController {
private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public ConvertImgPDFController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/pdf/img")
@Operation(
summary = "Convert PDF to image(s)",
@@ -178,7 +187,8 @@ public class ConvertImgPDFController {
boolean autoRotate = request.isAutoRotate();
// Convert the file to PDF and get the resulting bytes
byte[] bytes = PdfUtils.imageToPdf(file, fitOption, autoRotate, colorType);
byte[] bytes =
PdfUtils.imageToPdf(file, fitOption, autoRotate, colorType, pdfDocumentFactory);
return WebResponseUtils.bytesToWebResponse(
bytes,
file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_converted.pdf");

View File

@@ -10,29 +10,26 @@ import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.AttributeProvider;
import org.commonmark.renderer.html.HtmlRenderer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
// Disabled for now
// @RestController
// @Tag(name = "Convert", description = "Convert APIs")
// @RequestMapping("/api/v1/convert")
public class ConvertMarkdownToPdf {
@Autowired
// @Autowired
@Qualifier("bookAndHtmlFormatsInstalled")
private boolean bookAndHtmlFormatsInstalled;

View File

@@ -1,5 +1,6 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -8,6 +9,8 @@ import java.util.Arrays;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -20,6 +23,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -29,7 +33,7 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@RequestMapping("/api/v1/convert")
public class ConvertOfficeController {
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Check for valid file extension
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
if (originalFilename == null
@@ -62,12 +66,10 @@ public class ConvertOfficeController {
.runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
return pdfBytes;
return tempOutputFile.toFile();
} finally {
// Clean up the temporary files
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
}
@@ -76,6 +78,13 @@ public class ConvertOfficeController {
return fileExtension.matches(extensionPattern);
}
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public ConvertOfficeController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/file/pdf")
@Operation(
summary = "Convert a file to a PDF using LibreOffice",
@@ -86,12 +95,18 @@ public class ConvertOfficeController {
MultipartFile inputFile = request.getFileInput();
// unused but can start server instance if startup time is to long
// LibreOfficeListener.getInstance().start();
File file = null;
try {
file = convertToPdf(inputFile);
byte[] pdfByteArray = convertToPdf(inputFile);
return WebResponseUtils.bytesToWebResponse(
pdfByteArray,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_convertedToPDF.pdf");
PDDocument doc = pdfDocumentFactory.load(file);
return WebResponseUtils.pdfDocToWebResponse(
doc,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_convertedToPDF.pdf");
} finally {
if (file != null) file.delete();
}
}
}

View File

@@ -6,30 +6,27 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.PdfToBookRequest;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
// Disabled for now
// @RestController
// @Tag(name = "Convert", description = "Convert APIs")
// @RequestMapping("/api/v1/convert")
public class ConvertPDFToBookController {
@Autowired
// @Autowired
@Qualifier("bookAndHtmlFormatsInstalled")
private boolean bookAndHtmlFormatsInstalled;

View File

@@ -9,7 +9,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
@@ -17,6 +16,7 @@ import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -29,6 +29,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.PdfToPdfARequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -40,6 +41,13 @@ public class ConvertPDFToPDFA {
private static final Logger logger = LoggerFactory.getLogger(ConvertPDFToPDFA.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public ConvertPDFToPDFA(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/pdf/pdfa")
@Operation(
summary = "Convert a PDF to a PDF/A",
@@ -54,7 +62,7 @@ public class ConvertPDFToPDFA {
byte[] pdfBytes = inputFile.getBytes();
// Load the PDF document
PDDocument document = Loader.loadPDF(pdfBytes);
PDDocument document = pdfDocumentFactory.load(pdfBytes);
// Get the document catalog
PDDocumentCatalog catalog = document.getDocumentCatalog();
@@ -101,18 +109,18 @@ public class ConvertPDFToPDFA {
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] optimizedPdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_PDFA.pdf";
return WebResponseUtils.bytesToWebResponse(optimizedPdfBytes, outputFilename);
try {
PDDocument doc = pdfDocumentFactory.load(tempOutputFile.toFile());
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_PDFA.pdf";
return WebResponseUtils.pdfDocToWebResponse(doc, outputFilename);
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
}
}

View File

@@ -6,6 +6,10 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -16,6 +20,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.UrlToPdfRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
@@ -26,6 +31,15 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@RequestMapping("/api/v1/convert")
public class ConvertWebsiteToPDF {
private static final Logger logger = LoggerFactory.getLogger(ConvertWebsiteToPDF.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public ConvertWebsiteToPDF(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/url/pdf")
@Operation(
summary = "Convert a URL to a PDF",
@@ -46,12 +60,12 @@ public class ConvertWebsiteToPDF {
}
Path tempOutputFile = null;
byte[] pdfBytes;
PDDocument doc = null;
try {
// Prepare the output file path
tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command
// Prepare the WeasyPrint command
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(URL);
@@ -61,16 +75,23 @@ public class ConvertWebsiteToPDF {
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempOutputFile);
}
// Convert URL to a safe filename
String outputFilename = convertURLToFileName(URL);
// Load the PDF using pdfDocumentFactory
doc = pdfDocumentFactory.load(tempOutputFile.toFile());
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
// Convert URL to a safe filename
String outputFilename = convertURLToFileName(URL);
return WebResponseUtils.pdfDocToWebResponse(doc, outputFilename);
} finally {
if (tempOutputFile != null) {
try {
Files.deleteIfExists(tempOutputFile);
} catch (IOException e) {
logger.error("Error deleting temporary output file", e);
}
}
}
}
private String convertURLToFileName(String url) {

View File

@@ -30,13 +30,13 @@ import stirling.software.SPDF.model.api.extract.PDFFilePage;
@RestController
@RequestMapping("/api/v1/convert")
@Tag(name = "Convert", description = "Convert APIs")
public class ExtractController {
public class ExtractCSVController {
private static final Logger logger = LoggerFactory.getLogger(CropController.class);
@PostMapping(value = "/pdf/csv", consumes = "multipart/form-data")
@Operation(
summary = "Extracts a PDF document to csv",
summary = "Extracts a CSV document from a PDF",
description =
"This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO")
public ResponseEntity<String> PdfToCsv(@ModelAttribute PDFFilePage form) throws Exception {