Metadata handling for all PDF endpoints (#1894)

* Add image support to multi-tool page

Related to #278

* changes to support image types

* final touches

* final touches

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* final touches

Signed-off-by: a <a>

* Update translation files (#1888)

Signed-off-by: GitHub Action <action@github.com>
Co-authored-by: GitHub Action <action@github.com>

* final touches

Signed-off-by: a <a>

---------

Signed-off-by: a <a>
Signed-off-by: GitHub Action <action@github.com>
Co-authored-by: a <a>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: GitHub Action <action@github.com>
This commit is contained in:
Anthony Stirling
2024-09-14 16:29:39 +01:00
committed by GitHub
parent bb1c859e0d
commit de4144a1a4
43 changed files with 696 additions and 284 deletions

View File

@@ -12,11 +12,11 @@ import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -38,6 +38,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.AutoSplitPdfRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -49,6 +50,13 @@ public class AutoSplitPdfController {
private static final String QR_CONTENT = "https://github.com/Stirling-Tools/Stirling-PDF";
private static final String QR_CONTENT_OLD = "https://github.com/Frooodle/Stirling-PDF";
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public AutoSplitPdfController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(value = "/auto-split-pdf", consumes = "multipart/form-data")
@Operation(
summary = "Auto split PDF pages into separate documents",
@@ -59,73 +67,93 @@ public class AutoSplitPdfController {
MultipartFile file = request.getFileInput();
boolean duplexMode = request.isDuplexMode();
PDDocument document = Loader.loadPDF(file.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
PDDocument document = null;
List<PDDocument> splitDocuments = new ArrayList<>();
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
Path zipFile = null;
byte[] data = null;
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150);
String result = decodeQRCode(bim);
if ((QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result)) && page != 0) {
splitDocuments.add(new PDDocument());
try {
document = pdfDocumentFactory.load(file.getInputStream());
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150);
String result = decodeQRCode(bim);
if ((QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result)) && page != 0) {
splitDocuments.add(new PDDocument());
}
if (!splitDocuments.isEmpty()
&& !QR_CONTENT.equals(result)
&& !QR_CONTENT_OLD.equals(result)) {
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
} else if (page == 0) {
PDDocument firstDocument = new PDDocument();
firstDocument.addPage(document.getPage(page));
splitDocuments.add(firstDocument);
}
// If duplexMode is true and current page is a divider, then skip next page
if (duplexMode && (QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result))) {
page++;
}
}
if (!splitDocuments.isEmpty()
&& !QR_CONTENT.equals(result)
&& !QR_CONTENT_OLD.equals(result)) {
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
} else if (page == 0) {
PDDocument firstDocument = new PDDocument();
firstDocument.addPage(document.getPage(page));
splitDocuments.add(firstDocument);
// Remove split documents that have no pages
splitDocuments.removeIf(pdDocument -> pdDocument.getNumberOfPages() == 0);
zipFile = Files.createTempFile("split_documents", ".zip");
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
for (int i = 0; i < splitDocuments.size(); i++) {
String fileName = filename + "_" + (i + 1) + ".pdf";
PDDocument splitDocument = splitDocuments.get(i);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
splitDocument.save(baos);
byte[] pdf = baos.toByteArray();
ZipEntry pdfEntry = new ZipEntry(fileName);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdf);
zipOut.closeEntry();
}
}
// If duplexMode is true and current page is a divider, then skip next page
if (duplexMode && (QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result))) {
page++;
}
}
// Remove split documents that have no pages
splitDocuments.removeIf(pdDocument -> pdDocument.getNumberOfPages() == 0);
for (PDDocument splitDocument : splitDocuments) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
splitDocument.save(baos);
splitDocumentsBoas.add(baos);
splitDocument.close();
}
document.close();
Path zipFile = Files.createTempFile("split_documents", ".zip");
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
byte[] data;
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
String fileName = filename + "_" + (i + 1) + ".pdf";
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
byte[] pdf = baos.toByteArray();
ZipEntry pdfEntry = new ZipEntry(fileName);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdf);
zipOut.closeEntry();
}
} catch (Exception e) {
logger.error("exception", e);
} finally {
data = Files.readAllBytes(zipFile);
Files.deleteIfExists(zipFile);
}
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
// Clean up resources
if (document != null) {
try {
document.close();
} catch (IOException e) {
logger.error("Error closing main PDDocument", e);
}
}
for (PDDocument splitDoc : splitDocuments) {
try {
splitDoc.close();
} catch (IOException e) {
logger.error("Error closing split PDDocument", e);
}
}
if (zipFile != null) {
try {
Files.deleteIfExists(zipFile);
} catch (IOException e) {
logger.error("Error deleting temporary zip file", e);
}
}
}
}
private static String decodeQRCode(BufferedImage bufferedImage) {

View File

@@ -16,6 +16,7 @@ import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
@@ -30,6 +31,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.RemoveBlankPagesRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -40,6 +42,13 @@ public class BlankPageController {
private static final Logger logger = LoggerFactory.getLogger(BlankPageController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public BlankPageController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
@Operation(
summary = "Remove blank pages from a PDF file",
@@ -124,7 +133,7 @@ public class BlankPageController {
public void createZipEntry(ZipOutputStream zos, List<PDPage> pages, String entryName)
throws IOException {
try (PDDocument document = new PDDocument()) {
try (PDDocument document = pdfDocumentFactory.createNewDocument()) {
for (PDPage page : pages) {
document.addPage(page);

View File

@@ -20,6 +20,7 @@ import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -32,6 +33,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
@@ -44,6 +46,13 @@ public class CompressController {
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public CompressController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(
summary = "Optimize PDF file",
@@ -258,7 +267,7 @@ public class CompressController {
}
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
Path finalFile = tempOutputFile;
// Check if optimized file is larger than the original
if (pdfBytes.length > inputFileSize) {
// Log the occurrence
@@ -266,14 +275,15 @@ public class CompressController {
"Optimized file is larger than the original. Returning the original file instead.");
// Read the original file again
pdfBytes = Files.readAllBytes(tempInputFile);
finalFile = tempInputFile;
}
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(finalFile.toFile()), outputFilename);
} finally {
// Clean up the temporary files

View File

@@ -14,6 +14,7 @@ import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -25,9 +26,8 @@ import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.config.PdfMetadataService;
import stirling.software.SPDF.model.PdfMetadata;
import stirling.software.SPDF.model.api.misc.FlattenRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -37,6 +37,13 @@ public class FlattenController {
private static final Logger logger = LoggerFactory.getLogger(FlattenController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public FlattenController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/flatten")
@Operation(
summary = "Flatten PDF form fields or full page",
@@ -46,7 +53,6 @@ public class FlattenController {
MultipartFile file = request.getFileInput();
PDDocument document = Loader.loadPDF(file.getBytes());
PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document);
Boolean flattenOnlyForms = request.getFlattenOnlyForms();
if (Boolean.TRUE.equals(flattenOnlyForms)) {
@@ -60,7 +66,8 @@ public class FlattenController {
// flatten whole page aka convert each page to image and readd it (making text
// unselectable)
PDFRenderer pdfRenderer = new PDFRenderer(document);
PDDocument newDocument = new PDDocument();
PDDocument newDocument =
pdfDocumentFactory.createNewDocumentBasedOnOldDocument(document);
int numPages = document.getNumberOfPages();
for (int i = 0; i < numPages; i++) {
try {
@@ -80,7 +87,6 @@ public class FlattenController {
logger.error("exception", e);
}
}
PdfMetadataService.setMetadataToPdf(newDocument, metadata);
return WebResponseUtils.pdfDocToWebResponse(
newDocument, Filenames.toSimpleFileName(file.getOriginalFilename()));
}

View File

@@ -1,5 +1,6 @@
package stirling.software.SPDF.controller.api.misc;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
@@ -28,6 +29,7 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -52,6 +54,13 @@ public class OCRController {
.collect(Collectors.toList());
}
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public OCRController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
@Operation(
summary = "Process a PDF file with OCR",
@@ -175,7 +184,7 @@ public class OCRController {
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.toFile());
// Return the OCR processed PDF as a response
String outputFilename =
@@ -196,7 +205,13 @@ public class OCRController {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
try (ByteArrayInputStream pdfInputStream = new ByteArrayInputStream(pdfBytes)) {
byte[] buffer = new byte[1024];
int length;
while ((length = pdfInputStream.read(buffer)) != -1) {
zipOut.write(buffer, 0, length);
}
}
zipOut.closeEntry();
// Add text file to the zip

View File

@@ -4,6 +4,7 @@ import java.io.IOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -17,6 +18,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.OverlayImageRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -27,6 +29,13 @@ public class OverlayImageController {
private static final Logger logger = LoggerFactory.getLogger(OverlayImageController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public OverlayImageController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/add-image")
@Operation(
summary = "Overlay image onto a PDF file",
@@ -41,7 +50,9 @@ public class OverlayImageController {
try {
byte[] pdfBytes = pdfFile.getBytes();
byte[] imageBytes = imageFile.getBytes();
byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage);
byte[] result =
PdfUtils.overlayImage(
pdfDocumentFactory, pdfBytes, imageBytes, x, y, everyPage);
return WebResponseUtils.bytesToWebResponse(
result,

View File

@@ -4,7 +4,6 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -13,6 +12,7 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -26,6 +26,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.AddPageNumbersRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -36,6 +37,13 @@ public class PageNumbersController {
private static final Logger logger = LoggerFactory.getLogger(PageNumbersController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public PageNumbersController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(value = "/add-page-numbers", consumes = "multipart/form-data")
@Operation(
summary = "Add page numbers to a PDF document",
@@ -52,7 +60,7 @@ public class PageNumbersController {
String customText = request.getCustomText();
int pageNumber = startingNumber;
byte[] fileBytes = file.getBytes();
PDDocument document = Loader.loadPDF(fileBytes);
PDDocument document = pdfDocumentFactory.load(fileBytes);
float font_size = request.getFontSize();
String font_type = request.getFontType();
float marginFactor;

View File

@@ -8,6 +8,7 @@ import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@@ -20,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFFile;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -31,6 +33,13 @@ public class RepairController {
private static final Logger logger = LoggerFactory.getLogger(RepairController.class);
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public RepairController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/repair")
@Operation(
summary = "Repair a PDF file",
@@ -58,7 +67,7 @@ public class RepairController {
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.toFile());
// Return the optimized PDF as a response
String outputFilename =

View File

@@ -12,7 +12,6 @@ import java.util.List;
import javax.imageio.ImageIO;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -25,6 +24,7 @@ import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
import org.apache.pdfbox.util.Matrix;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -38,6 +38,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.AddStampRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -45,6 +46,13 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class StampController {
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public StampController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
@PostMapping(consumes = "multipart/form-data", value = "/add-stamp")
@Operation(
summary = "Add stamp to a PDF file",
@@ -86,7 +94,7 @@ public class StampController {
}
// Load the input PDF
PDDocument document = Loader.loadPDF(pdfFile.getBytes());
PDDocument document = pdfDocumentFactory.load(pdfFile);
List<Integer> pageNumbers = request.getPageNumbersList(document, true);