misc beginings
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.model.api.misc.ExtractHeaderRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class AutoRenameController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(AutoRenameController.class);
|
||||
|
||||
private static final float TITLE_FONT_SIZE_THRESHOLD = 20.0f;
|
||||
private static final int LINE_LIMIT = 11;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/auto-rename")
|
||||
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> extractHeader(@ModelAttribute ExtractHeaderRequest request) throws Exception {
|
||||
MultipartFile file = request.getFileInput();
|
||||
Boolean useFirstTextAsFallback = request.getUseFirstTextAsFallback();
|
||||
|
||||
PDDocument document = PDDocument.load(file.getInputStream());
|
||||
PDFTextStripper reader = new PDFTextStripper() {
|
||||
class LineInfo {
|
||||
String text;
|
||||
float fontSize;
|
||||
|
||||
LineInfo(String text, float fontSize) {
|
||||
this.text = text;
|
||||
this.fontSize = fontSize;
|
||||
}
|
||||
}
|
||||
|
||||
List<LineInfo> lineInfos = new ArrayList<>();
|
||||
StringBuilder lineBuilder = new StringBuilder();
|
||||
float lastY = -1;
|
||||
float maxFontSizeInLine = 0.0f;
|
||||
int lineCount = 0;
|
||||
|
||||
@Override
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
|
||||
processLine();
|
||||
lineBuilder = new StringBuilder(text.getUnicode());
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
lastY = text.getY();
|
||||
lineCount++;
|
||||
} else if (lineCount < LINE_LIMIT) {
|
||||
lineBuilder.append(text.getUnicode());
|
||||
if (text.getFontSizeInPt() > maxFontSizeInLine) {
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void processLine() {
|
||||
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
|
||||
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
this.lineInfos.clear();
|
||||
this.lineBuilder = new StringBuilder();
|
||||
this.lastY = -1;
|
||||
this.maxFontSizeInLine = 0.0f;
|
||||
this.lineCount = 0;
|
||||
super.getText(doc);
|
||||
processLine(); // Process the last line
|
||||
|
||||
// Merge lines with same font size
|
||||
List<LineInfo> mergedLineInfos = new ArrayList<>();
|
||||
for (int i = 0; i < lineInfos.size(); i++) {
|
||||
String mergedText = lineInfos.get(i).text;
|
||||
float fontSize = lineInfos.get(i).fontSize;
|
||||
while (i + 1 < lineInfos.size() && lineInfos.get(i + 1).fontSize == fontSize) {
|
||||
mergedText += " " + lineInfos.get(i + 1).text;
|
||||
i++;
|
||||
}
|
||||
mergedLineInfos.add(new LineInfo(mergedText, fontSize));
|
||||
}
|
||||
|
||||
// Sort lines by font size in descending order and get the first one
|
||||
mergedLineInfos.sort(Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
|
||||
String title = mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(0).text;
|
||||
|
||||
return title != null ? title : (useFirstTextAsFallback ? (mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(mergedLineInfos.size() - 1).text) : null);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
String header = reader.getText(document);
|
||||
|
||||
|
||||
|
||||
// Sanitize the header string by removing characters not allowed in a filename.
|
||||
if (header != null && header.length() < 255) {
|
||||
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
|
||||
} else {
|
||||
logger.info("File has no good title to be found");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.DataBufferByte;
|
||||
import java.awt.image.DataBufferInt;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.google.zxing.BinaryBitmap;
|
||||
import com.google.zxing.LuminanceSource;
|
||||
import com.google.zxing.MultiFormatReader;
|
||||
import com.google.zxing.NotFoundException;
|
||||
import com.google.zxing.PlanarYUVLuminanceSource;
|
||||
import com.google.zxing.Result;
|
||||
import com.google.zxing.common.HybridBinarizer;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import stirling.software.SPDF.model.api.misc.AutoSplitPdfRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
public class AutoSplitPdfController {
|
||||
|
||||
private static final String QR_CONTENT = "https://github.com/Frooodle/Stirling-PDF";
|
||||
|
||||
@PostMapping(value = "/auto-split-pdf", consumes = "multipart/form-data")
|
||||
@Operation(summary = "Auto split PDF pages into separate documents", description = "This endpoint accepts a PDF file, scans each page for a specific QR code, and splits the document at the QR code boundaries. The output is a zip file containing each separate PDF document. Input:PDF Output:ZIP Type:SISO")
|
||||
public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest request) throws IOException {
|
||||
MultipartFile file = request.getFileInput();
|
||||
boolean duplexMode = request.isDuplexMode();
|
||||
|
||||
InputStream inputStream = file.getInputStream();
|
||||
PDDocument document = PDDocument.load(inputStream);
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
|
||||
List<PDDocument> splitDocuments = new ArrayList<>();
|
||||
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
|
||||
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150);
|
||||
String result = decodeQRCode(bim);
|
||||
|
||||
if (QR_CONTENT.equals(result) && page != 0) {
|
||||
splitDocuments.add(new PDDocument());
|
||||
}
|
||||
|
||||
if (!splitDocuments.isEmpty() && !QR_CONTENT.equals(result)) {
|
||||
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
|
||||
} else if (page == 0) {
|
||||
PDDocument firstDocument = new PDDocument();
|
||||
firstDocument.addPage(document.getPage(page));
|
||||
splitDocuments.add(firstDocument);
|
||||
}
|
||||
|
||||
// If duplexMode is true and current page is a divider, then skip next page
|
||||
if (duplexMode && QR_CONTENT.equals(result)) {
|
||||
page++;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove split documents that have no pages
|
||||
splitDocuments.removeIf(pdDocument -> pdDocument.getNumberOfPages() == 0);
|
||||
|
||||
for (PDDocument splitDocument : splitDocuments) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
splitDocument.save(baos);
|
||||
splitDocumentsBoas.add(baos);
|
||||
splitDocument.close();
|
||||
}
|
||||
|
||||
document.close();
|
||||
|
||||
Path zipFile = Files.createTempFile("split_documents", ".zip");
|
||||
String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", "");
|
||||
byte[] data;
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
|
||||
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
|
||||
String fileName = filename + "_" + (i + 1) + ".pdf";
|
||||
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
|
||||
byte[] pdf = baos.toByteArray();
|
||||
|
||||
ZipEntry pdfEntry = new ZipEntry(fileName);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
zipOut.write(pdf);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
data = Files.readAllBytes(zipFile);
|
||||
Files.delete(zipFile);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
|
||||
private static String decodeQRCode(BufferedImage bufferedImage) {
|
||||
LuminanceSource source;
|
||||
|
||||
if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferByte) {
|
||||
byte[] pixels = ((DataBufferByte) bufferedImage.getRaster().getDataBuffer()).getData();
|
||||
source = new PlanarYUVLuminanceSource(pixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
|
||||
} else if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferInt) {
|
||||
int[] pixels = ((DataBufferInt) bufferedImage.getRaster().getDataBuffer()).getData();
|
||||
byte[] newPixels = new byte[pixels.length];
|
||||
for (int i = 0; i < pixels.length; i++) {
|
||||
newPixels[i] = (byte) (pixels[i] & 0xff);
|
||||
}
|
||||
source = new PlanarYUVLuminanceSource(newPixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
|
||||
} else {
|
||||
throw new IllegalArgumentException("BufferedImage must have 8-bit gray scale, 24-bit RGB, 32-bit ARGB (packed int), byte gray, or 3-byte/4-byte RGB image data");
|
||||
}
|
||||
|
||||
BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));
|
||||
|
||||
try {
|
||||
Result result = new MultiFormatReader().decode(bitmap);
|
||||
return result.getText();
|
||||
} catch (NotFoundException e) {
|
||||
return null; // there is no QR code in the image
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageTree;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.model.api.misc.RemoveBlankPagesRequest;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class BlankPageController {
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
|
||||
@Operation(
|
||||
summary = "Remove blank pages from a PDF file",
|
||||
description = "This endpoint removes blank pages from a given PDF file. Users can specify the threshold and white percentage to tune the detection of blank pages. Input:PDF Output:PDF Type:SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> removeBlankPages(@ModelAttribute RemoveBlankPagesRequest request) throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
int threshold = request.getThreshold();
|
||||
float whitePercent = request.getWhitePercent();
|
||||
|
||||
PDDocument document = null;
|
||||
try {
|
||||
document = PDDocument.load(inputFile.getInputStream());
|
||||
PDPageTree pages = document.getDocumentCatalog().getPages();
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
|
||||
List<Integer> pagesToKeepIndex = new ArrayList<>();
|
||||
int pageIndex = 0;
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
|
||||
for (PDPage page : pages) {
|
||||
System.out.println("checking page " + pageIndex);
|
||||
textStripper.setStartPage(pageIndex + 1);
|
||||
textStripper.setEndPage(pageIndex + 1);
|
||||
String pageText = textStripper.getText(document);
|
||||
boolean hasText = !pageText.trim().isEmpty();
|
||||
if (hasText) {
|
||||
pagesToKeepIndex.add(pageIndex);
|
||||
System.out.println("page " + pageIndex + " has text");
|
||||
} else {
|
||||
boolean hasImages = PdfUtils.hasImagesOnPage(page);
|
||||
if (hasImages) {
|
||||
System.out.println("page " + pageIndex + " has image");
|
||||
|
||||
Path tempFile = Files.createTempFile("image_", ".png");
|
||||
|
||||
// Render image and save as temp file
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 300);
|
||||
ImageIO.write(image, "png", tempFile.toFile());
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent)));
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
// does contain data
|
||||
if (returnCode.getRc() == 0) {
|
||||
System.out.println("page " + pageIndex + " has image which is not blank");
|
||||
pagesToKeepIndex.add(pageIndex);
|
||||
} else {
|
||||
System.out.println("Skipping, Image was blank for page #" + pageIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
pageIndex++;
|
||||
|
||||
}
|
||||
System.out.print("pagesToKeep=" + pagesToKeepIndex.size());
|
||||
|
||||
// Remove pages not present in pagesToKeepIndex
|
||||
List<Integer> pageIndices = IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList());
|
||||
Collections.reverse(pageIndices); // Reverse to prevent index shifting during removal
|
||||
for (Integer i : pageIndices) {
|
||||
if (!pagesToKeepIndex.contains(i)) {
|
||||
pages.remove(i);
|
||||
}
|
||||
}
|
||||
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_blanksRemoved.pdf");
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
} finally {
|
||||
if (document != null)
|
||||
document.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDResources;
|
||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class CompressController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
|
||||
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> optimizePdf(@ModelAttribute OptimizePdfRequest request) throws Exception {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
Integer optimizeLevel = request.getOptimizeLevel();
|
||||
String expectedOutputSizeString = request.getExpectedOutputSizeString();
|
||||
|
||||
|
||||
if(expectedOutputSizeString == null && optimizeLevel == null) {
|
||||
throw new Exception("Both expected output size and optimize level are not specified");
|
||||
}
|
||||
|
||||
Long expectedOutputSize = 0L;
|
||||
boolean autoMode = false;
|
||||
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1 ) {
|
||||
expectedOutputSize = GeneralUtils.convertSizeToBytes(expectedOutputSizeString);
|
||||
autoMode = true;
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
long inputFileSize = Files.size(tempInputFile);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Determine initial optimization level based on expected size reduction, only if in autoMode
|
||||
if(autoMode) {
|
||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||
if (sizeReductionRatio > 0.7) {
|
||||
optimizeLevel = 1;
|
||||
} else if (sizeReductionRatio > 0.5) {
|
||||
optimizeLevel = 2;
|
||||
} else if (sizeReductionRatio > 0.35) {
|
||||
optimizeLevel = 3;
|
||||
} else {
|
||||
optimizeLevel = 3;
|
||||
}
|
||||
}
|
||||
|
||||
boolean sizeMet = false;
|
||||
while (!sizeMet && optimizeLevel <= 4) {
|
||||
// Prepare the Ghostscript command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("gs");
|
||||
command.add("-sDEVICE=pdfwrite");
|
||||
command.add("-dCompatibilityLevel=1.4");
|
||||
|
||||
switch (optimizeLevel) {
|
||||
case 1:
|
||||
command.add("-dPDFSETTINGS=/prepress");
|
||||
break;
|
||||
case 2:
|
||||
command.add("-dPDFSETTINGS=/printer");
|
||||
break;
|
||||
case 3:
|
||||
command.add("-dPDFSETTINGS=/ebook");
|
||||
break;
|
||||
case 4:
|
||||
command.add("-dPDFSETTINGS=/screen");
|
||||
break;
|
||||
default:
|
||||
command.add("-dPDFSETTINGS=/default");
|
||||
}
|
||||
|
||||
command.add("-dNOPAUSE");
|
||||
command.add("-dQUIET");
|
||||
command.add("-dBATCH");
|
||||
command.add("-sOutputFile=" + tempOutputFile.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Check if file size is within expected size or not auto mode so instantly finish
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||
sizeMet = true;
|
||||
} else {
|
||||
// Increase optimization level for next iteration
|
||||
optimizeLevel++;
|
||||
if(autoMode && optimizeLevel > 3) {
|
||||
System.out.println("Skipping level 4 due to bad results in auto mode");
|
||||
sizeMet = true;
|
||||
} else if(optimizeLevel == 5) {
|
||||
|
||||
} else {
|
||||
System.out.println("Increasing ghostscript optimisation level to " + optimizeLevel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (expectedOutputSize != null && autoMode) {
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize > expectedOutputSize) {
|
||||
try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) {
|
||||
long previousFileSize = 0;
|
||||
double scaleFactor = 1.0;
|
||||
while (true) {
|
||||
for (PDPage page : doc.getPages()) {
|
||||
PDResources res = page.getResources();
|
||||
|
||||
for (COSName name : res.getXObjectNames()) {
|
||||
PDXObject xobj = res.getXObject(name);
|
||||
if (xobj instanceof PDImageXObject) {
|
||||
PDImageXObject image = (PDImageXObject) xobj;
|
||||
|
||||
// Get the image in BufferedImage format
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
|
||||
// Calculate the new dimensions
|
||||
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
|
||||
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);
|
||||
|
||||
// If the new dimensions are zero, skip this iteration
|
||||
if (newWidth == 0 || newHeight == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, proceed with the scaling
|
||||
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||
|
||||
// Convert the scaled image back to a BufferedImage
|
||||
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
|
||||
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
|
||||
|
||||
// Compress the scaled image
|
||||
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
|
||||
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
|
||||
byte[] imageBytes = compressedImageStream.toByteArray();
|
||||
compressedImageStream.close();
|
||||
|
||||
// Convert compressed image back to PDImageXObject
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
|
||||
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());
|
||||
|
||||
// Replace the image in the resources with the compressed version
|
||||
res.put(name, compressedImage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// save the document to tempOutputFile again
|
||||
doc.save(tempOutputFile.toString());
|
||||
|
||||
long currentSize = Files.size(tempOutputFile);
|
||||
// Check if the overall PDF size is still larger than expectedOutputSize
|
||||
if (currentSize > expectedOutputSize) {
|
||||
// Log the current file size and scaleFactor
|
||||
|
||||
System.out.println("Current file size: " + FileUtils.byteCountToDisplaySize(currentSize));
|
||||
System.out.println("Current scale factor: " + scaleFactor);
|
||||
|
||||
// The file is still too large, reduce scaleFactor and try again
|
||||
scaleFactor *= 0.9; // reduce scaleFactor by 10%
|
||||
// Avoid scaleFactor being too small, causing the image to shrink to 0
|
||||
if(scaleFactor < 0.2 || previousFileSize == currentSize){
|
||||
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality, lowest size recommended is " + FileUtils.byteCountToDisplaySize(currentSize) + ", " + currentSize + " bytes");
|
||||
}
|
||||
previousFileSize = currentSize;
|
||||
} else {
|
||||
// The file is small enough, break the loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Check if optimized file is larger than the original
|
||||
if(pdfBytes.length > inputFileSize) {
|
||||
// Log the occurrence
|
||||
logger.warn("Optimized file is larger than the original. Returning the original file instead.");
|
||||
|
||||
// Read the original file again
|
||||
pdfBytes = Files.readAllBytes(tempInputFile);
|
||||
}
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.parameters.RequestBody;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.model.api.misc.ExtractImageScansRequest;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
import io.swagger.v3.oas.annotations.media.Content;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class ExtractImageScansController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-image-scans")
|
||||
@Operation(summary = "Extract image scans from an input file",
|
||||
description = "This endpoint extracts image scans from a given file based on certain parameters. Users can specify angle threshold, tolerance, minimum area, minimum contour area, and border size. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
public ResponseEntity<byte[]> extractImageScans(
|
||||
@RequestBody(
|
||||
description = "Form data containing file and extraction parameters",
|
||||
required = true,
|
||||
content = @Content(
|
||||
mediaType = "multipart/form-data",
|
||||
schema = @Schema(implementation = ExtractImageScansRequest.class) // This should represent your form's structure
|
||||
)
|
||||
)
|
||||
ExtractImageScansRequest form) throws IOException, InterruptedException {
|
||||
String fileName = form.getFileInput().getOriginalFilename();
|
||||
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
|
||||
|
||||
List<String> images = new ArrayList<>();
|
||||
|
||||
// Check if input file is a PDF
|
||||
if (extension.equalsIgnoreCase("pdf")) {
|
||||
// Load PDF document
|
||||
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(form.getFileInput().getBytes()))) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
images = new ArrayList<>();
|
||||
|
||||
// Create images of all pages
|
||||
for (int i = 0; i < pageCount; i++) {
|
||||
// Create temp file to save the image
|
||||
Path tempFile = Files.createTempFile("image_", ".png");
|
||||
|
||||
// Render image and save as temp file
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(i, 300);
|
||||
ImageIO.write(image, "png", tempFile.toFile());
|
||||
|
||||
// Add temp file path to images list
|
||||
images.add(tempFile.toString());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + extension);
|
||||
Files.copy(form.getFileInput().getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
// Add input file path to images list
|
||||
images.add(tempInputFile.toString());
|
||||
}
|
||||
|
||||
List<byte[]> processedImageBytes = new ArrayList<>();
|
||||
|
||||
// Process each image
|
||||
for (int i = 0; i < images.size(); i++) {
|
||||
|
||||
Path tempDir = Files.createTempDirectory("openCV_output");
|
||||
List<String> command = new ArrayList<>(Arrays.asList(
|
||||
"python3",
|
||||
"./scripts/split_photos.py",
|
||||
images.get(i),
|
||||
tempDir.toString(),
|
||||
"--angle_threshold", String.valueOf(form.getAngleThreshold()),
|
||||
"--tolerance", String.valueOf(form.getTolerance()),
|
||||
"--min_area", String.valueOf(form.getMinArea()),
|
||||
"--min_contour_area", String.valueOf(form.getMinContourArea()),
|
||||
"--border_size", String.valueOf(form.getBorderSize())
|
||||
));
|
||||
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the output photos in temp directory
|
||||
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());
|
||||
for (Path tempOutputFile : tempOutputFiles) {
|
||||
byte[] imageBytes = Files.readAllBytes(tempOutputFile);
|
||||
processedImageBytes.add(imageBytes);
|
||||
}
|
||||
// Clean up the temporary directory
|
||||
FileUtils.deleteDirectory(tempDir.toFile());
|
||||
}
|
||||
|
||||
// Create zip file if multiple images
|
||||
if (processedImageBytes.size() > 1) {
|
||||
String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add processed images to the zip
|
||||
for (int i = 0; i < processedImageBytes.size(); i++) {
|
||||
ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i + 1) + ".png");
|
||||
zipOut.putNextEntry(entry);
|
||||
zipOut.write(processedImageBytes.get(i));
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the processed image as a response
|
||||
byte[] imageBytes = processedImageBytes.get(0);
|
||||
return WebResponseUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.RenderedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class ExtractImagesController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-images")
|
||||
@Operation(summary = "Extract images from a PDF file",
|
||||
description = "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
public ResponseEntity<byte[]> extractImages(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file containing images")
|
||||
MultipartFile file,
|
||||
@RequestParam("format")
|
||||
@Parameter(description = "The output image format e.g., 'png', 'jpeg', or 'gif'", schema = @Schema(allowableValues = {"png", "jpeg", "gif"}))
|
||||
String format) throws IOException {
|
||||
|
||||
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
PDDocument document = PDDocument.load(file.getBytes());
|
||||
|
||||
// Create ByteArrayOutputStream to write zip file to byte array
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
|
||||
// Create ZipOutputStream to create zip file
|
||||
ZipOutputStream zos = new ZipOutputStream(baos);
|
||||
|
||||
// Set compression level
|
||||
zos.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
||||
int imageIndex = 1;
|
||||
String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", "");
|
||||
int pageNum = 1;
|
||||
// Iterate over each page
|
||||
for (PDPage page : document.getPages()) {
|
||||
++pageNum;
|
||||
// Extract images from page
|
||||
for (COSName name : page.getResources().getXObjectNames()) {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
|
||||
|
||||
// Convert image to desired format
|
||||
RenderedImage renderedImage = image.getImage();
|
||||
BufferedImage bufferedImage = null;
|
||||
if (format.equalsIgnoreCase("png")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
|
||||
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
|
||||
} else if (format.equalsIgnoreCase("gif")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
|
||||
// Write image to zip file
|
||||
String imageName = filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
ZipEntry zipEntry = new ZipEntry(imageName);
|
||||
zos.putNextEntry(zipEntry);
|
||||
|
||||
Graphics2D g = bufferedImage.createGraphics();
|
||||
g.drawImage((Image) renderedImage, 0, 0, null);
|
||||
g.dispose();
|
||||
// Write image bytes to zip file
|
||||
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
|
||||
ImageIO.write(bufferedImage, format, imageBaos);
|
||||
zos.write(imageBaos.toByteArray());
|
||||
|
||||
zos.closeEntry();
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close ZipOutputStream and PDDocument
|
||||
zos.close();
|
||||
document.close();
|
||||
|
||||
// Create ByteArrayResource from byte array
|
||||
byte[] zipContents = baos.toByteArray();
|
||||
|
||||
return WebResponseUtils.boasToWebResponse(baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.image.AffineTransformOp;
|
||||
//Required for image manipulation
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.BufferedImageOp;
|
||||
import java.awt.image.ConvolveOp;
|
||||
import java.awt.image.Kernel;
|
||||
import java.awt.image.RescaleOp;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
//Required for file input/output
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
//Other required classes
|
||||
import java.util.Random;
|
||||
|
||||
//Required for image input/output
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class FakeScanControllerWIP {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class);
|
||||
|
||||
//TODO
|
||||
@Hidden
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/fakeScan")
|
||||
@Operation(
|
||||
summary = "Repair a PDF file",
|
||||
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response."
|
||||
)
|
||||
public ResponseEntity<byte[]> repairPdf(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to be repaired", required = true)
|
||||
MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
PDDocument document = PDDocument.load(inputFile.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page)
|
||||
{
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIO.write(image, "png", new File("scanned-" + (page+1) + ".png"));
|
||||
}
|
||||
document.close();
|
||||
|
||||
// Constants
|
||||
int scannedness = 90; // Value between 0 and 100
|
||||
int dirtiness = 0; // Value between 0 and 100
|
||||
|
||||
// Load the source image
|
||||
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
|
||||
|
||||
// Create the destination image
|
||||
BufferedImage destinationImage = new BufferedImage(sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
|
||||
|
||||
// Apply a brightness and contrast effect based on the "scanned-ness"
|
||||
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
|
||||
float offset = scannedness * 1.5f; // Between 0 and 150
|
||||
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
|
||||
op.filter(sourceImage, destinationImage);
|
||||
|
||||
// Apply a rotation effect
|
||||
double rotationRequired = Math.toRadians((new Random().nextInt(3 - 1) + 1)); // Random angle between 1 and 3 degrees
|
||||
double locationX = destinationImage.getWidth() / 2;
|
||||
double locationY = destinationImage.getHeight() / 2;
|
||||
AffineTransform tx = AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
|
||||
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
|
||||
destinationImage = rotateOp.filter(destinationImage, null);
|
||||
|
||||
// Apply a blur effect based on the "scanned-ness"
|
||||
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
|
||||
float[] matrix = {
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity
|
||||
};
|
||||
BufferedImageOp blurOp = new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
|
||||
destinationImage = blurOp.filter(destinationImage, null);
|
||||
|
||||
// Add noise to the image based on the "dirtiness"
|
||||
Random random = new Random();
|
||||
for (int y = 0; y < destinationImage.getHeight(); y++) {
|
||||
for (int x = 0; x < destinationImage.getWidth(); x++) {
|
||||
if (random.nextInt(100) < dirtiness) {
|
||||
// Change the pixel color to black randomly based on the "dirtiness"
|
||||
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the image
|
||||
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
PDDocument documentOut = new PDDocument();
|
||||
for (int page = 1; page <= document.getNumberOfPages(); ++page)
|
||||
{
|
||||
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
|
||||
|
||||
// Adjust the dimensions of the page
|
||||
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
|
||||
documentOut.addPage(pdPage);
|
||||
|
||||
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
|
||||
|
||||
// Draw the image with a slight offset and enlarged dimensions
|
||||
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
|
||||
contentStream.close();
|
||||
}
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
documentOut.save(baos);
|
||||
documentOut.close();
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scanned.pdf";
|
||||
return WebResponseUtils.boasToWebResponse(baos, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class MetadataController {
|
||||
|
||||
|
||||
private String checkUndefined(String entry) {
|
||||
// Check if the string is "undefined"
|
||||
if ("undefined".equals(entry)) {
|
||||
// Return null if it is
|
||||
return null;
|
||||
}
|
||||
// Return the original string if it's not "undefined"
|
||||
return entry;
|
||||
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/update-metadata")
|
||||
@Operation(summary = "Update metadata of a PDF file",
|
||||
description = "This endpoint allows you to update the metadata of a given PDF file. You can add, modify, or delete standard and custom metadata fields. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> metadata(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to update metadata")
|
||||
MultipartFile pdfFile,
|
||||
@RequestParam(value = "deleteAll", required = false, defaultValue = "false")
|
||||
@Parameter(description = "Delete all metadata if set to true")
|
||||
Boolean deleteAll,
|
||||
@RequestParam(value = "author", required = false)
|
||||
@Parameter(description = "The author of the document")
|
||||
String author,
|
||||
@RequestParam(value = "creationDate", required = false)
|
||||
@Parameter(description = "The creation date of the document (format: yyyy/MM/dd HH:mm:ss)")
|
||||
String creationDate,
|
||||
@RequestParam(value = "creator", required = false)
|
||||
@Parameter(description = "The creator of the document")
|
||||
String creator,
|
||||
@RequestParam(value = "keywords", required = false)
|
||||
@Parameter(description = "The keywords for the document")
|
||||
String keywords,
|
||||
@RequestParam(value = "modificationDate", required = false)
|
||||
@Parameter(description = "The modification date of the document (format: yyyy/MM/dd HH:mm:ss)")
|
||||
String modificationDate,
|
||||
@RequestParam(value = "producer", required = false)
|
||||
@Parameter(description = "The producer of the document")
|
||||
String producer,
|
||||
@RequestParam(value = "subject", required = false)
|
||||
@Parameter(description = "The subject of the document")
|
||||
String subject,
|
||||
@RequestParam(value = "title", required = false)
|
||||
@Parameter(description = "The title of the document")
|
||||
String title,
|
||||
@RequestParam(value = "trapped", required = false)
|
||||
@Parameter(description = "The trapped status of the document")
|
||||
String trapped,
|
||||
@Parameter(description = "Map list of key and value of custom parameters, note these must start with customKey and customValue if they are non standard")
|
||||
@RequestParam Map<String, String> allRequestParams)
|
||||
throws IOException {
|
||||
|
||||
// Load the PDF file into a PDDocument
|
||||
PDDocument document = PDDocument.load(pdfFile.getBytes());
|
||||
|
||||
// Get the document information from the PDF
|
||||
PDDocumentInformation info = document.getDocumentInformation();
|
||||
|
||||
// Check if each metadata value is "undefined" and set it to null if it is
|
||||
author = checkUndefined(author);
|
||||
creationDate = checkUndefined(creationDate);
|
||||
creator = checkUndefined(creator);
|
||||
keywords = checkUndefined(keywords);
|
||||
modificationDate = checkUndefined(modificationDate);
|
||||
producer = checkUndefined(producer);
|
||||
subject = checkUndefined(subject);
|
||||
title = checkUndefined(title);
|
||||
trapped = checkUndefined(trapped);
|
||||
|
||||
// If the "deleteAll" flag is set, remove all metadata from the document
|
||||
// information
|
||||
if (deleteAll) {
|
||||
for (String key : info.getMetadataKeys()) {
|
||||
info.setCustomMetadataValue(key, null);
|
||||
}
|
||||
// Remove metadata from the PDF history
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("Metadata"));
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("PieceInfo"));
|
||||
author = null;
|
||||
creationDate = null;
|
||||
creator = null;
|
||||
keywords = null;
|
||||
modificationDate = null;
|
||||
producer = null;
|
||||
subject = null;
|
||||
title = null;
|
||||
trapped = null;
|
||||
} else {
|
||||
// Iterate through the request parameters and set the metadata values
|
||||
for (Entry<String, String> entry : allRequestParams.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
// Check if the key is a standard metadata key
|
||||
if (!key.equalsIgnoreCase("Author") && !key.equalsIgnoreCase("CreationDate") && !key.equalsIgnoreCase("Creator") && !key.equalsIgnoreCase("Keywords")
|
||||
&& !key.equalsIgnoreCase("modificationDate") && !key.equalsIgnoreCase("Producer") && !key.equalsIgnoreCase("Subject") && !key.equalsIgnoreCase("Title")
|
||||
&& !key.equalsIgnoreCase("Trapped") && !key.contains("customKey") && !key.contains("customValue")) {
|
||||
info.setCustomMetadataValue(key, entry.getValue());
|
||||
} else if (key.contains("customKey")) {
|
||||
int number = Integer.parseInt(key.replaceAll("\\D", ""));
|
||||
String customKey = entry.getValue();
|
||||
String customValue = allRequestParams.get("customValue" + number);
|
||||
info.setCustomMetadataValue(customKey, customValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (creationDate != null && creationDate.length() > 0) {
|
||||
Calendar creationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
creationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
info.setCreationDate(creationDateCal);
|
||||
} else {
|
||||
info.setCreationDate(null);
|
||||
}
|
||||
if (modificationDate != null && modificationDate.length() > 0) {
|
||||
Calendar modificationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
modificationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
info.setModificationDate(modificationDateCal);
|
||||
} else {
|
||||
info.setModificationDate(null);
|
||||
}
|
||||
info.setCreator(creator);
|
||||
info.setKeywords(keywords);
|
||||
info.setAuthor(author);
|
||||
info.setProducer(producer);
|
||||
info.setSubject(subject);
|
||||
info.setTitle(title);
|
||||
info.setTrapped(trapped);
|
||||
|
||||
document.setDocumentInformation(info);
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
|
||||
@Operation(summary = "Process a PDF file with OCR",
|
||||
description = "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to be processed with OCR")
|
||||
MultipartFile inputFile,
|
||||
@RequestParam("languages")
|
||||
@Parameter(description = "List of languages to use in OCR processing")
|
||||
List<String> selectedLanguages,
|
||||
@RequestParam(name = "sidecar", required = false)
|
||||
@Parameter(description = "Include OCR text in a sidecar text file if set to true")
|
||||
Boolean sidecar,
|
||||
@RequestParam(name = "deskew", required = false)
|
||||
@Parameter(description = "Deskew the input file if set to true")
|
||||
Boolean deskew,
|
||||
@RequestParam(name = "clean", required = false)
|
||||
@Parameter(description = "Clean the input file if set to true")
|
||||
Boolean clean,
|
||||
@RequestParam(name = "clean-final", required = false)
|
||||
@Parameter(description = "Clean the final output if set to true")
|
||||
Boolean cleanFinal,
|
||||
@RequestParam(name = "ocrType", required = false)
|
||||
@Parameter(description = "Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'", schema = @Schema(allowableValues = {"skip-text", "force-ocr", "Normal"}))
|
||||
String ocrType,
|
||||
@RequestParam(name = "ocrRenderType", required = false, defaultValue = "hocr")
|
||||
@Parameter(description = "Specify the OCR render type, either 'hocr' or 'sandwich'", schema = @Schema(allowableValues = {"hocr", "sandwich"}))
|
||||
String ocrRenderType,
|
||||
@RequestParam(name = "removeImagesAfter", required = false)
|
||||
@Parameter(description = "Remove images from the output PDF if set to true")
|
||||
Boolean removeImagesAfter) throws IOException, InterruptedException {
|
||||
|
||||
// --output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.isEmpty()) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
if(!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) {
|
||||
throw new IOException("ocrRenderType wrong");
|
||||
}
|
||||
|
||||
// Get available Tesseract languages
|
||||
List<String> availableLanguages = getAvailableTesseractLanguages();
|
||||
|
||||
// Validate selected languages
|
||||
selectedLanguages = selectedLanguages.stream().filter(availableLanguages::contains).toList();
|
||||
|
||||
if (selectedLanguages.isEmpty()) {
|
||||
throw new IOException("None of the selected languages are valid.");
|
||||
}
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the output file path
|
||||
Path sidecarTextPath = null;
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf", "--pdf-renderer" , ocrRenderType));
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarTextPath.toString());
|
||||
}
|
||||
|
||||
if (deskew != null && deskew) {
|
||||
command.add("--deskew");
|
||||
}
|
||||
if (clean != null && clean) {
|
||||
command.add("--clean");
|
||||
}
|
||||
if (cleanFinal != null && cleanFinal) {
|
||||
command.add("--clean-final");
|
||||
}
|
||||
if (ocrType != null && !ocrType.equals("")) {
|
||||
if ("skip-text".equals(ocrType)) {
|
||||
command.add("--skip-text");
|
||||
} else if ("force-ocr".equals(ocrType)) {
|
||||
command.add("--force-ocr");
|
||||
} else if ("Normal".equals(ocrType)) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
|
||||
command.add("--jobs");
|
||||
command.add("1");
|
||||
result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Remove images from the OCR processed PDF if the flag is set to true
|
||||
if (removeImagesAfter != null && removeImagesAfter) {
|
||||
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
|
||||
|
||||
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
|
||||
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
|
||||
tempOutputFile = tempPdfWithoutImages;
|
||||
}
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(sidecarTextPath, zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(sidecarTextPath);
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class OverlayImageController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OverlayImageController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/add-image")
|
||||
@Operation(
|
||||
summary = "Overlay image onto a PDF file",
|
||||
description = "This endpoint overlays an image onto a PDF file at the specified coordinates. The image can be overlaid on every page of the PDF if specified. Input:PDF/IMAGE Output:PDF Type:MF-SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> overlayImage(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to overlay the image onto.", required = true)
|
||||
MultipartFile pdfFile,
|
||||
@RequestParam("fileInput2")
|
||||
@Parameter(description = "The image file to be overlaid onto the PDF.", required = true)
|
||||
MultipartFile imageFile,
|
||||
@RequestParam("x")
|
||||
@Parameter(description = "The x-coordinate at which to place the top-left corner of the image.", example = "0")
|
||||
float x,
|
||||
@RequestParam("y")
|
||||
@Parameter(description = "The y-coordinate at which to place the top-left corner of the image.", example = "0")
|
||||
float y,
|
||||
@RequestParam("everyPage")
|
||||
@Parameter(description = "Whether to overlay the image onto every page of the PDF.", example = "false")
|
||||
boolean everyPage) {
|
||||
try {
|
||||
byte[] pdfBytes = pdfFile.getBytes();
|
||||
byte[] imageBytes = imageFile.getBytes();
|
||||
byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to add image to PDF", e);
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class PageNumbersController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PageNumbersController.class);
|
||||
|
||||
@PostMapping(value = "/add-page-numbers", consumes = "multipart/form-data")
|
||||
@Operation(summary = "Add page numbers to a PDF document", description = "This operation takes an input PDF file and adds page numbers to it. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> addPageNumbers(
|
||||
@Parameter(description = "The input PDF file", required = true) @RequestParam("fileInput") MultipartFile file,
|
||||
@Parameter(description = "Custom margin: small/medium/large", required = true, schema = @Schema(type = "string", allowableValues = {"small", "medium", "large"})) @RequestParam("customMargin") String customMargin,
|
||||
@Parameter(description = "Position: 1 of 9 positions", required = true, schema = @Schema(type = "integer", minimum = "1", maximum = "9")) @RequestParam("position") int position,
|
||||
@Parameter(description = "Starting number", required = true, schema = @Schema(type = "integer", minimum = "1")) @RequestParam("startingNumber") int startingNumber,
|
||||
@Parameter(description = "Which pages to number, default all", required = false, schema = @Schema(type = "string")) @RequestParam(value = "pagesToNumber", required = false) String pagesToNumber,
|
||||
@Parameter(description = "Custom text: defaults to just number but can have things like \"Page {n} of {p}\"", required = false, schema = @Schema(type = "string")) @RequestParam(value = "customText", required = false) String customText)
|
||||
throws IOException {
|
||||
int pageNumber = startingNumber;
|
||||
byte[] fileBytes = file.getBytes();
|
||||
PDDocument document = PDDocument.load(fileBytes);
|
||||
|
||||
float marginFactor;
|
||||
switch (customMargin.toLowerCase()) {
|
||||
case "small":
|
||||
marginFactor = 0.02f;
|
||||
break;
|
||||
case "medium":
|
||||
marginFactor = 0.035f;
|
||||
break;
|
||||
case "large":
|
||||
marginFactor = 0.05f;
|
||||
break;
|
||||
case "x-large":
|
||||
marginFactor = 0.075f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
marginFactor = 0.035f;
|
||||
break;
|
||||
}
|
||||
|
||||
float fontSize = 12.0f;
|
||||
PDType1Font font = PDType1Font.HELVETICA;
|
||||
if(pagesToNumber == null || pagesToNumber.length() == 0) {
|
||||
pagesToNumber = "all";
|
||||
}
|
||||
if(customText == null || customText.length() == 0) {
|
||||
customText = "{n}";
|
||||
}
|
||||
List<Integer> pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
|
||||
|
||||
for (int i : pagesToNumberList) {
|
||||
PDPage page = document.getPage(i);
|
||||
PDRectangle pageSize = page.getMediaBox();
|
||||
|
||||
String text = customText != null ? customText.replace("{n}", String.valueOf(pageNumber)).replace("{total}", String.valueOf(document.getNumberOfPages())).replace("{filename}", file.getOriginalFilename().replaceFirst("[.][^.]+$", "")) : String.valueOf(pageNumber);
|
||||
|
||||
float x, y;
|
||||
|
||||
int xGroup = (position - 1) % 3;
|
||||
int yGroup = 2 - (position - 1) / 3;
|
||||
|
||||
switch (xGroup) {
|
||||
case 0: // left
|
||||
x = pageSize.getLowerLeftX() + marginFactor * pageSize.getWidth();
|
||||
break;
|
||||
case 1: // center
|
||||
x = pageSize.getLowerLeftX() + (pageSize.getWidth() / 2);
|
||||
break;
|
||||
default: // right
|
||||
x = pageSize.getUpperRightX() - marginFactor * pageSize.getWidth();
|
||||
break;
|
||||
}
|
||||
|
||||
switch (yGroup) {
|
||||
case 0: // bottom
|
||||
y = pageSize.getLowerLeftY() + marginFactor * pageSize.getHeight();
|
||||
break;
|
||||
case 1: // middle
|
||||
y = pageSize.getLowerLeftY() + (pageSize.getHeight() / 2);
|
||||
break;
|
||||
default: // top
|
||||
y = pageSize.getUpperRightY() - marginFactor * pageSize.getHeight();
|
||||
break;
|
||||
}
|
||||
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
|
||||
contentStream.beginText();
|
||||
contentStream.setFont(font, fontSize);
|
||||
contentStream.newLineAtOffset(x, y);
|
||||
contentStream.showText(text);
|
||||
contentStream.endText();
|
||||
contentStream.close();
|
||||
|
||||
pageNumber++;
|
||||
}
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
document.save(baos);
|
||||
document.close();
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", MediaType.APPLICATION_PDF);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class RepairController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(RepairController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/repair")
|
||||
@Operation(
|
||||
summary = "Repair a PDF file",
|
||||
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> repairPdf(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to be repaired", required = true)
|
||||
MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("gs");
|
||||
command.add("-o");
|
||||
command.add(tempOutputFile.toString());
|
||||
command.add("-sDEVICE=pdfwrite");
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_repaired.pdf";
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
|
||||
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class ShowJavascript {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ShowJavascript.class);
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/show-javascript")
|
||||
public ResponseEntity<byte[]> extractHeader(
|
||||
@RequestPart(value = "fileInput") MultipartFile inputFile) throws Exception {
|
||||
|
||||
String script = "";
|
||||
|
||||
try (PDDocument document = PDDocument.load(inputFile.getInputStream())) {
|
||||
|
||||
if(document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) {
|
||||
PDNameTreeNode<PDActionJavaScript> jsTree = document.getDocumentCatalog().getNames().getJavaScript();
|
||||
|
||||
if (jsTree != null) {
|
||||
Map<String, PDActionJavaScript> jsEntries = jsTree.getNames();
|
||||
|
||||
for (Map.Entry<String, PDActionJavaScript> entry : jsEntries.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
PDActionJavaScript jsAction = entry.getValue();
|
||||
String jsCodeStr = jsAction.getAction();
|
||||
|
||||
script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (script.isEmpty()) {
|
||||
script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(script.getBytes(StandardCharsets.UTF_8), inputFile.getOriginalFilename() + ".js");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user