formatting

This commit is contained in:
Anthony Stirling
2023-12-30 19:11:27 +00:00
parent 7b43fca6fc
commit 5f771b7851
155 changed files with 5539 additions and 4767 deletions

View File

@@ -19,8 +19,10 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.ExtractHeaderRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
@@ -32,97 +34,105 @@ public class AutoRenameController {
private static final int LINE_LIMIT = 11;
@PostMapping(consumes = "multipart/form-data", value = "/auto-rename")
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> extractHeader(@ModelAttribute ExtractHeaderRequest request) throws Exception {
@Operation(
summary = "Extract header from PDF file",
description =
"This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> extractHeader(@ModelAttribute ExtractHeaderRequest request)
throws Exception {
MultipartFile file = request.getFileInput();
Boolean useFirstTextAsFallback = request.isUseFirstTextAsFallback();
PDDocument document = PDDocument.load(file.getInputStream());
PDFTextStripper reader = new PDFTextStripper() {
class LineInfo {
String text;
float fontSize;
PDDocument document = PDDocument.load(file.getInputStream());
PDFTextStripper reader =
new PDFTextStripper() {
class LineInfo {
String text;
float fontSize;
LineInfo(String text, float fontSize) {
this.text = text;
this.fontSize = fontSize;
}
}
LineInfo(String text, float fontSize) {
this.text = text;
this.fontSize = fontSize;
}
}
List<LineInfo> lineInfos = new ArrayList<>();
StringBuilder lineBuilder = new StringBuilder();
float lastY = -1;
float maxFontSizeInLine = 0.0f;
int lineCount = 0;
List<LineInfo> lineInfos = new ArrayList<>();
StringBuilder lineBuilder = new StringBuilder();
float lastY = -1;
float maxFontSizeInLine = 0.0f;
int lineCount = 0;
@Override
protected void processTextPosition(TextPosition text) {
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
processLine();
lineBuilder = new StringBuilder(text.getUnicode());
maxFontSizeInLine = text.getFontSizeInPt();
lastY = text.getY();
lineCount++;
} else if (lineCount < LINE_LIMIT) {
lineBuilder.append(text.getUnicode());
if (text.getFontSizeInPt() > maxFontSizeInLine) {
maxFontSizeInLine = text.getFontSizeInPt();
}
}
}
@Override
protected void processTextPosition(TextPosition text) {
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
processLine();
lineBuilder = new StringBuilder(text.getUnicode());
maxFontSizeInLine = text.getFontSizeInPt();
lastY = text.getY();
lineCount++;
} else if (lineCount < LINE_LIMIT) {
lineBuilder.append(text.getUnicode());
if (text.getFontSizeInPt() > maxFontSizeInLine) {
maxFontSizeInLine = text.getFontSizeInPt();
}
}
}
private void processLine() {
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
}
}
private void processLine() {
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
}
}
@Override
public String getText(PDDocument doc) throws IOException {
this.lineInfos.clear();
this.lineBuilder = new StringBuilder();
this.lastY = -1;
this.maxFontSizeInLine = 0.0f;
this.lineCount = 0;
super.getText(doc);
processLine(); // Process the last line
@Override
public String getText(PDDocument doc) throws IOException {
this.lineInfos.clear();
this.lineBuilder = new StringBuilder();
this.lastY = -1;
this.maxFontSizeInLine = 0.0f;
this.lineCount = 0;
super.getText(doc);
processLine(); // Process the last line
// Merge lines with same font size
List<LineInfo> mergedLineInfos = new ArrayList<>();
for (int i = 0; i < lineInfos.size(); i++) {
String mergedText = lineInfos.get(i).text;
float fontSize = lineInfos.get(i).fontSize;
while (i + 1 < lineInfos.size() && lineInfos.get(i + 1).fontSize == fontSize) {
mergedText += " " + lineInfos.get(i + 1).text;
i++;
}
mergedLineInfos.add(new LineInfo(mergedText, fontSize));
}
// Merge lines with same font size
List<LineInfo> mergedLineInfos = new ArrayList<>();
for (int i = 0; i < lineInfos.size(); i++) {
String mergedText = lineInfos.get(i).text;
float fontSize = lineInfos.get(i).fontSize;
while (i + 1 < lineInfos.size()
&& lineInfos.get(i + 1).fontSize == fontSize) {
mergedText += " " + lineInfos.get(i + 1).text;
i++;
}
mergedLineInfos.add(new LineInfo(mergedText, fontSize));
}
// Sort lines by font size in descending order and get the first one
mergedLineInfos.sort(Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
String title = mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(0).text;
// Sort lines by font size in descending order and get the first one
mergedLineInfos.sort(
Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
String title =
mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(0).text;
return title != null ? title : (useFirstTextAsFallback ? (mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(mergedLineInfos.size() - 1).text) : null);
}
return title != null
? title
: (useFirstTextAsFallback
? (mergedLineInfos.isEmpty()
? null
: mergedLineInfos.get(mergedLineInfos.size() - 1)
.text)
: null);
}
};
};
String header = reader.getText(document);
String header = reader.getText(document);
// Sanitize the header string by removing characters not allowed in a filename.
if (header != null && header.length() < 255) {
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
} else {
logger.info("File has no good title to be found");
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
logger.info("File has no good title to be found");
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
}
}
}

View File

@@ -1,4 +1,5 @@
package stirling.software.SPDF.controller.api.misc;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.awt.image.DataBufferInt;
@@ -32,6 +33,7 @@ import com.google.zxing.common.HybridBinarizer;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.AutoSplitPdfRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -43,8 +45,12 @@ public class AutoSplitPdfController {
private static final String QR_CONTENT = "https://github.com/Frooodle/Stirling-PDF";
@PostMapping(value = "/auto-split-pdf", consumes = "multipart/form-data")
@Operation(summary = "Auto split PDF pages into separate documents", description = "This endpoint accepts a PDF file, scans each page for a specific QR code, and splits the document at the QR code boundaries. The output is a zip file containing each separate PDF document. Input:PDF Output:ZIP-PDF Type:SISO")
public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest request) throws IOException {
@Operation(
summary = "Auto split PDF pages into separate documents",
description =
"This endpoint accepts a PDF file, scans each page for a specific QR code, and splits the document at the QR code boundaries. The output is a zip file containing each separate PDF document. Input:PDF Output:ZIP-PDF Type:SISO")
public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest request)
throws IOException {
MultipartFile file = request.getFileInput();
boolean duplexMode = request.isDuplexMode();
@@ -107,29 +113,48 @@ public class AutoSplitPdfController {
} catch (Exception e) {
e.printStackTrace();
} finally {
data = Files.readAllBytes(zipFile);
data = Files.readAllBytes(zipFile);
Files.delete(zipFile);
}
return WebResponseUtils.bytesToWebResponse(data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
}
private static String decodeQRCode(BufferedImage bufferedImage) {
LuminanceSource source;
if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferByte) {
byte[] pixels = ((DataBufferByte) bufferedImage.getRaster().getDataBuffer()).getData();
source = new PlanarYUVLuminanceSource(pixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
source =
new PlanarYUVLuminanceSource(
pixels,
bufferedImage.getWidth(),
bufferedImage.getHeight(),
0,
0,
bufferedImage.getWidth(),
bufferedImage.getHeight(),
false);
} else if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferInt) {
int[] pixels = ((DataBufferInt) bufferedImage.getRaster().getDataBuffer()).getData();
byte[] newPixels = new byte[pixels.length];
for (int i = 0; i < pixels.length; i++) {
newPixels[i] = (byte) (pixels[i] & 0xff);
}
source = new PlanarYUVLuminanceSource(newPixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
source =
new PlanarYUVLuminanceSource(
newPixels,
bufferedImage.getWidth(),
bufferedImage.getHeight(),
0,
0,
bufferedImage.getWidth(),
bufferedImage.getHeight(),
false);
} else {
throw new IllegalArgumentException("BufferedImage must have 8-bit gray scale, 24-bit RGB, 32-bit ARGB (packed int), byte gray, or 3-byte/4-byte RGB image data");
throw new IllegalArgumentException(
"BufferedImage must have 8-bit gray scale, 24-bit RGB, 32-bit ARGB (packed int), byte gray, or 3-byte/4-byte RGB image data");
}
BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));

View File

@@ -28,6 +28,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.RemoveBlankPagesRequest;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@@ -39,17 +40,18 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class BlankPageController {
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
@Operation(
summary = "Remove blank pages from a PDF file",
description = "This endpoint removes blank pages from a given PDF file. Users can specify the threshold and white percentage to tune the detection of blank pages. Input:PDF Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> removeBlankPages(@ModelAttribute RemoveBlankPagesRequest request) throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
int threshold = request.getThreshold();
float whitePercent = request.getWhitePercent();
PDDocument document = null;
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
@Operation(
summary = "Remove blank pages from a PDF file",
description =
"This endpoint removes blank pages from a given PDF file. Users can specify the threshold and white percentage to tune the detection of blank pages. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> removeBlankPages(@ModelAttribute RemoveBlankPagesRequest request)
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
int threshold = request.getThreshold();
float whitePercent = request.getWhitePercent();
PDDocument document = null;
try {
document = PDDocument.load(inputFile.getInputStream());
PDPageTree pages = document.getDocumentCatalog().getPages();
@@ -72,21 +74,34 @@ public class BlankPageController {
boolean hasImages = PdfUtils.hasImagesOnPage(page);
if (hasImages) {
System.out.println("page " + pageIndex + " has image");
Path tempFile = Files.createTempFile("image_", ".png");
// Render image and save as temp file
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 300);
ImageIO.write(image, "png", tempFile.toFile());
List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent)));
List<String> command =
new ArrayList<>(
Arrays.asList(
"python3",
System.getProperty("user.dir")
+ "/scripts/detect-blank-pages.py",
tempFile.toString(),
"--threshold",
String.valueOf(threshold),
"--white_percent",
String.valueOf(whitePercent)));
// Run CLI command
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
.runCommandWithOutputHandling(command);
// does contain data
if (returnCode.getRc() == 0) {
System.out.println("page " + pageIndex + " has image which is not blank");
System.out.println(
"page " + pageIndex + " has image which is not blank");
pagesToKeepIndex.add(pageIndex);
} else {
System.out.println("Skipping, Image was blank for page #" + pageIndex);
@@ -94,12 +109,12 @@ public class BlankPageController {
}
}
pageIndex++;
}
System.out.print("pagesToKeep=" + pagesToKeepIndex.size());
// Remove pages not present in pagesToKeepIndex
List<Integer> pageIndices = IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList());
List<Integer> pageIndices =
IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList());
Collections.reverse(pageIndices); // Reverse to prevent index shifting during removal
for (Integer i : pageIndices) {
if (!pagesToKeepIndex.contains(i)) {
@@ -107,16 +122,15 @@ public class BlankPageController {
}
}
return WebResponseUtils.pdfDocToWebResponse(document, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_blanksRemoved.pdf");
return WebResponseUtils.pdfDocToWebResponse(
document,
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "")
+ "_blanksRemoved.pdf");
} catch (IOException e) {
e.printStackTrace();
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
} finally {
if (document != null)
document.close();
if (document != null) document.close();
}
}
}

View File

@@ -30,6 +30,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@@ -44,20 +45,23 @@ public class CompressController {
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> optimizePdf(@ModelAttribute OptimizePdfRequest request) throws Exception {
@Operation(
summary = "Optimize PDF file",
description =
"This endpoint accepts a PDF file and optimizes it based on the provided parameters. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> optimizePdf(@ModelAttribute OptimizePdfRequest request)
throws Exception {
MultipartFile inputFile = request.getFileInput();
Integer optimizeLevel = request.getOptimizeLevel();
String expectedOutputSizeString = request.getExpectedOutputSize();
if(expectedOutputSizeString == null && optimizeLevel == null) {
if (expectedOutputSizeString == null && optimizeLevel == null) {
throw new Exception("Both expected output size and optimize level are not specified");
}
Long expectedOutputSize = 0L;
boolean autoMode = false;
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1 ) {
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1) {
expectedOutputSize = GeneralUtils.convertSizeToBytes(expectedOutputSizeString);
autoMode = true;
}
@@ -71,8 +75,9 @@ public class CompressController {
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Determine initial optimization level based on expected size reduction, only if in autoMode
if(autoMode) {
// Determine initial optimization level based on expected size reduction, only if in
// autoMode
if (autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
@@ -94,20 +99,20 @@ public class CompressController {
command.add("-dCompatibilityLevel=1.4");
switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
}
command.add("-dNOPAUSE");
@@ -116,7 +121,9 @@ public class CompressController {
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// Check if file size is within expected size or not auto mode so instantly finish
long outputFileSize = Files.size(tempOutputFile);
@@ -125,19 +132,18 @@ public class CompressController {
} else {
// Increase optimization level for next iteration
optimizeLevel++;
if(autoMode && optimizeLevel > 3) {
if (autoMode && optimizeLevel > 3) {
System.out.println("Skipping level 4 due to bad results in auto mode");
sizeMet = true;
} else if(optimizeLevel == 5) {
} else if (optimizeLevel == 5) {
} else {
System.out.println("Increasing ghostscript optimisation level to " + optimizeLevel);
System.out.println(
"Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
}
if (expectedOutputSize != null && autoMode) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
@@ -157,8 +163,8 @@ public class CompressController {
BufferedImage bufferedImage = image.getImage();
// Calculate the new dimensions
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);
int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
@@ -166,23 +172,39 @@ public class CompressController {
}
// Otherwise, proceed with the scaling
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
Image scaledImage =
bufferedImage.getScaledInstance(
newWidth, newHeight, Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
BufferedImage scaledBufferedImage =
new BufferedImage(
newWidth,
newHeight,
BufferedImage.TYPE_INT_RGB);
scaledBufferedImage
.getGraphics()
.drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
ByteArrayOutputStream compressedImageStream =
new ByteArrayOutputStream();
ImageIO.write(
scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
// Convert compressed image back to PDImageXObject
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());
ByteArrayInputStream bais =
new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
imageBytes,
image.getCOSObject().toString());
// Replace the image in the resources with the compressed version
// Replace the image in the resources with the compressed
// version
res.put(name, compressedImage);
}
}
@@ -194,16 +216,23 @@ public class CompressController {
long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
System.out.println("Current file size: " + FileUtils.byteCountToDisplaySize(currentSize));
// Log the current file size and scaleFactor
System.out.println(
"Current file size: "
+ FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if(scaleFactor < 0.2 || previousFileSize == currentSize){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality, lowest size recommended is " + FileUtils.byteCountToDisplaySize(currentSize) + ", " + currentSize + " bytes");
if (scaleFactor < 0.2 || previousFileSize == currentSize) {
throw new RuntimeException(
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
+ FileUtils.byteCountToDisplaySize(currentSize)
+ ", "
+ currentSize
+ " bytes");
}
previousFileSize = currentSize;
} else {
@@ -211,10 +240,7 @@ public class CompressController {
break;
}
}
}
}
}
@@ -222,9 +248,10 @@ public class CompressController {
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Check if optimized file is larger than the original
if(pdfBytes.length > inputFileSize) {
if (pdfBytes.length > inputFileSize) {
// Log the occurrence
logger.warn("Optimized file is larger than the original. Returning the original file instead.");
logger.warn(
"Optimized file is larger than the original. Returning the original file instead.");
// Read the original file again
pdfBytes = Files.readAllBytes(tempInputFile);
@@ -235,8 +262,8 @@ public class CompressController {
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
String outputFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}

View File

@@ -32,10 +32,12 @@ import io.swagger.v3.oas.annotations.media.Content;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.parameters.RequestBody;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.ExtractImageScansRequest;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
@@ -44,18 +46,28 @@ public class ExtractImageScansController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class);
@PostMapping(consumes = "multipart/form-data", value = "/extract-image-scans")
@Operation(summary = "Extract image scans from an input file",
description = "This endpoint extracts image scans from a given file based on certain parameters. Users can specify angle threshold, tolerance, minimum area, minimum contour area, and border size. Input:PDF Output:IMAGE/ZIP Type:SIMO")
@Operation(
summary = "Extract image scans from an input file",
description =
"This endpoint extracts image scans from a given file based on certain parameters. Users can specify angle threshold, tolerance, minimum area, minimum contour area, and border size. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity<byte[]> extractImageScans(
@RequestBody(
description = "Form data containing file and extraction parameters",
required = true,
content = @Content(
mediaType = "multipart/form-data",
schema = @Schema(implementation = ExtractImageScansRequest.class) // This should represent your form's structure
)
)
ExtractImageScansRequest form) throws IOException, InterruptedException {
@RequestBody(
description = "Form data containing file and extraction parameters",
required = true,
content =
@Content(
mediaType = "multipart/form-data",
schema =
@Schema(
implementation =
ExtractImageScansRequest
.class) // This should
// represent
// your form's
// structure
))
ExtractImageScansRequest form)
throws IOException, InterruptedException {
String fileName = form.getFileInput().getOriginalFilename();
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
@@ -64,7 +76,8 @@ public class ExtractImageScansController {
// Check if input file is a PDF
if (extension.equalsIgnoreCase("pdf")) {
// Load PDF document
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(form.getFileInput().getBytes()))) {
try (PDDocument document =
PDDocument.load(new ByteArrayInputStream(form.getFileInput().getBytes()))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
images = new ArrayList<>();
@@ -84,7 +97,10 @@ public class ExtractImageScansController {
}
} else {
Path tempInputFile = Files.createTempFile("input_", "." + extension);
Files.copy(form.getFileInput().getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(
form.getFileInput().getInputStream(),
tempInputFile,
StandardCopyOption.REPLACE_EXISTING);
// Add input file path to images list
images.add(tempInputFile.toString());
}
@@ -95,21 +111,28 @@ public class ExtractImageScansController {
for (int i = 0; i < images.size(); i++) {
Path tempDir = Files.createTempDirectory("openCV_output");
List<String> command = new ArrayList<>(Arrays.asList(
"python3",
"./scripts/split_photos.py",
images.get(i),
tempDir.toString(),
"--angle_threshold", String.valueOf(form.getAngleThreshold()),
"--tolerance", String.valueOf(form.getTolerance()),
"--min_area", String.valueOf(form.getMinArea()),
"--min_contour_area", String.valueOf(form.getMinContourArea()),
"--border_size", String.valueOf(form.getBorderSize())
));
List<String> command =
new ArrayList<>(
Arrays.asList(
"python3",
"./scripts/split_photos.py",
images.get(i),
tempDir.toString(),
"--angle_threshold",
String.valueOf(form.getAngleThreshold()),
"--tolerance",
String.valueOf(form.getTolerance()),
"--min_area",
String.valueOf(form.getMinArea()),
"--min_contour_area",
String.valueOf(form.getMinContourArea()),
"--border_size",
String.valueOf(form.getBorderSize())));
// Run CLI command
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
.runCommandWithOutputHandling(command);
// Read the output photos in temp directory
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());
@@ -126,10 +149,16 @@ public class ExtractImageScansController {
String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
try (ZipOutputStream zipOut =
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add processed images to the zip
for (int i = 0; i < processedImageBytes.size(); i++) {
ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i + 1) + ".png");
ZipEntry entry =
new ZipEntry(
fileName.replaceFirst("[.][^.]+$", "")
+ "_"
+ (i + 1)
+ ".png");
zipOut.putNextEntry(entry);
zipOut.write(processedImageBytes.get(i));
zipOut.closeEntry();
@@ -141,13 +170,15 @@ public class ExtractImageScansController {
// Clean up the temporary zip file
Files.delete(tempZipFile);
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
// Return the processed image as a response
byte[] imageBytes = processedImageBytes.get(0);
return WebResponseUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG);
return WebResponseUtils.bytesToWebResponse(
imageBytes,
fileName.replaceFirst("[.][^.]+$", "") + ".png",
MediaType.IMAGE_PNG);
}
}
}

View File

@@ -1,4 +1,5 @@
package stirling.software.SPDF.controller.api.misc;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.image.BufferedImage;
@@ -29,8 +30,10 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFWithImageFormatRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
@@ -39,13 +42,17 @@ public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@PostMapping(consumes = "multipart/form-data", value = "/extract-images")
@Operation(summary = "Extract images from a PDF file",
description = "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFWithImageFormatRequest request) throws IOException {
@Operation(
summary = "Extract images from a PDF file",
description =
"This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFWithImageFormatRequest request)
throws IOException {
MultipartFile file = request.getFileInput();
String format = request.getFormat();
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
System.out.println(
System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
PDDocument document = PDDocument.load(file.getBytes());
// Create ByteArrayOutputStream to write zip file to byte array
@@ -69,24 +76,37 @@ public class ExtractImagesController {
if (page.getResources().isImageXObject(name)) {
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
int imageHash = image.hashCode();
if(processedImages.contains(imageHash)) {
if (processedImages.contains(imageHash)) {
continue; // Skip already processed images
}
processedImages.add(imageHash);
// Convert image to desired format
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if (format.equalsIgnoreCase("png")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
} else if (format.equalsIgnoreCase("gif")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
String imageName = filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
String imageName =
filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
ZipEntry zipEntry = new ZipEntry(imageName);
zos.putNextEntry(zipEntry);
@@ -111,7 +131,7 @@ public class ExtractImagesController {
// Create ByteArrayResource from byte array
byte[] zipContents = baos.toByteArray();
return WebResponseUtils.boasToWebResponse(baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
return WebResponseUtils.boasToWebResponse(
baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
}
}

View File

@@ -3,21 +3,17 @@ package stirling.software.SPDF.controller.api.misc;
import java.awt.Color;
import java.awt.geom.AffineTransform;
import java.awt.image.AffineTransformOp;
import java.awt.image.BufferedImage;
import java.awt.image.BufferedImageOp;
import java.awt.image.ConvolveOp;
import java.awt.image.Kernel;
import java.awt.image.RescaleOp;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.security.SecureRandom;
import java.util.Random;
import javax.imageio.ImageIO;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -40,6 +36,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFFile;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -50,102 +47,101 @@ public class FakeScanControllerWIP {
private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class);
//TODO
// TODO
@Hidden
@PostMapping(consumes = "multipart/form-data", value = "/fakeScan")
@Operation(
summary = "Repair a PDF file",
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response."
)
summary = "Repair a PDF file",
description =
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException {
MultipartFile inputFile = request.getFileInput();
PDDocument document = PDDocument.load(inputFile.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); ++page)
{
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
ImageIO.write(image, "png", new File("scanned-" + (page+1) + ".png"));
}
document.close();
PDDocument document = PDDocument.load(inputFile.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png"));
}
document.close();
// Constants
int scannedness = 90; // Value between 0 and 100
int dirtiness = 0; // Value between 0 and 100
// Constants
int scannedness = 90; // Value between 0 and 100
int dirtiness = 0; // Value between 0 and 100
// Load the source image
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
// Load the source image
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
// Create the destination image
BufferedImage destinationImage = new BufferedImage(sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
// Create the destination image
BufferedImage destinationImage =
new BufferedImage(
sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
// Apply a brightness and contrast effect based on the "scanned-ness"
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
float offset = scannedness * 1.5f; // Between 0 and 150
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
op.filter(sourceImage, destinationImage);
// Apply a brightness and contrast effect based on the "scanned-ness"
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
float offset = scannedness * 1.5f; // Between 0 and 150
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
op.filter(sourceImage, destinationImage);
// Apply a rotation effect
double rotationRequired = Math.toRadians((new SecureRandom().nextInt(3 - 1) + 1)); // Random angle between 1 and 3 degrees
double locationX = destinationImage.getWidth() / 2;
double locationY = destinationImage.getHeight() / 2;
AffineTransform tx = AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
destinationImage = rotateOp.filter(destinationImage, null);
// Apply a rotation effect
double rotationRequired =
Math.toRadians(
(new SecureRandom().nextInt(3 - 1)
+ 1)); // Random angle between 1 and 3 degrees
double locationX = destinationImage.getWidth() / 2;
double locationY = destinationImage.getHeight() / 2;
AffineTransform tx =
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
destinationImage = rotateOp.filter(destinationImage, null);
// Apply a blur effect based on the "scanned-ness"
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
float[] matrix = {
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity
};
BufferedImageOp blurOp = new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
destinationImage = blurOp.filter(destinationImage, null);
// Apply a blur effect based on the "scanned-ness"
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
float[] matrix = {
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity
};
BufferedImageOp blurOp =
new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
destinationImage = blurOp.filter(destinationImage, null);
// Add noise to the image based on the "dirtiness"
Random random = new SecureRandom();
for (int y = 0; y < destinationImage.getHeight(); y++) {
for (int x = 0; x < destinationImage.getWidth(); x++) {
if (random.nextInt(100) < dirtiness) {
// Change the pixel color to black randomly based on the "dirtiness"
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
}
}
}
// Add noise to the image based on the "dirtiness"
Random random = new SecureRandom();
for (int y = 0; y < destinationImage.getHeight(); y++) {
for (int x = 0; x < destinationImage.getWidth(); x++) {
if (random.nextInt(100) < dirtiness) {
// Change the pixel color to black randomly based on the "dirtiness"
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
}
}
}
// Save the image
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
// Save the image
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
PDDocument documentOut = new PDDocument();
for (int page = 1; page <= document.getNumberOfPages(); ++page) {
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
// Adjust the dimensions of the page
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
documentOut.addPage(pdPage);
PDDocument documentOut = new PDDocument();
for (int page = 1; page <= document.getNumberOfPages(); ++page)
{
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
// Adjust the dimensions of the page
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
documentOut.addPage(pdPage);
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
// Draw the image with a slight offset and enlarged dimensions
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
contentStream.close();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
documentOut.save(baos);
documentOut.close();
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
// Draw the image with a slight offset and enlarged dimensions
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
contentStream.close();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
documentOut.save(baos);
documentOut.close();
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scanned.pdf";
String outputFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scanned.pdf";
return WebResponseUtils.boasToWebResponse(baos, outputFilename);
}
}

View File

@@ -19,6 +19,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.MetadataRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -27,7 +28,6 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class MetadataController {
private String checkUndefined(String entry) {
// Check if the string is "undefined"
if ("undefined".equals(entry)) {
@@ -36,14 +36,16 @@ public class MetadataController {
}
// Return the original string if it's not "undefined"
return entry;
}
@PostMapping(consumes = "multipart/form-data", value = "/update-metadata")
@Operation(summary = "Update metadata of a PDF file",
description = "This endpoint allows you to update the metadata of a given PDF file. You can add, modify, or delete standard and custom metadata fields. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> metadata(@ModelAttribute MetadataRequest request) throws IOException {
@Operation(
summary = "Update metadata of a PDF file",
description =
"This endpoint allows you to update the metadata of a given PDF file. You can add, modify, or delete standard and custom metadata fields. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> metadata(@ModelAttribute MetadataRequest request)
throws IOException {
// Extract PDF file from the request object
MultipartFile pdfFile = request.getFileInput();
@@ -61,8 +63,8 @@ public class MetadataController {
// Extract additional custom parameters
Map<String, String> allRequestParams = request.getAllRequestParams();
if(allRequestParams == null) {
allRequestParams = new java.util.HashMap<String, String>();
if (allRequestParams == null) {
allRequestParams = new java.util.HashMap<String, String>();
}
// Load the PDF file into a PDDocument
PDDocument document = PDDocument.load(pdfFile.getBytes());
@@ -89,7 +91,9 @@ public class MetadataController {
}
// Remove metadata from the PDF history
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("Metadata"));
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("PieceInfo"));
document.getDocumentCatalog()
.getCOSObject()
.removeItem(COSName.getPDFName("PieceInfo"));
author = null;
creationDate = null;
creator = null;
@@ -104,9 +108,17 @@ public class MetadataController {
for (Entry<String, String> entry : allRequestParams.entrySet()) {
String key = entry.getKey();
// Check if the key is a standard metadata key
if (!key.equalsIgnoreCase("Author") && !key.equalsIgnoreCase("CreationDate") && !key.equalsIgnoreCase("Creator") && !key.equalsIgnoreCase("Keywords")
&& !key.equalsIgnoreCase("modificationDate") && !key.equalsIgnoreCase("Producer") && !key.equalsIgnoreCase("Subject") && !key.equalsIgnoreCase("Title")
&& !key.equalsIgnoreCase("Trapped") && !key.contains("customKey") && !key.contains("customValue")) {
if (!key.equalsIgnoreCase("Author")
&& !key.equalsIgnoreCase("CreationDate")
&& !key.equalsIgnoreCase("Creator")
&& !key.equalsIgnoreCase("Keywords")
&& !key.equalsIgnoreCase("modificationDate")
&& !key.equalsIgnoreCase("Producer")
&& !key.equalsIgnoreCase("Subject")
&& !key.equalsIgnoreCase("Title")
&& !key.equalsIgnoreCase("Trapped")
&& !key.contains("customKey")
&& !key.contains("customValue")) {
info.setCustomMetadataValue(key, entry.getValue());
} else if (key.contains("customKey")) {
int number = Integer.parseInt(key.replaceAll("\\D", ""));
@@ -119,7 +131,8 @@ public class MetadataController {
if (creationDate != null && creationDate.length() > 0) {
Calendar creationDateCal = Calendar.getInstance();
try {
creationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
creationDateCal.setTime(
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
} catch (ParseException e) {
e.printStackTrace();
}
@@ -130,7 +143,8 @@ public class MetadataController {
if (modificationDate != null && modificationDate.length() > 0) {
Calendar modificationDateCal = Calendar.getInstance();
try {
modificationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
modificationDateCal.setTime(
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
} catch (ParseException e) {
e.printStackTrace();
}
@@ -147,7 +161,8 @@ public class MetadataController {
info.setTrapped(trapped);
document.setDocumentInformation(info);
return WebResponseUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
return WebResponseUtils.pdfDocToWebResponse(
document,
pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
}
}

View File

@@ -26,6 +26,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
@@ -44,14 +45,21 @@ public class OCRController {
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd"))
.collect(Collectors.toList());
}
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
@Operation(summary = "Process a PDF file with OCR",
description = "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
public ResponseEntity<byte[]> processPdfWithOCR(@ModelAttribute ProcessPdfWithOcrRequest request) throws IOException, InterruptedException {
@Operation(
summary = "Process a PDF file with OCR",
description =
"This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
public ResponseEntity<byte[]> processPdfWithOCR(
@ModelAttribute ProcessPdfWithOcrRequest request)
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
List<String> selectedLanguages = request.getLanguages();
Boolean sidecar = request.isSidecar();
@@ -65,16 +73,17 @@ public class OCRController {
if (selectedLanguages == null || selectedLanguages.isEmpty()) {
throw new IOException("Please select at least one language.");
}
if(!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) {
if (!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) {
throw new IOException("ocrRenderType wrong");
}
// Get available Tesseract languages
List<String> availableLanguages = getAvailableTesseractLanguages();
// Validate selected languages
selectedLanguages = selectedLanguages.stream().filter(availableLanguages::contains).toList();
selectedLanguages =
selectedLanguages.stream().filter(availableLanguages::contains).toList();
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
@@ -92,8 +101,16 @@ public class OCRController {
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf", "--pdf-renderer" , ocrRenderType));
List<String> command =
new ArrayList<>(
Arrays.asList(
"ocrmypdf",
"--verbose",
"2",
"--output-type",
"pdf",
"--pdf-renderer",
ocrRenderType));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
@@ -120,42 +137,61 @@ public class OCRController {
}
}
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
command.addAll(
Arrays.asList(
"--language",
languageOption,
tempInputFile.toString(),
tempOutputFile.toString()));
// Run CLI command
ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
if (result.getRc() != 0
&& result.getMessages().contains("multiprocessing/synchronize.py")
&& result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
}
// Remove images from the OCR processed PDF if the flag is set to true
if (removeImagesAfter != null && removeImagesAfter) {
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
List<String> gsCommand =
Arrays.asList(
"gs",
"-sDEVICE=pdfwrite",
"-dFILTERIMAGE",
"-o",
tempPdfWithoutImages.toString(),
tempOutputFile.toString());
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
String outputFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
String outputZipFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
try (ZipOutputStream zipOut =
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
@@ -177,13 +213,12 @@ public class OCRController {
Files.delete(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}
}

View File

@@ -14,6 +14,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.OverlayImageRequest;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -27,9 +28,9 @@ public class OverlayImageController {
@PostMapping(consumes = "multipart/form-data", value = "/add-image")
@Operation(
summary = "Overlay image onto a PDF file",
description = "This endpoint overlays an image onto a PDF file at the specified coordinates. The image can be overlaid on every page of the PDF if specified. Input:PDF/IMAGE Output:PDF Type:MF-SISO"
)
summary = "Overlay image onto a PDF file",
description =
"This endpoint overlays an image onto a PDF file at the specified coordinates. The image can be overlaid on every page of the PDF if specified. Input:PDF/IMAGE Output:PDF Type:MF-SISO")
public ResponseEntity<byte[]> overlayImage(@ModelAttribute OverlayImageRequest request) {
MultipartFile pdfFile = request.getFileInput();
MultipartFile imageFile = request.getImageFile();
@@ -41,7 +42,9 @@ public class OverlayImageController {
byte[] imageBytes = imageFile.getBytes();
byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage);
return WebResponseUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
return WebResponseUtils.bytesToWebResponse(
result,
pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
} catch (IOException e) {
logger.error("Failed to add image to PDF", e);
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);

View File

@@ -21,6 +21,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.AddPageNumbersRequest;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -33,16 +34,20 @@ public class PageNumbersController {
private static final Logger logger = LoggerFactory.getLogger(PageNumbersController.class);
@PostMapping(value = "/add-page-numbers", consumes = "multipart/form-data")
@Operation(summary = "Add page numbers to a PDF document", description = "This operation takes an input PDF file and adds page numbers to it. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> addPageNumbers(@ModelAttribute AddPageNumbersRequest request) throws IOException {
@Operation(
summary = "Add page numbers to a PDF document",
description =
"This operation takes an input PDF file and adds page numbers to it. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> addPageNumbers(@ModelAttribute AddPageNumbersRequest request)
throws IOException {
MultipartFile file = request.getFileInput();
String customMargin = request.getCustomMargin();
int position = request.getPosition();
int startingNumber = request.getStartingNumber();
String pagesToNumber = request.getPagesToNumber();
String customText = request.getCustomText();
int pageNumber = startingNumber;
byte[] fileBytes = file.getBytes();
int pageNumber = startingNumber;
byte[] fileBytes = file.getBytes();
PDDocument document = PDDocument.load(fileBytes);
float marginFactor;
@@ -58,9 +63,8 @@ public class PageNumbersController {
break;
case "x-large":
marginFactor = 0.075f;
break;
break;
default:
marginFactor = 0.035f;
break;
@@ -68,19 +72,29 @@ public class PageNumbersController {
float fontSize = 12.0f;
PDType1Font font = PDType1Font.HELVETICA;
if(pagesToNumber == null || pagesToNumber.length() == 0) {
pagesToNumber = "all";
if (pagesToNumber == null || pagesToNumber.length() == 0) {
pagesToNumber = "all";
}
if(customText == null || customText.length() == 0) {
customText = "{n}";
if (customText == null || customText.length() == 0) {
customText = "{n}";
}
List<Integer> pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
List<Integer> pagesToNumberList =
GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
for (int i : pagesToNumberList) {
PDPage page = document.getPage(i);
PDRectangle pageSize = page.getMediaBox();
String text = customText != null ? customText.replace("{n}", String.valueOf(pageNumber)).replace("{total}", String.valueOf(document.getNumberOfPages())).replace("{filename}", file.getOriginalFilename().replaceFirst("[.][^.]+$", "")) : String.valueOf(pageNumber);
String text =
customText != null
? customText
.replace("{n}", String.valueOf(pageNumber))
.replace("{total}", String.valueOf(document.getNumberOfPages()))
.replace(
"{filename}",
file.getOriginalFilename()
.replaceFirst("[.][^.]+$", ""))
: String.valueOf(pageNumber);
float x, y;
@@ -88,10 +102,10 @@ public class PageNumbersController {
int yGroup = 2 - (position - 1) / 3;
switch (xGroup) {
case 0: // left
case 0: // left
x = pageSize.getLowerLeftX() + marginFactor * pageSize.getWidth();
break;
case 1: // center
case 1: // center
x = pageSize.getLowerLeftX() + (pageSize.getWidth() / 2);
break;
default: // right
@@ -100,10 +114,10 @@ public class PageNumbersController {
}
switch (yGroup) {
case 0: // bottom
case 0: // bottom
y = pageSize.getLowerLeftY() + marginFactor * pageSize.getHeight();
break;
case 1: // middle
case 1: // middle
y = pageSize.getLowerLeftY() + (pageSize.getHeight() / 2);
break;
default: // top
@@ -111,7 +125,9 @@ public class PageNumbersController {
break;
}
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
PDPageContentStream contentStream =
new PDPageContentStream(
document, page, PDPageContentStream.AppendMode.APPEND, true);
contentStream.beginText();
contentStream.setFont(font, fontSize);
contentStream.newLineAtOffset(x, y);
@@ -126,10 +142,9 @@ public class PageNumbersController {
document.save(baos);
document.close();
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", MediaType.APPLICATION_PDF);
return WebResponseUtils.bytesToWebResponse(
baos.toByteArray(),
file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf",
MediaType.APPLICATION_PDF);
}
}

View File

@@ -17,6 +17,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFFile;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
@@ -31,11 +32,12 @@ public class RepairController {
@PostMapping(consumes = "multipart/form-data", value = "/repair")
@Operation(
summary = "Repair a PDF file",
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
summary = "Repair a PDF file",
description =
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request)
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
@@ -50,8 +52,9 @@ public class RepairController {
command.add("-sDEVICE=pdfwrite");
command.add(tempInputFile.toString());
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -61,8 +64,8 @@ public class RepairController {
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_repaired.pdf";
String outputFilename =
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_repaired.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}

View File

@@ -17,47 +17,60 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFFile;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class ShowJavascript {
private static final Logger logger = LoggerFactory.getLogger(ShowJavascript.class);
@PostMapping(consumes = "multipart/form-data", value = "/show-javascript")
@Operation(summary = "Grabs all JS from a PDF and returns a single JS file with all code", description = "desc. Input:PDF Output:JS Type:SISO")
@Operation(
summary = "Grabs all JS from a PDF and returns a single JS file with all code",
description = "desc. Input:PDF Output:JS Type:SISO")
public ResponseEntity<byte[]> extractHeader(@ModelAttribute PDFFile request) throws Exception {
MultipartFile inputFile = request.getFileInput();
MultipartFile inputFile = request.getFileInput();
String script = "";
try (PDDocument document = PDDocument.load(inputFile.getInputStream())) {
if(document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) {
PDNameTreeNode<PDActionJavaScript> jsTree = document.getDocumentCatalog().getNames().getJavaScript();
if (jsTree != null) {
Map<String, PDActionJavaScript> jsEntries = jsTree.getNames();
for (Map.Entry<String, PDActionJavaScript> entry : jsEntries.entrySet()) {
String name = entry.getKey();
PDActionJavaScript jsAction = entry.getValue();
String jsCodeStr = jsAction.getAction();
script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n";
}
}
}
if (script.isEmpty()) {
script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";
if (document.getDocumentCatalog() != null
&& document.getDocumentCatalog().getNames() != null) {
PDNameTreeNode<PDActionJavaScript> jsTree =
document.getDocumentCatalog().getNames().getJavaScript();
if (jsTree != null) {
Map<String, PDActionJavaScript> jsEntries = jsTree.getNames();
for (Map.Entry<String, PDActionJavaScript> entry : jsEntries.entrySet()) {
String name = entry.getKey();
PDActionJavaScript jsAction = entry.getValue();
String jsCodeStr = jsAction.getAction();
script +=
"// File: "
+ inputFile.getOriginalFilename()
+ ", Script: "
+ name
+ "\n"
+ jsCodeStr
+ "\n";
}
}
}
return WebResponseUtils.bytesToWebResponse(script.getBytes(StandardCharsets.UTF_8), inputFile.getOriginalFilename() + ".js");
if (script.isEmpty()) {
script =
"PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";
}
return WebResponseUtils.bytesToWebResponse(
script.getBytes(StandardCharsets.UTF_8),
inputFile.getOriginalFilename() + ".js");
}
}
}