formatting
This commit is contained in:
@@ -19,8 +19,10 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.ExtractHeaderRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
@@ -32,97 +34,105 @@ public class AutoRenameController {
|
||||
private static final int LINE_LIMIT = 11;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/auto-rename")
|
||||
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> extractHeader(@ModelAttribute ExtractHeaderRequest request) throws Exception {
|
||||
@Operation(
|
||||
summary = "Extract header from PDF file",
|
||||
description =
|
||||
"This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> extractHeader(@ModelAttribute ExtractHeaderRequest request)
|
||||
throws Exception {
|
||||
MultipartFile file = request.getFileInput();
|
||||
Boolean useFirstTextAsFallback = request.isUseFirstTextAsFallback();
|
||||
|
||||
PDDocument document = PDDocument.load(file.getInputStream());
|
||||
PDFTextStripper reader = new PDFTextStripper() {
|
||||
class LineInfo {
|
||||
String text;
|
||||
float fontSize;
|
||||
PDDocument document = PDDocument.load(file.getInputStream());
|
||||
PDFTextStripper reader =
|
||||
new PDFTextStripper() {
|
||||
class LineInfo {
|
||||
String text;
|
||||
float fontSize;
|
||||
|
||||
LineInfo(String text, float fontSize) {
|
||||
this.text = text;
|
||||
this.fontSize = fontSize;
|
||||
}
|
||||
}
|
||||
LineInfo(String text, float fontSize) {
|
||||
this.text = text;
|
||||
this.fontSize = fontSize;
|
||||
}
|
||||
}
|
||||
|
||||
List<LineInfo> lineInfos = new ArrayList<>();
|
||||
StringBuilder lineBuilder = new StringBuilder();
|
||||
float lastY = -1;
|
||||
float maxFontSizeInLine = 0.0f;
|
||||
int lineCount = 0;
|
||||
List<LineInfo> lineInfos = new ArrayList<>();
|
||||
StringBuilder lineBuilder = new StringBuilder();
|
||||
float lastY = -1;
|
||||
float maxFontSizeInLine = 0.0f;
|
||||
int lineCount = 0;
|
||||
|
||||
@Override
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
|
||||
processLine();
|
||||
lineBuilder = new StringBuilder(text.getUnicode());
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
lastY = text.getY();
|
||||
lineCount++;
|
||||
} else if (lineCount < LINE_LIMIT) {
|
||||
lineBuilder.append(text.getUnicode());
|
||||
if (text.getFontSizeInPt() > maxFontSizeInLine) {
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
|
||||
processLine();
|
||||
lineBuilder = new StringBuilder(text.getUnicode());
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
lastY = text.getY();
|
||||
lineCount++;
|
||||
} else if (lineCount < LINE_LIMIT) {
|
||||
lineBuilder.append(text.getUnicode());
|
||||
if (text.getFontSizeInPt() > maxFontSizeInLine) {
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void processLine() {
|
||||
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
|
||||
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
|
||||
}
|
||||
}
|
||||
private void processLine() {
|
||||
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
|
||||
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
this.lineInfos.clear();
|
||||
this.lineBuilder = new StringBuilder();
|
||||
this.lastY = -1;
|
||||
this.maxFontSizeInLine = 0.0f;
|
||||
this.lineCount = 0;
|
||||
super.getText(doc);
|
||||
processLine(); // Process the last line
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
this.lineInfos.clear();
|
||||
this.lineBuilder = new StringBuilder();
|
||||
this.lastY = -1;
|
||||
this.maxFontSizeInLine = 0.0f;
|
||||
this.lineCount = 0;
|
||||
super.getText(doc);
|
||||
processLine(); // Process the last line
|
||||
|
||||
// Merge lines with same font size
|
||||
List<LineInfo> mergedLineInfos = new ArrayList<>();
|
||||
for (int i = 0; i < lineInfos.size(); i++) {
|
||||
String mergedText = lineInfos.get(i).text;
|
||||
float fontSize = lineInfos.get(i).fontSize;
|
||||
while (i + 1 < lineInfos.size() && lineInfos.get(i + 1).fontSize == fontSize) {
|
||||
mergedText += " " + lineInfos.get(i + 1).text;
|
||||
i++;
|
||||
}
|
||||
mergedLineInfos.add(new LineInfo(mergedText, fontSize));
|
||||
}
|
||||
// Merge lines with same font size
|
||||
List<LineInfo> mergedLineInfos = new ArrayList<>();
|
||||
for (int i = 0; i < lineInfos.size(); i++) {
|
||||
String mergedText = lineInfos.get(i).text;
|
||||
float fontSize = lineInfos.get(i).fontSize;
|
||||
while (i + 1 < lineInfos.size()
|
||||
&& lineInfos.get(i + 1).fontSize == fontSize) {
|
||||
mergedText += " " + lineInfos.get(i + 1).text;
|
||||
i++;
|
||||
}
|
||||
mergedLineInfos.add(new LineInfo(mergedText, fontSize));
|
||||
}
|
||||
|
||||
// Sort lines by font size in descending order and get the first one
|
||||
mergedLineInfos.sort(Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
|
||||
String title = mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(0).text;
|
||||
// Sort lines by font size in descending order and get the first one
|
||||
mergedLineInfos.sort(
|
||||
Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
|
||||
String title =
|
||||
mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(0).text;
|
||||
|
||||
return title != null ? title : (useFirstTextAsFallback ? (mergedLineInfos.isEmpty() ? null : mergedLineInfos.get(mergedLineInfos.size() - 1).text) : null);
|
||||
}
|
||||
return title != null
|
||||
? title
|
||||
: (useFirstTextAsFallback
|
||||
? (mergedLineInfos.isEmpty()
|
||||
? null
|
||||
: mergedLineInfos.get(mergedLineInfos.size() - 1)
|
||||
.text)
|
||||
: null);
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
String header = reader.getText(document);
|
||||
|
||||
String header = reader.getText(document);
|
||||
|
||||
|
||||
|
||||
// Sanitize the header string by removing characters not allowed in a filename.
|
||||
if (header != null && header.length() < 255) {
|
||||
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
|
||||
} else {
|
||||
logger.info("File has no good title to be found");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
|
||||
logger.info("File has no good title to be found");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.DataBufferByte;
|
||||
import java.awt.image.DataBufferInt;
|
||||
@@ -32,6 +33,7 @@ import com.google.zxing.common.HybridBinarizer;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.AutoSplitPdfRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@@ -43,8 +45,12 @@ public class AutoSplitPdfController {
|
||||
private static final String QR_CONTENT = "https://github.com/Frooodle/Stirling-PDF";
|
||||
|
||||
@PostMapping(value = "/auto-split-pdf", consumes = "multipart/form-data")
|
||||
@Operation(summary = "Auto split PDF pages into separate documents", description = "This endpoint accepts a PDF file, scans each page for a specific QR code, and splits the document at the QR code boundaries. The output is a zip file containing each separate PDF document. Input:PDF Output:ZIP-PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest request) throws IOException {
|
||||
@Operation(
|
||||
summary = "Auto split PDF pages into separate documents",
|
||||
description =
|
||||
"This endpoint accepts a PDF file, scans each page for a specific QR code, and splits the document at the QR code boundaries. The output is a zip file containing each separate PDF document. Input:PDF Output:ZIP-PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> autoSplitPdf(@ModelAttribute AutoSplitPdfRequest request)
|
||||
throws IOException {
|
||||
MultipartFile file = request.getFileInput();
|
||||
boolean duplexMode = request.isDuplexMode();
|
||||
|
||||
@@ -107,29 +113,48 @@ public class AutoSplitPdfController {
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
data = Files.readAllBytes(zipFile);
|
||||
data = Files.readAllBytes(zipFile);
|
||||
Files.delete(zipFile);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
|
||||
private static String decodeQRCode(BufferedImage bufferedImage) {
|
||||
LuminanceSource source;
|
||||
|
||||
if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferByte) {
|
||||
byte[] pixels = ((DataBufferByte) bufferedImage.getRaster().getDataBuffer()).getData();
|
||||
source = new PlanarYUVLuminanceSource(pixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
|
||||
source =
|
||||
new PlanarYUVLuminanceSource(
|
||||
pixels,
|
||||
bufferedImage.getWidth(),
|
||||
bufferedImage.getHeight(),
|
||||
0,
|
||||
0,
|
||||
bufferedImage.getWidth(),
|
||||
bufferedImage.getHeight(),
|
||||
false);
|
||||
} else if (bufferedImage.getRaster().getDataBuffer() instanceof DataBufferInt) {
|
||||
int[] pixels = ((DataBufferInt) bufferedImage.getRaster().getDataBuffer()).getData();
|
||||
byte[] newPixels = new byte[pixels.length];
|
||||
for (int i = 0; i < pixels.length; i++) {
|
||||
newPixels[i] = (byte) (pixels[i] & 0xff);
|
||||
}
|
||||
source = new PlanarYUVLuminanceSource(newPixels, bufferedImage.getWidth(), bufferedImage.getHeight(), 0, 0, bufferedImage.getWidth(), bufferedImage.getHeight(), false);
|
||||
source =
|
||||
new PlanarYUVLuminanceSource(
|
||||
newPixels,
|
||||
bufferedImage.getWidth(),
|
||||
bufferedImage.getHeight(),
|
||||
0,
|
||||
0,
|
||||
bufferedImage.getWidth(),
|
||||
bufferedImage.getHeight(),
|
||||
false);
|
||||
} else {
|
||||
throw new IllegalArgumentException("BufferedImage must have 8-bit gray scale, 24-bit RGB, 32-bit ARGB (packed int), byte gray, or 3-byte/4-byte RGB image data");
|
||||
throw new IllegalArgumentException(
|
||||
"BufferedImage must have 8-bit gray scale, 24-bit RGB, 32-bit ARGB (packed int), byte gray, or 3-byte/4-byte RGB image data");
|
||||
}
|
||||
|
||||
BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));
|
||||
|
||||
@@ -28,6 +28,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.RemoveBlankPagesRequest;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@@ -39,17 +40,18 @@ import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
public class BlankPageController {
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
|
||||
@Operation(
|
||||
summary = "Remove blank pages from a PDF file",
|
||||
description = "This endpoint removes blank pages from a given PDF file. Users can specify the threshold and white percentage to tune the detection of blank pages. Input:PDF Output:PDF Type:SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> removeBlankPages(@ModelAttribute RemoveBlankPagesRequest request) throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
int threshold = request.getThreshold();
|
||||
float whitePercent = request.getWhitePercent();
|
||||
|
||||
PDDocument document = null;
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
|
||||
@Operation(
|
||||
summary = "Remove blank pages from a PDF file",
|
||||
description =
|
||||
"This endpoint removes blank pages from a given PDF file. Users can specify the threshold and white percentage to tune the detection of blank pages. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> removeBlankPages(@ModelAttribute RemoveBlankPagesRequest request)
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
int threshold = request.getThreshold();
|
||||
float whitePercent = request.getWhitePercent();
|
||||
|
||||
PDDocument document = null;
|
||||
try {
|
||||
document = PDDocument.load(inputFile.getInputStream());
|
||||
PDPageTree pages = document.getDocumentCatalog().getPages();
|
||||
@@ -72,21 +74,34 @@ public class BlankPageController {
|
||||
boolean hasImages = PdfUtils.hasImagesOnPage(page);
|
||||
if (hasImages) {
|
||||
System.out.println("page " + pageIndex + " has image");
|
||||
|
||||
|
||||
Path tempFile = Files.createTempFile("image_", ".png");
|
||||
|
||||
|
||||
// Render image and save as temp file
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 300);
|
||||
ImageIO.write(image, "png", tempFile.toFile());
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent)));
|
||||
|
||||
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"python3",
|
||||
System.getProperty("user.dir")
|
||||
+ "/scripts/detect-blank-pages.py",
|
||||
tempFile.toString(),
|
||||
"--threshold",
|
||||
String.valueOf(threshold),
|
||||
"--white_percent",
|
||||
String.valueOf(whitePercent)));
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// does contain data
|
||||
if (returnCode.getRc() == 0) {
|
||||
System.out.println("page " + pageIndex + " has image which is not blank");
|
||||
System.out.println(
|
||||
"page " + pageIndex + " has image which is not blank");
|
||||
pagesToKeepIndex.add(pageIndex);
|
||||
} else {
|
||||
System.out.println("Skipping, Image was blank for page #" + pageIndex);
|
||||
@@ -94,12 +109,12 @@ public class BlankPageController {
|
||||
}
|
||||
}
|
||||
pageIndex++;
|
||||
|
||||
}
|
||||
System.out.print("pagesToKeep=" + pagesToKeepIndex.size());
|
||||
|
||||
// Remove pages not present in pagesToKeepIndex
|
||||
List<Integer> pageIndices = IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList());
|
||||
List<Integer> pageIndices =
|
||||
IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList());
|
||||
Collections.reverse(pageIndices); // Reverse to prevent index shifting during removal
|
||||
for (Integer i : pageIndices) {
|
||||
if (!pagesToKeepIndex.contains(i)) {
|
||||
@@ -107,16 +122,15 @@ public class BlankPageController {
|
||||
}
|
||||
}
|
||||
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_blanksRemoved.pdf");
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "")
|
||||
+ "_blanksRemoved.pdf");
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
} finally {
|
||||
if (document != null)
|
||||
document.close();
|
||||
if (document != null) document.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@@ -44,20 +45,23 @@ public class CompressController {
|
||||
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
|
||||
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> optimizePdf(@ModelAttribute OptimizePdfRequest request) throws Exception {
|
||||
@Operation(
|
||||
summary = "Optimize PDF file",
|
||||
description =
|
||||
"This endpoint accepts a PDF file and optimizes it based on the provided parameters. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> optimizePdf(@ModelAttribute OptimizePdfRequest request)
|
||||
throws Exception {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
Integer optimizeLevel = request.getOptimizeLevel();
|
||||
String expectedOutputSizeString = request.getExpectedOutputSize();
|
||||
|
||||
|
||||
if(expectedOutputSizeString == null && optimizeLevel == null) {
|
||||
if (expectedOutputSizeString == null && optimizeLevel == null) {
|
||||
throw new Exception("Both expected output size and optimize level are not specified");
|
||||
}
|
||||
|
||||
Long expectedOutputSize = 0L;
|
||||
boolean autoMode = false;
|
||||
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1 ) {
|
||||
if (expectedOutputSizeString != null && expectedOutputSizeString.length() > 1) {
|
||||
expectedOutputSize = GeneralUtils.convertSizeToBytes(expectedOutputSizeString);
|
||||
autoMode = true;
|
||||
}
|
||||
@@ -71,8 +75,9 @@ public class CompressController {
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Determine initial optimization level based on expected size reduction, only if in autoMode
|
||||
if(autoMode) {
|
||||
// Determine initial optimization level based on expected size reduction, only if in
|
||||
// autoMode
|
||||
if (autoMode) {
|
||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||
if (sizeReductionRatio > 0.7) {
|
||||
optimizeLevel = 1;
|
||||
@@ -94,20 +99,20 @@ public class CompressController {
|
||||
command.add("-dCompatibilityLevel=1.4");
|
||||
|
||||
switch (optimizeLevel) {
|
||||
case 1:
|
||||
command.add("-dPDFSETTINGS=/prepress");
|
||||
break;
|
||||
case 2:
|
||||
command.add("-dPDFSETTINGS=/printer");
|
||||
break;
|
||||
case 3:
|
||||
command.add("-dPDFSETTINGS=/ebook");
|
||||
break;
|
||||
case 4:
|
||||
command.add("-dPDFSETTINGS=/screen");
|
||||
break;
|
||||
default:
|
||||
command.add("-dPDFSETTINGS=/default");
|
||||
case 1:
|
||||
command.add("-dPDFSETTINGS=/prepress");
|
||||
break;
|
||||
case 2:
|
||||
command.add("-dPDFSETTINGS=/printer");
|
||||
break;
|
||||
case 3:
|
||||
command.add("-dPDFSETTINGS=/ebook");
|
||||
break;
|
||||
case 4:
|
||||
command.add("-dPDFSETTINGS=/screen");
|
||||
break;
|
||||
default:
|
||||
command.add("-dPDFSETTINGS=/default");
|
||||
}
|
||||
|
||||
command.add("-dNOPAUSE");
|
||||
@@ -116,7 +121,9 @@ public class CompressController {
|
||||
command.add("-sOutputFile=" + tempOutputFile.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// Check if file size is within expected size or not auto mode so instantly finish
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
@@ -125,19 +132,18 @@ public class CompressController {
|
||||
} else {
|
||||
// Increase optimization level for next iteration
|
||||
optimizeLevel++;
|
||||
if(autoMode && optimizeLevel > 3) {
|
||||
if (autoMode && optimizeLevel > 3) {
|
||||
System.out.println("Skipping level 4 due to bad results in auto mode");
|
||||
sizeMet = true;
|
||||
} else if(optimizeLevel == 5) {
|
||||
|
||||
} else if (optimizeLevel == 5) {
|
||||
|
||||
} else {
|
||||
System.out.println("Increasing ghostscript optimisation level to " + optimizeLevel);
|
||||
System.out.println(
|
||||
"Increasing ghostscript optimisation level to " + optimizeLevel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (expectedOutputSize != null && autoMode) {
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize > expectedOutputSize) {
|
||||
@@ -157,8 +163,8 @@ public class CompressController {
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
|
||||
// Calculate the new dimensions
|
||||
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
|
||||
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);
|
||||
int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
|
||||
int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
|
||||
|
||||
// If the new dimensions are zero, skip this iteration
|
||||
if (newWidth == 0 || newHeight == 0) {
|
||||
@@ -166,23 +172,39 @@ public class CompressController {
|
||||
}
|
||||
|
||||
// Otherwise, proceed with the scaling
|
||||
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||
Image scaledImage =
|
||||
bufferedImage.getScaledInstance(
|
||||
newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||
|
||||
// Convert the scaled image back to a BufferedImage
|
||||
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
|
||||
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
|
||||
BufferedImage scaledBufferedImage =
|
||||
new BufferedImage(
|
||||
newWidth,
|
||||
newHeight,
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
scaledBufferedImage
|
||||
.getGraphics()
|
||||
.drawImage(scaledImage, 0, 0, null);
|
||||
|
||||
// Compress the scaled image
|
||||
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
|
||||
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
|
||||
ByteArrayOutputStream compressedImageStream =
|
||||
new ByteArrayOutputStream();
|
||||
ImageIO.write(
|
||||
scaledBufferedImage, "jpeg", compressedImageStream);
|
||||
byte[] imageBytes = compressedImageStream.toByteArray();
|
||||
compressedImageStream.close();
|
||||
|
||||
// Convert compressed image back to PDImageXObject
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
|
||||
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());
|
||||
ByteArrayInputStream bais =
|
||||
new ByteArrayInputStream(imageBytes);
|
||||
PDImageXObject compressedImage =
|
||||
PDImageXObject.createFromByteArray(
|
||||
doc,
|
||||
imageBytes,
|
||||
image.getCOSObject().toString());
|
||||
|
||||
// Replace the image in the resources with the compressed version
|
||||
// Replace the image in the resources with the compressed
|
||||
// version
|
||||
res.put(name, compressedImage);
|
||||
}
|
||||
}
|
||||
@@ -194,16 +216,23 @@ public class CompressController {
|
||||
long currentSize = Files.size(tempOutputFile);
|
||||
// Check if the overall PDF size is still larger than expectedOutputSize
|
||||
if (currentSize > expectedOutputSize) {
|
||||
// Log the current file size and scaleFactor
|
||||
|
||||
System.out.println("Current file size: " + FileUtils.byteCountToDisplaySize(currentSize));
|
||||
// Log the current file size and scaleFactor
|
||||
|
||||
System.out.println(
|
||||
"Current file size: "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize));
|
||||
System.out.println("Current scale factor: " + scaleFactor);
|
||||
|
||||
// The file is still too large, reduce scaleFactor and try again
|
||||
scaleFactor *= 0.9; // reduce scaleFactor by 10%
|
||||
// Avoid scaleFactor being too small, causing the image to shrink to 0
|
||||
if(scaleFactor < 0.2 || previousFileSize == currentSize){
|
||||
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality, lowest size recommended is " + FileUtils.byteCountToDisplaySize(currentSize) + ", " + currentSize + " bytes");
|
||||
if (scaleFactor < 0.2 || previousFileSize == currentSize) {
|
||||
throw new RuntimeException(
|
||||
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize)
|
||||
+ ", "
|
||||
+ currentSize
|
||||
+ " bytes");
|
||||
}
|
||||
previousFileSize = currentSize;
|
||||
} else {
|
||||
@@ -211,10 +240,7 @@ public class CompressController {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,9 +248,10 @@ public class CompressController {
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Check if optimized file is larger than the original
|
||||
if(pdfBytes.length > inputFileSize) {
|
||||
if (pdfBytes.length > inputFileSize) {
|
||||
// Log the occurrence
|
||||
logger.warn("Optimized file is larger than the original. Returning the original file instead.");
|
||||
logger.warn(
|
||||
"Optimized file is larger than the original. Returning the original file instead.");
|
||||
|
||||
// Read the original file again
|
||||
pdfBytes = Files.readAllBytes(tempInputFile);
|
||||
@@ -235,8 +262,8 @@ public class CompressController {
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
String outputFilename =
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -32,10 +32,12 @@ import io.swagger.v3.oas.annotations.media.Content;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.parameters.RequestBody;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.ExtractImageScansRequest;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
@@ -44,18 +46,28 @@ public class ExtractImageScansController {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-image-scans")
|
||||
@Operation(summary = "Extract image scans from an input file",
|
||||
description = "This endpoint extracts image scans from a given file based on certain parameters. Users can specify angle threshold, tolerance, minimum area, minimum contour area, and border size. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
@Operation(
|
||||
summary = "Extract image scans from an input file",
|
||||
description =
|
||||
"This endpoint extracts image scans from a given file based on certain parameters. Users can specify angle threshold, tolerance, minimum area, minimum contour area, and border size. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
public ResponseEntity<byte[]> extractImageScans(
|
||||
@RequestBody(
|
||||
description = "Form data containing file and extraction parameters",
|
||||
required = true,
|
||||
content = @Content(
|
||||
mediaType = "multipart/form-data",
|
||||
schema = @Schema(implementation = ExtractImageScansRequest.class) // This should represent your form's structure
|
||||
)
|
||||
)
|
||||
ExtractImageScansRequest form) throws IOException, InterruptedException {
|
||||
@RequestBody(
|
||||
description = "Form data containing file and extraction parameters",
|
||||
required = true,
|
||||
content =
|
||||
@Content(
|
||||
mediaType = "multipart/form-data",
|
||||
schema =
|
||||
@Schema(
|
||||
implementation =
|
||||
ExtractImageScansRequest
|
||||
.class) // This should
|
||||
// represent
|
||||
// your form's
|
||||
// structure
|
||||
))
|
||||
ExtractImageScansRequest form)
|
||||
throws IOException, InterruptedException {
|
||||
String fileName = form.getFileInput().getOriginalFilename();
|
||||
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
|
||||
|
||||
@@ -64,7 +76,8 @@ public class ExtractImageScansController {
|
||||
// Check if input file is a PDF
|
||||
if (extension.equalsIgnoreCase("pdf")) {
|
||||
// Load PDF document
|
||||
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(form.getFileInput().getBytes()))) {
|
||||
try (PDDocument document =
|
||||
PDDocument.load(new ByteArrayInputStream(form.getFileInput().getBytes()))) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
images = new ArrayList<>();
|
||||
@@ -84,7 +97,10 @@ public class ExtractImageScansController {
|
||||
}
|
||||
} else {
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + extension);
|
||||
Files.copy(form.getFileInput().getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
Files.copy(
|
||||
form.getFileInput().getInputStream(),
|
||||
tempInputFile,
|
||||
StandardCopyOption.REPLACE_EXISTING);
|
||||
// Add input file path to images list
|
||||
images.add(tempInputFile.toString());
|
||||
}
|
||||
@@ -95,21 +111,28 @@ public class ExtractImageScansController {
|
||||
for (int i = 0; i < images.size(); i++) {
|
||||
|
||||
Path tempDir = Files.createTempDirectory("openCV_output");
|
||||
List<String> command = new ArrayList<>(Arrays.asList(
|
||||
"python3",
|
||||
"./scripts/split_photos.py",
|
||||
images.get(i),
|
||||
tempDir.toString(),
|
||||
"--angle_threshold", String.valueOf(form.getAngleThreshold()),
|
||||
"--tolerance", String.valueOf(form.getTolerance()),
|
||||
"--min_area", String.valueOf(form.getMinArea()),
|
||||
"--min_contour_area", String.valueOf(form.getMinContourArea()),
|
||||
"--border_size", String.valueOf(form.getBorderSize())
|
||||
));
|
||||
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"python3",
|
||||
"./scripts/split_photos.py",
|
||||
images.get(i),
|
||||
tempDir.toString(),
|
||||
"--angle_threshold",
|
||||
String.valueOf(form.getAngleThreshold()),
|
||||
"--tolerance",
|
||||
String.valueOf(form.getTolerance()),
|
||||
"--min_area",
|
||||
String.valueOf(form.getMinArea()),
|
||||
"--min_contour_area",
|
||||
String.valueOf(form.getMinContourArea()),
|
||||
"--border_size",
|
||||
String.valueOf(form.getBorderSize())));
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the output photos in temp directory
|
||||
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());
|
||||
@@ -126,10 +149,16 @@ public class ExtractImageScansController {
|
||||
String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
try (ZipOutputStream zipOut =
|
||||
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add processed images to the zip
|
||||
for (int i = 0; i < processedImageBytes.size(); i++) {
|
||||
ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i + 1) + ".png");
|
||||
ZipEntry entry =
|
||||
new ZipEntry(
|
||||
fileName.replaceFirst("[.][^.]+$", "")
|
||||
+ "_"
|
||||
+ (i + 1)
|
||||
+ ".png");
|
||||
zipOut.putNextEntry(entry);
|
||||
zipOut.write(processedImageBytes.get(i));
|
||||
zipOut.closeEntry();
|
||||
@@ -141,13 +170,15 @@ public class ExtractImageScansController {
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the processed image as a response
|
||||
byte[] imageBytes = processedImageBytes.get(0);
|
||||
return WebResponseUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
imageBytes,
|
||||
fileName.replaceFirst("[.][^.]+$", "") + ".png",
|
||||
MediaType.IMAGE_PNG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
@@ -29,8 +30,10 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFWithImageFormatRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
@@ -39,13 +42,17 @@ public class ExtractImagesController {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-images")
|
||||
@Operation(summary = "Extract images from a PDF file",
|
||||
description = "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFWithImageFormatRequest request) throws IOException {
|
||||
@Operation(
|
||||
summary = "Extract images from a PDF file",
|
||||
description =
|
||||
"This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
|
||||
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFWithImageFormatRequest request)
|
||||
throws IOException {
|
||||
MultipartFile file = request.getFileInput();
|
||||
String format = request.getFormat();
|
||||
|
||||
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
System.out.println(
|
||||
System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
PDDocument document = PDDocument.load(file.getBytes());
|
||||
|
||||
// Create ByteArrayOutputStream to write zip file to byte array
|
||||
@@ -69,24 +76,37 @@ public class ExtractImagesController {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
|
||||
int imageHash = image.hashCode();
|
||||
if(processedImages.contains(imageHash)) {
|
||||
if (processedImages.contains(imageHash)) {
|
||||
continue; // Skip already processed images
|
||||
}
|
||||
processedImages.add(imageHash);
|
||||
|
||||
|
||||
// Convert image to desired format
|
||||
RenderedImage renderedImage = image.getImage();
|
||||
BufferedImage bufferedImage = null;
|
||||
if (format.equalsIgnoreCase("png")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
|
||||
bufferedImage =
|
||||
new BufferedImage(
|
||||
renderedImage.getWidth(),
|
||||
renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_ARGB);
|
||||
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
|
||||
bufferedImage =
|
||||
new BufferedImage(
|
||||
renderedImage.getWidth(),
|
||||
renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
} else if (format.equalsIgnoreCase("gif")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
|
||||
bufferedImage =
|
||||
new BufferedImage(
|
||||
renderedImage.getWidth(),
|
||||
renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
|
||||
// Write image to zip file
|
||||
String imageName = filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
String imageName =
|
||||
filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
ZipEntry zipEntry = new ZipEntry(imageName);
|
||||
zos.putNextEntry(zipEntry);
|
||||
|
||||
@@ -111,7 +131,7 @@ public class ExtractImagesController {
|
||||
// Create ByteArrayResource from byte array
|
||||
byte[] zipContents = baos.toByteArray();
|
||||
|
||||
return WebResponseUtils.boasToWebResponse(baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.boasToWebResponse(
|
||||
baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,21 +3,17 @@ package stirling.software.SPDF.controller.api.misc;
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.image.AffineTransformOp;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.BufferedImageOp;
|
||||
import java.awt.image.ConvolveOp;
|
||||
import java.awt.image.Kernel;
|
||||
import java.awt.image.RescaleOp;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.security.SecureRandom;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
@@ -40,6 +36,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@@ -50,102 +47,101 @@ public class FakeScanControllerWIP {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class);
|
||||
|
||||
//TODO
|
||||
// TODO
|
||||
@Hidden
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/fakeScan")
|
||||
@Operation(
|
||||
summary = "Repair a PDF file",
|
||||
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response."
|
||||
)
|
||||
summary = "Repair a PDF file",
|
||||
description =
|
||||
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
|
||||
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
|
||||
PDDocument document = PDDocument.load(inputFile.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page)
|
||||
{
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIO.write(image, "png", new File("scanned-" + (page+1) + ".png"));
|
||||
}
|
||||
document.close();
|
||||
PDDocument document = PDDocument.load(inputFile.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png"));
|
||||
}
|
||||
document.close();
|
||||
|
||||
// Constants
|
||||
int scannedness = 90; // Value between 0 and 100
|
||||
int dirtiness = 0; // Value between 0 and 100
|
||||
// Constants
|
||||
int scannedness = 90; // Value between 0 and 100
|
||||
int dirtiness = 0; // Value between 0 and 100
|
||||
|
||||
// Load the source image
|
||||
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
|
||||
// Load the source image
|
||||
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
|
||||
|
||||
// Create the destination image
|
||||
BufferedImage destinationImage = new BufferedImage(sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
|
||||
// Create the destination image
|
||||
BufferedImage destinationImage =
|
||||
new BufferedImage(
|
||||
sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
|
||||
|
||||
// Apply a brightness and contrast effect based on the "scanned-ness"
|
||||
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
|
||||
float offset = scannedness * 1.5f; // Between 0 and 150
|
||||
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
|
||||
op.filter(sourceImage, destinationImage);
|
||||
// Apply a brightness and contrast effect based on the "scanned-ness"
|
||||
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
|
||||
float offset = scannedness * 1.5f; // Between 0 and 150
|
||||
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
|
||||
op.filter(sourceImage, destinationImage);
|
||||
|
||||
// Apply a rotation effect
|
||||
double rotationRequired = Math.toRadians((new SecureRandom().nextInt(3 - 1) + 1)); // Random angle between 1 and 3 degrees
|
||||
double locationX = destinationImage.getWidth() / 2;
|
||||
double locationY = destinationImage.getHeight() / 2;
|
||||
AffineTransform tx = AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
|
||||
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
|
||||
destinationImage = rotateOp.filter(destinationImage, null);
|
||||
// Apply a rotation effect
|
||||
double rotationRequired =
|
||||
Math.toRadians(
|
||||
(new SecureRandom().nextInt(3 - 1)
|
||||
+ 1)); // Random angle between 1 and 3 degrees
|
||||
double locationX = destinationImage.getWidth() / 2;
|
||||
double locationY = destinationImage.getHeight() / 2;
|
||||
AffineTransform tx =
|
||||
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
|
||||
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
|
||||
destinationImage = rotateOp.filter(destinationImage, null);
|
||||
|
||||
// Apply a blur effect based on the "scanned-ness"
|
||||
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
|
||||
float[] matrix = {
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity
|
||||
};
|
||||
BufferedImageOp blurOp = new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
|
||||
destinationImage = blurOp.filter(destinationImage, null);
|
||||
// Apply a blur effect based on the "scanned-ness"
|
||||
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
|
||||
float[] matrix = {
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity
|
||||
};
|
||||
BufferedImageOp blurOp =
|
||||
new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
|
||||
destinationImage = blurOp.filter(destinationImage, null);
|
||||
|
||||
// Add noise to the image based on the "dirtiness"
|
||||
Random random = new SecureRandom();
|
||||
for (int y = 0; y < destinationImage.getHeight(); y++) {
|
||||
for (int x = 0; x < destinationImage.getWidth(); x++) {
|
||||
if (random.nextInt(100) < dirtiness) {
|
||||
// Change the pixel color to black randomly based on the "dirtiness"
|
||||
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add noise to the image based on the "dirtiness"
|
||||
Random random = new SecureRandom();
|
||||
for (int y = 0; y < destinationImage.getHeight(); y++) {
|
||||
for (int x = 0; x < destinationImage.getWidth(); x++) {
|
||||
if (random.nextInt(100) < dirtiness) {
|
||||
// Change the pixel color to black randomly based on the "dirtiness"
|
||||
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the image
|
||||
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
|
||||
// Save the image
|
||||
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
|
||||
|
||||
PDDocument documentOut = new PDDocument();
|
||||
for (int page = 1; page <= document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Adjust the dimensions of the page
|
||||
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
|
||||
documentOut.addPage(pdPage);
|
||||
|
||||
PDDocument documentOut = new PDDocument();
|
||||
for (int page = 1; page <= document.getNumberOfPages(); ++page)
|
||||
{
|
||||
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
|
||||
|
||||
// Adjust the dimensions of the page
|
||||
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
|
||||
documentOut.addPage(pdPage);
|
||||
|
||||
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
|
||||
|
||||
// Draw the image with a slight offset and enlarged dimensions
|
||||
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
|
||||
contentStream.close();
|
||||
}
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
documentOut.save(baos);
|
||||
documentOut.close();
|
||||
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
|
||||
|
||||
// Draw the image with a slight offset and enlarged dimensions
|
||||
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
|
||||
contentStream.close();
|
||||
}
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
documentOut.save(baos);
|
||||
documentOut.close();
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scanned.pdf";
|
||||
String outputFilename =
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scanned.pdf";
|
||||
return WebResponseUtils.boasToWebResponse(baos, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.MetadataRequest;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@@ -27,7 +28,6 @@ import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
public class MetadataController {
|
||||
|
||||
|
||||
private String checkUndefined(String entry) {
|
||||
// Check if the string is "undefined"
|
||||
if ("undefined".equals(entry)) {
|
||||
@@ -36,14 +36,16 @@ public class MetadataController {
|
||||
}
|
||||
// Return the original string if it's not "undefined"
|
||||
return entry;
|
||||
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/update-metadata")
|
||||
@Operation(summary = "Update metadata of a PDF file",
|
||||
description = "This endpoint allows you to update the metadata of a given PDF file. You can add, modify, or delete standard and custom metadata fields. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> metadata(@ModelAttribute MetadataRequest request) throws IOException {
|
||||
|
||||
@Operation(
|
||||
summary = "Update metadata of a PDF file",
|
||||
description =
|
||||
"This endpoint allows you to update the metadata of a given PDF file. You can add, modify, or delete standard and custom metadata fields. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> metadata(@ModelAttribute MetadataRequest request)
|
||||
throws IOException {
|
||||
|
||||
// Extract PDF file from the request object
|
||||
MultipartFile pdfFile = request.getFileInput();
|
||||
|
||||
@@ -61,8 +63,8 @@ public class MetadataController {
|
||||
|
||||
// Extract additional custom parameters
|
||||
Map<String, String> allRequestParams = request.getAllRequestParams();
|
||||
if(allRequestParams == null) {
|
||||
allRequestParams = new java.util.HashMap<String, String>();
|
||||
if (allRequestParams == null) {
|
||||
allRequestParams = new java.util.HashMap<String, String>();
|
||||
}
|
||||
// Load the PDF file into a PDDocument
|
||||
PDDocument document = PDDocument.load(pdfFile.getBytes());
|
||||
@@ -89,7 +91,9 @@ public class MetadataController {
|
||||
}
|
||||
// Remove metadata from the PDF history
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("Metadata"));
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("PieceInfo"));
|
||||
document.getDocumentCatalog()
|
||||
.getCOSObject()
|
||||
.removeItem(COSName.getPDFName("PieceInfo"));
|
||||
author = null;
|
||||
creationDate = null;
|
||||
creator = null;
|
||||
@@ -104,9 +108,17 @@ public class MetadataController {
|
||||
for (Entry<String, String> entry : allRequestParams.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
// Check if the key is a standard metadata key
|
||||
if (!key.equalsIgnoreCase("Author") && !key.equalsIgnoreCase("CreationDate") && !key.equalsIgnoreCase("Creator") && !key.equalsIgnoreCase("Keywords")
|
||||
&& !key.equalsIgnoreCase("modificationDate") && !key.equalsIgnoreCase("Producer") && !key.equalsIgnoreCase("Subject") && !key.equalsIgnoreCase("Title")
|
||||
&& !key.equalsIgnoreCase("Trapped") && !key.contains("customKey") && !key.contains("customValue")) {
|
||||
if (!key.equalsIgnoreCase("Author")
|
||||
&& !key.equalsIgnoreCase("CreationDate")
|
||||
&& !key.equalsIgnoreCase("Creator")
|
||||
&& !key.equalsIgnoreCase("Keywords")
|
||||
&& !key.equalsIgnoreCase("modificationDate")
|
||||
&& !key.equalsIgnoreCase("Producer")
|
||||
&& !key.equalsIgnoreCase("Subject")
|
||||
&& !key.equalsIgnoreCase("Title")
|
||||
&& !key.equalsIgnoreCase("Trapped")
|
||||
&& !key.contains("customKey")
|
||||
&& !key.contains("customValue")) {
|
||||
info.setCustomMetadataValue(key, entry.getValue());
|
||||
} else if (key.contains("customKey")) {
|
||||
int number = Integer.parseInt(key.replaceAll("\\D", ""));
|
||||
@@ -119,7 +131,8 @@ public class MetadataController {
|
||||
if (creationDate != null && creationDate.length() > 0) {
|
||||
Calendar creationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
creationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
|
||||
creationDateCal.setTime(
|
||||
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
@@ -130,7 +143,8 @@ public class MetadataController {
|
||||
if (modificationDate != null && modificationDate.length() > 0) {
|
||||
Calendar modificationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
modificationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
|
||||
modificationDateCal.setTime(
|
||||
new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
@@ -147,7 +161,8 @@ public class MetadataController {
|
||||
info.setTrapped(trapped);
|
||||
|
||||
document.setDocumentInformation(info);
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
@@ -44,14 +45,21 @@ public class OCRController {
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
|
||||
return Arrays.stream(files)
|
||||
.filter(file -> file.getName().endsWith(".traineddata"))
|
||||
.map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd"))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
|
||||
@Operation(summary = "Process a PDF file with OCR",
|
||||
description = "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@ModelAttribute ProcessPdfWithOcrRequest request) throws IOException, InterruptedException {
|
||||
@Operation(
|
||||
summary = "Process a PDF file with OCR",
|
||||
description =
|
||||
"This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(
|
||||
@ModelAttribute ProcessPdfWithOcrRequest request)
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
List<String> selectedLanguages = request.getLanguages();
|
||||
Boolean sidecar = request.isSidecar();
|
||||
@@ -65,16 +73,17 @@ public class OCRController {
|
||||
if (selectedLanguages == null || selectedLanguages.isEmpty()) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
if(!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) {
|
||||
|
||||
if (!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) {
|
||||
throw new IOException("ocrRenderType wrong");
|
||||
}
|
||||
|
||||
|
||||
// Get available Tesseract languages
|
||||
List<String> availableLanguages = getAvailableTesseractLanguages();
|
||||
|
||||
// Validate selected languages
|
||||
selectedLanguages = selectedLanguages.stream().filter(availableLanguages::contains).toList();
|
||||
selectedLanguages =
|
||||
selectedLanguages.stream().filter(availableLanguages::contains).toList();
|
||||
|
||||
if (selectedLanguages.isEmpty()) {
|
||||
throw new IOException("None of the selected languages are valid.");
|
||||
@@ -92,8 +101,16 @@ public class OCRController {
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf", "--pdf-renderer" , ocrRenderType));
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"ocrmypdf",
|
||||
"--verbose",
|
||||
"2",
|
||||
"--output-type",
|
||||
"pdf",
|
||||
"--pdf-renderer",
|
||||
ocrRenderType));
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
|
||||
@@ -120,42 +137,61 @@ public class OCRController {
|
||||
}
|
||||
}
|
||||
|
||||
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
|
||||
command.addAll(
|
||||
Arrays.asList(
|
||||
"--language",
|
||||
languageOption,
|
||||
tempInputFile.toString(),
|
||||
tempOutputFile.toString()));
|
||||
|
||||
// Run CLI command
|
||||
ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
|
||||
command.add("--jobs");
|
||||
command.add("1");
|
||||
result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult result =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
|
||||
.runCommandWithOutputHandling(command);
|
||||
if (result.getRc() != 0
|
||||
&& result.getMessages().contains("multiprocessing/synchronize.py")
|
||||
&& result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
|
||||
command.add("--jobs");
|
||||
command.add("1");
|
||||
result =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
|
||||
.runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Remove images from the OCR processed PDF if the flag is set to true
|
||||
if (removeImagesAfter != null && removeImagesAfter) {
|
||||
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
|
||||
|
||||
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
|
||||
List<String> gsCommand =
|
||||
Arrays.asList(
|
||||
"gs",
|
||||
"-sDEVICE=pdfwrite",
|
||||
"-dFILTERIMAGE",
|
||||
"-o",
|
||||
tempPdfWithoutImages.toString(),
|
||||
tempOutputFile.toString());
|
||||
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||
.runCommandWithOutputHandling(gsCommand);
|
||||
tempOutputFile = tempPdfWithoutImages;
|
||||
}
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
|
||||
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
String outputFilename =
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
String outputZipFilename =
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
try (ZipOutputStream zipOut =
|
||||
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
@@ -177,13 +213,12 @@ public class OCRController {
|
||||
Files.delete(sidecarTextPath);
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
return WebResponseUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.OverlayImageRequest;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@@ -27,9 +28,9 @@ public class OverlayImageController {
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/add-image")
|
||||
@Operation(
|
||||
summary = "Overlay image onto a PDF file",
|
||||
description = "This endpoint overlays an image onto a PDF file at the specified coordinates. The image can be overlaid on every page of the PDF if specified. Input:PDF/IMAGE Output:PDF Type:MF-SISO"
|
||||
)
|
||||
summary = "Overlay image onto a PDF file",
|
||||
description =
|
||||
"This endpoint overlays an image onto a PDF file at the specified coordinates. The image can be overlaid on every page of the PDF if specified. Input:PDF/IMAGE Output:PDF Type:MF-SISO")
|
||||
public ResponseEntity<byte[]> overlayImage(@ModelAttribute OverlayImageRequest request) {
|
||||
MultipartFile pdfFile = request.getFileInput();
|
||||
MultipartFile imageFile = request.getImageFile();
|
||||
@@ -41,7 +42,9 @@ public class OverlayImageController {
|
||||
byte[] imageBytes = imageFile.getBytes();
|
||||
byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
result,
|
||||
pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to add image to PDF", e);
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
|
||||
@@ -21,6 +21,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.AddPageNumbersRequest;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@@ -33,16 +34,20 @@ public class PageNumbersController {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PageNumbersController.class);
|
||||
|
||||
@PostMapping(value = "/add-page-numbers", consumes = "multipart/form-data")
|
||||
@Operation(summary = "Add page numbers to a PDF document", description = "This operation takes an input PDF file and adds page numbers to it. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> addPageNumbers(@ModelAttribute AddPageNumbersRequest request) throws IOException {
|
||||
@Operation(
|
||||
summary = "Add page numbers to a PDF document",
|
||||
description =
|
||||
"This operation takes an input PDF file and adds page numbers to it. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> addPageNumbers(@ModelAttribute AddPageNumbersRequest request)
|
||||
throws IOException {
|
||||
MultipartFile file = request.getFileInput();
|
||||
String customMargin = request.getCustomMargin();
|
||||
int position = request.getPosition();
|
||||
int startingNumber = request.getStartingNumber();
|
||||
String pagesToNumber = request.getPagesToNumber();
|
||||
String customText = request.getCustomText();
|
||||
int pageNumber = startingNumber;
|
||||
byte[] fileBytes = file.getBytes();
|
||||
int pageNumber = startingNumber;
|
||||
byte[] fileBytes = file.getBytes();
|
||||
PDDocument document = PDDocument.load(fileBytes);
|
||||
|
||||
float marginFactor;
|
||||
@@ -58,9 +63,8 @@ public class PageNumbersController {
|
||||
break;
|
||||
case "x-large":
|
||||
marginFactor = 0.075f;
|
||||
break;
|
||||
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
marginFactor = 0.035f;
|
||||
break;
|
||||
@@ -68,19 +72,29 @@ public class PageNumbersController {
|
||||
|
||||
float fontSize = 12.0f;
|
||||
PDType1Font font = PDType1Font.HELVETICA;
|
||||
if(pagesToNumber == null || pagesToNumber.length() == 0) {
|
||||
pagesToNumber = "all";
|
||||
if (pagesToNumber == null || pagesToNumber.length() == 0) {
|
||||
pagesToNumber = "all";
|
||||
}
|
||||
if(customText == null || customText.length() == 0) {
|
||||
customText = "{n}";
|
||||
if (customText == null || customText.length() == 0) {
|
||||
customText = "{n}";
|
||||
}
|
||||
List<Integer> pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
|
||||
List<Integer> pagesToNumberList =
|
||||
GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
|
||||
|
||||
for (int i : pagesToNumberList) {
|
||||
PDPage page = document.getPage(i);
|
||||
PDRectangle pageSize = page.getMediaBox();
|
||||
|
||||
String text = customText != null ? customText.replace("{n}", String.valueOf(pageNumber)).replace("{total}", String.valueOf(document.getNumberOfPages())).replace("{filename}", file.getOriginalFilename().replaceFirst("[.][^.]+$", "")) : String.valueOf(pageNumber);
|
||||
String text =
|
||||
customText != null
|
||||
? customText
|
||||
.replace("{n}", String.valueOf(pageNumber))
|
||||
.replace("{total}", String.valueOf(document.getNumberOfPages()))
|
||||
.replace(
|
||||
"{filename}",
|
||||
file.getOriginalFilename()
|
||||
.replaceFirst("[.][^.]+$", ""))
|
||||
: String.valueOf(pageNumber);
|
||||
|
||||
float x, y;
|
||||
|
||||
@@ -88,10 +102,10 @@ public class PageNumbersController {
|
||||
int yGroup = 2 - (position - 1) / 3;
|
||||
|
||||
switch (xGroup) {
|
||||
case 0: // left
|
||||
case 0: // left
|
||||
x = pageSize.getLowerLeftX() + marginFactor * pageSize.getWidth();
|
||||
break;
|
||||
case 1: // center
|
||||
case 1: // center
|
||||
x = pageSize.getLowerLeftX() + (pageSize.getWidth() / 2);
|
||||
break;
|
||||
default: // right
|
||||
@@ -100,10 +114,10 @@ public class PageNumbersController {
|
||||
}
|
||||
|
||||
switch (yGroup) {
|
||||
case 0: // bottom
|
||||
case 0: // bottom
|
||||
y = pageSize.getLowerLeftY() + marginFactor * pageSize.getHeight();
|
||||
break;
|
||||
case 1: // middle
|
||||
case 1: // middle
|
||||
y = pageSize.getLowerLeftY() + (pageSize.getHeight() / 2);
|
||||
break;
|
||||
default: // top
|
||||
@@ -111,7 +125,9 @@ public class PageNumbersController {
|
||||
break;
|
||||
}
|
||||
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
|
||||
PDPageContentStream contentStream =
|
||||
new PDPageContentStream(
|
||||
document, page, PDPageContentStream.AppendMode.APPEND, true);
|
||||
contentStream.beginText();
|
||||
contentStream.setFont(font, fontSize);
|
||||
contentStream.newLineAtOffset(x, y);
|
||||
@@ -126,10 +142,9 @@ public class PageNumbersController {
|
||||
document.save(baos);
|
||||
document.close();
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", MediaType.APPLICATION_PDF);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
baos.toByteArray(),
|
||||
file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf",
|
||||
MediaType.APPLICATION_PDF);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
@@ -31,11 +32,12 @@ public class RepairController {
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/repair")
|
||||
@Operation(
|
||||
summary = "Repair a PDF file",
|
||||
description = "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
summary = "Repair a PDF file",
|
||||
description =
|
||||
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request)
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
@@ -50,8 +52,9 @@ public class RepairController {
|
||||
command.add("-sDEVICE=pdfwrite");
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
@@ -61,8 +64,8 @@ public class RepairController {
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_repaired.pdf";
|
||||
String outputFilename =
|
||||
inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_repaired.pdf";
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -17,47 +17,60 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
public class ShowJavascript {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ShowJavascript.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/show-javascript")
|
||||
@Operation(summary = "Grabs all JS from a PDF and returns a single JS file with all code", description = "desc. Input:PDF Output:JS Type:SISO")
|
||||
@Operation(
|
||||
summary = "Grabs all JS from a PDF and returns a single JS file with all code",
|
||||
description = "desc. Input:PDF Output:JS Type:SISO")
|
||||
public ResponseEntity<byte[]> extractHeader(@ModelAttribute PDFFile request) throws Exception {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
String script = "";
|
||||
|
||||
try (PDDocument document = PDDocument.load(inputFile.getInputStream())) {
|
||||
|
||||
if(document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) {
|
||||
PDNameTreeNode<PDActionJavaScript> jsTree = document.getDocumentCatalog().getNames().getJavaScript();
|
||||
|
||||
if (jsTree != null) {
|
||||
Map<String, PDActionJavaScript> jsEntries = jsTree.getNames();
|
||||
|
||||
for (Map.Entry<String, PDActionJavaScript> entry : jsEntries.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
PDActionJavaScript jsAction = entry.getValue();
|
||||
String jsCodeStr = jsAction.getAction();
|
||||
|
||||
script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (script.isEmpty()) {
|
||||
script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";
|
||||
if (document.getDocumentCatalog() != null
|
||||
&& document.getDocumentCatalog().getNames() != null) {
|
||||
PDNameTreeNode<PDActionJavaScript> jsTree =
|
||||
document.getDocumentCatalog().getNames().getJavaScript();
|
||||
|
||||
if (jsTree != null) {
|
||||
Map<String, PDActionJavaScript> jsEntries = jsTree.getNames();
|
||||
|
||||
for (Map.Entry<String, PDActionJavaScript> entry : jsEntries.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
PDActionJavaScript jsAction = entry.getValue();
|
||||
String jsCodeStr = jsAction.getAction();
|
||||
|
||||
script +=
|
||||
"// File: "
|
||||
+ inputFile.getOriginalFilename()
|
||||
+ ", Script: "
|
||||
+ name
|
||||
+ "\n"
|
||||
+ jsCodeStr
|
||||
+ "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(script.getBytes(StandardCharsets.UTF_8), inputFile.getOriginalFilename() + ".js");
|
||||
if (script.isEmpty()) {
|
||||
script =
|
||||
"PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
script.getBytes(StandardCharsets.UTF_8),
|
||||
inputFile.getOriginalFilename() + ".js");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user