Number of fixes and making pipline LIVE ! (#907)
Closes #889 and #332 #710 #901 #885
This commit is contained in:
@@ -130,7 +130,7 @@ public class AutoRenameController {
|
||||
|
||||
// Sanitize the header string by removing characters not allowed in a filename.
|
||||
if (header != null && header.length() < 255) {
|
||||
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
|
||||
header = header.replaceAll("[/\\\\?%*:|\"<>]", "").trim();
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
|
||||
} else {
|
||||
logger.info("File has no good title to be found");
|
||||
|
||||
@@ -58,7 +58,7 @@ public class AutoSplitPdfController {
|
||||
|
||||
PDDocument document = Loader.loadPDF(file.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
List<PDDocument> splitDocuments = new ArrayList<>();
|
||||
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ public class BlankPageController {
|
||||
List<Integer> pagesToKeepIndex = new ArrayList<>();
|
||||
int pageIndex = 0;
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
for (PDPage page : pages) {
|
||||
logger.info("checking page " + pageIndex);
|
||||
textStripper.setStartPage(pageIndex + 1);
|
||||
|
||||
@@ -2,9 +2,7 @@ package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
@@ -75,194 +73,208 @@ public class CompressController {
|
||||
long inputFileSize = Files.size(tempInputFile);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Determine initial optimization level based on expected size reduction, only if in
|
||||
// autoMode
|
||||
if (autoMode) {
|
||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||
if (sizeReductionRatio > 0.7) {
|
||||
optimizeLevel = 1;
|
||||
} else if (sizeReductionRatio > 0.5) {
|
||||
optimizeLevel = 2;
|
||||
} else if (sizeReductionRatio > 0.35) {
|
||||
optimizeLevel = 3;
|
||||
} else {
|
||||
optimizeLevel = 3;
|
||||
}
|
||||
}
|
||||
|
||||
boolean sizeMet = false;
|
||||
while (!sizeMet && optimizeLevel <= 4) {
|
||||
// Prepare the Ghostscript command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("gs");
|
||||
command.add("-sDEVICE=pdfwrite");
|
||||
command.add("-dCompatibilityLevel=1.4");
|
||||
|
||||
switch (optimizeLevel) {
|
||||
case 1:
|
||||
command.add("-dPDFSETTINGS=/prepress");
|
||||
break;
|
||||
case 2:
|
||||
command.add("-dPDFSETTINGS=/printer");
|
||||
break;
|
||||
case 3:
|
||||
command.add("-dPDFSETTINGS=/ebook");
|
||||
break;
|
||||
case 4:
|
||||
command.add("-dPDFSETTINGS=/screen");
|
||||
break;
|
||||
default:
|
||||
command.add("-dPDFSETTINGS=/default");
|
||||
}
|
||||
|
||||
command.add("-dNOPAUSE");
|
||||
command.add("-dQUIET");
|
||||
command.add("-dBATCH");
|
||||
command.add("-sOutputFile=" + tempOutputFile.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// Check if file size is within expected size or not auto mode so instantly finish
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||
sizeMet = true;
|
||||
} else {
|
||||
// Increase optimization level for next iteration
|
||||
optimizeLevel++;
|
||||
if (autoMode && optimizeLevel > 3) {
|
||||
System.out.println("Skipping level 4 due to bad results in auto mode");
|
||||
sizeMet = true;
|
||||
} else if (optimizeLevel == 5) {
|
||||
|
||||
Path tempOutputFile = null;
|
||||
byte[] pdfBytes;
|
||||
try {
|
||||
tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
// Determine initial optimization level based on expected size reduction, only if in
|
||||
// autoMode
|
||||
if (autoMode) {
|
||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||
if (sizeReductionRatio > 0.7) {
|
||||
optimizeLevel = 1;
|
||||
} else if (sizeReductionRatio > 0.5) {
|
||||
optimizeLevel = 2;
|
||||
} else if (sizeReductionRatio > 0.35) {
|
||||
optimizeLevel = 3;
|
||||
} else {
|
||||
System.out.println(
|
||||
"Increasing ghostscript optimisation level to " + optimizeLevel);
|
||||
optimizeLevel = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (expectedOutputSize != null && autoMode) {
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize > expectedOutputSize) {
|
||||
try (PDDocument doc = Loader.loadPDF(new File(tempOutputFile.toString()))) {
|
||||
long previousFileSize = 0;
|
||||
double scaleFactor = 1.0;
|
||||
while (true) {
|
||||
for (PDPage page : doc.getPages()) {
|
||||
PDResources res = page.getResources();
|
||||
boolean sizeMet = false;
|
||||
while (!sizeMet && optimizeLevel <= 4) {
|
||||
// Prepare the Ghostscript command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("gs");
|
||||
command.add("-sDEVICE=pdfwrite");
|
||||
command.add("-dCompatibilityLevel=1.4");
|
||||
|
||||
for (COSName name : res.getXObjectNames()) {
|
||||
PDXObject xobj = res.getXObject(name);
|
||||
if (xobj instanceof PDImageXObject) {
|
||||
PDImageXObject image = (PDImageXObject) xobj;
|
||||
switch (optimizeLevel) {
|
||||
case 1:
|
||||
command.add("-dPDFSETTINGS=/prepress");
|
||||
break;
|
||||
case 2:
|
||||
command.add("-dPDFSETTINGS=/printer");
|
||||
break;
|
||||
case 3:
|
||||
command.add("-dPDFSETTINGS=/ebook");
|
||||
break;
|
||||
case 4:
|
||||
command.add("-dPDFSETTINGS=/screen");
|
||||
break;
|
||||
default:
|
||||
command.add("-dPDFSETTINGS=/default");
|
||||
}
|
||||
|
||||
// Get the image in BufferedImage format
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
command.add("-dNOPAUSE");
|
||||
command.add("-dQUIET");
|
||||
command.add("-dBATCH");
|
||||
command.add("-sOutputFile=" + tempOutputFile.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
// Calculate the new dimensions
|
||||
int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
|
||||
int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// If the new dimensions are zero, skip this iteration
|
||||
if (newWidth == 0 || newHeight == 0) {
|
||||
continue;
|
||||
// Check if file size is within expected size or not auto mode so instantly finish
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||
sizeMet = true;
|
||||
} else {
|
||||
// Increase optimization level for next iteration
|
||||
optimizeLevel++;
|
||||
if (autoMode && optimizeLevel > 4) {
|
||||
System.out.println("Skipping level 5 due to bad results in auto mode");
|
||||
sizeMet = true;
|
||||
} else {
|
||||
System.out.println(
|
||||
"Increasing ghostscript optimisation level to " + optimizeLevel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (expectedOutputSize != null && autoMode) {
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
byte[] fileBytes = Files.readAllBytes(tempOutputFile);
|
||||
if (outputFileSize > expectedOutputSize) {
|
||||
try (PDDocument doc = Loader.loadPDF(fileBytes)) {
|
||||
long previousFileSize = 0;
|
||||
double scaleFactorConst = 0.9f;
|
||||
double scaleFactor = 0.9f;
|
||||
while (true) {
|
||||
for (PDPage page : doc.getPages()) {
|
||||
PDResources res = page.getResources();
|
||||
if (res != null && res.getXObjectNames() != null) {
|
||||
for (COSName name : res.getXObjectNames()) {
|
||||
PDXObject xobj = res.getXObject(name);
|
||||
if (xobj != null && xobj instanceof PDImageXObject) {
|
||||
PDImageXObject image = (PDImageXObject) xobj;
|
||||
|
||||
// Get the image in BufferedImage format
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
|
||||
// Calculate the new dimensions
|
||||
int newWidth =
|
||||
(int)
|
||||
(bufferedImage.getWidth()
|
||||
* scaleFactorConst);
|
||||
int newHeight =
|
||||
(int)
|
||||
(bufferedImage.getHeight()
|
||||
* scaleFactorConst);
|
||||
|
||||
// If the new dimensions are zero, skip this iteration
|
||||
if (newWidth == 0 || newHeight == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, proceed with the scaling
|
||||
Image scaledImage =
|
||||
bufferedImage.getScaledInstance(
|
||||
newWidth,
|
||||
newHeight,
|
||||
Image.SCALE_SMOOTH);
|
||||
|
||||
// Convert the scaled image back to a BufferedImage
|
||||
BufferedImage scaledBufferedImage =
|
||||
new BufferedImage(
|
||||
newWidth,
|
||||
newHeight,
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
scaledBufferedImage
|
||||
.getGraphics()
|
||||
.drawImage(scaledImage, 0, 0, null);
|
||||
|
||||
// Compress the scaled image
|
||||
ByteArrayOutputStream compressedImageStream =
|
||||
new ByteArrayOutputStream();
|
||||
ImageIO.write(
|
||||
scaledBufferedImage,
|
||||
"jpeg",
|
||||
compressedImageStream);
|
||||
byte[] imageBytes = compressedImageStream.toByteArray();
|
||||
compressedImageStream.close();
|
||||
|
||||
PDImageXObject compressedImage =
|
||||
PDImageXObject.createFromByteArray(
|
||||
doc,
|
||||
imageBytes,
|
||||
image.getCOSObject().toString());
|
||||
|
||||
// Replace the image in the resources with the
|
||||
// compressed
|
||||
// version
|
||||
res.put(name, compressedImage);
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, proceed with the scaling
|
||||
Image scaledImage =
|
||||
bufferedImage.getScaledInstance(
|
||||
newWidth, newHeight, Image.SCALE_SMOOTH);
|
||||
|
||||
// Convert the scaled image back to a BufferedImage
|
||||
BufferedImage scaledBufferedImage =
|
||||
new BufferedImage(
|
||||
newWidth,
|
||||
newHeight,
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
scaledBufferedImage
|
||||
.getGraphics()
|
||||
.drawImage(scaledImage, 0, 0, null);
|
||||
|
||||
// Compress the scaled image
|
||||
ByteArrayOutputStream compressedImageStream =
|
||||
new ByteArrayOutputStream();
|
||||
ImageIO.write(
|
||||
scaledBufferedImage, "jpeg", compressedImageStream);
|
||||
byte[] imageBytes = compressedImageStream.toByteArray();
|
||||
compressedImageStream.close();
|
||||
|
||||
// Convert compressed image back to PDImageXObject
|
||||
ByteArrayInputStream bais =
|
||||
new ByteArrayInputStream(imageBytes);
|
||||
PDImageXObject compressedImage =
|
||||
PDImageXObject.createFromByteArray(
|
||||
doc,
|
||||
imageBytes,
|
||||
image.getCOSObject().toString());
|
||||
|
||||
// Replace the image in the resources with the compressed
|
||||
// version
|
||||
res.put(name, compressedImage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// save the document to tempOutputFile again
|
||||
doc.save(tempOutputFile.toString());
|
||||
// save the document to tempOutputFile again
|
||||
doc.save(tempOutputFile.toString());
|
||||
|
||||
long currentSize = Files.size(tempOutputFile);
|
||||
// Check if the overall PDF size is still larger than expectedOutputSize
|
||||
if (currentSize > expectedOutputSize) {
|
||||
// Log the current file size and scaleFactor
|
||||
long currentSize = Files.size(tempOutputFile);
|
||||
// Check if the overall PDF size is still larger than expectedOutputSize
|
||||
if (currentSize > expectedOutputSize) {
|
||||
// Log the current file size and scaleFactor
|
||||
|
||||
System.out.println(
|
||||
"Current file size: "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize));
|
||||
System.out.println("Current scale factor: " + scaleFactor);
|
||||
System.out.println(
|
||||
"Current file size: "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize));
|
||||
System.out.println("Current scale factor: " + scaleFactor);
|
||||
|
||||
// The file is still too large, reduce scaleFactor and try again
|
||||
scaleFactor *= 0.9; // reduce scaleFactor by 10%
|
||||
// Avoid scaleFactor being too small, causing the image to shrink to 0
|
||||
if (scaleFactor < 0.2 || previousFileSize == currentSize) {
|
||||
throw new RuntimeException(
|
||||
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize)
|
||||
+ ", "
|
||||
+ currentSize
|
||||
+ " bytes");
|
||||
// The file is still too large, reduce scaleFactor and try again
|
||||
scaleFactor *= 0.9f; // reduce scaleFactor by 10%
|
||||
// Avoid scaleFactor being too small, causing the image to shrink to
|
||||
// 0
|
||||
if (scaleFactor < 0.2f || previousFileSize == currentSize) {
|
||||
throw new RuntimeException(
|
||||
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
|
||||
+ FileUtils.byteCountToDisplaySize(currentSize)
|
||||
+ ", "
|
||||
+ currentSize
|
||||
+ " bytes");
|
||||
}
|
||||
previousFileSize = currentSize;
|
||||
} else {
|
||||
// The file is small enough, break the loop
|
||||
break;
|
||||
}
|
||||
previousFileSize = currentSize;
|
||||
} else {
|
||||
// The file is small enough, break the loop
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read the optimized PDF file
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Check if optimized file is larger than the original
|
||||
if (pdfBytes.length > inputFileSize) {
|
||||
// Log the occurrence
|
||||
logger.warn(
|
||||
"Optimized file is larger than the original. Returning the original file instead.");
|
||||
|
||||
// Read the original file again
|
||||
pdfBytes = Files.readAllBytes(tempInputFile);
|
||||
}
|
||||
} finally {
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
}
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Check if optimized file is larger than the original
|
||||
if (pdfBytes.length > inputFileSize) {
|
||||
// Log the occurrence
|
||||
logger.warn(
|
||||
"Optimized file is larger than the original. Returning the original file instead.");
|
||||
|
||||
// Read the original file again
|
||||
pdfBytes = Files.readAllBytes(tempInputFile);
|
||||
}
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename =
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
|
||||
@@ -84,6 +84,7 @@ public class ExtractImageScansController {
|
||||
// Load PDF document
|
||||
try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
images = new ArrayList<>();
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ public class FakeScanControllerWIP {
|
||||
|
||||
PDDocument document = Loader.loadPDF(inputFile.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png"));
|
||||
|
||||
@@ -12,7 +12,6 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
@@ -50,9 +49,6 @@ public class PipelineController {
|
||||
@PostMapping("/handleData")
|
||||
public ResponseEntity<byte[]> handleData(@ModelAttribute HandleDataRequest request)
|
||||
throws JsonMappingException, JsonProcessingException {
|
||||
if (!Boolean.TRUE.equals(applicationProperties.getSystem().getEnableAlphaFunctionality())) {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
MultipartFile[] files = request.getFileInput();
|
||||
String jsonString = request.getJson();
|
||||
|
||||
@@ -83,6 +83,7 @@ public class RedactController {
|
||||
if (convertPDFToImage) {
|
||||
PDDocument imageDocument = new PDDocument();
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
PDPage newPage = new PDPage(new PDRectangle(bim.getWidth(), bim.getHeight()));
|
||||
|
||||
@@ -54,33 +54,32 @@ public class SanitizeController {
|
||||
boolean removeLinks = request.isRemoveLinks();
|
||||
boolean removeFonts = request.isRemoveFonts();
|
||||
|
||||
try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) {
|
||||
if (removeJavaScript) {
|
||||
sanitizeJavaScript(document);
|
||||
}
|
||||
|
||||
if (removeEmbeddedFiles) {
|
||||
sanitizeEmbeddedFiles(document);
|
||||
}
|
||||
|
||||
if (removeMetadata) {
|
||||
sanitizeMetadata(document);
|
||||
}
|
||||
|
||||
if (removeLinks) {
|
||||
sanitizeLinks(document);
|
||||
}
|
||||
|
||||
if (removeFonts) {
|
||||
sanitizeFonts(document);
|
||||
}
|
||||
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_sanitized.pdf");
|
||||
PDDocument document = Loader.loadPDF(inputFile.getBytes());
|
||||
if (removeJavaScript) {
|
||||
sanitizeJavaScript(document);
|
||||
}
|
||||
|
||||
if (removeEmbeddedFiles) {
|
||||
sanitizeEmbeddedFiles(document);
|
||||
}
|
||||
|
||||
if (removeMetadata) {
|
||||
sanitizeMetadata(document);
|
||||
}
|
||||
|
||||
if (removeLinks) {
|
||||
sanitizeLinks(document);
|
||||
}
|
||||
|
||||
if (removeFonts) {
|
||||
sanitizeFonts(document);
|
||||
}
|
||||
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_sanitized.pdf");
|
||||
}
|
||||
|
||||
private void sanitizeJavaScript(PDDocument document) throws IOException {
|
||||
|
||||
@@ -136,6 +136,7 @@ public class GeneralUtils {
|
||||
int offset = oneBased ? 1 : 0;
|
||||
for (String page : pages) {
|
||||
if ("all".equalsIgnoreCase(page)) {
|
||||
|
||||
for (int i = 0; i < totalPages; i++) {
|
||||
result.add(i + offset);
|
||||
}
|
||||
|
||||
@@ -214,6 +214,7 @@ public class PdfUtils {
|
||||
throws IOException, Exception {
|
||||
try (PDDocument document = Loader.loadPDF(inputStream)) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
|
||||
// Create a ByteArrayOutputStream to save the image(s) to
|
||||
|
||||
@@ -189,7 +189,11 @@ public class ProcessExecutor {
|
||||
}
|
||||
}
|
||||
} else if (exitCode != 0) {
|
||||
throw new IOException("Command process failed with exit code " + exitCode);
|
||||
throw new IOException(
|
||||
"Command process failed with exit code "
|
||||
+ exitCode
|
||||
+ "\nLogs: "
|
||||
+ messages);
|
||||
}
|
||||
} finally {
|
||||
semaphore.release();
|
||||
|
||||
Reference in New Issue
Block a user