Number of fixes and making pipline LIVE ! (#907)

Closes #889 and #332
#710
#901
#885
This commit is contained in:
Anthony Stirling
2024-03-13 19:15:10 +00:00
committed by GitHub
parent ac620082ec
commit ae73595335
19 changed files with 359 additions and 220 deletions

View File

@@ -130,7 +130,7 @@ public class AutoRenameController {
// Sanitize the header string by removing characters not allowed in a filename.
if (header != null && header.length() < 255) {
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
header = header.replaceAll("[/\\\\?%*:|\"<>]", "").trim();
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
} else {
logger.info("File has no good title to be found");

View File

@@ -58,7 +58,7 @@ public class AutoSplitPdfController {
PDDocument document = Loader.loadPDF(file.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
List<PDDocument> splitDocuments = new ArrayList<>();
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();

View File

@@ -59,7 +59,7 @@ public class BlankPageController {
List<Integer> pagesToKeepIndex = new ArrayList<>();
int pageIndex = 0;
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
for (PDPage page : pages) {
logger.info("checking page " + pageIndex);
textStripper.setStartPage(pageIndex + 1);

View File

@@ -2,9 +2,7 @@ package stirling.software.SPDF.controller.api.misc;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
@@ -75,194 +73,208 @@ public class CompressController {
long inputFileSize = Files.size(tempInputFile);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Determine initial optimization level based on expected size reduction, only if in
// autoMode
if (autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
} else if (sizeReductionRatio > 0.5) {
optimizeLevel = 2;
} else if (sizeReductionRatio > 0.35) {
optimizeLevel = 3;
} else {
optimizeLevel = 3;
}
}
boolean sizeMet = false;
while (!sizeMet && optimizeLevel <= 4) {
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");
switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
}
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// Check if file size is within expected size or not auto mode so instantly finish
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
// Increase optimization level for next iteration
optimizeLevel++;
if (autoMode && optimizeLevel > 3) {
System.out.println("Skipping level 4 due to bad results in auto mode");
sizeMet = true;
} else if (optimizeLevel == 5) {
Path tempOutputFile = null;
byte[] pdfBytes;
try {
tempOutputFile = Files.createTempFile("output_", ".pdf");
// Determine initial optimization level based on expected size reduction, only if in
// autoMode
if (autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
} else if (sizeReductionRatio > 0.5) {
optimizeLevel = 2;
} else if (sizeReductionRatio > 0.35) {
optimizeLevel = 3;
} else {
System.out.println(
"Increasing ghostscript optimisation level to " + optimizeLevel);
optimizeLevel = 3;
}
}
}
if (expectedOutputSize != null && autoMode) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = Loader.loadPDF(new File(tempOutputFile.toString()))) {
long previousFileSize = 0;
double scaleFactor = 1.0;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
boolean sizeMet = false;
while (!sizeMet && optimizeLevel <= 4) {
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/ebook");
break;
case 4:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
}
// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
// Calculate the new dimensions
int newWidth = (int) (bufferedImage.getWidth() * scaleFactor);
int newHeight = (int) (bufferedImage.getHeight() * scaleFactor);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
// Check if file size is within expected size or not auto mode so instantly finish
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
// Increase optimization level for next iteration
optimizeLevel++;
if (autoMode && optimizeLevel > 4) {
System.out.println("Skipping level 5 due to bad results in auto mode");
sizeMet = true;
} else {
System.out.println(
"Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
}
if (expectedOutputSize != null && autoMode) {
long outputFileSize = Files.size(tempOutputFile);
byte[] fileBytes = Files.readAllBytes(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = Loader.loadPDF(fileBytes)) {
long previousFileSize = 0;
double scaleFactorConst = 0.9f;
double scaleFactor = 0.9f;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
if (res != null && res.getXObjectNames() != null) {
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj != null && xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();
// Calculate the new dimensions
int newWidth =
(int)
(bufferedImage.getWidth()
* scaleFactorConst);
int newHeight =
(int)
(bufferedImage.getHeight()
* scaleFactorConst);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
}
// Otherwise, proceed with the scaling
Image scaledImage =
bufferedImage.getScaledInstance(
newWidth,
newHeight,
Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage =
new BufferedImage(
newWidth,
newHeight,
BufferedImage.TYPE_INT_RGB);
scaledBufferedImage
.getGraphics()
.drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream =
new ByteArrayOutputStream();
ImageIO.write(
scaledBufferedImage,
"jpeg",
compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
imageBytes,
image.getCOSObject().toString());
// Replace the image in the resources with the
// compressed
// version
res.put(name, compressedImage);
}
}
// Otherwise, proceed with the scaling
Image scaledImage =
bufferedImage.getScaledInstance(
newWidth, newHeight, Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage =
new BufferedImage(
newWidth,
newHeight,
BufferedImage.TYPE_INT_RGB);
scaledBufferedImage
.getGraphics()
.drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream =
new ByteArrayOutputStream();
ImageIO.write(
scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
// Convert compressed image back to PDImageXObject
ByteArrayInputStream bais =
new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
imageBytes,
image.getCOSObject().toString());
// Replace the image in the resources with the compressed
// version
res.put(name, compressedImage);
}
}
}
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
System.out.println(
"Current file size: "
+ FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);
System.out.println(
"Current file size: "
+ FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if (scaleFactor < 0.2 || previousFileSize == currentSize) {
throw new RuntimeException(
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
+ FileUtils.byteCountToDisplaySize(currentSize)
+ ", "
+ currentSize
+ " bytes");
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9f; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to
// 0
if (scaleFactor < 0.2f || previousFileSize == currentSize) {
throw new RuntimeException(
"Could not reach the desired size without excessively degrading image quality, lowest size recommended is "
+ FileUtils.byteCountToDisplaySize(currentSize)
+ ", "
+ currentSize
+ " bytes");
}
previousFileSize = currentSize;
} else {
// The file is small enough, break the loop
break;
}
previousFileSize = currentSize;
} else {
// The file is small enough, break the loop
break;
}
}
}
}
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
// Check if optimized file is larger than the original
if (pdfBytes.length > inputFileSize) {
// Log the occurrence
logger.warn(
"Optimized file is larger than the original. Returning the original file instead.");
// Read the original file again
pdfBytes = Files.readAllBytes(tempInputFile);
}
} finally {
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
}
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Check if optimized file is larger than the original
if (pdfBytes.length > inputFileSize) {
// Log the occurrence
logger.warn(
"Optimized file is larger than the original. Returning the original file instead.");
// Read the original file again
pdfBytes = Files.readAllBytes(tempInputFile);
}
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())

View File

@@ -84,6 +84,7 @@ public class ExtractImageScansController {
// Load PDF document
try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
int pageCount = document.getNumberOfPages();
images = new ArrayList<>();

View File

@@ -60,6 +60,7 @@ public class FakeScanControllerWIP {
PDDocument document = Loader.loadPDF(inputFile.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png"));

View File

@@ -12,7 +12,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -50,9 +49,6 @@ public class PipelineController {
@PostMapping("/handleData")
public ResponseEntity<byte[]> handleData(@ModelAttribute HandleDataRequest request)
throws JsonMappingException, JsonProcessingException {
if (!Boolean.TRUE.equals(applicationProperties.getSystem().getEnableAlphaFunctionality())) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
MultipartFile[] files = request.getFileInput();
String jsonString = request.getJson();

View File

@@ -83,6 +83,7 @@ public class RedactController {
if (convertPDFToImage) {
PDDocument imageDocument = new PDDocument();
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
PDPage newPage = new PDPage(new PDRectangle(bim.getWidth(), bim.getHeight()));

View File

@@ -54,33 +54,32 @@ public class SanitizeController {
boolean removeLinks = request.isRemoveLinks();
boolean removeFonts = request.isRemoveFonts();
try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) {
if (removeJavaScript) {
sanitizeJavaScript(document);
}
if (removeEmbeddedFiles) {
sanitizeEmbeddedFiles(document);
}
if (removeMetadata) {
sanitizeMetadata(document);
}
if (removeLinks) {
sanitizeLinks(document);
}
if (removeFonts) {
sanitizeFonts(document);
}
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_sanitized.pdf");
PDDocument document = Loader.loadPDF(inputFile.getBytes());
if (removeJavaScript) {
sanitizeJavaScript(document);
}
if (removeEmbeddedFiles) {
sanitizeEmbeddedFiles(document);
}
if (removeMetadata) {
sanitizeMetadata(document);
}
if (removeLinks) {
sanitizeLinks(document);
}
if (removeFonts) {
sanitizeFonts(document);
}
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_sanitized.pdf");
}
private void sanitizeJavaScript(PDDocument document) throws IOException {

View File

@@ -136,6 +136,7 @@ public class GeneralUtils {
int offset = oneBased ? 1 : 0;
for (String page : pages) {
if ("all".equalsIgnoreCase(page)) {
for (int i = 0; i < totalPages; i++) {
result.add(i + offset);
}

View File

@@ -214,6 +214,7 @@ public class PdfUtils {
throws IOException, Exception {
try (PDDocument document = Loader.loadPDF(inputStream)) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
int pageCount = document.getNumberOfPages();
// Create a ByteArrayOutputStream to save the image(s) to

View File

@@ -189,7 +189,11 @@ public class ProcessExecutor {
}
}
} else if (exitCode != 0) {
throw new IOException("Command process failed with exit code " + exitCode);
throw new IOException(
"Command process failed with exit code "
+ exitCode
+ "\nLogs: "
+ messages);
}
} finally {
semaphore.release();

View File

@@ -1,11 +0,0 @@
fileToPDF.fileTypesList=Microsoft Word: (DOC, DOCX, DOT, DOTX) <br> \
Microsoft Excel: (CSV, XLS, XLSX, XLT, XLTX, SLK, DIF) <br> \
Microsoft PowerPoint: (PPT, PPTX) <br> \
OpenDocument Formats: (ODT, OTT, ODS, OTS, ODP, OTP, ODG, OTG) <br> \
Plain Text: (TXT, TEXT, XML) <br> \
Rich Text Format: (RTF) <br> \
Images: (BMP, GIF, JPEG, PNG, TIF, PBM, PGM, PPM, RAS, XBM, XPM, SVG, SVM, WMF) <br> \
HTML: (HTML) <br> \
Lotus Word Pro: (LWP) <br> \
StarOffice formats: (SDA, SDC, SDD, SDW, STC, STD, STI, STW, SXD, SXG, SXI, SXW) <br> \
Other formats: (DBF, FODS, VSD, VOR, VOR3, VOR4, UOP, PCT, PS, PDF)

View File

@@ -22,7 +22,17 @@
</form>
<p class="mt-3" th:text="#{fileToPDF.credit}"></p>
<p class="mt-3" th:text="#{fileToPDF.supportedFileTypes}"></p>
<p th:utext="#{fileToPDF.fileTypesList}"></p>
<p>Microsoft Word: (DOC, DOCX, DOT, DOTX)</p>
<p>Microsoft Excel: (CSV, XLS, XLSX, XLT, XLTX, SLK, DIF)</p>
<p>Microsoft PowerPoint: (PPT, PPTX)</p>
<p>OpenDocument Formats: (ODT, OTT, ODS, OTS, ODP, OTP, ODG, OTG)</p>
<p>Plain Text: (TXT, TEXT, XML)</p>
<p>Rich Text Format: (RTF)</p>
<p>Images: (BMP, GIF, JPEG, PNG, TIF, PBM, PGM, PPM, RAS, XBM, XPM, SVG, SVM, WMF)</p>
<p>HTML: (HTML)</p>
<p>Lotus Word Pro: (LWP)</p>
<p>StarOffice: (SDA, SDC, SDD, SDW, STC, STD, STI, STW, SXD, SXG, SXI, SXW)</p>
<p>Other: (DBF, FODS, VSD, VOR, VOR3, VOR4, UOP, PCT, PS, PDF)</p>
<a href="https://help.libreoffice.org/latest/en-US/text/shared/guide/supported_formats.html">https://help.libreoffice.org/latest/en-US/text/shared/guide/supported_formats.html</a>
</div>
</div>

View File

@@ -22,8 +22,8 @@
<span class="icon-text" th:text="#{home.multiTool.title}"></span>
</a>
</li>
<li th:if="${@enableAlphaFunctionality}" class="nav-item nav-item-separator"></li>
<li th:if="${@enableAlphaFunctionality}" class="nav-item">
<li class="nav-item nav-item-separator"></li>
<li class="nav-item">
<a class="nav-link" href="#" th:href="@{pipeline}" th:classappend="${currentPage}=='pipeline' ? 'active' : ''" th:title="#{home.pipeline.desc}">
<img class="icon" src="images/pipeline.svg" alt="icon">
<span class="icon-text" th:text="#{home.pipeline.title}"></span>

View File

@@ -23,9 +23,8 @@
<br>
<input type="text" id="searchBar" onkeyup="filterCards()" th:placeholder="#{home.searchBar}">
<div class="features-container">
<th:block th:if="${@enableAlphaFunctionality}">
<div th:replace="~{fragments/card :: card(id='pipeline', cardTitle=#{home.pipeline.title}, cardText=#{home.pipeline.desc}, cardLink='pipeline', svgPath='images/pipeline.svg', tags=#{pipeline.tags})}"></div>
</th:block>
<div th:replace="~{fragments/card :: card(id='view-pdf', cardTitle=#{home.viewPdf.title}, cardText=#{home.viewPdf.desc}, cardLink='view-pdf', svgPath='images/book-opened.svg', tags=#{viewPdf.tags})}"></div>
<div th:replace="~{fragments/card :: card(id='multi-tool', cardTitle=#{home.multiTool.title}, cardText=#{home.multiTool.desc}, cardLink='multi-tool', svgPath='images/tools.svg', tags=#{multiTool.tags})}"></div>
<div th:replace="~{fragments/card :: card(id='merge-pdfs', cardTitle=#{home.merge.title}, cardText=#{home.merge.desc}, cardLink='merge-pdfs', svgPath='images/union.svg', tags=#{merge.tags})}"></div>