format and move everything, other in own folder

This commit is contained in:
Anthony Stirling
2023-04-22 12:51:01 +01:00
parent af6cd2e38b
commit 78d3fd3768
33 changed files with 702 additions and 763 deletions

View File

@@ -28,5 +28,5 @@ public class ErrorUtils {
modelAndView.addObject("stackTrace", stackTrace);
return modelAndView;
}
}

View File

@@ -1,4 +1,5 @@
package stirling.software.SPDF.utils;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
@@ -19,9 +20,9 @@ import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class PDFToFile {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
throws IOException, InterruptedException {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter) throws IOException, InterruptedException {
if (!"application/pdf".equals(inputFile.getContentType())) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
@@ -32,11 +33,11 @@ public class PDFToFile {
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
// Validate output format
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html","xml","txt:Text");
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html", "xml", "txt:Text");
if (!allowedFormats.contains(outputFormat)) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
@@ -52,9 +53,8 @@ public class PDFToFile {
tempOutputDir = Files.createTempDirectory("output_");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList(
"soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()
));
List<String> command = new ArrayList<>(
Arrays.asList("soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Get output files
@@ -64,8 +64,8 @@ public class PDFToFile {
// Return single output file
File outputFile = outputFiles.get(0);
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
if(outputFormat.equals("txt:Text")) {
outputFormat="txt";
if (outputFormat.equals("txt:Text")) {
outputFormat = "txt";
}
headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat);
fileBytes = FileUtils.readFileToByteArray(outputFile);

View File

@@ -9,6 +9,12 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.security.KeyPair;
import java.security.KeyStore;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -31,18 +37,79 @@ import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
import java.io.InputStream;
import java.security.KeyPair;
import java.security.KeyStore;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.X509Certificate;
public class PdfUtils {
private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class);
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentLength(bytes.length);
headers.setContentDispositionFormData("attachment", docName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI) throws IOException, Exception {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
List<BufferedImage> images = new ArrayList<>();
// Create images of all pages
for (int i = 0; i < pageCount; i++) {
images.add(pdfRenderer.renderImageWithDPI(i, 300, colorType));
}
if (singleImage) {
// Combine all images into a single big image
BufferedImage combined = new BufferedImage(images.get(0).getWidth(), images.get(0).getHeight() * pageCount, BufferedImage.TYPE_INT_RGB);
Graphics g = combined.getGraphics();
for (int i = 0; i < images.size(); i++) {
g.drawImage(images.get(i), 0, i * images.get(0).getHeight(), null);
}
images = Arrays.asList(combined);
}
// Create a ByteArrayOutputStream to save the image(s) to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (singleImage) {
// Write the image to the output stream
ImageIO.write(images.get(0), imageType, baos);
// Log that the image was successfully written to the byte array
logger.info("Image successfully written to byte array");
} else {
// Zip the images and return as byte array
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
for (int i = 0; i < images.size(); i++) {
BufferedImage image = images.get(i);
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
ImageIO.write(image, imageType, baosImage);
// Add the image to the zip file
zos.putNextEntry(new ZipEntry(String.format("page_%d.%s", i + 1, imageType.toLowerCase())));
zos.write(baosImage.toByteArray());
}
}
// Log that the images were successfully written to the byte array
logger.info("Images successfully written to byte array as a zip");
}
}
return baos.toByteArray();
} catch (IOException e) {
// Log an error message if there is an issue converting the PDF to an image
logger.error("Error converting PDF to image", e);
throw e;
}
}
public static byte[] imageToPdf(MultipartFile[] files, boolean stretchToFit, boolean autoRotate) throws IOException {
try (PDDocument doc = new PDDocument()) {
for (MultipartFile file : files) {
@@ -73,7 +140,8 @@ public class PdfUtils {
float pageHeight = page.getMediaBox().getHeight();
if (autoRotate && ((image.getWidth() > image.getHeight() && pageHeight > pageWidth) || (image.getWidth() < image.getHeight() && pageWidth > pageHeight))) {
// Rotate the page 90 degrees if the image better fits the page in landscape orientation
// Rotate the page 90 degrees if the image better fits the page in landscape
// orientation
page.setRotation(90);
pageWidth = page.getMediaBox().getHeight();
pageHeight = page.getMediaBox().getWidth();
@@ -136,123 +204,21 @@ public class PdfUtils {
}
public static X509Certificate[] loadCertificateChainFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI)
throws IOException, Exception {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
List<BufferedImage> images = new ArrayList<>();
String alias = keystore.aliases().nextElement();
Certificate[] certChain = keystore.getCertificateChain(alias);
X509Certificate[] x509CertChain = new X509Certificate[certChain.length];
// Create images of all pages
for (int i = 0; i < pageCount; i++) {
images.add(pdfRenderer.renderImageWithDPI(i, 300, colorType));
}
if (singleImage) {
// Combine all images into a single big image
BufferedImage combined = new BufferedImage(images.get(0).getWidth(), images.get(0).getHeight() * pageCount, BufferedImage.TYPE_INT_RGB);
Graphics g = combined.getGraphics();
for (int i = 0; i < images.size(); i++) {
g.drawImage(images.get(i), 0, i * images.get(0).getHeight(), null);
}
images = Arrays.asList(combined);
}
// Create a ByteArrayOutputStream to save the image(s) to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (singleImage) {
// Write the image to the output stream
ImageIO.write(images.get(0), imageType, baos);
// Log that the image was successfully written to the byte array
logger.info("Image successfully written to byte array");
} else {
// Zip the images and return as byte array
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
for (int i = 0; i < images.size(); i++) {
BufferedImage image = images.get(i);
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
ImageIO.write(image, imageType, baosImage);
// Add the image to the zip file
zos.putNextEntry(new ZipEntry(String.format("page_%d.%s", i + 1, imageType.toLowerCase())));
zos.write(baosImage.toByteArray());
}
}
// Log that the images were successfully written to the byte array
logger.info("Images successfully written to byte array as a zip");
}
}
return baos.toByteArray();
} catch (IOException e) {
// Log an error message if there is an issue converting the PDF to an image
logger.error("Error converting PDF to image", e);
throw e;
}
}
public static byte[] overlayImage(byte[] pdfBytes, byte[] imageBytes, float x, float y, boolean everyPage) throws IOException {
PDDocument document = PDDocument.load(new ByteArrayInputStream(pdfBytes));
// Get the first page of the PDF
int pages = document.getNumberOfPages();
for (int i = 0; i < pages; i++) {
PDPage page = document.getPage(i);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true)) {
// Create an image object from the image bytes
PDImageXObject image = PDImageXObject.createFromByteArray(document, imageBytes, "");
// Draw the image onto the page at the specified x and y coordinates
contentStream.drawImage(image, x, y);
logger.info("Image successfully overlayed onto PDF");
if (everyPage == false && i == 0) {
break;
}
} catch (IOException e) {
// Log an error message if there is an issue overlaying the image onto the PDF
logger.error("Error overlaying image onto PDF", e);
throw e;
}
}
// Create a ByteArrayOutputStream to save the PDF to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
logger.info("PDF successfully saved to byte array");
return baos.toByteArray();
for (int i = 0; i < certChain.length; i++) {
x509CertChain[i] = (X509Certificate) certChain[i];
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
// Close the document
document.close();
return PdfUtils.boasToWebResponse(baos, docName);
return x509CertChain;
}
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentLength(bytes.length);
headers.setContentDispositionFormData("attachment", docName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static KeyPair loadKeyPairFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
@@ -265,18 +231,45 @@ public class PdfUtils {
return new KeyPair(publicKey, privateKey);
}
public static X509Certificate[] loadCertificateChainFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
public static byte[] overlayImage(byte[] pdfBytes, byte[] imageBytes, float x, float y, boolean everyPage) throws IOException {
PDDocument document = PDDocument.load(new ByteArrayInputStream(pdfBytes));
// Get the first page of the PDF
int pages = document.getNumberOfPages();
for (int i = 0; i < pages; i++) {
PDPage page = document.getPage(i);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true)) {
// Create an image object from the image bytes
PDImageXObject image = PDImageXObject.createFromByteArray(document, imageBytes, "");
// Draw the image onto the page at the specified x and y coordinates
contentStream.drawImage(image, x, y);
logger.info("Image successfully overlayed onto PDF");
if (!everyPage && i == 0) {
break;
}
} catch (IOException e) {
// Log an error message if there is an issue overlaying the image onto the PDF
logger.error("Error overlaying image onto PDF", e);
throw e;
}
String alias = keystore.aliases().nextElement();
Certificate[] certChain = keystore.getCertificateChain(alias);
X509Certificate[] x509CertChain = new X509Certificate[certChain.length];
for (int i = 0; i < certChain.length; i++) {
x509CertChain[i] = (X509Certificate) certChain[i];
}
// Create a ByteArrayOutputStream to save the PDF to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
logger.info("PDF successfully saved to byte array");
return baos.toByteArray();
}
return x509CertChain;
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
// Close the document
document.close();
return PdfUtils.boasToWebResponse(baos, docName);
}
}

View File

@@ -9,15 +9,24 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
public class ProcessExecutor {
public enum Processes {
LIBRE_OFFICE,
OCR_MY_PDF
public enum Processes {
LIBRE_OFFICE, OCR_MY_PDF
}
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
public static ProcessExecutor getInstance(Processes processType) {
return instances.computeIfAbsent(processType, key -> {
int semaphoreLimit = switch (key) {
case LIBRE_OFFICE -> 1;
case OCR_MY_PDF -> 2;
};
return new ProcessExecutor(semaphoreLimit);
});
}
private final Semaphore semaphore;
@@ -25,78 +34,67 @@ public class ProcessExecutor {
this.semaphore = new Semaphore(semaphoreLimit);
}
public static ProcessExecutor getInstance(Processes processType) {
return instances.computeIfAbsent(processType, key -> {
int semaphoreLimit = switch (key) {
case LIBRE_OFFICE -> 1;
case OCR_MY_PDF -> 2;
};
return new ProcessExecutor(semaphoreLimit);
});
public int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
int exitCode = 1;
semaphore.acquire();
try {
System.out.print("Running command: " + String.join(" ", command));
ProcessBuilder processBuilder = new ProcessBuilder(command);
Process process = processBuilder.start();
// Read the error stream and standard output stream concurrently
List<String> errorLines = new ArrayList<>();
List<String> outputLines = new ArrayList<>();
Thread errorReaderThread = new Thread(() -> {
try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = errorReader.readLine()) != null) {
errorLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
Thread outputReaderThread = new Thread(() -> {
try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = outputReader.readLine()) != null) {
outputLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
errorReaderThread.start();
outputReaderThread.start();
// Wait for the conversion process to complete
exitCode = process.waitFor();
// Wait for the reader threads to finish
errorReaderThread.join();
outputReaderThread.join();
if (outputLines.size() > 0) {
String outputMessage = String.join("\n", outputLines);
System.out.println("Command output:\n" + outputMessage);
}
if (errorLines.size() > 0) {
String errorMessage = String.join("\n", errorLines);
System.out.println("Command error output:\n" + errorMessage);
if (exitCode != 0) {
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
}
}
} finally {
semaphore.release();
}
return exitCode;
}
public int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
int exitCode = 1;
semaphore.acquire();
try {
System.out.print("Running command: " + String.join(" ", command));
ProcessBuilder processBuilder = new ProcessBuilder(command);
Process process = processBuilder.start();
// Read the error stream and standard output stream concurrently
List<String> errorLines = new ArrayList<>();
List<String> outputLines = new ArrayList<>();
Thread errorReaderThread = new Thread(() -> {
try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = errorReader.readLine()) != null) {
errorLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
Thread outputReaderThread = new Thread(() -> {
try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = outputReader.readLine()) != null) {
outputLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
errorReaderThread.start();
outputReaderThread.start();
// Wait for the conversion process to complete
exitCode = process.waitFor();
// Wait for the reader threads to finish
errorReaderThread.join();
outputReaderThread.join();
if (outputLines.size() > 0) {
String outputMessage = String.join("\n", outputLines);
System.out.println("Command output:\n" + outputMessage);
}
if (errorLines.size() > 0) {
String errorMessage = String.join("\n", errorLines);
System.out.println("Command error output:\n" + errorMessage);
if (exitCode != 0) {
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
}
}
} finally {
semaphore.release();
}
return exitCode;
}
}

View File

@@ -1,4 +1,5 @@
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
@@ -12,8 +13,8 @@ import org.apache.pdfbox.cos.COSString;
public class WatermarkRemover extends PDFStreamEngine {
private final String watermarkText;
private final Pattern pattern;
private final String watermarkText;
public WatermarkRemover(String watermarkText) {
this.watermarkText = watermarkText;
@@ -30,7 +31,7 @@ public class WatermarkRemover extends PDFStreamEngine {
}
if (processText) {
for(int j = 0 ; j < operands.size(); ++j) {
for (int j = 0; j < operands.size(); ++j) {
COSBase operand = operands.get(j);
if (operand instanceof COSString) {
COSString cosString = (COSString) operand;
@@ -56,11 +57,10 @@ public class WatermarkRemover extends PDFStreamEngine {
array.set(i, cosString);
operands.set(j, array);
}
}
}
}
}
}