Lots of changes (#70)
Image extraction and conversion to formats Multi parallel file execution for all forms so you can input multiple files quickly Any file at all pdf using libreoffice, super powerful Sadly makes docker image larger but worth it OCR PDF using ocr my pdf Works awesomely for adding text to a image Improved compression using ocr my pdf app Settings page with custom download options such as - open in same window - open in new window - download - download as zip Update detection in settings page it should show notification if there is a update (very hidden) UI cleanups Add other image formats to PDF to Image Various fies to icons, and pdf.js usage
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
package stirling.software.SPDF;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.Socket;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
public class LibreOfficeListener {
|
||||
|
||||
private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
|
||||
|
||||
private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes
|
||||
private static final int LISTENER_PORT = 2002;
|
||||
|
||||
private ExecutorService executorService;
|
||||
private Process process;
|
||||
private long lastActivityTime;
|
||||
|
||||
private LibreOfficeListener() {}
|
||||
|
||||
public static LibreOfficeListener getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
public void start() throws IOException {
|
||||
// Check if the listener is already running
|
||||
if (process != null && process.isAlive()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Start the listener process
|
||||
process = Runtime.getRuntime().exec("unoconv --listener");
|
||||
lastActivityTime = System.currentTimeMillis();
|
||||
|
||||
// Start a background thread to monitor the activity timeout
|
||||
executorService = Executors.newSingleThreadExecutor();
|
||||
executorService.submit(() -> {
|
||||
while (true) {
|
||||
long idleTime = System.currentTimeMillis() - lastActivityTime;
|
||||
if (idleTime >= ACTIVITY_TIMEOUT) {
|
||||
// If there has been no activity for too long, tear down the listener
|
||||
process.destroy();
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(5000); // Check for inactivity every 5 seconds
|
||||
} catch (InterruptedException e) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// Wait for the listener to start up
|
||||
long startTime = System.currentTimeMillis();
|
||||
long timeout = 30000; // Timeout after 30 seconds
|
||||
while (System.currentTimeMillis() - startTime < timeout) {
|
||||
if (isListenerRunning()) {
|
||||
|
||||
lastActivityTime = System.currentTimeMillis();
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} // Check every 1 second
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isListenerRunning() {
|
||||
try {
|
||||
System.out.println("waiting for listener to start");
|
||||
Socket socket = new Socket();
|
||||
socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
|
||||
socket.close();
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void stop() {
|
||||
// Stop the activity timeout monitor thread
|
||||
executorService.shutdownNow();
|
||||
|
||||
// Stop the listener process
|
||||
if (process != null && process.isAlive()) {
|
||||
process.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
14
src/main/java/stirling/software/SPDF/config/AppConfig.java
Normal file
14
src/main/java/stirling/software/SPDF/config/AppConfig.java
Normal file
@@ -0,0 +1,14 @@
|
||||
package stirling.software.SPDF.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
|
||||
@Configuration
|
||||
public class AppConfig {
|
||||
@Bean(name = "appVersion")
|
||||
public String appVersion() {
|
||||
String version = getClass().getPackage().getImplementationVersion();
|
||||
return (version != null) ? version : "0.3.3";
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,23 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDResources;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
@@ -12,16 +26,31 @@ import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.spire.pdf.PdfCompressionLevel;
|
||||
import com.spire.pdf.PdfDocument;
|
||||
import com.spire.pdf.PdfPageBase;
|
||||
import com.spire.pdf.exporting.PdfImageInfo;
|
||||
import com.spire.pdf.graphics.PdfBitmap;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.pdf.PdfReader;
|
||||
import com.itextpdf.text.pdf.PdfStamper;
|
||||
|
||||
import stirling.software.SPDF.utils.ErrorUtils;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
//import com.spire.pdf.*;
|
||||
@Controller
|
||||
public class CompressController {
|
||||
|
||||
@@ -33,36 +62,56 @@ public class CompressController {
|
||||
return "compress-pdf";
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/compress-pdf")
|
||||
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("imageCompressionLevel") String imageCompressionLevel)
|
||||
throws IOException {
|
||||
// Load a sample PDF document
|
||||
PdfDocument document = new PdfDocument();
|
||||
document.loadFromBytes(pdfFile.getBytes());
|
||||
public ResponseEntity<byte[]> optimizePdf(
|
||||
@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("optimizeLevel") int optimizeLevel,
|
||||
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
|
||||
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
|
||||
|
||||
// Compress PDF
|
||||
document.getFileInfo().setIncrementalUpdate(false);
|
||||
document.setCompressionLevel(PdfCompressionLevel.Best);
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// compress PDF Images
|
||||
for (int i = 0; i < document.getPages().getCount(); i++) {
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
PdfPageBase page = document.getPages().get(i);
|
||||
PdfImageInfo[] images = page.getImagesInfo();
|
||||
if (images != null && images.length > 0)
|
||||
for (int j = 0; j < images.length; j++) {
|
||||
PdfImageInfo image = images[j];
|
||||
PdfBitmap bp = new PdfBitmap(image.getImage());
|
||||
// bp.setPngDirectToJpeg(true);
|
||||
bp.setQuality(Integer.valueOf(imageCompressionLevel));
|
||||
// Prepare the OCRmyPDF command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("ocrmypdf");
|
||||
command.add("--optimize");
|
||||
command.add(String.valueOf(optimizeLevel));
|
||||
|
||||
page.replaceImage(j, bp);
|
||||
|
||||
}
|
||||
if (fastWebView != null && fastWebView) {
|
||||
long fileSize = inputFile.getSize();
|
||||
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
|
||||
command.add("--fast-web-view");
|
||||
command.add(String.valueOf(fastWebViewSize));
|
||||
}
|
||||
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf");
|
||||
if (jbig2Lossy != null && jbig2Lossy) {
|
||||
command.add("--jbig2-lossy");
|
||||
}
|
||||
|
||||
}
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.RenderedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
@Controller
|
||||
public class ExtractImagesController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
|
||||
|
||||
@GetMapping("/extract-images")
|
||||
public String extractImagesForm(Model model) {
|
||||
model.addAttribute("currentPage", "extract-images");
|
||||
return "extract-images";
|
||||
}
|
||||
|
||||
@PostMapping("/extract-images")
|
||||
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
|
||||
|
||||
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
PDDocument document = PDDocument.load(file.getBytes());
|
||||
|
||||
// Create ByteArrayOutputStream to write zip file to byte array
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
|
||||
// Create ZipOutputStream to create zip file
|
||||
ZipOutputStream zos = new ZipOutputStream(baos);
|
||||
|
||||
// Set compression level
|
||||
zos.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
||||
int imageIndex = 1;
|
||||
|
||||
int pageNum = 1;
|
||||
// Iterate over each page
|
||||
for (PDPage page : document.getPages()) {
|
||||
++pageNum;
|
||||
// Extract images from page
|
||||
for (COSName name : page.getResources().getXObjectNames()) {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
|
||||
|
||||
// Convert image to desired format
|
||||
RenderedImage renderedImage = image.getImage();
|
||||
BufferedImage bufferedImage = null;
|
||||
if (format.equalsIgnoreCase("png")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_ARGB);
|
||||
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_INT_RGB);
|
||||
} else if (format.equalsIgnoreCase("gif")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
|
||||
BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
|
||||
// Write image to zip file
|
||||
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
ZipEntry zipEntry = new ZipEntry(imageName);
|
||||
zos.putNextEntry(zipEntry);
|
||||
|
||||
Graphics2D g = bufferedImage.createGraphics();
|
||||
g.drawImage((Image) renderedImage, 0, 0, null);
|
||||
g.dispose();
|
||||
// Write image bytes to zip file
|
||||
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
|
||||
ImageIO.write(bufferedImage, format, imageBaos);
|
||||
zos.write(imageBaos.toByteArray());
|
||||
|
||||
|
||||
zos.closeEntry();
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close ZipOutputStream and PDDocument
|
||||
zos.close();
|
||||
document.close();
|
||||
|
||||
// Create ByteArrayResource from byte array
|
||||
byte[] zipContents = baos.toByteArray();
|
||||
ByteArrayResource resource = new ByteArrayResource(zipContents);
|
||||
|
||||
// Set content disposition header to indicate that the response should be downloaded as a file
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentLength(zipContents.length);
|
||||
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
|
||||
|
||||
// Return ResponseEntity with ByteArrayResource and headers
|
||||
return ResponseEntity
|
||||
.status(HttpStatus.OK)
|
||||
.headers(headers)
|
||||
|
||||
.header("Cache-Control", "no-cache")
|
||||
.contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.body(resource);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
//import com.spire.pdf.*;
|
||||
@Controller
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
@GetMapping("/ocr-pdf")
|
||||
public ModelAndView ocrPdfPage() {
|
||||
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
|
||||
modelAndView.addObject("languages", getAvailableTesseractLanguages());
|
||||
modelAndView.addObject("currentPage", "ocr-pdf");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@PostMapping("/ocr-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("languages") List<String> selectedLanguages,
|
||||
@RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException {
|
||||
|
||||
//--output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.size() < 1) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption,
|
||||
tempInputFile.toString(), tempOutputFile.toString()));
|
||||
String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt");
|
||||
if (sidecar != null && sidecar) {
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarFile);
|
||||
}
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(sidecarFile);
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(Paths.get(sidecarFile), zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(Paths.get(sidecarFile));
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment", outputZipFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(zipBytes);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files)
|
||||
.filter(file -> file.getName().endsWith(".traineddata"))
|
||||
.map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd"))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -10,7 +10,7 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
public class PdfController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
|
||||
|
||||
|
||||
@GetMapping("/home")
|
||||
public String root(Model model) {
|
||||
return "redirect:/";
|
||||
@@ -22,4 +22,6 @@ public class PdfController {
|
||||
return "home";
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -49,7 +49,6 @@ public class RearrangePagesPDFController {
|
||||
int pageIndex = pagesToRemove.get(i);
|
||||
document.removePage(pageIndex);
|
||||
}
|
||||
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_removed_pages.pdf");
|
||||
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ public class SplitPDFController {
|
||||
ByteArrayResource resource = new ByteArrayResource(data);
|
||||
new File("split_documents.zip").delete();
|
||||
// return the Resource in the response
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=split_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.contentLength(resource.contentLength()).body(resource);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ public class ConvertImgPDFController {
|
||||
} else {
|
||||
ByteArrayResource resource = new ByteArrayResource(result);
|
||||
// return the Resource in the response
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=converted_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.contentLength(resource.contentLength()).body(resource);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.LibreOfficeListener;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@Controller
|
||||
public class ConvertOfficeController {
|
||||
|
||||
|
||||
@GetMapping("/file-to-pdf")
|
||||
public String convertToPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "file-to-pdf");
|
||||
return "convert/file-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/file-to-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
//unused but can start server instance if startup time is to long
|
||||
//LibreOfficeListener.getInstance().start();
|
||||
|
||||
byte[] pdfByteArray = convertToPdf(inputFile);
|
||||
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
|
||||
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename()));
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
|
||||
"-f",
|
||||
"pdf",
|
||||
"-o",
|
||||
tempOutputFile.toString(),
|
||||
tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the converted PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
return pdfBytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private String getFileExtension(String fileName) {
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex == -1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1);
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Color;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.BaseColor;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Element;
|
||||
import com.itextpdf.text.pdf.PdfPCell;
|
||||
import com.itextpdf.text.pdf.PdfPTable;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertXlsxController {
|
||||
|
||||
|
||||
@GetMapping("/xlsx-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/xlsx-to-pdf")
|
||||
public ResponseEntity<byte[]> convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException{
|
||||
// Load Excel file
|
||||
|
||||
Workbook workbook = WorkbookFactory.create(xlsx.getInputStream());
|
||||
|
||||
// Create PDF document
|
||||
Document document = new Document();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter.getInstance(document, outputStream);
|
||||
document.open();
|
||||
|
||||
// Convert each sheet in Excel to a separate page in PDF
|
||||
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
|
||||
PdfPTable table = new PdfPTable(workbook.getSheetAt(i).getRow(0).getPhysicalNumberOfCells());
|
||||
for (int row = 0; row < workbook.getSheetAt(i).getPhysicalNumberOfRows(); row++) {
|
||||
for (int cell = 0; cell < workbook.getSheetAt(i).getRow(row).getPhysicalNumberOfCells(); cell++) {
|
||||
PdfPCell pdfCell = new PdfPCell();
|
||||
pdfCell.addElement(new com.itextpdf.text.Paragraph(workbook.getSheetAt(i).getRow(row).getCell(cell).toString()));
|
||||
|
||||
// Copy cell style, borders, and background color
|
||||
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getBottomBorderColor()));
|
||||
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getTopBorderColor()));
|
||||
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getLeftBorderColor()));
|
||||
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getRightBorderColor()));
|
||||
Short bc = workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getFillBackgroundColor();
|
||||
pdfCell.setBackgroundColor(new BaseColor(bc));
|
||||
|
||||
table.addCell(pdfCell);
|
||||
}
|
||||
}
|
||||
document.add(table);
|
||||
}
|
||||
// Close document and output stream
|
||||
document.close();
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
// Close document and input stream
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -2,12 +2,19 @@ package stirling.software.SPDF.controller.security;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
@@ -18,6 +25,7 @@ import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WatermarkRemover;
|
||||
|
||||
@Controller
|
||||
public class WatermarkController {
|
||||
@@ -28,6 +36,12 @@ public class WatermarkController {
|
||||
return "security/add-watermark";
|
||||
}
|
||||
|
||||
@GetMapping("/remove-watermark")
|
||||
public String removeWatermarkForm(Model model) {
|
||||
model.addAttribute("currentPage", "remove-watermark");
|
||||
return "security/remove-watermark";
|
||||
}
|
||||
|
||||
@PostMapping("/add-watermark")
|
||||
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
|
||||
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
|
||||
@@ -71,4 +85,62 @@ public class WatermarkController {
|
||||
}
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@PostMapping("/remove-watermark")
|
||||
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
|
||||
|
||||
// Load the input PDF
|
||||
PDDocument document = PDDocument.load(pdfFile.getInputStream());
|
||||
|
||||
// Create a new PDF document for the output
|
||||
PDDocument outputDocument = new PDDocument();
|
||||
|
||||
// Loop through the pages
|
||||
int numPages = document.getNumberOfPages();
|
||||
for (int i = 0; i < numPages; i++) {
|
||||
PDPage page = document.getPage(i);
|
||||
|
||||
// Process the content stream to remove the watermark text
|
||||
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
|
||||
editor.processPage(page);
|
||||
editor.processPage(page);
|
||||
// Add the page to the output document
|
||||
outputDocument.addPage(page);
|
||||
}
|
||||
|
||||
for (PDPage page : outputDocument.getPages()) {
|
||||
List<PDAnnotation> annotations = page.getAnnotations();
|
||||
List<PDAnnotation> annotationsToRemove = new ArrayList<>();
|
||||
|
||||
for (PDAnnotation annotation : annotations) {
|
||||
if (annotation instanceof PDAnnotationMarkup) {
|
||||
PDAnnotationMarkup markup = (PDAnnotationMarkup) annotation;
|
||||
String contents = markup.getContents();
|
||||
if (contents != null && contents.contains(watermarkText)) {
|
||||
annotationsToRemove.add(markup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
annotations.removeAll(annotationsToRemove);
|
||||
}
|
||||
PDDocumentCatalog catalog = outputDocument.getDocumentCatalog();
|
||||
PDAcroForm acroForm = catalog.getAcroForm();
|
||||
if (acroForm != null) {
|
||||
List<PDField> fields = acroForm.getFields();
|
||||
for (PDField field : fields) {
|
||||
String fieldValue = field.getValueAsString();
|
||||
if (fieldValue.contains(watermarkText)) {
|
||||
field.setValue(fieldValue.replace(watermarkText, ""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -32,7 +32,6 @@ import org.springframework.http.ResponseEntity;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
import com.spire.pdf.PdfDocument;
|
||||
|
||||
public class PdfUtils {
|
||||
|
||||
@@ -172,16 +171,7 @@ public class PdfUtils {
|
||||
return PdfUtils.boasToWebResponse(baos, docName);
|
||||
}
|
||||
|
||||
public static ResponseEntity<byte[]> pdfDocToWebResponse(PdfDocument document, String docName) throws IOException {
|
||||
|
||||
// Open Byte Array and save document to it
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
document.saveToStream(baos);
|
||||
// Close the document
|
||||
document.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(baos, docName);
|
||||
}
|
||||
|
||||
|
||||
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
|
||||
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package stirling.software.SPDF.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
public class ProcessExecutor {
|
||||
public static int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
||||
Process process = processBuilder.start();
|
||||
|
||||
// Read the error stream and standard output stream concurrently
|
||||
List<String> errorLines = new ArrayList<>();
|
||||
List<String> outputLines = new ArrayList<>();
|
||||
|
||||
Thread errorReaderThread = new Thread(() -> {
|
||||
try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = errorReader.readLine()) != null) {
|
||||
errorLines.add(line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
Thread outputReaderThread = new Thread(() -> {
|
||||
try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = outputReader.readLine()) != null) {
|
||||
outputLines.add(line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
errorReaderThread.start();
|
||||
outputReaderThread.start();
|
||||
|
||||
// Wait for the conversion process to complete
|
||||
int exitCode = process.waitFor();
|
||||
|
||||
// Wait for the reader threads to finish
|
||||
errorReaderThread.join();
|
||||
outputReaderThread.join();
|
||||
|
||||
if (outputLines.size() > 0) {
|
||||
String outputMessage = String.join("\n", outputLines);
|
||||
System.out.println("Command output:\n" + outputMessage);
|
||||
}
|
||||
|
||||
if (errorLines.size() > 0) {
|
||||
String errorMessage = String.join("\n", errorLines);
|
||||
System.out.println("Command error output:\n" + errorMessage);
|
||||
if (exitCode != 0) {
|
||||
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package stirling.software.SPDF.utils;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.cos.COSArray;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSString;
|
||||
|
||||
public class WatermarkRemover extends PDFStreamEngine {
|
||||
|
||||
private final String watermarkText;
|
||||
private final Pattern pattern;
|
||||
|
||||
public WatermarkRemover(String watermarkText) {
|
||||
this.watermarkText = watermarkText;
|
||||
this.pattern = Pattern.compile(Pattern.quote(watermarkText));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
|
||||
String operation = operator.getName();
|
||||
|
||||
boolean processText = false;
|
||||
if ("Tj".equals(operation) || "TJ".equals(operation) || "'".equals(operation) || "\"".equals(operation)) {
|
||||
processText = true;
|
||||
}
|
||||
|
||||
if (processText) {
|
||||
for(int j = 0 ; j < operands.size(); ++j) {
|
||||
COSBase operand = operands.get(j);
|
||||
if (operand instanceof COSString) {
|
||||
COSString cosString = (COSString) operand;
|
||||
String string = cosString.getString();
|
||||
Matcher matcher = pattern.matcher(string);
|
||||
if (matcher.find()) {
|
||||
string = matcher.replaceAll("");
|
||||
cosString.setValue(string.getBytes());
|
||||
}
|
||||
} else if (operand instanceof COSArray) {
|
||||
COSArray array = (COSArray) operand;
|
||||
for (int i = 0; i < array.size(); i++) {
|
||||
COSBase item = array.get(i);
|
||||
if (item instanceof COSString) {
|
||||
COSString cosString = (COSString) item;
|
||||
String string = cosString.getString();
|
||||
Matcher matcher = pattern.matcher(string);
|
||||
if (matcher.find()) {
|
||||
System.out.println("operation =" + operation);
|
||||
System.out.println("1 =" + string);
|
||||
string = matcher.replaceAll("");
|
||||
cosString.setValue(string.getBytes());
|
||||
array.set(i, cosString);
|
||||
operands.set(j, array);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
super.processOperator(operator, operands);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user