Lots of changes (#70)

Image extraction and conversion to formats 

Multi parallel file execution for all forms so you can input multiple files quickly 

Any file at all pdf using libreoffice, super powerful
Sadly makes docker image larger but worth it 

OCR PDF using ocr my pdf
Works awesomely for adding text to a image

Improved compression using ocr my pdf app

Settings page with custom download options such as 
- open in same window
- open in new window
- download
- download as zip

Update detection in settings page it should show notification if there is a update (very hidden)

UI cleanups

Add other image formats to PDF to Image

Various fies to icons, and pdf.js usage
This commit is contained in:
Anthony Stirling
2023-03-20 21:55:11 +00:00
committed by GitHub
parent 54abb53842
commit a9145fe84c
54 changed files with 82327 additions and 8300 deletions

View File

@@ -0,0 +1,94 @@
package stirling.software.SPDF;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class LibreOfficeListener {
private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes
private static final int LISTENER_PORT = 2002;
private ExecutorService executorService;
private Process process;
private long lastActivityTime;
private LibreOfficeListener() {}
public static LibreOfficeListener getInstance() {
return INSTANCE;
}
public void start() throws IOException {
// Check if the listener is already running
if (process != null && process.isAlive()) {
return;
}
// Start the listener process
process = Runtime.getRuntime().exec("unoconv --listener");
lastActivityTime = System.currentTimeMillis();
// Start a background thread to monitor the activity timeout
executorService = Executors.newSingleThreadExecutor();
executorService.submit(() -> {
while (true) {
long idleTime = System.currentTimeMillis() - lastActivityTime;
if (idleTime >= ACTIVITY_TIMEOUT) {
// If there has been no activity for too long, tear down the listener
process.destroy();
break;
}
try {
Thread.sleep(5000); // Check for inactivity every 5 seconds
} catch (InterruptedException e) {
break;
}
}
});
// Wait for the listener to start up
long startTime = System.currentTimeMillis();
long timeout = 30000; // Timeout after 30 seconds
while (System.currentTimeMillis() - startTime < timeout) {
if (isListenerRunning()) {
lastActivityTime = System.currentTimeMillis();
return;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} // Check every 1 second
}
}
private boolean isListenerRunning() {
try {
System.out.println("waiting for listener to start");
Socket socket = new Socket();
socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
socket.close();
return true;
} catch (IOException e) {
return false;
}
}
public synchronized void stop() {
// Stop the activity timeout monitor thread
executorService.shutdownNow();
// Stop the listener process
if (process != null && process.isAlive()) {
process.destroy();
}
}
}

View File

@@ -0,0 +1,14 @@
package stirling.software.SPDF.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class AppConfig {
@Bean(name = "appVersion")
public String appVersion() {
String version = getClass().getPackage().getImplementationVersion();
return (version != null) ? version : "0.3.3";
}
}

View File

@@ -1,9 +1,23 @@
package stirling.software.SPDF.controller;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
@@ -12,16 +26,31 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.spire.pdf.PdfCompressionLevel;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;
import com.spire.pdf.exporting.PdfImageInfo;
import com.spire.pdf.graphics.PdfBitmap;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import stirling.software.SPDF.utils.ErrorUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
//import com.spire.pdf.*;
@Controller
public class CompressController {
@@ -33,36 +62,56 @@ public class CompressController {
return "compress-pdf";
}
@PostMapping("/compress-pdf")
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("imageCompressionLevel") String imageCompressionLevel)
throws IOException {
// Load a sample PDF document
PdfDocument document = new PdfDocument();
document.loadFromBytes(pdfFile.getBytes());
public ResponseEntity<byte[]> optimizePdf(
@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
// Compress PDF
document.getFileInfo().setIncrementalUpdate(false);
document.setCompressionLevel(PdfCompressionLevel.Best);
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// compress PDF Images
for (int i = 0; i < document.getPages().getCount(); i++) {
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
PdfPageBase page = document.getPages().get(i);
PdfImageInfo[] images = page.getImagesInfo();
if (images != null && images.length > 0)
for (int j = 0; j < images.length; j++) {
PdfImageInfo image = images[j];
PdfBitmap bp = new PdfBitmap(image.getImage());
// bp.setPngDirectToJpeg(true);
bp.setQuality(Integer.valueOf(imageCompressionLevel));
// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("ocrmypdf");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
page.replaceImage(j, bp);
}
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
command.add("--fast-web-view");
command.add(String.valueOf(fastWebViewSize));
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf");
if (jbig2Lossy != null && jbig2Lossy) {
command.add("--jbig2-lossy");
}
}
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}

View File

@@ -0,0 +1,129 @@
package stirling.software.SPDF.controller;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
@Controller
public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "extract-images";
}
@PostMapping("/extract-images")
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
PDDocument document = PDDocument.load(file.getBytes());
// Create ByteArrayOutputStream to write zip file to byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Create ZipOutputStream to create zip file
ZipOutputStream zos = new ZipOutputStream(baos);
// Set compression level
zos.setLevel(Deflater.BEST_COMPRESSION);
int imageIndex = 1;
int pageNum = 1;
// Iterate over each page
for (PDPage page : document.getPages()) {
++pageNum;
// Extract images from page
for (COSName name : page.getResources().getXObjectNames()) {
if (page.getResources().isImageXObject(name)) {
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
// Convert image to desired format
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if (format.equalsIgnoreCase("png")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
} else if (format.equalsIgnoreCase("gif")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
ZipEntry zipEntry = new ZipEntry(imageName);
zos.putNextEntry(zipEntry);
Graphics2D g = bufferedImage.createGraphics();
g.drawImage((Image) renderedImage, 0, 0, null);
g.dispose();
// Write image bytes to zip file
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, format, imageBaos);
zos.write(imageBaos.toByteArray());
zos.closeEntry();
imageIndex++;
}
}
}
// Close ZipOutputStream and PDDocument
zos.close();
document.close();
// Create ByteArrayResource from byte array
byte[] zipContents = baos.toByteArray();
ByteArrayResource resource = new ByteArrayResource(zipContents);
// Set content disposition header to indicate that the response should be downloaded as a file
HttpHeaders headers = new HttpHeaders();
headers.setContentLength(zipContents.length);
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
// Return ResponseEntity with ByteArrayResource and headers
return ResponseEntity
.status(HttpStatus.OK)
.headers(headers)
.header("Cache-Control", "no-cache")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.body(resource);
}
}

View File

@@ -0,0 +1,143 @@
package stirling.software.SPDF.controller;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
//import com.spire.pdf.*;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException {
//--output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption,
tempInputFile.toString(), tempOutputFile.toString()));
String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt");
if (sidecar != null && sidecar) {
command.add("--sidecar");
command.add(sidecarFile);
}
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(sidecarFile);
zipOut.putNextEntry(txtEntry);
Files.copy(Paths.get(sidecarFile), zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(Paths.get(sidecarFile));
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd"))
.collect(Collectors.toList());
}
}

View File

@@ -10,7 +10,7 @@ import org.springframework.web.bind.annotation.GetMapping;
public class PdfController {
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
@@ -22,4 +22,6 @@ public class PdfController {
return "home";
}
}

View File

@@ -49,7 +49,6 @@ public class RearrangePagesPDFController {
int pageIndex = pagesToRemove.get(i);
document.removePage(pageIndex);
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_removed_pages.pdf");
}

View File

@@ -134,7 +134,7 @@ public class SplitPDFController {
ByteArrayResource resource = new ByteArrayResource(data);
new File("split_documents.zip").delete();
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=split_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
}
}

View File

@@ -78,7 +78,7 @@ public class ConvertImgPDFController {
} else {
ByteArrayResource resource = new ByteArrayResource(result);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=converted_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
}
}

View File

@@ -0,0 +1,82 @@
package stirling.software.SPDF.controller.converters;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.LibreOfficeListener;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertOfficeController {
@GetMapping("/file-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "file-to-pdf");
return "convert/file-to-pdf";
}
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
//unused but can start server instance if startup time is to long
//LibreOfficeListener.getInstance().start();
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename()));
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
return pdfBytes;
}
private String getFileExtension(String fileName) {
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex == -1) {
return "";
}
return fileName.substring(dotIndex + 1);
}
}

View File

@@ -1,81 +0,0 @@
package stirling.software.SPDF.controller.converters;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.ss.usermodel.Color;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import stirling.software.SPDF.utils.PdfUtils;
@Controller
public class ConvertXlsxController {
@GetMapping("/xlsx-to-pdf")
public String cinvertToPDF(Model model) {
model.addAttribute("currentPage", "xlsx-to-pdf");
return "convert/xlsx-to-pdf";
}
@PostMapping("/xlsx-to-pdf")
public ResponseEntity<byte[]> convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException{
// Load Excel file
Workbook workbook = WorkbookFactory.create(xlsx.getInputStream());
// Create PDF document
Document document = new Document();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
PdfWriter.getInstance(document, outputStream);
document.open();
// Convert each sheet in Excel to a separate page in PDF
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
PdfPTable table = new PdfPTable(workbook.getSheetAt(i).getRow(0).getPhysicalNumberOfCells());
for (int row = 0; row < workbook.getSheetAt(i).getPhysicalNumberOfRows(); row++) {
for (int cell = 0; cell < workbook.getSheetAt(i).getRow(row).getPhysicalNumberOfCells(); cell++) {
PdfPCell pdfCell = new PdfPCell();
pdfCell.addElement(new com.itextpdf.text.Paragraph(workbook.getSheetAt(i).getRow(row).getCell(cell).toString()));
// Copy cell style, borders, and background color
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getBottomBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getTopBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getLeftBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getRightBorderColor()));
Short bc = workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getFillBackgroundColor();
pdfCell.setBackgroundColor(new BaseColor(bc));
table.addCell(pdfCell);
}
}
document.add(table);
}
// Close document and output stream
document.close();
outputStream.flush();
outputStream.close();
return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
// Close document and input stream
}
}

View File

@@ -2,12 +2,19 @@ package stirling.software.SPDF.controller.security;
import java.awt.Color;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.util.Matrix;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
@@ -18,6 +25,7 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WatermarkRemover;
@Controller
public class WatermarkController {
@@ -28,6 +36,12 @@ public class WatermarkController {
return "security/add-watermark";
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
@PostMapping("/add-watermark")
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
@@ -71,4 +85,62 @@ public class WatermarkController {
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
}
@PostMapping("/remove-watermark")
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
// Load the input PDF
PDDocument document = PDDocument.load(pdfFile.getInputStream());
// Create a new PDF document for the output
PDDocument outputDocument = new PDDocument();
// Loop through the pages
int numPages = document.getNumberOfPages();
for (int i = 0; i < numPages; i++) {
PDPage page = document.getPage(i);
// Process the content stream to remove the watermark text
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
editor.processPage(page);
editor.processPage(page);
// Add the page to the output document
outputDocument.addPage(page);
}
for (PDPage page : outputDocument.getPages()) {
List<PDAnnotation> annotations = page.getAnnotations();
List<PDAnnotation> annotationsToRemove = new ArrayList<>();
for (PDAnnotation annotation : annotations) {
if (annotation instanceof PDAnnotationMarkup) {
PDAnnotationMarkup markup = (PDAnnotationMarkup) annotation;
String contents = markup.getContents();
if (contents != null && contents.contains(watermarkText)) {
annotationsToRemove.add(markup);
}
}
}
annotations.removeAll(annotationsToRemove);
}
PDDocumentCatalog catalog = outputDocument.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm != null) {
List<PDField> fields = acroForm.getFields();
for (PDField field : fields) {
String fieldValue = field.getValueAsString();
if (fieldValue.contains(watermarkText)) {
field.setValue(fieldValue.replace(watermarkText, ""));
}
}
}
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
}
}

View File

@@ -32,7 +32,6 @@ import org.springframework.http.ResponseEntity;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.spire.pdf.PdfDocument;
public class PdfUtils {
@@ -172,16 +171,7 @@ public class PdfUtils {
return PdfUtils.boasToWebResponse(baos, docName);
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PdfDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.saveToStream(baos);
// Close the document
document.close();
return PdfUtils.boasToWebResponse(baos, docName);
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {

View File

@@ -0,0 +1,69 @@
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.io.BufferedReader;
import java.util.ArrayList;
public class ProcessExecutor {
public static int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
ProcessBuilder processBuilder = new ProcessBuilder(command);
Process process = processBuilder.start();
// Read the error stream and standard output stream concurrently
List<String> errorLines = new ArrayList<>();
List<String> outputLines = new ArrayList<>();
Thread errorReaderThread = new Thread(() -> {
try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = errorReader.readLine()) != null) {
errorLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
Thread outputReaderThread = new Thread(() -> {
try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = outputReader.readLine()) != null) {
outputLines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
errorReaderThread.start();
outputReaderThread.start();
// Wait for the conversion process to complete
int exitCode = process.waitFor();
// Wait for the reader threads to finish
errorReaderThread.join();
outputReaderThread.join();
if (outputLines.size() > 0) {
String outputMessage = String.join("\n", outputLines);
System.out.println("Command output:\n" + outputMessage);
}
if (errorLines.size() > 0) {
String errorMessage = String.join("\n", errorLines);
System.out.println("Command error output:\n" + errorMessage);
if (exitCode != 0) {
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
}
}
return exitCode;
}
}

View File

@@ -0,0 +1,69 @@
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSString;
public class WatermarkRemover extends PDFStreamEngine {
private final String watermarkText;
private final Pattern pattern;
public WatermarkRemover(String watermarkText) {
this.watermarkText = watermarkText;
this.pattern = Pattern.compile(Pattern.quote(watermarkText));
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
boolean processText = false;
if ("Tj".equals(operation) || "TJ".equals(operation) || "'".equals(operation) || "\"".equals(operation)) {
processText = true;
}
if (processText) {
for(int j = 0 ; j < operands.size(); ++j) {
COSBase operand = operands.get(j);
if (operand instanceof COSString) {
COSString cosString = (COSString) operand;
String string = cosString.getString();
Matcher matcher = pattern.matcher(string);
if (matcher.find()) {
string = matcher.replaceAll("");
cosString.setValue(string.getBytes());
}
} else if (operand instanceof COSArray) {
COSArray array = (COSArray) operand;
for (int i = 0; i < array.size(); i++) {
COSBase item = array.get(i);
if (item instanceof COSString) {
COSString cosString = (COSString) item;
String string = cosString.getString();
Matcher matcher = pattern.matcher(string);
if (matcher.find()) {
System.out.println("operation =" + operation);
System.out.println("1 =" + string);
string = matcher.replaceAll("");
cosString.setValue(string.getBytes());
array.set(i, cosString);
operands.set(j, array);
}
}
}
}
}
}
super.processOperator(operator, operands);
}
}