Lots of changes (#70)

Image extraction and conversion to formats 

Multi parallel file execution for all forms so you can input multiple files quickly 

Any file at all pdf using libreoffice, super powerful
Sadly makes docker image larger but worth it 

OCR PDF using ocr my pdf
Works awesomely for adding text to a image

Improved compression using ocr my pdf app

Settings page with custom download options such as 
- open in same window
- open in new window
- download
- download as zip

Update detection in settings page it should show notification if there is a update (very hidden)

UI cleanups

Add other image formats to PDF to Image

Various fies to icons, and pdf.js usage
This commit is contained in:
Anthony Stirling
2023-03-20 21:55:11 +00:00
committed by GitHub
parent 54abb53842
commit a9145fe84c
54 changed files with 82327 additions and 8300 deletions

View File

@@ -1,9 +1,23 @@
package stirling.software.SPDF.controller;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
@@ -12,16 +26,31 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.spire.pdf.PdfCompressionLevel;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;
import com.spire.pdf.exporting.PdfImageInfo;
import com.spire.pdf.graphics.PdfBitmap;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import stirling.software.SPDF.utils.ErrorUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
//import com.spire.pdf.*;
@Controller
public class CompressController {
@@ -33,36 +62,56 @@ public class CompressController {
return "compress-pdf";
}
@PostMapping("/compress-pdf")
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("imageCompressionLevel") String imageCompressionLevel)
throws IOException {
// Load a sample PDF document
PdfDocument document = new PdfDocument();
document.loadFromBytes(pdfFile.getBytes());
public ResponseEntity<byte[]> optimizePdf(
@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
// Compress PDF
document.getFileInfo().setIncrementalUpdate(false);
document.setCompressionLevel(PdfCompressionLevel.Best);
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// compress PDF Images
for (int i = 0; i < document.getPages().getCount(); i++) {
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
PdfPageBase page = document.getPages().get(i);
PdfImageInfo[] images = page.getImagesInfo();
if (images != null && images.length > 0)
for (int j = 0; j < images.length; j++) {
PdfImageInfo image = images[j];
PdfBitmap bp = new PdfBitmap(image.getImage());
// bp.setPngDirectToJpeg(true);
bp.setQuality(Integer.valueOf(imageCompressionLevel));
// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("ocrmypdf");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
page.replaceImage(j, bp);
}
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
command.add("--fast-web-view");
command.add(String.valueOf(fastWebViewSize));
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf");
if (jbig2Lossy != null && jbig2Lossy) {
command.add("--jbig2-lossy");
}
}
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}

View File

@@ -0,0 +1,129 @@
package stirling.software.SPDF.controller;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
@Controller
public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "extract-images";
}
@PostMapping("/extract-images")
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
PDDocument document = PDDocument.load(file.getBytes());
// Create ByteArrayOutputStream to write zip file to byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Create ZipOutputStream to create zip file
ZipOutputStream zos = new ZipOutputStream(baos);
// Set compression level
zos.setLevel(Deflater.BEST_COMPRESSION);
int imageIndex = 1;
int pageNum = 1;
// Iterate over each page
for (PDPage page : document.getPages()) {
++pageNum;
// Extract images from page
for (COSName name : page.getResources().getXObjectNames()) {
if (page.getResources().isImageXObject(name)) {
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
// Convert image to desired format
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if (format.equalsIgnoreCase("png")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
} else if (format.equalsIgnoreCase("gif")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
ZipEntry zipEntry = new ZipEntry(imageName);
zos.putNextEntry(zipEntry);
Graphics2D g = bufferedImage.createGraphics();
g.drawImage((Image) renderedImage, 0, 0, null);
g.dispose();
// Write image bytes to zip file
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, format, imageBaos);
zos.write(imageBaos.toByteArray());
zos.closeEntry();
imageIndex++;
}
}
}
// Close ZipOutputStream and PDDocument
zos.close();
document.close();
// Create ByteArrayResource from byte array
byte[] zipContents = baos.toByteArray();
ByteArrayResource resource = new ByteArrayResource(zipContents);
// Set content disposition header to indicate that the response should be downloaded as a file
HttpHeaders headers = new HttpHeaders();
headers.setContentLength(zipContents.length);
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
// Return ResponseEntity with ByteArrayResource and headers
return ResponseEntity
.status(HttpStatus.OK)
.headers(headers)
.header("Cache-Control", "no-cache")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.body(resource);
}
}

View File

@@ -0,0 +1,143 @@
package stirling.software.SPDF.controller;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
//import com.spire.pdf.*;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException {
//--output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption,
tempInputFile.toString(), tempOutputFile.toString()));
String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt");
if (sidecar != null && sidecar) {
command.add("--sidecar");
command.add(sidecarFile);
}
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(sidecarFile);
zipOut.putNextEntry(txtEntry);
Files.copy(Paths.get(sidecarFile), zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(Paths.get(sidecarFile));
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd"))
.collect(Collectors.toList());
}
}

View File

@@ -10,7 +10,7 @@ import org.springframework.web.bind.annotation.GetMapping;
public class PdfController {
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
@@ -22,4 +22,6 @@ public class PdfController {
return "home";
}
}

View File

@@ -49,7 +49,6 @@ public class RearrangePagesPDFController {
int pageIndex = pagesToRemove.get(i);
document.removePage(pageIndex);
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_removed_pages.pdf");
}

View File

@@ -134,7 +134,7 @@ public class SplitPDFController {
ByteArrayResource resource = new ByteArrayResource(data);
new File("split_documents.zip").delete();
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=split_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
}
}

View File

@@ -78,7 +78,7 @@ public class ConvertImgPDFController {
} else {
ByteArrayResource resource = new ByteArrayResource(result);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=converted_documents.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
}
}

View File

@@ -0,0 +1,82 @@
package stirling.software.SPDF.controller.converters;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.LibreOfficeListener;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertOfficeController {
@GetMapping("/file-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "file-to-pdf");
return "convert/file-to-pdf";
}
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
//unused but can start server instance if startup time is to long
//LibreOfficeListener.getInstance().start();
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename()));
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
return pdfBytes;
}
private String getFileExtension(String fileName) {
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex == -1) {
return "";
}
return fileName.substring(dotIndex + 1);
}
}

View File

@@ -1,81 +0,0 @@
package stirling.software.SPDF.controller.converters;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.ss.usermodel.Color;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import stirling.software.SPDF.utils.PdfUtils;
@Controller
public class ConvertXlsxController {
@GetMapping("/xlsx-to-pdf")
public String cinvertToPDF(Model model) {
model.addAttribute("currentPage", "xlsx-to-pdf");
return "convert/xlsx-to-pdf";
}
@PostMapping("/xlsx-to-pdf")
public ResponseEntity<byte[]> convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException{
// Load Excel file
Workbook workbook = WorkbookFactory.create(xlsx.getInputStream());
// Create PDF document
Document document = new Document();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
PdfWriter.getInstance(document, outputStream);
document.open();
// Convert each sheet in Excel to a separate page in PDF
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
PdfPTable table = new PdfPTable(workbook.getSheetAt(i).getRow(0).getPhysicalNumberOfCells());
for (int row = 0; row < workbook.getSheetAt(i).getPhysicalNumberOfRows(); row++) {
for (int cell = 0; cell < workbook.getSheetAt(i).getRow(row).getPhysicalNumberOfCells(); cell++) {
PdfPCell pdfCell = new PdfPCell();
pdfCell.addElement(new com.itextpdf.text.Paragraph(workbook.getSheetAt(i).getRow(row).getCell(cell).toString()));
// Copy cell style, borders, and background color
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getBottomBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getTopBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getLeftBorderColor()));
pdfCell.setBorderColor(new BaseColor(workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getRightBorderColor()));
Short bc = workbook.getSheetAt(i).getRow(row).getCell(cell).getCellStyle().getFillBackgroundColor();
pdfCell.setBackgroundColor(new BaseColor(bc));
table.addCell(pdfCell);
}
}
document.add(table);
}
// Close document and output stream
document.close();
outputStream.flush();
outputStream.close();
return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
// Close document and input stream
}
}

View File

@@ -2,12 +2,19 @@ package stirling.software.SPDF.controller.security;
import java.awt.Color;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.util.Matrix;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
@@ -18,6 +25,7 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WatermarkRemover;
@Controller
public class WatermarkController {
@@ -28,6 +36,12 @@ public class WatermarkController {
return "security/add-watermark";
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
@PostMapping("/add-watermark")
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
@@ -71,4 +85,62 @@ public class WatermarkController {
}
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
}
@PostMapping("/remove-watermark")
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
// Load the input PDF
PDDocument document = PDDocument.load(pdfFile.getInputStream());
// Create a new PDF document for the output
PDDocument outputDocument = new PDDocument();
// Loop through the pages
int numPages = document.getNumberOfPages();
for (int i = 0; i < numPages; i++) {
PDPage page = document.getPage(i);
// Process the content stream to remove the watermark text
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
editor.processPage(page);
editor.processPage(page);
// Add the page to the output document
outputDocument.addPage(page);
}
for (PDPage page : outputDocument.getPages()) {
List<PDAnnotation> annotations = page.getAnnotations();
List<PDAnnotation> annotationsToRemove = new ArrayList<>();
for (PDAnnotation annotation : annotations) {
if (annotation instanceof PDAnnotationMarkup) {
PDAnnotationMarkup markup = (PDAnnotationMarkup) annotation;
String contents = markup.getContents();
if (contents != null && contents.contains(watermarkText)) {
annotationsToRemove.add(markup);
}
}
}
annotations.removeAll(annotationsToRemove);
}
PDDocumentCatalog catalog = outputDocument.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm != null) {
List<PDField> fields = acroForm.getFields();
for (PDField field : fields) {
String fieldValue = field.getValueAsString();
if (fieldValue.contains(watermarkText)) {
field.setValue(fieldValue.replace(watermarkText, ""));
}
}
}
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
}
}