rework for API, folder changes, easter eggs and fun
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@RestController
|
||||
public class CompressController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
|
||||
public ResponseEntity<byte[]> optimizePdf(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel,
|
||||
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the OCRmyPDF command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("ocrmypdf");
|
||||
command.add("--skip-text");
|
||||
command.add("--tesseract-timeout=0");
|
||||
command.add("--optimize");
|
||||
command.add(String.valueOf(optimizeLevel));
|
||||
command.add("--output-type");
|
||||
command.add("pdf");
|
||||
|
||||
if (fastWebView != null && fastWebView) {
|
||||
long fileSize = inputFile.getSize();
|
||||
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
|
||||
command.add("--fast-web-view");
|
||||
command.add(String.valueOf(fastWebViewSize));
|
||||
}
|
||||
|
||||
if (jbig2Lossy != null && jbig2Lossy) {
|
||||
command.add("--jbig2-lossy");
|
||||
}
|
||||
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@RestController
|
||||
public class ExtractImageScansController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-image-scans")
|
||||
public ResponseEntity<byte[]> extractImageScans(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile,
|
||||
@RequestParam(name = "angle_threshold", defaultValue = "5") int angleThreshold, @RequestParam(name = "tolerance", defaultValue = "20") int tolerance,
|
||||
@RequestParam(name = "min_area", defaultValue = "8000") int minArea, @RequestParam(name = "min_contour_area", defaultValue = "500") int minContourArea,
|
||||
@RequestParam(name = "border_size", defaultValue = "1") int borderSize) throws IOException, InterruptedException {
|
||||
|
||||
String fileName = inputFile.getOriginalFilename();
|
||||
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
|
||||
|
||||
List<String> images = new ArrayList<>();
|
||||
|
||||
// Check if input file is a PDF
|
||||
if (extension.equalsIgnoreCase("pdf")) {
|
||||
// Load PDF document
|
||||
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputFile.getBytes()))) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
images = new ArrayList<>();
|
||||
|
||||
// Create images of all pages
|
||||
for (int i = 0; i < pageCount; i++) {
|
||||
// Create temp file to save the image
|
||||
Path tempFile = Files.createTempFile("image_", ".png");
|
||||
|
||||
// Render image and save as temp file
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(i, 300);
|
||||
ImageIO.write(image, "png", tempFile.toFile());
|
||||
|
||||
// Add temp file path to images list
|
||||
images.add(tempFile.toString());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + extension);
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
// Add input file path to images list
|
||||
images.add(tempInputFile.toString());
|
||||
}
|
||||
|
||||
List<byte[]> processedImageBytes = new ArrayList<>();
|
||||
|
||||
// Process each image
|
||||
for (int i = 0; i < images.size(); i++) {
|
||||
|
||||
Path tempDir = Files.createTempDirectory("openCV_output");
|
||||
List<String> command = new ArrayList<>(Arrays.asList("python3", "/scripts/split_photos.py", images.get(i), tempDir.toString(), String.valueOf(angleThreshold),
|
||||
String.valueOf(tolerance), String.valueOf(minArea), String.valueOf(minContourArea), String.valueOf(borderSize)));
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the output photos in temp directory
|
||||
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());
|
||||
for (Path tempOutputFile : tempOutputFiles) {
|
||||
byte[] imageBytes = Files.readAllBytes(tempOutputFile);
|
||||
processedImageBytes.add(imageBytes);
|
||||
}
|
||||
// Clean up the temporary directory
|
||||
FileUtils.deleteDirectory(tempDir.toFile());
|
||||
}
|
||||
|
||||
// Create zip file if multiple images
|
||||
if (processedImageBytes.size() > 1) {
|
||||
String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add processed images to the zip
|
||||
for (int i = 0; i < processedImageBytes.size(); i++) {
|
||||
ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i + 1) + ".png");
|
||||
zipOut.putNextEntry(entry);
|
||||
zipOut.write(processedImageBytes.get(i));
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
|
||||
return PdfUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the processed image as a response
|
||||
byte[] imageBytes = processedImageBytes.get(0);
|
||||
return PdfUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.Image;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.RenderedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@RestController
|
||||
public class ExtractImagesController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/extract-images")
|
||||
public ResponseEntity<byte[]> extractImages(@RequestPart(required = true, value = "fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
|
||||
|
||||
System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
|
||||
PDDocument document = PDDocument.load(file.getBytes());
|
||||
|
||||
// Create ByteArrayOutputStream to write zip file to byte array
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
|
||||
// Create ZipOutputStream to create zip file
|
||||
ZipOutputStream zos = new ZipOutputStream(baos);
|
||||
|
||||
// Set compression level
|
||||
zos.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
||||
int imageIndex = 1;
|
||||
|
||||
int pageNum = 1;
|
||||
// Iterate over each page
|
||||
for (PDPage page : document.getPages()) {
|
||||
++pageNum;
|
||||
// Extract images from page
|
||||
for (COSName name : page.getResources().getXObjectNames()) {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
|
||||
|
||||
// Convert image to desired format
|
||||
RenderedImage renderedImage = image.getImage();
|
||||
BufferedImage bufferedImage = null;
|
||||
if (format.equalsIgnoreCase("png")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
|
||||
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
|
||||
} else if (format.equalsIgnoreCase("gif")) {
|
||||
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
|
||||
}
|
||||
|
||||
// Write image to zip file
|
||||
String imageName = "Image " + imageIndex + " (Page " + pageNum + ")." + format;
|
||||
ZipEntry zipEntry = new ZipEntry(imageName);
|
||||
zos.putNextEntry(zipEntry);
|
||||
|
||||
Graphics2D g = bufferedImage.createGraphics();
|
||||
g.drawImage((Image) renderedImage, 0, 0, null);
|
||||
g.dispose();
|
||||
// Write image bytes to zip file
|
||||
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
|
||||
ImageIO.write(bufferedImage, format, imageBaos);
|
||||
zos.write(imageBaos.toByteArray());
|
||||
|
||||
zos.closeEntry();
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close ZipOutputStream and PDDocument
|
||||
zos.close();
|
||||
document.close();
|
||||
|
||||
// Create ByteArrayResource from byte array
|
||||
byte[] zipContents = baos.toByteArray();
|
||||
|
||||
return PdfUtils.boasToWebResponse(baos, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@RestController
|
||||
public class MetadataController {
|
||||
|
||||
|
||||
private String checkUndefined(String entry) {
|
||||
// Check if the string is "undefined"
|
||||
if ("undefined".equals(entry)) {
|
||||
// Return null if it is
|
||||
return null;
|
||||
}
|
||||
// Return the original string if it's not "undefined"
|
||||
return entry;
|
||||
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/update-metadata")
|
||||
public ResponseEntity<byte[]> metadata(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile,
|
||||
@RequestParam(value = "deleteAll", required = false, defaultValue = "false") Boolean deleteAll, @RequestParam(value = "author", required = false) String author,
|
||||
@RequestParam(value = "creationDate", required = false) String creationDate, @RequestParam(value = "creator", required = false) String creator,
|
||||
@RequestParam(value = "keywords", required = false) String keywords, @RequestParam(value = "modificationDate", required = false) String modificationDate,
|
||||
@RequestParam(value = "producer", required = false) String producer, @RequestParam(value = "subject", required = false) String subject,
|
||||
@RequestParam(value = "title", required = false) String title, @RequestParam(value = "trapped", required = false) String trapped,
|
||||
@RequestParam Map<String, String> allRequestParams) throws IOException {
|
||||
|
||||
// Load the PDF file into a PDDocument
|
||||
PDDocument document = PDDocument.load(pdfFile.getBytes());
|
||||
|
||||
// Get the document information from the PDF
|
||||
PDDocumentInformation info = document.getDocumentInformation();
|
||||
|
||||
// Check if each metadata value is "undefined" and set it to null if it is
|
||||
author = checkUndefined(author);
|
||||
creationDate = checkUndefined(creationDate);
|
||||
creator = checkUndefined(creator);
|
||||
keywords = checkUndefined(keywords);
|
||||
modificationDate = checkUndefined(modificationDate);
|
||||
producer = checkUndefined(producer);
|
||||
subject = checkUndefined(subject);
|
||||
title = checkUndefined(title);
|
||||
trapped = checkUndefined(trapped);
|
||||
|
||||
// If the "deleteAll" flag is set, remove all metadata from the document
|
||||
// information
|
||||
if (deleteAll) {
|
||||
for (String key : info.getMetadataKeys()) {
|
||||
info.setCustomMetadataValue(key, null);
|
||||
}
|
||||
// Remove metadata from the PDF history
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("Metadata"));
|
||||
document.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("PieceInfo"));
|
||||
author = null;
|
||||
creationDate = null;
|
||||
creator = null;
|
||||
keywords = null;
|
||||
modificationDate = null;
|
||||
producer = null;
|
||||
subject = null;
|
||||
title = null;
|
||||
trapped = null;
|
||||
} else {
|
||||
// Iterate through the request parameters and set the metadata values
|
||||
for (Entry<String, String> entry : allRequestParams.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
// Check if the key is a standard metadata key
|
||||
if (!key.equalsIgnoreCase("Author") && !key.equalsIgnoreCase("CreationDate") && !key.equalsIgnoreCase("Creator") && !key.equalsIgnoreCase("Keywords")
|
||||
&& !key.equalsIgnoreCase("modificationDate") && !key.equalsIgnoreCase("Producer") && !key.equalsIgnoreCase("Subject") && !key.equalsIgnoreCase("Title")
|
||||
&& !key.equalsIgnoreCase("Trapped") && !key.contains("customKey") && !key.contains("customValue")) {
|
||||
info.setCustomMetadataValue(key, entry.getValue());
|
||||
} else if (key.contains("customKey")) {
|
||||
int number = Integer.parseInt(key.replaceAll("\\D", ""));
|
||||
String customKey = entry.getValue();
|
||||
String customValue = allRequestParams.get("customValue" + number);
|
||||
info.setCustomMetadataValue(customKey, customValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (creationDate != null && creationDate.length() > 0) {
|
||||
Calendar creationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
creationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(creationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
info.setCreationDate(creationDateCal);
|
||||
} else {
|
||||
info.setCreationDate(null);
|
||||
}
|
||||
if (modificationDate != null && modificationDate.length() > 0) {
|
||||
Calendar modificationDateCal = Calendar.getInstance();
|
||||
try {
|
||||
modificationDateCal.setTime(new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(modificationDate));
|
||||
} catch (ParseException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
info.setModificationDate(modificationDateCal);
|
||||
} else {
|
||||
info.setModificationDate(null);
|
||||
}
|
||||
info.setCreator(creator);
|
||||
info.setKeywords(keywords);
|
||||
info.setAuthor(author);
|
||||
info.setProducer(producer);
|
||||
info.setSubject(subject);
|
||||
info.setTitle(title);
|
||||
info.setTrapped(trapped);
|
||||
|
||||
document.setDocumentInformation(info);
|
||||
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
@RestController
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile,
|
||||
@RequestParam("languages") List<String> selectedLanguages, @RequestParam(name = "sidecar", required = false) Boolean sidecar,
|
||||
@RequestParam(name = "deskew", required = false) Boolean deskew, @RequestParam(name = "clean", required = false) Boolean clean,
|
||||
@RequestParam(name = "clean-final", required = false) Boolean cleanFinal, @RequestParam(name = "ocrType", required = false) String ocrType)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
// --output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.size() < 1) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
// Validate and sanitize selected languages using regex
|
||||
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
|
||||
selectedLanguages = selectedLanguages.stream().filter(lang -> Pattern.matches(languagePattern, lang)).collect(Collectors.toList());
|
||||
|
||||
if (selectedLanguages.isEmpty()) {
|
||||
throw new IOException("None of the selected languages are valid.");
|
||||
}
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the output file path
|
||||
Path sidecarTextPath = null;
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf"));
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarTextPath.toString());
|
||||
}
|
||||
|
||||
if (deskew != null && deskew) {
|
||||
command.add("--deskew");
|
||||
}
|
||||
if (clean != null && clean) {
|
||||
command.add("--clean");
|
||||
}
|
||||
if (cleanFinal != null && cleanFinal) {
|
||||
command.add("--clean-final");
|
||||
}
|
||||
if (ocrType != null && !ocrType.equals("")) {
|
||||
if ("skip-text".equals(ocrType)) {
|
||||
command.add("--skip-text");
|
||||
} else if ("force-ocr".equals(ocrType)) {
|
||||
command.add("--force-ocr");
|
||||
} else if ("Normal".equals(ocrType)) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(sidecarTextPath, zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(sidecarTextPath);
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
return PdfUtils.bytesToWebResponse(pdfBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@RestController
|
||||
public class OverlayImageController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OverlayImageController.class);
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/add-image")
|
||||
public ResponseEntity<byte[]> overlayImage(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("fileInput2") MultipartFile imageFile,
|
||||
@RequestParam("x") float x, @RequestParam("y") float y, @RequestParam("everyPage") boolean everyPage) {
|
||||
try {
|
||||
byte[] pdfBytes = pdfFile.getBytes();
|
||||
byte[] imageBytes = imageFile.getBytes();
|
||||
byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage);
|
||||
|
||||
return PdfUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf");
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to add image to PDF", e);
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user