new features

This commit is contained in:
Anthony Stirling
2023-08-01 00:03:13 +01:00
parent 0da9c62ef8
commit 77411e94a4
18 changed files with 413 additions and 78 deletions

View File

@@ -0,0 +1,86 @@
package stirling.software.SPDF.controller.api;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
import com.itextpdf.kernel.pdf.*;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
@RestController
@Tag(name = "General", description = "General APIs")
public class ToSinglePageController {
private static final Logger logger = LoggerFactory.getLogger(ToSinglePageController.class);
@PostMapping(consumes = "multipart/form-data", value = "/pdf-to-single-page")
@Operation(
summary = "Convert a multi-page PDF into a single long page PDF",
description = "This endpoint converts a multi-page PDF document into a single paged PDF document. The width of the single page will be same as the input's width, but the height will be the sum of all the pages' heights. Input:PDF Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> pdfToSinglePage(
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true)
MultipartFile file) throws IOException {
PdfReader reader = new PdfReader(file.getInputStream());
PdfDocument sourceDocument = new PdfDocument(reader);
float totalHeight = 0;
float width = 0;
for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) {
Rectangle pageSize = sourceDocument.getPage(i).getPageSize();
totalHeight += pageSize.getHeight();
if(width < pageSize.getWidth())
width = pageSize.getWidth();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument newDocument = new PdfDocument(writer);
PageSize newPageSize = new PageSize(width, totalHeight);
newDocument.addNewPage(newPageSize);
Document layoutDoc = new Document(newDocument);
float yOffset = totalHeight;
for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) {
PdfFormXObject pageCopy = sourceDocument.getPage(i).copyAsFormXObject(newDocument);
Image copiedPage = new Image(pageCopy);
copiedPage.setFixedPosition(0, yOffset - sourceDocument.getPage(i).getPageSize().getHeight());
yOffset -= sourceDocument.getPage(i).getPageSize().getHeight();
layoutDoc.add(copiedPage);
}
layoutDoc.close();
sourceDocument.close();
byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf");
}
}

View File

@@ -19,6 +19,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
@@ -44,87 +45,17 @@ public class ConvertHtmlToPDF {
String originalFilename = fileInput.getOriginalFilename();
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
}
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}
pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
}
}byte[] pdfBytes = FileToPdf.convertHtmlToPdf( fileInput.getBytes(), originalFilename);
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
//search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());
if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}
// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if (htmlFile.getFileName().toString().equals("index.html")) {
return htmlFile;
}
}
return htmlFiles.get(0);
}
}
}

View File

@@ -0,0 +1,52 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.IOException;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
public class ConvertMarkdownToPdf {
@PostMapping(consumes = "multipart/form-data", value = "/markdown-to-pdf")
@Operation(
summary = "Convert a Markdown file to PDF",
description = "This endpoint takes a Markdown file input, converts it to HTML, and then to PDF format."
)
public ResponseEntity<byte[]> markdownToPdf(
@RequestPart(required = true, value = "fileInput") MultipartFile fileInput)
throws IOException, InterruptedException {
if (fileInput == null) {
throw new IllegalArgumentException("Please provide a Markdown file for conversion.");
}
String originalFilename = fileInput.getOriginalFilename();
if (originalFilename == null || !originalFilename.endsWith(".md")) {
throw new IllegalArgumentException("File must be in .md format.");
}
// Convert Markdown to HTML using CommonMark
Parser parser = Parser.builder().build();
Node document = parser.parse(new String(fileInput.getBytes()));
HtmlRenderer renderer = HtmlRenderer.builder().build();
String htmlContent = renderer.render(document);
byte[] pdfBytes = FileToPdf.convertHtmlToPdf(htmlContent.getBytes(), "converted.html");
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}

View File

@@ -25,7 +25,14 @@ public class ConverterWebController {
model.addAttribute("currentPage", "html-to-pdf");
return "convert/html-to-pdf";
}
@GetMapping("/markdown-to-pdf")
@Hidden
public String convertMarkdownToPdfForm(Model model) {
model.addAttribute("currentPage", "markdown-to-pdf");
return "convert/markdown-to-pdf";
}
@GetMapping("/url-to-pdf")
@Hidden
public String convertURLToPdfForm(Model model) {

View File

@@ -97,6 +97,20 @@ public class GeneralWebController {
return "pdf-organizer";
}
@GetMapping("/extract-page")
@Hidden
public String extractPages(Model model) {
model.addAttribute("currentPage", "extract-page");
return "extract-page";
}
@GetMapping("/pdf-to-single-page")
@Hidden
public String pdfToSinglePage(Model model) {
model.addAttribute("currentPage", "pdf-to-single-page");
return "pdf-to-single-page";
}
@GetMapping("/rotate-pdf")
@Hidden
public String rotatePdfForm(Model model) {

View File

@@ -0,0 +1,95 @@
package stirling.software.SPDF.utils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
public class FileToPdf {
public static byte[] convertHtmlToPdf(byte[] fileBytes, String fileName) throws IOException, InterruptedException {
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileBytes);
} else {
tempInputFile = unzipAndGetMainHtml(fileBytes);
}
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode;
if (fileName.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}
pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
if (fileName.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
}
return pdfBytes;
}
private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(fileBytes))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
//search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());
if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}
// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if (htmlFile.getFileName().toString().equals("index.html")) {
return htmlFile;
}
}
return htmlFiles.get(0);
}
}
}

View File

@@ -12,6 +12,9 @@ import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
public class WebResponseUtils {
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
@@ -57,5 +60,19 @@ public class WebResponseUtils {
return boasToWebResponse(baos, docName);
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PdfDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument newDocument = new PdfDocument(writer);
document.copyPagesTo(1, document.getNumberOfPages(), newDocument);
newDocument.close();
return boasToWebResponse(baos, docName);
}
}