More fixes for RequestPart mixing

This commit is contained in:
Anthony Stirling
2023-07-30 11:39:29 +01:00
55 changed files with 3789 additions and 653 deletions

View File

@@ -4,9 +4,13 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.*;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
@@ -17,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -24,7 +29,7 @@ import stirling.software.SPDF.utils.WebResponseUtils;
public class ConvertHtmlToPDF {
@PostMapping(consumes = "multipart/form-data", value = "/convert-to-pdf")
@PostMapping(consumes = "multipart/form-data", value = "/html-to-pdf")
@Operation(
summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
description = "This endpoint takes an HTML or ZIP file input and converts it to a PDF format."
@@ -40,61 +45,83 @@ public class ConvertHtmlToPDF {
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile;
if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (originalFilename.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Files.write(tempInputFile, fileInput.getBytes());
} else {
tempInputFile = unzipAndGetMainHtml(fileInput);
}
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}
pdfBytes = Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
}
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = 0;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
} else {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
}
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
if (originalFilename.endsWith(".zip")) {
GeneralUtils.deleteDirectory(tempInputFile.getParent());
}
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (!entry.isDirectory()) {
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
return tempDirectory.resolve("index.html");
}
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(entry.getName());
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
//search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());
if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}
// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if (htmlFile.getFileName().toString().equals("index.html")) {
return htmlFile;
}
}
return htmlFiles.get(0);
}
}

View File

@@ -43,7 +43,7 @@ public class ConvertImgPDFController {
@Parameter(description = "Choose between a single image containing all pages or separate images for each page", schema = @Schema(allowableValues = {"single", "multiple"}))
String singleOrMultiple,
@RequestParam("colorType")
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
String colorType,
@RequestParam("dpi")
@Parameter(description = "The DPI (dots per inch) for the output image(s)")
@@ -94,7 +94,7 @@ public class ConvertImgPDFController {
@Parameter(description = "Whether to stretch the images to fit the PDF page or maintain the aspect ratio", example = "false")
boolean stretchToFit,
@RequestParam("colorType")
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"rgb", "greyscale", "blackwhite"}))
@Parameter(description = "The color type of the output image(s)", schema = @Schema(allowableValues = {"color", "greyscale", "blackwhite"}))
String colorType,
@RequestParam(defaultValue = "false", name = "autoRotate")
@Parameter(description = "Whether to automatically rotate the images to better fit the PDF page", example = "true")

View File

@@ -19,6 +19,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -41,7 +42,7 @@ public class ConvertOfficeController {
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@@ -62,10 +63,10 @@ public class ConvertOfficeController {
summary = "Convert a file to a PDF using LibreOffice",
description = "This endpoint converts a given file to a PDF using LibreOffice API Input:Any Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> processPdfWithOCR(
public ResponseEntity<byte[]> processFileToPDF(
@RequestPart(required = true, value = "fileInput")
@Parameter(
description = "The input file to be converted to a PDF file using OCR",
description = "The input file to be converted to a PDF file using LibreOffice",
required = true
)
MultipartFile inputFile

View File

@@ -16,6 +16,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -49,7 +50,7 @@ public class ConvertPDFToPDFA {
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

View File

@@ -8,6 +8,7 @@ import java.util.List;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
@@ -17,6 +18,7 @@ import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -29,32 +31,35 @@ public class ConvertWebsiteToPDF {
description = "This endpoint fetches content from a URL and converts it to a PDF format."
)
public ResponseEntity<byte[]> urlToPdf(
@RequestPart(required = true, value = "urlInput")
@RequestParam(required = true, value = "urlInput")
@Parameter(description = "The input URL to be converted to a PDF file", required = true)
String URL) throws IOException, InterruptedException {
// Validate the URL format
if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) {
if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) {
throw new IllegalArgumentException("Invalid URL format provided.");
}
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(URL);
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempOutputFile);
Path tempOutputFile = null;
byte[] pdfBytes;
try {
// Prepare the output file path
tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command
List<String> command = new ArrayList<>();
command.add("weasyprint");
command.add(URL);
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
}
finally {
// Clean up the temporary files
Files.delete(tempOutputFile);
}
// Convert URL to a safe filename
String outputFilename = convertURLToFileName(URL);