html to pdf
This commit is contained in:
@@ -83,7 +83,9 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("Convert", "pdf-to-text");
|
||||
addEndpointToGroup("Convert", "pdf-to-html");
|
||||
addEndpointToGroup("Convert", "pdf-to-xml");
|
||||
|
||||
addEndpointToGroup("Convert", "html-to-pdf");
|
||||
addEndpointToGroup("Convert", "url-to-pdf");
|
||||
|
||||
// Adding endpoints to "Security" group
|
||||
addEndpointToGroup("Security", "add-password");
|
||||
addEndpointToGroup("Security", "remove-password");
|
||||
@@ -125,12 +127,15 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("CLI", "pdf-to-html");
|
||||
addEndpointToGroup("CLI", "pdf-to-xml");
|
||||
addEndpointToGroup("CLI", "ocr-pdf");
|
||||
addEndpointToGroup("CLI", "html-to-pdf");
|
||||
addEndpointToGroup("CLI", "url-to-pdf");
|
||||
|
||||
|
||||
//python
|
||||
addEndpointToGroup("Python", "extract-image-scans");
|
||||
addEndpointToGroup("Python", "remove-blanks");
|
||||
|
||||
|
||||
addEndpointToGroup("Python", "html-to-pdf");
|
||||
addEndpointToGroup("Python", "url-to-pdf");
|
||||
|
||||
//openCV
|
||||
addEndpointToGroup("OpenCV", "extract-image-scans");
|
||||
|
||||
@@ -4,9 +4,13 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import java.util.*;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
@@ -40,61 +44,83 @@ public class ConvertHtmlToPDF {
|
||||
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
|
||||
throw new IllegalArgumentException("File must be either .html or .zip format.");
|
||||
}
|
||||
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
Path tempInputFile;
|
||||
|
||||
if (originalFilename.endsWith(".html")) {
|
||||
tempInputFile = Files.createTempFile("input_", ".html");
|
||||
Files.write(tempInputFile, fileInput.getBytes());
|
||||
} else {
|
||||
tempInputFile = unzipAndGetMainHtml(fileInput);
|
||||
Path tempInputFile = null;
|
||||
byte[] pdfBytes;
|
||||
try {
|
||||
if (originalFilename.endsWith(".html")) {
|
||||
tempInputFile = Files.createTempFile("input_", ".html");
|
||||
Files.write(tempInputFile, fileInput.getBytes());
|
||||
} else {
|
||||
tempInputFile = unzipAndGetMainHtml(fileInput);
|
||||
}
|
||||
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("weasyprint");
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
int returnCode = 0;
|
||||
if (originalFilename.endsWith(".zip")) {
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
|
||||
} else {
|
||||
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
} finally {
|
||||
// Clean up temporary files
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(tempInputFile);
|
||||
|
||||
if (originalFilename.endsWith(".zip")) {
|
||||
GeneralUtils.deleteDirectory(tempInputFile.getParent());
|
||||
}
|
||||
}
|
||||
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("weasyprint");
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
int returnCode = 0;
|
||||
if (originalFilename.endsWith(".zip")) {
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
|
||||
} else {
|
||||
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up temporary files
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(tempInputFile);
|
||||
if (originalFilename.endsWith(".zip")) {
|
||||
GeneralUtils.deleteDirectory(tempInputFile.getParent());
|
||||
}
|
||||
|
||||
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
|
||||
Path tempDirectory = Files.createTempDirectory("unzipped_");
|
||||
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
|
||||
ZipEntry entry = zipIn.getNextEntry();
|
||||
while (entry != null) {
|
||||
Path filePath = tempDirectory.resolve(entry.getName());
|
||||
if (!entry.isDirectory()) {
|
||||
Files.copy(zipIn, filePath);
|
||||
}
|
||||
zipIn.closeEntry();
|
||||
entry = zipIn.getNextEntry();
|
||||
}
|
||||
}
|
||||
return tempDirectory.resolve("index.html");
|
||||
}
|
||||
private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException {
|
||||
Path tempDirectory = Files.createTempDirectory("unzipped_");
|
||||
try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
|
||||
ZipEntry entry = zipIn.getNextEntry();
|
||||
while (entry != null) {
|
||||
Path filePath = tempDirectory.resolve(entry.getName());
|
||||
if (entry.isDirectory()) {
|
||||
Files.createDirectories(filePath); // Explicitly create the directory structure
|
||||
} else {
|
||||
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
|
||||
Files.copy(zipIn, filePath);
|
||||
}
|
||||
zipIn.closeEntry();
|
||||
entry = zipIn.getNextEntry();
|
||||
}
|
||||
}
|
||||
|
||||
//search for the main HTML file.
|
||||
try (Stream<Path> walk = Files.walk(tempDirectory)) {
|
||||
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (htmlFiles.isEmpty()) {
|
||||
throw new IOException("No HTML files found in the unzipped directory.");
|
||||
}
|
||||
|
||||
// Prioritize 'index.html' if it exists, otherwise use the first .html file
|
||||
for (Path htmlFile : htmlFiles) {
|
||||
if (htmlFile.getFileName().toString().equals("index.html")) {
|
||||
return htmlFile;
|
||||
}
|
||||
}
|
||||
|
||||
return htmlFiles.get(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -34,27 +34,30 @@ public class ConvertWebsiteToPDF {
|
||||
String URL) throws IOException, InterruptedException {
|
||||
|
||||
// Validate the URL format
|
||||
if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) {
|
||||
if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) {
|
||||
throw new IllegalArgumentException("Invalid URL format provided.");
|
||||
}
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the OCRmyPDF command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("weasyprint");
|
||||
command.add(URL);
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
Path tempOutputFile = null;
|
||||
byte[] pdfBytes;
|
||||
try {
|
||||
// Prepare the output file path
|
||||
tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the OCRmyPDF command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("weasyprint");
|
||||
command.add(URL);
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
}
|
||||
finally {
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempOutputFile);
|
||||
}
|
||||
// Convert URL to a safe filename
|
||||
String outputFilename = convertURLToFileName(URL);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user