# Description of Changes This pull request includes several changes primarily focused on improving configuration management, removing deprecated methods, and updating paths for external dependencies. The most important changes are summarized below: ### Configuration Management Improvements: * Added a new `RuntimePathConfig` class to manage dynamic paths for operations and pipeline configurations (`src/main/java/stirling/software/SPDF/config/RuntimePathConfig.java`). * Removed the `bookAndHtmlFormatsInstalled` bean and its associated logic from `AppConfig` and `EndpointConfiguration` (`src/main/java/stirling/software/SPDF/config/AppConfig.java`, `src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`). [[1]](diffhunk://#diff-4d774ec79aa55750c0a4739bee971b68877078b73654e863fd40ee924347e143L130-L138) [[2]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL12-L35) [[3]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL275-L280) ### External Dependency Path Updates: * Updated paths for `weasyprint` and `unoconvert` in `ExternalAppDepConfig` to use values from `RuntimePathConfig` (`src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java`). [[1]](diffhunk://#diff-c47af298c07c2622aa98b038b78822c56bdb002de71081e102d344794e7832a6R12-L33) [[2]](diffhunk://#diff-c47af298c07c2622aa98b038b78822c56bdb002de71081e102d344794e7832a6L104-R115) ### Minor Adjustments: * Corrected a typo from "Unoconv" to "Unoconvert" in `EndpointConfiguration` (`src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`). --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
236 lines
9.4 KiB
Java
236 lines
9.4 KiB
Java
package stirling.software.SPDF.utils;
|
|
|
|
import java.io.*;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.file.FileVisitResult;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.SimpleFileVisitor;
|
|
import java.nio.file.attribute.BasicFileAttributes;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Stream;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipInputStream;
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
import io.github.pixee.security.ZipSecurity;
|
|
|
|
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
|
|
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
|
|
|
public class FileToPdf {
|
|
|
|
public static byte[] convertHtmlToPdf(
|
|
String weasyprintPath,
|
|
HTMLToPdfRequest request,
|
|
byte[] fileBytes,
|
|
String fileName,
|
|
boolean disableSanitize)
|
|
throws IOException, InterruptedException {
|
|
|
|
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
|
Path tempInputFile = null;
|
|
byte[] pdfBytes;
|
|
try {
|
|
if (fileName.endsWith(".html")) {
|
|
tempInputFile = Files.createTempFile("input_", ".html");
|
|
String sanitizedHtml =
|
|
sanitizeHtmlContent(
|
|
new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
|
|
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
|
|
} else if (fileName.endsWith(".zip")) {
|
|
tempInputFile = Files.createTempFile("input_", ".zip");
|
|
Files.write(tempInputFile, fileBytes);
|
|
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
|
|
} else {
|
|
throw new IllegalArgumentException("Unsupported file format: " + fileName);
|
|
}
|
|
|
|
List<String> command = new ArrayList<>();
|
|
command.add(weasyprintPath);
|
|
command.add("-e");
|
|
command.add("utf-8");
|
|
command.add("-v");
|
|
command.add(tempInputFile.toString());
|
|
command.add(tempOutputFile.toString());
|
|
|
|
ProcessExecutorResult returnCode =
|
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
|
.runCommandWithOutputHandling(command);
|
|
|
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
|
} catch (IOException e) {
|
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
|
if (pdfBytes.length < 1) {
|
|
throw e;
|
|
}
|
|
} finally {
|
|
Files.deleteIfExists(tempOutputFile);
|
|
Files.deleteIfExists(tempInputFile);
|
|
}
|
|
|
|
return pdfBytes;
|
|
}
|
|
|
|
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
|
|
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
|
|
}
|
|
|
|
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize)
|
|
throws IOException {
|
|
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
|
|
try (ZipInputStream zipIn =
|
|
ZipSecurity.createHardenedInputStream(
|
|
new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) {
|
|
ZipEntry entry = zipIn.getNextEntry();
|
|
while (entry != null) {
|
|
Path filePath = tempUnzippedDir.resolve(sanitizeZipFilename(entry.getName()));
|
|
if (!entry.isDirectory()) {
|
|
Files.createDirectories(filePath.getParent());
|
|
if (entry.getName().toLowerCase().endsWith(".html")
|
|
|| entry.getName().toLowerCase().endsWith(".htm")) {
|
|
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
|
|
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
|
|
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
|
|
} else {
|
|
Files.copy(zipIn, filePath);
|
|
}
|
|
}
|
|
zipIn.closeEntry();
|
|
entry = zipIn.getNextEntry();
|
|
}
|
|
}
|
|
|
|
// Repack the sanitized files
|
|
zipDirectory(tempUnzippedDir, zipFilePath);
|
|
|
|
// Clean up
|
|
deleteDirectory(tempUnzippedDir);
|
|
}
|
|
|
|
private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException {
|
|
try (ZipOutputStream zos =
|
|
new ZipOutputStream(new FileOutputStream(zipFilePath.toFile()))) {
|
|
Files.walk(sourceDir)
|
|
.filter(path -> !Files.isDirectory(path))
|
|
.forEach(
|
|
path -> {
|
|
ZipEntry zipEntry =
|
|
new ZipEntry(sourceDir.relativize(path).toString());
|
|
try {
|
|
zos.putNextEntry(zipEntry);
|
|
Files.copy(path, zos);
|
|
zos.closeEntry();
|
|
} catch (IOException e) {
|
|
throw new UncheckedIOException(e);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
private static void deleteDirectory(Path dir) throws IOException {
|
|
Files.walkFileTree(
|
|
dir,
|
|
new SimpleFileVisitor<Path>() {
|
|
@Override
|
|
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
|
|
throws IOException {
|
|
Files.delete(file);
|
|
return FileVisitResult.CONTINUE;
|
|
}
|
|
|
|
@Override
|
|
public FileVisitResult postVisitDirectory(Path dir, IOException exc)
|
|
throws IOException {
|
|
Files.delete(dir);
|
|
return FileVisitResult.CONTINUE;
|
|
}
|
|
});
|
|
}
|
|
|
|
private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
|
|
Path tempDirectory = Files.createTempDirectory("unzipped_");
|
|
try (ZipInputStream zipIn =
|
|
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(fileBytes))) {
|
|
ZipEntry entry = zipIn.getNextEntry();
|
|
while (entry != null) {
|
|
Path filePath = tempDirectory.resolve(sanitizeZipFilename(entry.getName()));
|
|
if (entry.isDirectory()) {
|
|
Files.createDirectories(filePath); // Explicitly create the directory structure
|
|
} else {
|
|
Files.createDirectories(
|
|
filePath.getParent()); // Create parent directories if they don't exist
|
|
Files.copy(zipIn, filePath);
|
|
}
|
|
zipIn.closeEntry();
|
|
entry = zipIn.getNextEntry();
|
|
}
|
|
}
|
|
|
|
// search for the main HTML file.
|
|
try (Stream<Path> walk = Files.walk(tempDirectory)) {
|
|
List<Path> htmlFiles =
|
|
walk.filter(file -> file.toString().endsWith(".html"))
|
|
.collect(Collectors.toList());
|
|
|
|
if (htmlFiles.isEmpty()) {
|
|
throw new IOException("No HTML files found in the unzipped directory.");
|
|
}
|
|
|
|
// Prioritize 'index.html' if it exists, otherwise use the first .html file
|
|
for (Path htmlFile : htmlFiles) {
|
|
if ("index.html".equals(htmlFile.getFileName().toString())) {
|
|
return htmlFile;
|
|
}
|
|
}
|
|
|
|
return htmlFiles.get(0);
|
|
}
|
|
}
|
|
|
|
public static byte[] convertBookTypeToPdf(byte[] bytes, String originalFilename)
|
|
throws IOException, InterruptedException {
|
|
if (originalFilename == null || originalFilename.lastIndexOf('.') == -1) {
|
|
throw new IllegalArgumentException("Invalid original filename.");
|
|
}
|
|
|
|
String fileExtension = originalFilename.substring(originalFilename.lastIndexOf('.'));
|
|
List<String> command = new ArrayList<>();
|
|
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
|
Path tempInputFile = null;
|
|
|
|
try {
|
|
// Create temp file with appropriate extension
|
|
tempInputFile = Files.createTempFile("input_", fileExtension);
|
|
Files.write(tempInputFile, bytes);
|
|
|
|
command.add("ebook-convert");
|
|
command.add(tempInputFile.toString());
|
|
command.add(tempOutputFile.toString());
|
|
ProcessExecutorResult returnCode =
|
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE)
|
|
.runCommandWithOutputHandling(command);
|
|
|
|
return Files.readAllBytes(tempOutputFile);
|
|
} finally {
|
|
// Clean up temporary files
|
|
if (tempInputFile != null) {
|
|
Files.deleteIfExists(tempInputFile);
|
|
}
|
|
Files.deleteIfExists(tempOutputFile);
|
|
}
|
|
}
|
|
|
|
static String sanitizeZipFilename(String entryName) {
|
|
if (entryName == null || entryName.trim().isEmpty()) {
|
|
return entryName;
|
|
}
|
|
while (entryName.contains("../") || entryName.contains("..\\")) {
|
|
entryName = entryName.replace("../", "").replace("..\\", "");
|
|
}
|
|
return entryName;
|
|
}
|
|
}
|