Files
Stirling-PDF/src/main/java/stirling/software/SPDF/utils/FileToPdf.java

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

253 lines
10 KiB
Java
Raw Normal View History

2023-08-01 00:03:13 +01:00
package stirling.software.SPDF.utils;
import java.io.*;
2024-10-14 22:34:41 +01:00
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
2023-08-01 00:03:13 +01:00
import java.nio.file.Files;
import java.nio.file.Path;
2024-10-14 22:34:41 +01:00
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
2023-08-01 00:03:13 +01:00
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
2024-10-14 22:34:41 +01:00
import java.util.zip.ZipOutputStream;
2023-08-01 00:03:13 +01:00
2024-02-06 00:00:49 +00:00
import io.github.pixee.security.ZipSecurity;
2024-01-28 17:36:17 +00:00
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
2023-08-01 00:03:13 +01:00
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
public class FileToPdf {
2024-01-09 22:39:21 +00:00
public static byte[] convertHtmlToPdf(
2024-01-28 17:36:17 +00:00
HTMLToPdfRequest request,
byte[] fileBytes,
String fileName,
added option for disabling HTML Sanitize (#2831) # Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli <blaz.carli@arctur.si> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
2025-02-01 00:36:50 +01:00
boolean htmlFormatsInstalled,
Update sonarqube.yml and removal of gradle keys (#2866) # Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
2025-02-04 10:18:02 +00:00
boolean disableSanitize)
2023-08-01 00:03:13 +01:00
throws IOException, InterruptedException {
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
Update sonarqube.yml and removal of gradle keys (#2866) # Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
2025-02-04 10:18:02 +00:00
String sanitizedHtml =
sanitizeHtmlContent(
new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
2024-10-14 22:34:41 +01:00
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else if (fileName.endsWith(".zip")) {
2024-02-09 23:24:25 +00:00
tempInputFile = Files.createTempFile("input_", ".zip");
Files.write(tempInputFile, fileBytes);
added option for disabling HTML Sanitize (#2831) # Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli <blaz.carli@arctur.si> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
2025-02-01 00:36:50 +01:00
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
2024-10-14 22:34:41 +01:00
} else {
throw new IllegalArgumentException("Unsupported file format: " + fileName);
2023-08-01 00:03:13 +01:00
}
List<String> command = new ArrayList<>();
2024-01-09 22:39:21 +00:00
if (!htmlFormatsInstalled) {
[Test PR] Desktop fix and unoconv to unoserver (#2971) # Description of Changes This pull request includes several updates to the Docker configuration and Java application UI scaling. The changes enhance environment variable management, dependency installation, and UI responsiveness to different screen sizes. ### Docker Configuration Updates: * Added new environment variables `STIRLING_PDF_DESKTOP_UI`, `PYTHONPATH`, `UNO_PATH`, and `URE_BOOTSTRAP` to `Dockerfile` and `Dockerfile.fat` to improve the configuration and integration of the LibreOffice environment. [[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L38-R46) [[2]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27L40-R49) * Updated the `CMD` instruction in `Dockerfile` and `Dockerfile.fat` to run both the Java application and `unoserver` simultaneously. [[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L87-R96) [[2]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27L87-R100) * Modified the `RUN` instruction to include additional Python dependencies and setup a virtual environment. [[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L68-R81) [[2]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27R72-R86) ### Workflow Enhancements: * Added `STIRLING_PDF_DESKTOP_UI` environment variable to the GitHub Actions workflows (`PR-Demo-Comment.yml` and `push-docker.yml`) to ensure consistent environment settings. [[1]](diffhunk://#diff-145fe5c0ed8c24e4673c9ad39800dd171a2d0a2e8050497cff980fc7e3a3df0dR106) [[2]](diffhunk://#diff-76056236de05155107f6a660f1e3956059e37338011b8f0e72188afcb9b17b6fR41) ### Java Application UI Scaling: * Introduced `UIScaling` utility to dynamically adjust the size of UI components based on screen resolution in `DesktopBrowser` and `LoadingWindow` classes. [[1]](diffhunk://#diff-dff83b0fe53cba8ee80dc8cee96b9c2bfec612ec1f2c636ebdf22dedb36671e8L218-R219) [[2]](diffhunk://#diff-dff83b0fe53cba8ee80dc8cee96b9c2bfec612ec1f2c636ebdf22dedb36671e8L267-R270) [[3]](diffhunk://#diff-3e287daf297213b698b3c94d6e6ed4aae139d570ba6b115da459d72b5c36c42fL44-R64) [[4]](diffhunk://#diff-3e287daf297213b698b3c94d6e6ed4aae139d570ba6b115da459d72b5c36c42fL86-R102) * Improved the loading of icons by using the `UIScaling` utility for better visual quality. --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> Co-authored-by: a <a>
2025-02-18 11:57:56 +00:00
command.add("/opt/venv/bin/weasyprint");
2024-10-14 22:34:41 +01:00
command.add("-e");
command.add("utf-8");
command.add("-v");
2024-02-09 23:24:25 +00:00
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
2024-01-09 22:39:21 +00:00
} else {
2024-02-09 23:24:25 +00:00
command.add("ebook-convert");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
command.add("--paper-size");
command.add("a4");
2024-01-28 17:36:17 +00:00
2024-06-05 21:16:22 +01:00
if (request != null && request.getZoom() != 1.0) {
File tempCssFile = Files.createTempFile("customStyle", ".css").toFile();
2024-02-09 23:24:25 +00:00
try (FileWriter writer = new FileWriter(tempCssFile)) {
writer.write("body { zoom: " + request.getZoom() + "; }");
2024-01-28 17:36:17 +00:00
}
2024-02-09 23:24:25 +00:00
command.add("--extra-css");
command.add(tempCssFile.getAbsolutePath());
2024-01-28 17:36:17 +00:00
}
2024-01-09 22:39:21 +00:00
}
2024-01-10 00:33:07 +00:00
2024-10-14 22:34:41 +01:00
ProcessExecutorResult returnCode =
2024-02-09 23:24:25 +00:00
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
2023-08-01 00:03:13 +01:00
pdfBytes = Files.readAllBytes(tempOutputFile);
2024-01-18 23:28:39 +00:00
} catch (IOException e) {
pdfBytes = Files.readAllBytes(tempOutputFile);
if (pdfBytes.length < 1) {
throw e;
}
2023-08-01 00:03:13 +01:00
} finally {
2024-05-27 17:53:18 +01:00
Files.deleteIfExists(tempOutputFile);
Files.deleteIfExists(tempInputFile);
2023-12-30 19:11:27 +00:00
}
2023-08-01 00:03:13 +01:00
return pdfBytes;
2023-12-30 19:11:27 +00:00
}
added option for disabling HTML Sanitize (#2831) # Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli <blaz.carli@arctur.si> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
2025-02-01 00:36:50 +01:00
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
2024-10-14 22:34:41 +01:00
}
Update sonarqube.yml and removal of gradle keys (#2866) # Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
2025-02-04 10:18:02 +00:00
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize)
throws IOException {
2024-10-14 22:34:41 +01:00
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(
new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempUnzippedDir.resolve(sanitizeZipFilename(entry.getName()));
2024-10-14 22:34:41 +01:00
if (!entry.isDirectory()) {
Files.createDirectories(filePath.getParent());
if (entry.getName().toLowerCase().endsWith(".html")
|| entry.getName().toLowerCase().endsWith(".htm")) {
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
added option for disabling HTML Sanitize (#2831) # Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli <blaz.carli@arctur.si> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
2025-02-01 00:36:50 +01:00
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
2024-10-14 22:34:41 +01:00
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
} else {
Files.copy(zipIn, filePath);
}
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
// Repack the sanitized files
zipDirectory(tempUnzippedDir, zipFilePath);
// Clean up
deleteDirectory(tempUnzippedDir);
}
private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException {
try (ZipOutputStream zos =
new ZipOutputStream(new FileOutputStream(zipFilePath.toFile()))) {
Files.walk(sourceDir)
.filter(path -> !Files.isDirectory(path))
.forEach(
path -> {
ZipEntry zipEntry =
new ZipEntry(sourceDir.relativize(path).toString());
try {
zos.putNextEntry(zipEntry);
Files.copy(path, zos);
zos.closeEntry();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
}
private static void deleteDirectory(Path dir) throws IOException {
Files.walkFileTree(
dir,
new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc)
throws IOException {
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
2023-08-01 00:03:13 +01:00
private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
2024-02-06 00:00:49 +00:00
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(fileBytes))) {
2023-08-01 00:03:13 +01:00
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(sanitizeZipFilename(entry.getName()));
2023-08-01 00:03:13 +01:00
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
2023-12-30 19:11:27 +00:00
} else {
2023-08-01 00:03:13 +01:00
Files.createDirectories(
filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
2023-12-30 19:11:27 +00:00
}
2023-08-01 00:03:13 +01:00
zipIn.closeEntry();
entry = zipIn.getNextEntry();
2023-12-30 19:11:27 +00:00
}
}
2023-08-01 00:03:13 +01:00
// search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles =
walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());
2023-12-30 19:11:27 +00:00
2023-08-01 00:03:13 +01:00
if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
2023-12-30 19:11:27 +00:00
}
2023-08-01 00:03:13 +01:00
// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if ("index.html".equals(htmlFile.getFileName().toString())) {
2023-08-01 00:03:13 +01:00
return htmlFile;
2023-12-30 19:11:27 +00:00
}
}
2023-08-01 00:03:13 +01:00
return htmlFiles.get(0);
}
2023-12-30 19:11:27 +00:00
}
2024-01-09 22:39:21 +00:00
public static byte[] convertBookTypeToPdf(byte[] bytes, String originalFilename)
throws IOException, InterruptedException {
if (originalFilename == null || originalFilename.lastIndexOf('.') == -1) {
throw new IllegalArgumentException("Invalid original filename.");
}
String fileExtension = originalFilename.substring(originalFilename.lastIndexOf('.'));
List<String> command = new ArrayList<>();
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
try {
// Create temp file with appropriate extension
tempInputFile = Files.createTempFile("input_", fileExtension);
Files.write(tempInputFile, bytes);
command.add("ebook-convert");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE)
.runCommandWithOutputHandling(command);
return Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
if (tempInputFile != null) {
Files.deleteIfExists(tempInputFile);
}
Files.deleteIfExists(tempOutputFile);
}
}
static String sanitizeZipFilename(String entryName) {
if (entryName == null || entryName.trim().isEmpty()) {
return entryName;
}
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
return entryName;
}
2023-08-01 00:03:13 +01:00
}