added option for disabling HTML Sanitize (#2831)

# Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli <blaz.carli@arctur.si> Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
2025-02-01 00:36:50 +01:00
parent c5cffdcacb
commit 6ae2fddd48
6 changed files with 35 additions and 15 deletions
--- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java
@@ -14,6 +14,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;

 import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
+import stirling.software.SPDF.model.ApplicationProperties;
 import stirling.software.SPDF.service.CustomPDDocumentFactory;
 import stirling.software.SPDF.utils.FileToPdf;
 import stirling.software.SPDF.utils.WebResponseUtils;
@@ -27,12 +28,16 @@ public class ConvertHtmlToPDF {

    private final CustomPDDocumentFactory pdfDocumentFactory;

+	private final ApplicationProperties applicationProperties;
+
    @Autowired
    public ConvertHtmlToPDF(
            CustomPDDocumentFactory pdfDocumentFactory,
-            @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
+            @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
+			ApplicationProperties applicationProperties) {
        this.pdfDocumentFactory = pdfDocumentFactory;
        this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
+		this.applicationProperties = applicationProperties;
    }

    @PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
@@ -54,12 +59,16 @@ public class ConvertHtmlToPDF {
                || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
            throw new IllegalArgumentException("File must be either .html or .zip format.");
        }
+
+		boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
+
        byte[] pdfBytes =
                FileToPdf.convertHtmlToPdf(
                        request,
                        fileInput.getBytes(),
                        originalFilename,
-                        bookAndHtmlFormatsInstalled);
+                        bookAndHtmlFormatsInstalled,
+						disableSanitize);

        pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);

--- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java
@@ -24,6 +24,7 @@ import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;

 import stirling.software.SPDF.model.api.GeneralFile;
+import stirling.software.SPDF.model.ApplicationProperties;
 import stirling.software.SPDF.service.CustomPDDocumentFactory;
 import stirling.software.SPDF.utils.FileToPdf;
 import stirling.software.SPDF.utils.WebResponseUtils;
@@ -37,12 +38,16 @@ public class ConvertMarkdownToPdf {

    private final CustomPDDocumentFactory pdfDocumentFactory;

+	private final ApplicationProperties applicationProperties;
+
    @Autowired
    public ConvertMarkdownToPdf(
            CustomPDDocumentFactory pdfDocumentFactory,
-            @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
+            @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
+			ApplicationProperties applicationProperties) {
        this.pdfDocumentFactory = pdfDocumentFactory;
        this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
+		this.applicationProperties = applicationProperties;
    }

    @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
@@ -76,12 +81,15 @@ public class ConvertMarkdownToPdf {

        String htmlContent = renderer.render(document);

+		boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
+
        byte[] pdfBytes =
                FileToPdf.convertHtmlToPdf(
                        null,
                        htmlContent.getBytes(),
                        "converted.html",
-                        bookAndHtmlFormatsInstalled);
+                        bookAndHtmlFormatsInstalled,
+						disableSanitize);
        pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
        String outputFilename =
                originalFilename.replaceFirst("[.][^.]+$", "")
--- a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java
+++ b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java
@@ -283,6 +283,7 @@ public class ApplicationProperties {
        private Boolean enableAlphaFunctionality;
        private String enableAnalytics;
        private Datasource datasource;
+        private Boolean disableSanitize;
    }

    @Data
--- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java
+++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java
@@ -26,7 +26,8 @@ public class FileToPdf {
            HTMLToPdfRequest request,
            byte[] fileBytes,
            String fileName,
-            boolean htmlFormatsInstalled)
+            boolean htmlFormatsInstalled,
+			boolean disableSanitize)
            throws IOException, InterruptedException {

        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
@@ -35,13 +36,12 @@ public class FileToPdf {
        try {
            if (fileName.endsWith(".html")) {
                tempInputFile = Files.createTempFile("input_", ".html");
-                String sanitizedHtml =
-                        sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8));
+                String sanitizedHtml = sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
                Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
            } else if (fileName.endsWith(".zip")) {
                tempInputFile = Files.createTempFile("input_", ".zip");
                Files.write(tempInputFile, fileBytes);
-                sanitizeHtmlFilesInZip(tempInputFile);
+                sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
            } else {
                throw new IllegalArgumentException("Unsupported file format: " + fileName);
            }
@@ -89,11 +89,11 @@ public class FileToPdf {
        return pdfBytes;
    }

-    private static String sanitizeHtmlContent(String htmlContent) {
-        return CustomHtmlSanitizer.sanitize(htmlContent);
+    private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
+        return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
    }

-    private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException {
+    private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) throws IOException {
        Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
        try (ZipInputStream zipIn =
                ZipSecurity.createHardenedInputStream(
@@ -106,7 +106,7 @@ public class FileToPdf {
                    if (entry.getName().toLowerCase().endsWith(".html")
                            || entry.getName().toLowerCase().endsWith(".htm")) {
                        String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
-                        String sanitizedContent = sanitizeHtmlContent(content);
+                        String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
                        Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
                    } else {
                        Files.copy(zipIn, filePath);