added option for disabling HTML Sanitize (#2831)

# Description of Changes

Please provide a summary of the changes, including:

- added disableSanitize: false # set to 'true' to disable Sanitize HTML,
set to false to enable Sanitize HTML; (can lead to injections in HTML)
- Some users uses this on local boxes, and uses Google Fonts, and base64
image src.


### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [x] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [x] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: blaz.carli <blaz.carli@arctur.si>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Blaž Carli
2025-02-01 00:36:50 +01:00
committed by GitHub
parent c5cffdcacb
commit 6ae2fddd48
6 changed files with 35 additions and 15 deletions

View File

@@ -14,6 +14,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -27,12 +28,16 @@ public class ConvertHtmlToPDF {
private final CustomPDDocumentFactory pdfDocumentFactory;
private final ApplicationProperties applicationProperties;
@Autowired
public ConvertHtmlToPDF(
CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
}
@PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
@@ -54,12 +59,16 @@ public class ConvertHtmlToPDF {
|| (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}
boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
byte[] pdfBytes =
FileToPdf.convertHtmlToPdf(
request,
fileInput.getBytes(),
originalFilename,
bookAndHtmlFormatsInstalled);
bookAndHtmlFormatsInstalled,
disableSanitize);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);

View File

@@ -24,6 +24,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -37,12 +38,16 @@ public class ConvertMarkdownToPdf {
private final CustomPDDocumentFactory pdfDocumentFactory;
private final ApplicationProperties applicationProperties;
@Autowired
public ConvertMarkdownToPdf(
CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
}
@PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
@@ -76,12 +81,15 @@ public class ConvertMarkdownToPdf {
String htmlContent = renderer.render(document);
boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
byte[] pdfBytes =
FileToPdf.convertHtmlToPdf(
null,
htmlContent.getBytes(),
"converted.html",
bookAndHtmlFormatsInstalled);
bookAndHtmlFormatsInstalled,
disableSanitize);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
String outputFilename =
originalFilename.replaceFirst("[.][^.]+$", "")

View File

@@ -283,6 +283,7 @@ public class ApplicationProperties {
private Boolean enableAlphaFunctionality;
private String enableAnalytics;
private Datasource datasource;
private Boolean disableSanitize;
}
@Data

View File

@@ -26,7 +26,8 @@ public class FileToPdf {
HTMLToPdfRequest request,
byte[] fileBytes,
String fileName,
boolean htmlFormatsInstalled)
boolean htmlFormatsInstalled,
boolean disableSanitize)
throws IOException, InterruptedException {
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
@@ -35,13 +36,12 @@ public class FileToPdf {
try {
if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
String sanitizedHtml =
sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8));
String sanitizedHtml = sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else if (fileName.endsWith(".zip")) {
tempInputFile = Files.createTempFile("input_", ".zip");
Files.write(tempInputFile, fileBytes);
sanitizeHtmlFilesInZip(tempInputFile);
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
} else {
throw new IllegalArgumentException("Unsupported file format: " + fileName);
}
@@ -89,11 +89,11 @@ public class FileToPdf {
return pdfBytes;
}
private static String sanitizeHtmlContent(String htmlContent) {
return CustomHtmlSanitizer.sanitize(htmlContent);
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
}
private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException {
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) throws IOException {
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(
@@ -106,7 +106,7 @@ public class FileToPdf {
if (entry.getName().toLowerCase().endsWith(".html")
|| entry.getName().toLowerCase().endsWith(".htm")) {
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
String sanitizedContent = sanitizeHtmlContent(content);
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
} else {
Files.copy(zipIn, filePath);