diff --git a/Dockerfile-fat b/Dockerfile-fat index f0b7bc6b..ea09e51c 100644 --- a/Dockerfile-fat +++ b/Dockerfile-fat @@ -40,6 +40,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ apk upgrade --no-cache -a && \ apk add --no-cache \ + calibre@testing \ ca-certificates \ tzdata \ tini \ diff --git a/build.gradle b/build.gradle index 0c3669a2..0a27043e 100644 --- a/build.gradle +++ b/build.gradle @@ -134,6 +134,8 @@ dependencies { implementation "org.springframework.boot:spring-boot-starter-thymeleaf:$springBootVersion" implementation 'com.posthog.java:posthog:1.1.1' + implementation 'com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20240325.1' + if (System.getenv("DOCKER_ENABLE_SECURITY") != "false") { implementation "org.springframework.boot:spring-boot-starter-security:$springBootVersion" diff --git a/cucumber/features/external.feature b/cucumber/features/external.feature index 92ca2f40..58c0a859 100644 --- a/cucumber/features/external.feature +++ b/cucumber/features/external.feature @@ -218,6 +218,28 @@ Feature: API Validation | .odt | | .pptx | | .rtf | - - + @calibre @positive @htmltopdf + Scenario: Convert HTML to PDF + Given I use an example file at "exampleFiles/example.html" as parameter "fileInput" + When I send the API request to the endpoint "/api/v1/convert/html/pdf" + Then the response status code should be 200 + And the response file should have size greater than 100 + And the response file should have extension ".pdf" + + @calibre @positive @zippedhtmltopdf + Scenario: Convert zipped HTML to PDF + Given I use an example file at "exampleFiles/example_html.zip" as parameter "fileInput" + When I send the API request to the endpoint "/api/v1/convert/html/pdf" + Then the response status code should be 200 + And the response file should have size greater than 100 + And the response file should have extension ".pdf" + + @calibre @positive @markdowntopdf + Scenario: Convert Markdown to PDF + Given I use an example file at "exampleFiles/example.md" as parameter "fileInput" + When I send the API request to the endpoint "/api/v1/convert/markdown/pdf" + Then the response status code should be 200 + And the response file should have size greater than 100 + And the response file should have extension ".pdf" + \ No newline at end of file diff --git a/exampleYmlFiles/docker-compose-latest-fat-security.yml b/exampleYmlFiles/docker-compose-latest-fat-security.yml index f29a8a9f..8a98513f 100644 --- a/exampleYmlFiles/docker-compose-latest-fat-security.yml +++ b/exampleYmlFiles/docker-compose-latest-fat-security.yml @@ -7,7 +7,7 @@ services: limits: memory: 4G healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status | grep -q 'UP' && curl -fL http://localhost:8080/ | grep -q 'Please sign in'"] + test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status | grep -q 'UP'"] interval: 5s timeout: 10s retries: 16 @@ -19,7 +19,7 @@ services: - /stirling/latest/logs:/logs:rw environment: DOCKER_ENABLE_SECURITY: "true" - SECURITY_ENABLELOGIN: "true" + SECURITY_ENABLELOGIN: "false" PUID: 1002 PGID: 1002 UMASK: "022" @@ -30,4 +30,5 @@ services: SYSTEM_MAXFILESIZE: "100" METRICS_ENABLED: "true" SYSTEM_GOOGLEVISIBILITY: "true" + INSTALL_BOOK_AND_ADVANCED_HTML_OPS: "true" restart: on-failure:5 diff --git a/scripts/init-without-ocr.sh b/scripts/init-without-ocr.sh index 49218f76..593c4c63 100644 --- a/scripts/init-without-ocr.sh +++ b/scripts/init-without-ocr.sh @@ -12,8 +12,8 @@ fi umask "$UMASK" || true if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" && "$FAT_DOCKER" != "true" ]]; then - echo "issue with calibre in current version, feature currently disabled on Stirling-PDF" - #apk add --no-cache calibre@testing + #echo "issue with calibre in current version, feature currently disabled on Stirling-PDF" + apk add --no-cache calibre@testing fi if [[ "$FAT_DOCKER" != "true" ]]; then diff --git a/src/main/java/stirling/software/SPDF/EE/KeygenLicenseVerifier.java b/src/main/java/stirling/software/SPDF/EE/KeygenLicenseVerifier.java index f3f68257..e9b14ac4 100644 --- a/src/main/java/stirling/software/SPDF/EE/KeygenLicenseVerifier.java +++ b/src/main/java/stirling/software/SPDF/EE/KeygenLicenseVerifier.java @@ -5,6 +5,7 @@ import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.fasterxml.jackson.databind.JsonNode; @@ -12,19 +13,22 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.posthog.java.shaded.org.json.JSONObject; import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.model.ApplicationProperties; import stirling.software.SPDF.utils.GeneralUtils; @Service @Slf4j public class KeygenLicenseVerifier { private static final String ACCOUNT_ID = "e5430f69-e834-4ae4-befd-b602aae5f372"; - private static final String PRODUCT_ID = "f9bb2423-62c9-4d39-8def-4fdc5aca751e"; private static final String BASE_URL = "https://api.keygen.sh/v1/accounts"; private static final ObjectMapper objectMapper = new ObjectMapper(); - // 23:26:20.344 [scheduling-1] INFO s.s.SPDF.EE.KeygenLicenseVerifier - - // validateLicenseResponse body: - // {"data":{"id":"808ed3c9-584b-46dd-8a80-c9217ef70915","type":"licenses","attributes":{"name":"userCounTest","key":"A7EW-KUPF-PRML-RRVL-HLMP-7THR-F7KE-XF4C","expiry":"2024-10-31T21:39:49.271Z","status":"ACTIVE","uses":0,"suspended":false,"scheme":null,"encrypted":false,"strict":true,"floating":true,"protected":true,"version":null,"maxMachines":1,"maxProcesses":null,"maxUsers":null,"maxCores":null,"maxUses":null,"requireHeartbeat":false,"requireCheckIn":false,"lastValidated":"2024-10-01T22:26:18.121Z","lastCheckIn":null,"nextCheckIn":null,"lastCheckOut":null,"metadata":{"users":10},"created":"2024-10-01T21:39:49.268Z","updated":"2024-10-01T21:39:49.268Z"},"relationships":{"account":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372"},"data":{"type":"accounts","id":"e5430f69-e834-4ae4-befd-b602aae5f372"}},"environment":{"links":{"related":null},"data":null},"product":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/product"},"data":{"type":"products","id":"f9bb2423-62c9-4d39-8def-4fdc5aca751e"}},"policy":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/policy"},"data":{"type":"policies","id":"04caef06-9ac2-4084-bf3c-bca4a0d29143"}},"group":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/group"},"data":null},"owner":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/owner"},"data":null},"users":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/users"},"meta":{"count":0}},"machines":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/machines"},"meta":{"cores":0,"count":0}},"tokens":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/tokens"}},"entitlements":{"links":{"related":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915/entitlements"}}},"links":{"self":"/v1/accounts/e5430f69-e834-4ae4-befd-b602aae5f372/licenses/808ed3c9-584b-46dd-8a80-c9217ef70915"}},"meta":{"ts":"2024-10-01T22:26:18.124Z","valid":false,"detail":"fingerprint is not activated (has no associated machines)","code":"NO_MACHINES","scope":{"fingerprint":"example-fingerprint"}}} + private final ApplicationProperties applicationProperties; + + @Autowired + public KeygenLicenseVerifier(ApplicationProperties applicationProperties) { + this.applicationProperties = applicationProperties; + } public boolean verifyLicense(String licenseKey) { try { @@ -68,7 +72,7 @@ public class KeygenLicenseVerifier { } } - private static JsonNode validateLicense(String licenseKey, String machineFingerprint) + private JsonNode validateLicense(String licenseKey, String machineFingerprint) throws Exception { HttpClient client = HttpClient.newHttpClient(); String requestBody = @@ -104,14 +108,24 @@ public class KeygenLicenseVerifier { log.debug("Validation detail: " + detail); log.debug("Validation code: " + code); + int users = + jsonResponse + .path("data") + .path("attributes") + .path("metadata") + .path("users") + .asInt(0); + applicationProperties.getEnterpriseEdition().setMaxUsers(users); + log.info(applicationProperties.toString()); + } else { log.error("Error validating license. Status code: " + response.statusCode()); } return jsonResponse; } - private static boolean activateMachine( - String licenseKey, String licenseId, String machineFingerprint) throws Exception { + private boolean activateMachine(String licenseKey, String licenseId, String machineFingerprint) + throws Exception { HttpClient client = HttpClient.newHttpClient(); String hostname; @@ -184,7 +198,7 @@ public class KeygenLicenseVerifier { } } - private static String generateMachineFingerprint() { + private String generateMachineFingerprint() { return GeneralUtils.generateMachineFingerprint(); } } diff --git a/src/main/java/stirling/software/SPDF/EE/LicenseKeyChecker.java b/src/main/java/stirling/software/SPDF/EE/LicenseKeyChecker.java index 1b1e8cf6..3da7da05 100644 --- a/src/main/java/stirling/software/SPDF/EE/LicenseKeyChecker.java +++ b/src/main/java/stirling/software/SPDF/EE/LicenseKeyChecker.java @@ -20,7 +20,6 @@ public class LicenseKeyChecker { private boolean enterpriseEnbaledResult = false; - // Inject your license service or configuration @Autowired public LicenseKeyChecker( KeygenLicenseVerifier licenseService, ApplicationProperties applicationProperties) { @@ -46,7 +45,6 @@ public class LicenseKeyChecker { log.info("License key is invalid."); } } - } public void updateLicenseKey(String newKey) throws IOException { diff --git a/src/main/java/stirling/software/SPDF/config/security/UserService.java b/src/main/java/stirling/software/SPDF/config/security/UserService.java index 39b26a0e..27e0baa2 100644 --- a/src/main/java/stirling/software/SPDF/config/security/UserService.java +++ b/src/main/java/stirling/software/SPDF/config/security/UserService.java @@ -44,6 +44,10 @@ public class UserService implements UserServiceInterface { @Autowired DatabaseBackupInterface databaseBackupHelper; + public long getTotalUserCount() { + return userRepository.count(); + } + // Handle OAUTH2 login and user auto creation. public boolean processOAuth2PostLogin(String username, boolean autoCreateUser) throws IllegalArgumentException, IOException { diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java index 694d30ab..05bcbe55 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java @@ -1,30 +1,33 @@ package stirling.software.SPDF.controller.api.converters; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.GeneralFile; import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.WebResponseUtils; -// Disabled for now -// @RestController -// @Tag(name = "Convert", description = "Convert APIs") -// @RequestMapping("/api/v1/convert") +@RestController +@Tag(name = "Convert", description = "Convert APIs") +@RequestMapping("/api/v1/convert") public class ConvertBookToPDFController { private final boolean bookAndHtmlFormatsInstalled; private final CustomPDDocumentFactory pdfDocumentFactory; - // @Autowired + @Autowired public ConvertBookToPDFController( CustomPDDocumentFactory pdfDocumentFactory, @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { @@ -66,6 +69,8 @@ public class ConvertBookToPDFController { } byte[] pdfBytes = FileToPdf.convertBookTypeToPdf(fileInput.getBytes(), originalFilename); + pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); + String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index 19ba1ac4..a4fe57bb 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -1,27 +1,39 @@ package stirling.software.SPDF.controller.api.converters; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest; +import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.WebResponseUtils; -// Disabled for now -// @RestController -// @Tag(name = "Convert", description = "Convert APIs") -// @RequestMapping("/api/v1/convert") +@RestController +@Tag(name = "Convert", description = "Convert APIs") +@RequestMapping("/api/v1/convert") public class ConvertHtmlToPDF { - // @Autowired - @Qualifier("bookAndHtmlFormatsInstalled") - private boolean bookAndHtmlFormatsInstalled; + private final boolean bookAndHtmlFormatsInstalled; + + private final CustomPDDocumentFactory pdfDocumentFactory; + + @Autowired + public ConvertHtmlToPDF( + CustomPDDocumentFactory pdfDocumentFactory, + @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { + this.pdfDocumentFactory = pdfDocumentFactory; + this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; + } @PostMapping(consumes = "multipart/form-data", value = "/html/pdf") @Operation( @@ -49,6 +61,8 @@ public class ConvertHtmlToPDF { originalFilename, bookAndHtmlFormatsInstalled); + pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); + String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java index 5b2f3fdf..b378f479 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java @@ -10,28 +10,40 @@ import org.commonmark.node.Node; import org.commonmark.parser.Parser; import org.commonmark.renderer.html.AttributeProvider; import org.commonmark.renderer.html.HtmlRenderer; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.GeneralFile; +import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.WebResponseUtils; -// Disabled for now -// @RestController -// @Tag(name = "Convert", description = "Convert APIs") -// @RequestMapping("/api/v1/convert") +@RestController +@Tag(name = "Convert", description = "Convert APIs") +@RequestMapping("/api/v1/convert") public class ConvertMarkdownToPdf { - // @Autowired - @Qualifier("bookAndHtmlFormatsInstalled") - private boolean bookAndHtmlFormatsInstalled; + private final boolean bookAndHtmlFormatsInstalled; + + private final CustomPDDocumentFactory pdfDocumentFactory; + + @Autowired + public ConvertMarkdownToPdf( + CustomPDDocumentFactory pdfDocumentFactory, + @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { + this.pdfDocumentFactory = pdfDocumentFactory; + this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; + } @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf") @Operation( @@ -70,7 +82,7 @@ public class ConvertMarkdownToPdf { htmlContent.getBytes(), "converted.html", bookAndHtmlFormatsInstalled); - + pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java index 181b5713..c8b9dd4d 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java @@ -6,27 +6,30 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.converters.PdfToBookRequest; import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; -// Disabled for now -// @RestController -// @Tag(name = "Convert", description = "Convert APIs") -// @RequestMapping("/api/v1/convert") +@RestController +@Tag(name = "Convert", description = "Convert APIs") +@RequestMapping("/api/v1/convert") public class ConvertPDFToBookController { - // @Autowired + @Autowired @Qualifier("bookAndHtmlFormatsInstalled") private boolean bookAndHtmlFormatsInstalled; diff --git a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java index 758e6e8c..10ad434a 100644 --- a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java +++ b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java @@ -218,6 +218,7 @@ public class ApplicationProperties { public static class EnterpriseEdition { private boolean enabled; @ToString.Exclude private String key; + private int maxUsers; private CustomMetadata customMetadata = new CustomMetadata(); @Data diff --git a/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java b/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java index 180eaff7..99a2a212 100644 --- a/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java +++ b/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java @@ -34,6 +34,37 @@ public class CustomPDDocumentFactory { return document; } + public byte[] createNewBytesBasedOnOldDocument(byte[] oldDocument) throws IOException { + PDDocument document = Loader.loadPDF(oldDocument); + return createNewBytesBasedOnOldDocument(document); + } + + public byte[] createNewBytesBasedOnOldDocument(File oldDocument) throws IOException { + PDDocument document = Loader.loadPDF(oldDocument); + return createNewBytesBasedOnOldDocument(document); + } + + public byte[] createNewBytesBasedOnOldDocument(PDDocument oldDocument) throws IOException { + PDDocument document = new PDDocument(); + pdfMetadataService.setMetadataToPdf( + document, pdfMetadataService.extractMetadataFromPdf(oldDocument), true); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + document.save(baos); + document.close(); + return baos.toByteArray(); + } + + public PDDocument createNewDocumentBasedOnOldDocument(byte[] oldDocument) throws IOException { + PDDocument document = Loader.loadPDF(oldDocument); + return createNewDocumentBasedOnOldDocument(document); + } + + public PDDocument createNewDocumentBasedOnOldDocument(File oldDocument) throws IOException { + PDDocument document = Loader.loadPDF(oldDocument); + return createNewDocumentBasedOnOldDocument(document); + } + public PDDocument createNewDocumentBasedOnOldDocument(PDDocument oldDocument) throws IOException { PDDocument document = new PDDocument(); diff --git a/src/main/java/stirling/software/SPDF/utils/CustomHtmlSanitizer.java b/src/main/java/stirling/software/SPDF/utils/CustomHtmlSanitizer.java new file mode 100644 index 00000000..d8c247cc --- /dev/null +++ b/src/main/java/stirling/software/SPDF/utils/CustomHtmlSanitizer.java @@ -0,0 +1,20 @@ +package stirling.software.SPDF.utils; + +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; +import org.owasp.html.Sanitizers; + +public class CustomHtmlSanitizer { + private static final PolicyFactory POLICY = + Sanitizers.FORMATTING + .and(Sanitizers.BLOCKS) + .and(Sanitizers.STYLES) + .and(Sanitizers.LINKS) + .and(Sanitizers.TABLES) + .and(Sanitizers.IMAGES) + .and(new HtmlPolicyBuilder().disallowElements("noscript").toFactory()); + + public static String sanitize(String html) { + return POLICY.sanitize(html); + } +} diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index c7123424..55fbd38e 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -2,16 +2,23 @@ package stirling.software.SPDF.utils; import java.io.ByteArrayInputStream; import java.io.File; +import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; import io.github.pixee.security.ZipSecurity; @@ -33,10 +40,15 @@ public class FileToPdf { try { if (fileName.endsWith(".html")) { tempInputFile = Files.createTempFile("input_", ".html"); - Files.write(tempInputFile, fileBytes); - } else { + String sanitizedHtml = + sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8)); + Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); + } else if (fileName.endsWith(".zip")) { tempInputFile = Files.createTempFile("input_", ".zip"); Files.write(tempInputFile, fileBytes); + sanitizeHtmlFilesInZip(tempInputFile); + } else { + throw new IllegalArgumentException("Unsupported file format: " + fileName); } List command = new ArrayList<>(); @@ -45,7 +57,6 @@ public class FileToPdf { command.add("-e utf-8"); command.add(tempInputFile.toString()); command.add(tempOutputFile.toString()); - } else { command.add("ebook-convert"); command.add(tempInputFile.toString()); @@ -54,10 +65,8 @@ public class FileToPdf { command.add("a4"); if (request != null && request.getZoom() != 1.0) { - // Create a temporary CSS file File tempCssFile = Files.createTempFile("customStyle", ".css").toFile(); try (FileWriter writer = new FileWriter(tempCssFile)) { - // Write the CSS rule to the file writer.write("body { zoom: " + request.getZoom() + "; }"); } command.add("--extra-css"); @@ -65,9 +74,7 @@ public class FileToPdf { } } - ProcessExecutorResult returnCode; - - returnCode = + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) .runCommandWithOutputHandling(command); @@ -78,8 +85,6 @@ public class FileToPdf { throw e; } } finally { - - // Clean up temporary files Files.deleteIfExists(tempOutputFile); Files.deleteIfExists(tempInputFile); } @@ -87,6 +92,81 @@ public class FileToPdf { return pdfBytes; } + private static String sanitizeHtmlContent(String htmlContent) { + return CustomHtmlSanitizer.sanitize(htmlContent); + } + + private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException { + Path tempUnzippedDir = Files.createTempDirectory("unzipped_"); + try (ZipInputStream zipIn = + ZipSecurity.createHardenedInputStream( + new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) { + ZipEntry entry = zipIn.getNextEntry(); + while (entry != null) { + Path filePath = tempUnzippedDir.resolve(entry.getName()); + if (!entry.isDirectory()) { + Files.createDirectories(filePath.getParent()); + if (entry.getName().toLowerCase().endsWith(".html") + || entry.getName().toLowerCase().endsWith(".htm")) { + String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8); + String sanitizedContent = sanitizeHtmlContent(content); + Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8)); + } else { + Files.copy(zipIn, filePath); + } + } + zipIn.closeEntry(); + entry = zipIn.getNextEntry(); + } + } + + // Repack the sanitized files + zipDirectory(tempUnzippedDir, zipFilePath); + + // Clean up + deleteDirectory(tempUnzippedDir); + } + + private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException { + try (ZipOutputStream zos = + new ZipOutputStream(new FileOutputStream(zipFilePath.toFile()))) { + Files.walk(sourceDir) + .filter(path -> !Files.isDirectory(path)) + .forEach( + path -> { + ZipEntry zipEntry = + new ZipEntry(sourceDir.relativize(path).toString()); + try { + zos.putNextEntry(zipEntry); + Files.copy(path, zos); + zos.closeEntry(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + } + + private static void deleteDirectory(Path dir) throws IOException { + Files.walkFileTree( + dir, + new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) + throws IOException { + Files.delete(dir); + return FileVisitResult.CONTINUE; + } + }); + } + private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException { Path tempDirectory = Files.createTempDirectory("unzipped_"); try (ZipInputStream zipIn = diff --git a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java index 85e1bb79..8e56c8df 100644 --- a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java @@ -5,16 +5,21 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; +import java.net.InetAddress; import java.net.MalformedURLException; +import java.net.NetworkInterface; import java.net.URI; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; +import java.security.MessageDigest; import java.util.ArrayList; +import java.util.Enumeration; import java.util.List; import java.util.UUID; @@ -30,11 +35,6 @@ import com.fathzer.soft.javaluator.DoubleEvaluator; import io.github.pixee.security.HostValidator; import io.github.pixee.security.Urls; -import java.net.InetAddress; -import java.net.NetworkInterface; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.util.Enumeration; public class GeneralUtils { @@ -306,7 +306,7 @@ public class GeneralUtils { } settingsYml.save(); } - + public static String generateMachineFingerprint() { try { // Get the MAC address @@ -346,7 +346,7 @@ public class GeneralUtils { return fingerprint.toString(); } catch (Exception e) { - return "GenericID"; + return "GenericID"; } } } diff --git a/test.sh b/test.sh index 62a3ff3b..36cfeb11 100644 --- a/test.sh +++ b/test.sh @@ -82,18 +82,6 @@ main() { run_tests "Stirling-PDF" "./exampleYmlFiles/docker-compose-latest.yml" - if [ $? -eq 0 ]; then - cd cucumber - if behave; then - passed_tests+=("Stirling-PDF-Regression") - else - failed_tests+=("Stirling-PDF-Regression") - echo "Printing docker logs of failed regression" - docker logs "Stirling-PDF" - echo "Printed docker logs of failed regression" - fi - cd .. - fi docker-compose -f "./exampleYmlFiles/docker-compose-latest.yml" down export DOCKER_ENABLE_SECURITY=true @@ -117,6 +105,18 @@ main() { docker-compose -f "./exampleYmlFiles/docker-compose-latest-security.yml" down run_tests "Stirling-PDF-Security-Fat" "./exampleYmlFiles/docker-compose-latest-fat-security.yml" + if [ $? -eq 0 ]; then + cd cucumber + if python -m behave; then + passed_tests+=("Stirling-PDF-Regression") + else + failed_tests+=("Stirling-PDF-Regression") + echo "Printing docker logs of failed regression" + docker logs "Stirling-PDF-Security-Fat" + echo "Printed docker logs of failed regression" + fi + cd .. + fi docker-compose -f "./exampleYmlFiles/docker-compose-latest-fat-security.yml" down # Report results