show javascript, bug fixes

This commit is contained in:
Anthony Stirling
2023-08-06 21:56:02 +01:00
parent 379791a326
commit 54f53be5b5
14 changed files with 386 additions and 58 deletions

View File

@@ -0,0 +1,141 @@
package stirling.software.SPDF.controller.api.other;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.PDPageContentStream.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.http.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.*;
import io.swagger.v3.oas.annotations.parameters.*;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.text.TextPosition;
import org.apache.tomcat.util.http.ResponseUtil;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.ArrayList;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.itextpdf.io.font.constants.StandardFonts;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.properties.TextAlignment;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.Schema;
import java.io.*;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.text.*;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.*;
import io.swagger.v3.oas.annotations.media.Schema;
import org.springframework.http.ResponseEntity;
@RestController
@Tag(name = "Other", description = "Other APIs")
public class ShowJavascript {
private static final Logger logger = LoggerFactory.getLogger(ShowJavascript.class);
@PostMapping(consumes = "multipart/form-data", value = "/show-javascript")
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> extractHeader(
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file from which the javascript is to be extracted.", required = true) MultipartFile inputFile)
throws Exception {
try (
PdfDocument itextDoc = new PdfDocument(new PdfReader(inputFile.getInputStream()))
) {
String name = "";
String script = "";
String entryName = "File: "+inputFile.getOriginalFilename() + ", Script: ";
//Javascript
PdfDictionary namesDict = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names);
if (namesDict != null) {
PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
if (javascriptDict != null) {
PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
if(namesArray.getAsString(i) != null)
name = namesArray.getAsString(i).toString();
PdfObject jsCode = namesArray.get(i+1);
if (jsCode instanceof PdfStream) {
byte[] jsCodeBytes = ((PdfStream)jsCode).getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
script = "//" + entryName + name + "\n" +jsCodeStr;
} else if (jsCode instanceof PdfDictionary) {
// If the JS code is in a dictionary, you'll need to know the key to use.
// Assuming the key is PdfName.JS:
PdfStream jsCodeStream = ((PdfDictionary)jsCode).getAsStream(PdfName.JS);
if (jsCodeStream != null) {
byte[] jsCodeBytes = jsCodeStream.getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
script = "//" + entryName + name + "\n" +jsCodeStr;
}
}
}
}
}
if(script.equals("")) {
script = "PDF '" +inputFile.getOriginalFilename() + "' does not contain Javascript";
}
return WebResponseUtils.bytesToWebResponse(script.getBytes(), name + ".js");
}
}
}

View File

@@ -43,6 +43,7 @@ import com.itextpdf.kernel.pdf.layer.PdfOCProperties;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.options.SerializeOptions;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
@@ -193,11 +194,13 @@ public class GetInfoOnPDF {
if (embeddedFiles != null) {
PdfArray namesArray = embeddedFiles.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode embeddedFileNode = objectMapper.createObjectNode();
embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
// Add other details if required
embeddedFilesArray.add(embeddedFileNode);
if(namesArray != null) {
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode embeddedFileNode = objectMapper.createObjectNode();
embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
// Add other details if required
embeddedFilesArray.add(embeddedFileNode);
}
}
}
@@ -224,15 +227,33 @@ public class GetInfoOnPDF {
if (namesDict != null) {
PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
if (javascriptDict != null) {
PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
for (int i = 0; i < namesArray.size(); i += 2) {
ObjectNode jsNode = objectMapper.createObjectNode();
jsNode.put("JS Name", namesArray.getAsString(i).toString());
jsNode.put("JS Code", namesArray.getAsString(i + 1).toString());
if(namesArray.getAsString(i) != null)
jsNode.put("JS Name", namesArray.getAsString(i).toString());
// Here we check for a PdfStream object and retrieve the JS code from it
PdfObject jsCode = namesArray.get(i+1);
if (jsCode instanceof PdfStream) {
byte[] jsCodeBytes = ((PdfStream)jsCode).getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
jsNode.put("JS Script Length", jsCodeStr.length());
} else if (jsCode instanceof PdfDictionary) {
// If the JS code is in a dictionary, you'll need to know the key to use.
// Assuming the key is PdfName.JS:
PdfStream jsCodeStream = ((PdfDictionary)jsCode).getAsStream(PdfName.JS);
if (jsCodeStream != null) {
byte[] jsCodeBytes = jsCodeStream.getBytes();
String jsCodeStr = new String(jsCodeBytes, StandardCharsets.UTF_8);
jsNode.put("JS Script Character Length", jsCodeStr.length());
}
}
javascriptArray.add(jsNode);
}
}
}
other.set("JavaScript", javascriptArray);
@@ -305,16 +326,15 @@ public class GetInfoOnPDF {
}
other.set("Bookmarks/Outline/TOC", bookmarksArray);
byte[] xmpBytes = itextDoc.getXmpMetadata();
String xmpString = null;
try {
byte[] xmpBytes = itextDoc.getXmpMetadata();
if (xmpBytes != null) {
if (xmpBytes != null) {
try {
XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes);
xmpString = xmpMeta.dumpObject();
xmpString = new String(XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions()));
} catch (XMPException e) {
e.printStackTrace();
}
} catch (XMPException e) {
e.printStackTrace();
}
other.put("XMPMetadata", xmpString);
@@ -416,8 +436,10 @@ public class GetInfoOnPDF {
for (PdfAnnotation annotation : annotations) {
if (annotation instanceof PdfLinkAnnotation) {
PdfLinkAnnotation linkAnnotation = (PdfLinkAnnotation) annotation;
String uri = linkAnnotation.getAction().toString();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
if(linkAnnotation != null && linkAnnotation.getAction() != null) {
String uri = linkAnnotation.getAction().toString();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
}
}
}

View File

@@ -1,6 +1,7 @@
package stirling.software.SPDF.controller.api.security;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.PDPageTree;
@@ -21,7 +22,9 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import java.io.IOException;
import java.io.InputStream;
@@ -75,8 +78,24 @@ public class SanitizeController {
return WebResponseUtils.pdfDocToWebResponse(document, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_sanitized.pdf");
}
}
private void sanitizeJavaScript(PDDocument document) throws IOException {
for (PDPage page : document.getPages()) {
private void sanitizeJavaScript(PDDocument document) throws IOException {
// Get the root dictionary (catalog) of the PDF
PDDocumentCatalog catalog = document.getDocumentCatalog();
// Get the Names dictionary
COSDictionary namesDict = (COSDictionary) catalog.getCOSObject().getDictionaryObject(COSName.NAMES);
if (namesDict != null) {
// Get the JavaScript dictionary
COSDictionary javaScriptDict = (COSDictionary) namesDict.getDictionaryObject(COSName.getPDFName("JavaScript"));
if (javaScriptDict != null) {
// Remove the JavaScript dictionary
namesDict.removeItem(COSName.getPDFName("JavaScript"));
}
}
for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationWidget) {
PDAnnotationWidget widget = (PDAnnotationWidget) annotation;
@@ -89,13 +108,28 @@ public class SanitizeController {
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
if (acroForm != null) {
for (PDField field : acroForm.getFields()) {
if (field.getActions().getF() instanceof PDActionJavaScript) {
field.getActions().setF(null);
}
PDFormFieldAdditionalActions actions = field.getActions();
if(actions != null) {
if (actions.getC() instanceof PDActionJavaScript) {
actions.setC(null);
}
if (actions.getF() instanceof PDActionJavaScript) {
actions.setF(null);
}
if (actions.getK() instanceof PDActionJavaScript) {
actions.setK(null);
}
if (actions.getV() instanceof PDActionJavaScript) {
actions.setV(null);
}
}
}
}
}
}
}
private void sanitizeEmbeddedFiles(PDDocument document) {
PDPageTree allPages = document.getPages();

View File

@@ -31,7 +31,15 @@ public class OtherWebController {
modelAndView.addObject("currentPage", "extract-image-scans");
return modelAndView;
}
@GetMapping("/show-javascript")
@Hidden
public String extractJavascriptForm(Model model) {
model.addAttribute("currentPage", "show-javascript");
return "other/show-javascript";
}
@GetMapping("/add-page-numbers")
@Hidden
public String addPageNumbersForm(Model model) {