This commit is contained in:
Anthony Stirling
2023-06-26 18:23:06 +01:00
parent 3377af1305
commit 43b0e25bdb
5 changed files with 229 additions and 15 deletions

View File

@@ -42,16 +42,51 @@ public class PdfUtils {
private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class);
public boolean hasImageInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
String pdfText = "";
if(pagesToCheck == null || pagesToCheck.equals("all")) {
pdfText = textStripper.getText(pdfDocument);
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {
if (splitPoint.contains("-")) {
// Handle page ranges
String[] range = splitPoint.split("-");
int startPage = Integer.parseInt(range[0]);
int endPage = Integer.parseInt(range[1]);
for (int i = startPage; i <= endPage; i++) {
textStripper.setStartPage(i);
textStripper.setEndPage(i);
pdfText += textStripper.getText(pdfDocument);
}
} else {
// Handle individual page
int page = Integer.parseInt(splitPoint);
textStripper.setStartPage(page);
textStripper.setEndPage(page);
pdfText += textStripper.getText(pdfDocument);
}
}
}
pdfDocument.close();
return pdfText.contains(text);
}
public static boolean hasImagesOnPage(PDPage page) throws IOException {
ImageFinder imageFinder = new ImageFinder(page);
imageFinder.processPage(page);
return imageFinder.hasImages();
}
public static boolean hasTextOnPage(PdfPage page, String phrase) throws IOException {
String text = PdfTextExtractor.getTextFromPage(page, new SimpleTextExtractionStrategy());
return text.contains(phrase);
}
public static boolean hasText(PDDocument document, String phrase) throws IOException {
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document);
@@ -59,6 +94,86 @@ public class PdfUtils {
}
public boolean containsTextInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
String pdfText = "";
if(pagesToCheck == null || pagesToCheck.equals("all")) {
pdfText = textStripper.getText(pdfDocument);
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {
if (splitPoint.contains("-")) {
// Handle page ranges
String[] range = splitPoint.split("-");
int startPage = Integer.parseInt(range[0]);
int endPage = Integer.parseInt(range[1]);
for (int i = startPage; i <= endPage; i++) {
textStripper.setStartPage(i);
textStripper.setEndPage(i);
pdfText += textStripper.getText(pdfDocument);
}
} else {
// Handle individual page
int page = Integer.parseInt(splitPoint);
textStripper.setStartPage(page);
textStripper.setEndPage(page);
pdfText += textStripper.getText(pdfDocument);
}
}
}
pdfDocument.close();
return pdfText.contains(text);
}
public boolean pageCount(PDDocument pdfDocument, int pageCount, String comparator) throws IOException {
int actualPageCount = pdfDocument.getNumberOfPages();
pdfDocument.close();
switch(comparator.toLowerCase()) {
case "greater":
return actualPageCount > pageCount;
case "equal":
return actualPageCount == pageCount;
case "less":
return actualPageCount < pageCount;
default:
throw new IllegalArgumentException("Invalid comparator. Only 'greater', 'equal', and 'less' are supported.");
}
}
public boolean pageSize(PDDocument pdfDocument, String expectedPageSize) throws IOException {
PDPage firstPage = pdfDocument.getPage(0);
PDRectangle mediaBox = firstPage.getMediaBox();
float actualPageWidth = mediaBox.getWidth();
float actualPageHeight = mediaBox.getHeight();
pdfDocument.close();
// Assumes the expectedPageSize is in the format "widthxheight", e.g. "595x842" for A4
String[] dimensions = expectedPageSize.split("x");
float expectedPageWidth = Float.parseFloat(dimensions[0]);
float expectedPageHeight = Float.parseFloat(dimensions[1]);
// Checks if the actual page size matches the expected page size
return actualPageWidth == expectedPageWidth && actualPageHeight == expectedPageHeight;
}
public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI, String filename) throws IOException, Exception {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);