formatting
This commit is contained in:
@@ -48,83 +48,84 @@ public class ImageFinder extends org.apache.pdfbox.contentstream.PDFGraphicsStre
|
||||
super.processOperator(operator, operands);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void drawImage(PDImage pdImage) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clip(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public void drawImage(PDImage pdImage) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void moveTo(float x, float y) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lineTo(float x, float y) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public void clip(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Point2D getCurrentPoint() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public void moveTo(float x, float y) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void closePath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endPath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public void lineTo(float x, float y) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void strokePath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillPath(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3)
|
||||
throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
@Override
|
||||
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shadingFill(COSName shadingName) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
@Override
|
||||
public Point2D getCurrentPoint() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closePath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endPath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void strokePath() throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillPath(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shadingFill(COSName shadingName) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
// ... rest of the overridden methods
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
package stirling.software.SPDF.pdf;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -13,78 +14,80 @@ import stirling.software.SPDF.model.PDFText;
|
||||
|
||||
public class TextFinder extends PDFTextStripper {
|
||||
|
||||
private final String searchText;
|
||||
private final boolean useRegex;
|
||||
private final boolean wholeWordSearch;
|
||||
private final List<PDFText> textOccurrences = new ArrayList<>();
|
||||
private final String searchText;
|
||||
private final boolean useRegex;
|
||||
private final boolean wholeWordSearch;
|
||||
private final List<PDFText> textOccurrences = new ArrayList<>();
|
||||
|
||||
public TextFinder(String searchText, boolean useRegex, boolean wholeWordSearch) throws IOException {
|
||||
this.searchText = searchText.toLowerCase();
|
||||
this.useRegex = useRegex;
|
||||
this.wholeWordSearch = wholeWordSearch;
|
||||
setSortByPosition(true);
|
||||
}
|
||||
public TextFinder(String searchText, boolean useRegex, boolean wholeWordSearch)
|
||||
throws IOException {
|
||||
this.searchText = searchText.toLowerCase();
|
||||
this.useRegex = useRegex;
|
||||
this.wholeWordSearch = wholeWordSearch;
|
||||
setSortByPosition(true);
|
||||
}
|
||||
|
||||
private List<Integer> findOccurrencesInText(String searchText, String content) {
|
||||
List<Integer> indexes = new ArrayList<>();
|
||||
Pattern pattern;
|
||||
private List<Integer> findOccurrencesInText(String searchText, String content) {
|
||||
List<Integer> indexes = new ArrayList<>();
|
||||
Pattern pattern;
|
||||
|
||||
if (useRegex) {
|
||||
// Use regex-based search
|
||||
pattern = wholeWordSearch
|
||||
? Pattern.compile("(\\b|_|\\.)" + searchText + "(\\b|_|\\.)")
|
||||
: Pattern.compile(searchText);
|
||||
} else {
|
||||
// Use normal text search
|
||||
pattern = wholeWordSearch
|
||||
? Pattern.compile("(\\b|_|\\.)" + Pattern.quote(searchText) + "(\\b|_|\\.)")
|
||||
: Pattern.compile(Pattern.quote(searchText));
|
||||
}
|
||||
if (useRegex) {
|
||||
// Use regex-based search
|
||||
pattern =
|
||||
wholeWordSearch
|
||||
? Pattern.compile("(\\b|_|\\.)" + searchText + "(\\b|_|\\.)")
|
||||
: Pattern.compile(searchText);
|
||||
} else {
|
||||
// Use normal text search
|
||||
pattern =
|
||||
wholeWordSearch
|
||||
? Pattern.compile(
|
||||
"(\\b|_|\\.)" + Pattern.quote(searchText) + "(\\b|_|\\.)")
|
||||
: Pattern.compile(Pattern.quote(searchText));
|
||||
}
|
||||
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
while (matcher.find()) {
|
||||
indexes.add(matcher.start());
|
||||
}
|
||||
return indexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeString(String text, List<TextPosition> textPositions) {
|
||||
for (Integer index : findOccurrencesInText(searchText, text.toLowerCase())) {
|
||||
if (index + searchText.length() <= textPositions.size()) {
|
||||
// Initial values based on the first character
|
||||
TextPosition first = textPositions.get(index);
|
||||
float minX = first.getX();
|
||||
float minY = first.getY();
|
||||
float maxX = first.getX() + first.getWidth();
|
||||
float maxY = first.getY() + first.getHeight();
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
while (matcher.find()) {
|
||||
indexes.add(matcher.start());
|
||||
}
|
||||
return indexes;
|
||||
}
|
||||
|
||||
// Loop over the rest of the characters and adjust bounding box values
|
||||
for (int i = index; i < index + searchText.length(); i++) {
|
||||
TextPosition position = textPositions.get(i);
|
||||
minX = Math.min(minX, position.getX());
|
||||
minY = Math.min(minY, position.getY());
|
||||
maxX = Math.max(maxX, position.getX() + position.getWidth());
|
||||
maxY = Math.max(maxY, position.getY() + position.getHeight());
|
||||
}
|
||||
@Override
|
||||
protected void writeString(String text, List<TextPosition> textPositions) {
|
||||
for (Integer index : findOccurrencesInText(searchText, text.toLowerCase())) {
|
||||
if (index + searchText.length() <= textPositions.size()) {
|
||||
// Initial values based on the first character
|
||||
TextPosition first = textPositions.get(index);
|
||||
float minX = first.getX();
|
||||
float minY = first.getY();
|
||||
float maxX = first.getX() + first.getWidth();
|
||||
float maxY = first.getY() + first.getHeight();
|
||||
|
||||
textOccurrences.add(new PDFText(
|
||||
getCurrentPageNo() - 1,
|
||||
minX,
|
||||
minY,
|
||||
maxX,
|
||||
maxY,
|
||||
text
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Loop over the rest of the characters and adjust bounding box values
|
||||
for (int i = index; i < index + searchText.length(); i++) {
|
||||
TextPosition position = textPositions.get(i);
|
||||
minX = Math.min(minX, position.getX());
|
||||
minY = Math.min(minY, position.getY());
|
||||
maxX = Math.max(maxX, position.getX() + position.getWidth());
|
||||
maxY = Math.max(maxY, position.getY() + position.getHeight());
|
||||
}
|
||||
|
||||
public List<PDFText> getTextLocations(PDDocument document) throws Exception {
|
||||
this.getText(document);
|
||||
System.out.println("Found " + textOccurrences.size() + " occurrences of '" + searchText + "' in the document.");
|
||||
textOccurrences.add(
|
||||
new PDFText(getCurrentPageNo() - 1, minX, minY, maxX, maxY, text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return textOccurrences;
|
||||
}
|
||||
public List<PDFText> getTextLocations(PDDocument document) throws Exception {
|
||||
this.getText(document);
|
||||
System.out.println(
|
||||
"Found "
|
||||
+ textOccurrences.size()
|
||||
+ " occurrences of '"
|
||||
+ searchText
|
||||
+ "' in the document.");
|
||||
|
||||
}
|
||||
return textOccurrences;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user