Compare commits

...

2 Commits

Author SHA1 Message Date
Anthony Stirling
e0c83adc9f fix docker merge 2025-02-07 12:55:39 +00:00
Anthony Stirling
1eee6ee356 use libreoffice for pdfa 2025-02-06 15:55:22 +00:00
6 changed files with 11 additions and 10 deletions

View File

@@ -56,13 +56,15 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
openssl-dev \
openjdk21-jre \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
# pdftohtml
poppler-utils \
# OCR MY PDF (unpaper for descew and other advanced features)
tesseract-ocr-data-eng \
# CV
py3-opencv \
py3-opencv \
# python3/pip
python3 \
py3-pip && \

View File

@@ -57,6 +57,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
openssl-dev \
openjdk21-jre \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
# pdftohtml
poppler-utils \
@@ -65,7 +67,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
tesseract-ocr-data-eng \
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra \
# CV
py3-opencv \
py3-opencv \
# python3/pip
python3 \
py3-pip && \

View File

@@ -265,9 +265,6 @@ public class EndpointConfiguration {
// Pdftohtml dependent endpoints
addEndpointToGroup("Pdftohtml", "pdf-to-html");
addEndpointToGroup("Pdftohtml", "pdf-to-markdown");
// disabled for now while we resolve issues
disableEndpoint("pdf-to-pdfa");
}
private void processEnvironmentConfigs() {

View File

@@ -73,8 +73,8 @@ public class ConvertPDFToPDFA {
// Determine PDF/A filter based on requested format
String pdfFilter =
"pdfa".equals(outputFormat)
? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export"
: "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export";
? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}"
: "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}";
// Prepare LibreOffice command
List<String> command =
@@ -84,7 +84,7 @@ public class ConvertPDFToPDFA {
"--headless",
"--nologo",
"--convert-to",
"pdf:" + pdfFilter,
pdfFilter,
"--outdir",
tempOutputDir.toString(),
tempInputFile.toString()));

View File

@@ -1185,7 +1185,7 @@ changeMetadata.submit=Change
#pdfToPDFA
pdfToPDFA.title=PDF To PDF/A
pdfToPDFA.header=PDF To PDF/A
pdfToPDFA.credit=This service uses qpdf for PDF/A conversion
pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion
pdfToPDFA.submit=Convert
pdfToPDFA.tip=Currently does not work for multiple inputs at once
pdfToPDFA.outputFormat=Output format

View File

@@ -1185,7 +1185,7 @@ changeMetadata.submit=Change
#pdfToPDFA
pdfToPDFA.title=PDF To PDF/A
pdfToPDFA.header=PDF To PDF/A
pdfToPDFA.credit=This service uses qpdf for PDF/A conversion
pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion
pdfToPDFA.submit=Convert
pdfToPDFA.tip=Currently does not work for multiple inputs at once
pdfToPDFA.outputFormat=Output format