docker and ocr updates

This commit is contained in:
Anthony Stirling
2023-12-10 22:02:30 +00:00
parent 8b55ffff96
commit 59c7978330
28 changed files with 100 additions and 110 deletions

View File

@@ -1,33 +1,35 @@
# Main stage
FROM bellsoft/liberica-openjdk-debian:17 AS base
FROM ubuntu:latest AS base
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2
RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr
RUN apt-get update && \
apt-get install -y --no-install-recommends \
openjdk-17-jre \
libreoffice-core-nogui \
libreoffice-common \
libreoffice-writer-nogui \
libreoffice-calc-nogui \
libreoffice-impress-nogui \
python3-uno \
ghostscript \
python3-pip \
unoconv \
pngquant \
unpaper \
ocrmypdf && \
ocrmypdf \
unoconv && \
pip install --upgrade pip && \
pip install --no-cache-dir --user --upgrade ocrmypdf && \
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \
pip install --no-cache-dir --upgrade ocrmypdf && \
pip install --no-cache-dir \
pillow==10.0.1 \
lxml==4.6.5 \
reportlab==3.6.13 \
setuptools==65.5.1 \
pikepdf==4.4.1 \
wheel==0.38.1 \
cryptography==39.0.1 \
opencv-python-headless && \
rm -rf /var/lib/apt/lists/* && \
mkdir /usr/share/tesseract-ocr-original && \
cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
rm -rf /usr/share/tesseract-ocr
# Python packages stage
FROM base AS python-packages
# Install build tools and Python libraries
@@ -43,32 +45,4 @@ RUN apt-get update && \
FROM base
COPY --from=python-packages /usr/local /usr/local
# Install wkhtmltopdf
RUN apt-get update && \
apt-get install -y \
fontconfig \
libfontconfig1 \
libfreetype6 \
libx11-6 \
libxext6 \
libxrender1 \
xfonts-75dpi \
wget \
xfonts-base
# Set a default value for TARGETARCH if it's not provided
ARG TARGETARCH=arm64
# Conditional statement to choose the correct wkhtmltopdf package based on architecture
RUN if [ "$TARGETARCH" = "amd64" ]; then \
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_amd64.deb; \
elif [ "$TARGETARCH" = "arm64" ]; then \
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_arm64.deb; \
else \
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_amd64.deb; \
fi && \
dpkg -i wkhtmltox_0.12.6.1-3.bullseye_$TARGETARCH.deb && \
rm wkhtmltox_0.12.6.1-3.bullseye_$TARGETARCH.deb && \
rm -rf /var/lib/apt/lists/*