init docker
This commit is contained in:
@@ -1,31 +1,39 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files
|
||||
echo "Copying original files without overwriting existing files"
|
||||
mkdir -p /usr/share/tessdata
|
||||
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata
|
||||
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata 2>/dev/null || true
|
||||
|
||||
# Copy additional tessdata if available
|
||||
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
||||
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
|
||||
cp -rn /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
||||
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
|
||||
cp -rn /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||
if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||
# Convert comma-separated values to a space-separated list
|
||||
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
||||
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
|
||||
# Install each language pack
|
||||
for LANG in $LANGS; do
|
||||
if [[ $LANG =~ $pattern ]]; then
|
||||
apk add --no-cache "tesseract-ocr-data-$LANG"
|
||||
else
|
||||
echo "Skipping invalid language code"
|
||||
fi
|
||||
done
|
||||
# Convert comma-separated values to a space-separated list
|
||||
TES_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
||||
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
|
||||
|
||||
# Log available languages
|
||||
echo "Currently installed languages:"
|
||||
tesseract --list-langs
|
||||
|
||||
echo "Requested additional languages: $TES_LANGS"
|
||||
|
||||
# Instead of apk add, download language files from a known source
|
||||
for LANG in $TES_LANGS; do
|
||||
if [[ $LANG =~ $pattern ]]; then
|
||||
# Download to user-writable directory
|
||||
wget -P /usr/share/tessdata/ "https://github.com/tesseract-ocr/tessdata/raw/main/${LANG}.traineddata" || \
|
||||
echo "Failed to download language pack for ${LANG}"
|
||||
else
|
||||
echo "Skipping invalid language code"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
/scripts/init-without-ocr.sh "$@"
|
||||
Reference in New Issue
Block a user