@@ -22,7 +22,7 @@ RUN echo "deb http://http.us.debian.org/debian stable non-free" >/etc/apt/source
2222 # image processing, djvu
2323 mdbtools djvulibre-bin \
2424 libtiff5-dev \
25- libtiff-tools ghostscript librsvg2-bin jbig2dec \
25+ libtiff-tools ghostscript librsvg2-bin jbig2dec libopenjp2-7-dev \
2626 pst-utils libgif-dev \
2727 # ## tesseract
2828 tesseract-ocr-eng \
@@ -118,10 +118,11 @@ RUN groupadd -g 1000 -r app \
118118
119119# Download the ftm-typepredict model
120120RUN mkdir /models/ && \
121- curl -o "/models/model_type_prediction.ftz" "https://public.data.occrp.org/develop/models/types/type-08012020-7a69d1b.ftz"
121+ curl --keepalive-time 2 - o "/models/model_type_prediction.ftz" "https://public.data.occrp.org/develop/models/types/type-08012020-7a69d1b.ftz"
122122
123123COPY requirements.txt /tmp/
124- RUN pip3 install --no-cache-dir --no-binary "tesserocr" -r /tmp/requirements.txt
124+ RUN pip install --upgrade pip setuptools
125+ RUN pip3 install --no-cache-dir --no-binary "tesserocr" --no-binary "Pillow" -r /tmp/requirements.txt
125126
126127# Install spaCy models
127128RUN python3 -m spacy download en_core_web_sm \
@@ -147,11 +148,10 @@ RUN pip install --no-cache-dir --config-settings editable_mode=compat --use-pep5
147148RUN chown -R app:app /ingestors
148149
149150ENV ARCHIVE_TYPE=file \
150- ARCHIVE_PATH=/data \
151- FTM_STORE_URI=postgresql://aleph:aleph@postgres/aleph \
152- REDIS_URL=redis://redis:6379/0 \
153- TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \
154- LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
151+ ARCHIVE_PATH=/data \
152+ FTM_STORE_URI=postgresql://aleph:aleph@postgres/aleph \
153+ REDIS_URL=redis://redis:6379/0 \
154+ TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
155155
156156# USER app
157157CMD ingestors process
0 commit comments