File size: 1,369 Bytes
19190c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c248a2a
 
 
 
 
 
19190c2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Use a Python base image, compatible with Gradio
FROM python:3.9-slim-buster

# Set working directory
WORKDIR /app

# Install system dependencies for Tesseract and OpenCV
# 'build-essential' is often needed for compiling certain Python packages.
# 'tesseract-ocr' and 'tesseract-ocr-eng' are for the Tesseract engine and its English language data.
# 'libgl1-mesa-glx' is often needed for OpenCV (headless environment compatibility).
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    build-essential \
    tesseract-ocr \
    tesseract-ocr-eng \
    libgl1-mesa-glx && \
    rm -rf /var/lib/apt/lists/*

# Copy requirements file and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy the application code
COPY . .

# Set the TESSDATA_PREFIX environment variable. This is where Tesseract looks for language data.
# The common path for installed language data is /usr/share/tesseract-ocr/<version>/tessdata/
# We'll point to the general tessdata directory which often symlinks to the versioned one.
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/tessdata/

# Ensure the tesseract executable is in PATH, though it usually is after installation
ENV PATH="/usr/bin:${PATH}"

# Expose the port Gradio typically runs on
EXPOSE 7860

# Command to run the application
CMD ["python", "app.py"]