FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 # Install Python and system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3-pip \ python3.11-venv \ nodejs \ npm \ git \ build-essential \ curl \ && rm -rf /var/lib/apt/lists/* # Install pnpm globally RUN npm install -g pnpm # Create user for HF Spaces RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH WORKDIR $HOME/app # Copy files COPY --chown=user embedding_api.py . COPY --chown=user requirements_embedding.txt . COPY --chown=user hono-proxy ./hono-proxy COPY --chown=user vespa-certs ./vespa-certs # Install Python dependencies with GPU support RUN pip install --user --no-cache-dir -r requirements_embedding.txt # Install Node dependencies WORKDIR $HOME/app/hono-proxy RUN pnpm install # Setup Vespa certificates RUN mkdir -p $HOME/.vespa/il-infra.colpali-server.default && \ cp ../vespa-certs/* $HOME/.vespa/il-infra.colpali-server.default/ # Create startup script WORKDIR $HOME/app RUN echo '#!/bin/bash' > start.sh && \ echo 'echo "Starting services with GPU support..."' >> start.sh && \ echo 'echo "Python version: $(python3 --version)"' >> start.sh && \ echo 'echo "Node version: $(node --version)"' >> start.sh && \ echo 'echo "CUDA available: $(python3 -c "import torch; print(torch.cuda.is_available())")"' >> start.sh && \ echo 'echo "GPU device: $(python3 -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"No GPU\")")"' >> start.sh && \ echo '' >> start.sh && \ echo '# Start embedding API with GPU' >> start.sh && \ echo 'CUDA_VISIBLE_DEVICES=0 python3 embedding_api.py &' >> start.sh && \ echo 'EMBED_PID=$!' >> start.sh && \ echo 'echo "Started embedding API with PID: $EMBED_PID"' >> start.sh && \ echo '' >> start.sh && \ echo '# Wait for embedding API' >> start.sh && \ echo 'sleep 15' >> start.sh && \ echo '' >> start.sh && \ echo '# Check if embedding API is running' >> start.sh && \ echo 'if curl -f http://localhost:8001/health; then' >> start.sh && \ echo ' echo "Embedding API is healthy"' >> start.sh && \ echo 'else' >> start.sh && \ echo ' echo "Embedding API health check failed"' >> start.sh && \ echo 'fi' >> start.sh && \ echo '' >> start.sh && \ echo '# Start Hono proxy' >> start.sh && \ echo 'cd hono-proxy' >> start.sh && \ echo 'PORT=7860 CORS_ORIGIN="*" EMBEDDING_API_URL="http://localhost:8001" VESPA_ENDPOINT="https://f5acf536.ed2ceb09.z.vespa-app.cloud" npx tsx src/index.ts' >> start.sh && \ chmod +x start.sh EXPOSE 7860 CMD ["./start.sh"]