File size: 2,713 Bytes
35a0354
5dfbe50
35a0354
5dfbe50
35a0354
 
 
04bb55c
 
5dfbe50
 
9fc7504
5dfbe50
 
04bb55c
 
 
 
5dfbe50
 
 
 
 
 
 
04bb55c
 
 
 
 
5dfbe50
35a0354
04bb55c
5dfbe50
04bb55c
5dfbe50
04bb55c
5dfbe50
04bb55c
 
 
5dfbe50
04bb55c
5dfbe50
04bb55c
35a0354
 
04bb55c
35a0354
 
04bb55c
35a0354
 
04bb55c
 
 
 
35a0354
04bb55c
 
 
 
 
 
 
 
 
 
 
 
5dfbe50
 
 
04bb55c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04

# Install Python and system dependencies
RUN apt-get update && apt-get install -y \
    python3.11 \
    python3-pip \
    python3.11-venv \
    nodejs \
    npm \
    git \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Install pnpm globally
RUN npm install -g pnpm

# Create user for HF Spaces
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

WORKDIR $HOME/app

# Copy files
COPY --chown=user embedding_api.py .
COPY --chown=user requirements_embedding.txt .
COPY --chown=user hono-proxy ./hono-proxy
COPY --chown=user vespa-certs ./vespa-certs

# Install Python dependencies with GPU support
RUN pip install --user --no-cache-dir -r requirements_embedding.txt

# Install Node dependencies
WORKDIR $HOME/app/hono-proxy
RUN pnpm install

# Setup Vespa certificates
RUN mkdir -p $HOME/.vespa/il-infra.colpali-server.default && \
    cp ../vespa-certs/* $HOME/.vespa/il-infra.colpali-server.default/

# Create startup script
WORKDIR $HOME/app
RUN echo '#!/bin/bash' > start.sh && \
    echo 'echo "Starting services with GPU support..."' >> start.sh && \
    echo 'echo "Python version: $(python3 --version)"' >> start.sh && \
    echo 'echo "Node version: $(node --version)"' >> start.sh && \
    echo 'echo "CUDA available: $(python3 -c "import torch; print(torch.cuda.is_available())")"' >> start.sh && \
    echo 'echo "GPU device: $(python3 -c "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"No GPU\")")"' >> start.sh && \
    echo '' >> start.sh && \
    echo '# Start embedding API with GPU' >> start.sh && \
    echo 'CUDA_VISIBLE_DEVICES=0 python3 embedding_api.py &' >> start.sh && \
    echo 'EMBED_PID=$!' >> start.sh && \
    echo 'echo "Started embedding API with PID: $EMBED_PID"' >> start.sh && \
    echo '' >> start.sh && \
    echo '# Wait for embedding API' >> start.sh && \
    echo 'sleep 15' >> start.sh && \
    echo '' >> start.sh && \
    echo '# Check if embedding API is running' >> start.sh && \
    echo 'if curl -f http://localhost:8001/health; then' >> start.sh && \
    echo '    echo "Embedding API is healthy"' >> start.sh && \
    echo 'else' >> start.sh && \
    echo '    echo "Embedding API health check failed"' >> start.sh && \
    echo 'fi' >> start.sh && \
    echo '' >> start.sh && \
    echo '# Start Hono proxy' >> start.sh && \
    echo 'cd hono-proxy' >> start.sh && \
    echo 'PORT=7860 CORS_ORIGIN="*" EMBEDDING_API_URL="http://localhost:8001" VESPA_ENDPOINT="https://f5acf536.ed2ceb09.z.vespa-app.cloud" npx tsx src/index.ts' >> start.sh && \
    chmod +x start.sh

EXPOSE 7860

CMD ["./start.sh"]