Spaces:
Running
on
L40S
Running
on
L40S
File size: 2,133 Bytes
ee86994 2d3be92 ebea37f ee86994 7e4b5d9 ebea37f ee86994 ebea37f ee86994 ebea37f ee86994 ebea37f ee86994 ebea37f ee86994 ebea37f 8934cd2 cf98dc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
#!/bin/bash
echo "=== Starting NuMarkdown-8B-Thinking Space ==="
echo "Starting vLLM server with optimized settings..."
# Start vLLM with HF Spaces optimizations
python3 -m vllm.entrypoints.openai.api_server \
--model numind/NuMarkdown-8B-Thinking \
--port 8000 \
--host 0.0.0.0 \
--max-model-len 20000 \
--gpu-memory-utilization 0.95 \
--disable-log-requests \
--tensor-parallel-size 1 \
--trust-remote-code > $HOME/app/vllm.log 2>&1 &
VLLM_PID=$!
echo "vLLM started with PID: $VLLM_PID"
# More aggressive waiting with health checks
echo "Waiting for vLLM server to start (this may take 5-10 minutes)..."
for i in {1..180}; do # Wait up to 6 minutes
if curl -s --connect-timeout 5 http://localhost:8000/health > /dev/null 2>&1; then
echo "β vLLM health check passed!"
break
elif curl -s --connect-timeout 5 http://localhost:8000/v1/models > /dev/null 2>&1; then
echo "β vLLM server is ready!"
break
fi
# Show progress every 10 seconds
if [ $((i % 10)) -eq 0 ]; then
echo "Still waiting... ($i/180) - checking vLLM process"
if ! ps -p $VLLM_PID > /dev/null; then
echo "β vLLM process died! Checking logs:"
tail -20 $HOME/app/vllm.log
exit 1
fi
fi
sleep 2
done
# Final check
if ! curl -s http://localhost:8000/v1/models > /dev/null; then
echo "β vLLM server failed to start after 6 minutes!"
echo "Last 50 lines of vLLM logs:"
tail -50 $HOME/app/vllm.log
exit 1
fi
echo "β
vLLM server is ready!"
echo "=== Starting Gradio App ==="
echo "Port 7860 status before launching Gradio:"
netstat -tuln | grep :7860 || echo "Port 7860 is free"
echo "Environment check:"
echo "PORT=${PORT:-7860}"
echo "PWD=$(pwd)"
echo "USER=$(whoami)"
# Launch Gradio with explicit error handling
echo "Launching Gradio..."
echo "Checking if app.py exists:"
ls -la $HOME/app/app.py
echo "Python path:"
which python3
echo "Current directory contents:"
ls -la $HOME/app/
echo "=== Starting Gradio App ==="
echo "Running crash debug version..."
python3 $HOME/app/app.py |