Spaces:

codebyam
/

Llama-3.2-3B-Instruct-Q8_0-GGUF-DEMO

Sleeping

File size: 347 Bytes

26cf2e1
 
 
 
 
 
 
 
 
b87b786
26cf2e1

#!/bin/bash

# Start llama-server in background
cd /llama.cpp/build
./bin/llama-server --host 0.0.0.0 --port 8080 --model /models/model.q8_0.gguf --ctx-size 8192 --threads 2 &

# Wait for server to initialize
echo "Waiting for server to start..."
until curl -s "http://localhost:8080/v1/models" >/dev/null; do
    sleep 5
done

cd /
python3 app.py