File size: 766 Bytes

96ef4b5
 
57ba236

#!/bin/bash

if [ -z "$QUANT_METHOD" ] then
  echo "Using native precision"
  python3 -m sglang.launch_server \
    --model-path $MODEL_ID \
    --kv-cache-dtype $KV_CACHE_DTYPE \
    --tensor-parallel-size $TP_SIZE \
    --expert-parallel-size $TP_SIZE \
    --enable-torch-compile \
    --enable-ep-moe \
    --tool-call-parser qwen25 \
    --host 0.0.0.0 \
    --port 80
else
  echo "Using ${QUANT_METHOD} quantization schema"
  python3 -m sglang.launch_server \
    --model-path $MODEL_ID \
    --kv-cache-dtype $KV_CACHE_DTYPE \
    --tensor-parallel-size $TP_SIZE \
    --expert-parallel-size $TP_SIZE \
    --quantization $QUANT_METHOD \
    --enable-torch-compile \
    --enable-ep-moe \
    --tool-call-parser qwen25 \
    --host 0.0.0.0 \
    --port 80
fi