File size: 766 Bytes
96ef4b5
 
57ba236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash

if [ -z "$QUANT_METHOD" ] then
  echo "Using native precision"
  python3 -m sglang.launch_server \
    --model-path $MODEL_ID \
    --kv-cache-dtype $KV_CACHE_DTYPE \
    --tensor-parallel-size $TP_SIZE \
    --expert-parallel-size $TP_SIZE \
    --enable-torch-compile \
    --enable-ep-moe \
    --tool-call-parser qwen25 \
    --host 0.0.0.0 \
    --port 80
else
  echo "Using ${QUANT_METHOD} quantization schema"
  python3 -m sglang.launch_server \
    --model-path $MODEL_ID \
    --kv-cache-dtype $KV_CACHE_DTYPE \
    --tensor-parallel-size $TP_SIZE \
    --expert-parallel-size $TP_SIZE \
    --quantization $QUANT_METHOD \
    --enable-torch-compile \
    --enable-ep-moe \
    --tool-call-parser qwen25 \
    --host 0.0.0.0 \
    --port 80
fi