set -e
set -x
#!/bin/bash

model_path=meta-llama/Llama-3.3-70B-Instruct
tokenizer_path=meta-llama/Llama-3.3-70B-Instruct
dtype=bfloat16
mem_fraction_static=0.87

gpu=$1
port=50000

IFS=',' read -ra GPU_ARRAY <<< "$gpu"

tensor_parallel_size=${#GPU_ARRAY[@]}
echo $tensor_parallel_size

CUDA_VISIBLE_DEVICES=$gpu python3 -m sglang.launch_server --model-path $model_path --tokenizer-path $tokenizer_path \
    --port $port --tp-size $tensor_parallel_size --trust-remote-code --dtype $dtype --mem-fraction-static $mem_fraction_static
