BACKEND=vllm
# MODEL=/data/home/guoty/.cache/modelscope/hub/llm-research/meta-llama-3___1-8b-instruct
MODEL=Qwen/Qwen2.5-32B
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-32B/snapshots/1818d35814b8319459f4bd55ed1ac8709630f003
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-72B-Instruct/snapshots/d3d951150c1e5848237cd6a7ad11df4836aee842
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-14B/snapshots/97e1e76335b7017d8f67c08a19d103c0504298c9
# MODEL=Qwen/Qwen2.5-32B-Instruct
# MODEL=/mnt/sda/2022-0526/home/gtyinstinct/.cache/huggingface/hub/models--Qwen--Qwen2.5-32B-Instruct/snapshots/5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd
# MODEL=Qwen/Qwen2.5-14B-Instruct
DATASET=/mnt/sda/2022-0526/home/xuhx/datasets/sharegpt_v3_unfiltered_cleaned_split.json
SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/datasets/AzureLLMInferenceTrace_code.csv
# SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/datasets/512_512.csv

FOLDER=dynamic
PORT=65511

# --random-input-len --random-output-len  --arrival-stage 2 --stage-interval 300
DATASET_NAME=random
INPUT_LEN=512

OUTPUT_LENS=(1) # (1 64 128 256)
REQUEST_RATES=(32) # (32 32 20 20)  # 和 OUTPUT_LENS 一一对应 128 256 
# 1->32,32->32,64->32,96->28,128->20,192->20,256->20

for i in "${!OUTPUT_LENS[@]}"; do
    OUTPUT_LEN=${OUTPUT_LENS[$i]}
    REQUEST_RATE=${REQUEST_RATES[$i]}

    if [ "$FOLDER" == "dynamic" ]; then
        curl -X GET http://127.0.0.1:65511/v1/reset_layers
        sleep 1
    fi
    NUM_PROMPTS=$((REQUEST_RATE * 128))
    CMD="python benchmarks/benchmark_serving.py \
        --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
        --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
        --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE --random-output-len $OUTPUT_LEN --random-input-len $INPUT_LEN \
        --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/32B/${DATASET_NAME}/${FOLDER}/output_${OUTPUT_LEN}.json"
    
    echo "Running with REQUEST_RATE=${REQUEST_RATE}"
    echo $CMD
    eval $CMD
done