BACKEND=vllm
# MODEL=/data/home/guoty/.cache/modelscope/hub/llm-research/meta-llama-3___1-8b-instruct
# MODEL=Qwen/Qwen2.5-32B
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-32B/snapshots/1818d35814b8319459f4bd55ed1ac8709630f003
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-72B-Instruct/snapshots/d3d951150c1e5848237cd6a7ad11df4836aee842
# MODEL=/data/home/guoty/.cache/huggingface/hub/models--Qwen--Qwen2.5-14B/snapshots/97e1e76335b7017d8f67c08a19d103c0504298c9
# MODEL=Qwen/Qwen2.5-32B-Instruct
# MODEL=/mnt/sda/2022-0526/home/gtyinstinct/.cache/huggingface/hub/models--Qwen--Qwen2.5-32B-Instruct/snapshots/5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd
MODEL=Qwen/Qwen2.5-32B
DATASET=/mnt/sda/2022-0526/home/xuhx/datasets/sharegpt_v3_unfiltered_cleaned_split.json
SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/datasets/AzureLLMInferenceTrace_code.csv
# SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/datasets/512_512.csv

FOLDER=vllm
PORT=65511

# --random-input-len --random-output-len  --arrival-stage 2 --stage-interval 300
DATASET_NAME=sharegpt # random sharegpt splitwise
# 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 48
for REQUEST_RATE in 6 12 18
do
    NUM_PROMPTS=$((REQUEST_RATE * 128))

    if [ "$FOLDER" == "dynamic" ]; then
        curl -X GET http://127.0.0.1:65511/v1/reset_layers
        sleep 1
    fi

    CMD="python benchmarks/benchmark_serving.py \
        --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
        --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
        --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE \
        --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/32B/${DATASET_NAME}/${FOLDER}/rate_${REQUEST_RATE}.json"
    
    echo "Running with REQUEST_RATE=${REQUEST_RATE}"
    echo $CMD
    eval $CMD
done
# DATASET_NAME=splitwise
# # 1 2 3 4 5
# for REQUEST_RATE in 0.5 1.0 1.5 2.0 2.5
# do
#     NUM_PROMPTS=$(awk -v rate="$REQUEST_RATE" 'BEGIN{printf("%.0f", rate * 128)}')
#     echo "Running with REQUEST_RATE=$REQUEST_RATE, NUM_PROMPTS=$NUM_PROMPTS"

#     if [ "$FOLDER" == "dynamic" ]; then
#         curl -X GET http://127.0.0.1:65511/v1/reset_layers
#         sleep 1
#     fi

#     CMD="python benchmarks/benchmark_serving.py \
#         --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
#         --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
#         --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE \
#         --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/32B/${DATASET_NAME}_code/${FOLDER}/rate_${REQUEST_RATE}.json"
    
#     echo "Running with REQUEST_RATE=${REQUEST_RATE}"
#     echo $CMD
#     eval $CMD
# done
# DATASET_NAME=splitwise
# SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/datasets/AzureLLMInferenceTrace_conv.csv
# # 
# for REQUEST_RATE in 4 8 12 16 20
# do
#     NUM_PROMPTS=$((REQUEST_RATE * 128))

#     if [ "$FOLDER" == "dynamic" ]; then
#         curl -X GET http://127.0.0.1:65511/v1/reset_layers
#         sleep 1
#     fi

#     CMD="python benchmarks/benchmark_serving.py \
#         --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
#         --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
#         --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE \
#         --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/14B/${DATASET_NAME}_conv_filiter/${FOLDER}/rate_${REQUEST_RATE}.json"
    
#     echo "Running with REQUEST_RATE=${REQUEST_RATE}"
#     echo $CMD
#     eval $CMD
# done

# DATASET_NAME=shuffle
# SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/datasets/AzureLLMInferenceTrace_code.csv
# for REQUEST_RATE in 0.2 0.4 0.6 0.8 1.0
# do
#     NUM_PROMPTS=$(awk -v rate="$REQUEST_RATE" 'BEGIN{printf("%.0f", rate * 128)}')
#     echo "Running with REQUEST_RATE=$REQUEST_RATE, NUM_PROMPTS=$NUM_PROMPTS"


#     if [ "$FOLDER" == "dynamic" ]; then
#         curl -X GET http://127.0.0.1:65511/v1/reset_layers
#         sleep 1
#     fi

#     CMD="python benchmarks/benchmark_serving.py \
#         --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
#         --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
#         --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE \
#         --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/${DATASET_NAME}/${FOLDER}/rate_${REQUEST_RATE}.json"
    
#     echo "Running with REQUEST_RATE=${REQUEST_RATE}"
#     echo $CMD
#     eval $CMD
# done

# SPLITWISE_PATH=/mnt/sda/2022-0526/home/xuhx/datasets/AzureLLMInferenceTrace_conv.csv
# DATASET_NAME=splitwise # random sharegpt splitwise
# for REQUEST_RATE in 18
# do
#     NUM_PROMPTS=$((REQUEST_RATE * 128))

#     if [ "$FOLDER" == "dynamic" ]; then
#         curl -X GET http://127.0.0.1:65511/v1/reset_layers
#         sleep 1
#     fi

#     CMD="python benchmarks/benchmark_serving.py \
#         --backend $BACKEND --splitwise-path $SPLITWISE_PATH \
#         --model $MODEL --dataset-name $DATASET_NAME --dataset-path $DATASET \
#         --num-prompts $NUM_PROMPTS --port $PORT --trust-remote-code --request-rate $REQUEST_RATE \
#         --save-result --result-filename /mnt/sda/2022-0526/home/xuhx/projects/gLLM/experiments/results/32B/splitwise_conv_filiter/windows/size30_rate_${REQUEST_RATE}.json"
    
#     echo "Running with REQUEST_RATE=${REQUEST_RATE}"
#     echo $CMD
#     eval $CMD
# done