#!/bin/bash


#model="meta-llama/Llama-3.2-3B"
#model="meta-llama/Meta-Llama-3.1-8B-Instruct"
model="Qwen/Qwen2-7B"

dataset_name="random"

run() {
    python3 ../benchmark_serving.py \
          --backend vllm \
          --model $model \
          --percentile-metrics ttft,tpot,itl \
          --metric-percentiles 99,95,90 \
          --dataset-name $dataset_name \
          --random-input-len $1 \
          --random-output-len $2 \
          --num-prompts $3 \
          --port 8000 \
          --request-rate $5 \
          --random-range-ratio $4
          #--metric-percentiles 99,95,90,80,70,60,50,40,30,20,10,5,1 
}

#   input   output  num     ratio   rps
run 2000    600     60      0.5     inf
run 2000    600     60      0.5     2.0
run 2000    400     60      0.5     inf
run 2000    400     60      0.5     2.0
run 2000    200     60      0.5     inf
run 2000    200     60      0.5     2.0
run 2000    100     60      0.5     inf
run 2000    100     60      0.5     2.0
run 2000    20      60      0.5     inf
run 2000    20      60      0.5     2.0

run 4000    400     60      0.5     inf
run 4000    400     60      0.5     2.0
run 4000    200     60      0.5     inf
run 4000    200     60      0.5     2.0
run 4000    100     60      0.5     inf
run 4000    100     60      0.5     2.0
run 4000    20      60      0.5     inf
run 4000    20      60      0.5     2.0

run 6000    400     60      0.5     inf
run 6000    400     60      0.5     2.0
run 6000    200     60      0.5     inf
run 6000    200     60      0.5     2.0
run 6000    100     60      0.5     inf
run 6000    100     60      0.5     2.0
run 6000    20      60      0.5     inf
run 6000    20      60      0.5     2.0

run 8000    400     60      0.5     inf
run 8000    400     60      0.5     2.0
run 8000    200     60      0.5     inf
run 8000    200     60      0.5     2.0
run 8000    100     60      0.5     inf
run 8000    100     60      0.5     2.0
run 8000    20      60      0.5     inf
run 8000    20      60      0.5     2.0
