#!/bin/bash


model="meta-llama/Meta-Llama-3.1-8B-Instruct"
dataset_name="random"

run() {
    python3 ../benchmark_serving.py \
          --backend vllm \
          --model $model \
          --percentile-metrics ttft \
          --metric-percentiles 99,95,90,80 \
          --dataset-name $dataset_name \
          --random-input-len $1 \
          --random-output-len $2 \
          --num-prompts $3 \
          --port 8000 \
          --request-rate $5 \
          --random-range-ratio $4
}

for inlen in $(seq 100 50 8000); do 
#   input   output  num     ratio   rps
run $inlen  1       30      1.0     inf
done
