#!/bin/bash


model="meta-llama/Meta-Llama-3.1-8B-Instruct"
#model="meta-llama/Meta-Llama-3.1-70B-Instruct"
dataset_name="random"
num_prompts=200
# input_len=2096
output_len=100

result_folder=result_sweep_org2

mkdir -p $result_folder

for input_len in 1024 2048 3072 4096; do
#for input_len in 1024 2048 4096; do
#for input_len in 3072; do
	for qps in 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0; do 
		python3 ../benchmark_serving2.py \
			--backend vllm \
			--model $model \
			--percentile-metrics ttft,itl \
			--metric-percentiles 99,95,90,80 \
			--dataset-name $dataset_name \
			--random-input-len $input_len \
			--random-output-len $output_len \
			--num-prompts $num_prompts \
			--port 8000 \
			--request-rate $qps \
			--save-result \
			--result-dir $result_folder \
			--result-filename "input-$input_len-qps-$qps".json
	done
done
