# model_name="Llama-3.1-8B-Instruct"
# model_path="/mnt/Models/meta-llama/Llama-3.1-8B-Instruct"
model_name="Qwen2.5-14B-Instruct"
model_path="/mnt/Models/Qwen/Qwen2.5-14B-Instruct"
max_lengths="8000 16000 32000 64000 128000"
max_lengths="32000"
sparse_attn=1
test_latency=1
page_size=32
budgets=1024
batch_size=4    
# prefetch=1

for max_length in $max_lengths; do
    cmd="CUDA_VISIBLE_DEVICES=0 python efficiency.py \
        --model_name \"$model_name\" \
        --model_path \"$model_path\" \
        --max_length \"$max_length\" \
        --page_size \"$page_size\" \
        --budgets \"$budgets\" \
        --batch_size \"$batch_size\" "

    # 使用 if-else 判断 sparse_attn
    if [ "$sparse_attn" -eq 1 ]; then
        cmd="$cmd --sparse_attn"
    fi
    if [ "$prefetch" -eq 1 ]; then
        cmd="$cmd --prefetch"
    fi
    if [ "$test_latency" -eq 1 ]; then
        cmd="$cmd --test_latency"
    fi
    if [ "$test_TPOT" -eq 1 ]; then
        cmd="$cmd --test_TPOT"
    fi

    # 执行命令
    echo "Running: $cmd"
    eval "$cmd"
done