model_name="Llama-3.1-8B-Instruct"
model_path="/mnt/Models/meta-llama/Llama-3.1-8B-Instruct"
sparse_attn=1
max_lengths="128000"
page_size=32
budgets=200000 # set big budget, it means full cache attn in fact


for max_length in $max_lengths; do
    cmd="CUDA_VISIBLE_DEVICES=0 python test.py \
        --model_name \"$model_name\" \
        --model_path \"$model_path\" \
        --max_length \"$max_length\" \
        --page_size \"$page_size\" \
        --budgets \"$budgets\" "

    # 使用 if-else 判断 sparse_attn
    if [ "$sparse_attn" -eq 1 ]; then
        cmd="$cmd --sparse_attn"
    fi

    # 执行命令
    echo "Running: $cmd"
    eval "$cmd"
done