model_name="Qwen3-4B"
model_path="/mnt/Models/Qwen3-4B"
# model_name="Qwen3-8B"
# model_path="/mnt/Models/Qwen3-8B"
max_length="32000"
sparse_attn=1
test_latency=1
page_size=32
budgets=1024
batch_sizes="1 2"    
# prefetch=1

for batch_size in $batch_sizes; do
    cmd="CUDA_VISIBLE_DEVICES=0 python efficiency_cot.py \
        --model_name \"$model_name\" \
        --model_path \"$model_path\" \
        --max_length \"$max_length\" \
        --page_size \"$page_size\" \
        --budgets \"$budgets\" \
        --batch_size \"$batch_size\" "

    # 使用 if-else 判断 sparse_attn
    if [ "$sparse_attn" -eq 1 ]; then
        cmd="$cmd --sparse_attn"
    fi
    if [ "$prefetch" -eq 1 ]; then
        cmd="$cmd --prefetch"
    fi
    if [ "$test_latency" -eq 1 ]; then
        cmd="$cmd --test_latency"
    fi
    if [ "$test_TPOT" -eq 1 ]; then
        cmd="$cmd --test_TPOT"
    fi

    # 执行命令
    echo "Running: $cmd"
    eval "$cmd"
done