START_GPU=0
NUM_GPUS=1
# model="Llama-3.1-8B-Instruct"
model="Qwen2.5-14B-Instruct"
sparse_attn=1  # 0表示全量KVCache, baseline
page_size=32
budgets=2048   # 默认 budgets 值

run_pred() {
    rank=$1
    num_gpus=$2
    model=$3
    sparse=$4
    page_size=$5
    budgets=$6
    START_GPU=$7
    cmd="CUDA_VISIBLE_DEVICES=$rank python pred.py --world_size $num_gpus --model $model --page_size $page_size --budgets $budgets --start_gpu $START_GPU"
    [ "$sparse" -eq 1 ] && cmd="$cmd --sparse_attn"
    eval $cmd
}

export -f run_pred
seq $START_GPU  $((START_GPU + NUM_GPUS - 1)) | xargs -n 1 -P $NUM_GPUS -I {} bash -c "run_pred {} $NUM_GPUS $model $sparse_attn $page_size $budgets $START_GPU"
python result.py

budgets=1024
seq $START_GPU  $((START_GPU + NUM_GPUS - 1)) | xargs -n 1 -P $NUM_GPUS -I {} bash -c "run_pred {} $NUM_GPUS $model $sparse_attn $page_size $budgets $START_GPU"
python result.py

budgets=512
seq $START_GPU  $((START_GPU + NUM_GPUS - 1)) | xargs -n 1 -P $NUM_GPUS -I {} bash -c "run_pred {} $NUM_GPUS $model $sparse_attn $page_size $budgets $START_GPU"
python result.py

budgets=256
seq $START_GPU  $((START_GPU + NUM_GPUS - 1)) | xargs -n 1 -P $NUM_GPUS -I {} bash -c "run_pred {} $NUM_GPUS $model $sparse_attn $page_size $budgets $START_GPU"
python result.py

budgets=4096
seq $START_GPU  $((START_GPU + NUM_GPUS - 1)) | xargs -n 1 -P $NUM_GPUS -I {} bash -c "run_pred {} $NUM_GPUS $model $sparse_attn $page_size $budgets $START_GPU"
python result.py