export MKL_SERVICE_FORCE_INTEL=1

steps=(140)

# 定义测试基准

benchmarks=('p-bench' 'p-bench-crop' "mmstar" "hrbench-4k" "hrbench-8k" "cvbench-2d" "cvbench-3d" "vstar" "countqa" "colorbench" "babyvision" "mme-realworld" "mme-realworld-cn")

# 遍历所有步数
for step in "${steps[@]}"; do
    model_path="./verl_exp/box_in_image/global_step_140/actor/huggingface"
    # model_name="self1"
    model_name="box_in_image"
    model_names="${model_name}_seed42"
    
    echo "=========================================="
    echo "Processing step: $step"
    echo "Model path: $model_path"
    echo "Model name: $model_name"
    echo "=========================================="
    
    # 遍历所有基准测试
    for benchmark in "${benchmarks[@]}"; do
        echo "Running inference for model: $model_name on benchmark: $benchmark"
        python simple_inference_qwen3vl.py \
            --benchmark "$benchmark" \
            --model "$model_name" \
            --model_path "$model_path" \
            --gpus 16
        
        echo "Running judge for model: $model_name on benchmark: $benchmark"
        python judge_qwenlm.py \
            --benchmark "$benchmark" \
            --model "$model_names"
        
        echo "---------- Completed $benchmark for $model_name ----------"
    done
    
    echo "========== Completed all benchmarks for $model_name =========="
    echo ""
done

echo "All steps completed!"
