output_dir=../results
n=1

models="Qwen/Qwen3-4B"

for dataset_name in skywork mixture judgebench reward_bench reward_bench_v2;do
    for mode in instruct reasoning;do
        for model_path in ${models};do
            model_name=${model_path##*/}
            CUDA_VISIBLE_DEVICES=0 python ../eval_benchmark/gen_vllm_judge.py --model_path ${model_path} --dataset_name ${dataset_name} --local_index 0 --output_dir ${output_dir} --mode ${mode} --num_gen ${n} &
            CUDA_VISIBLE_DEVICES=1 python ../eval_benchmark/gen_vllm_judge.py --model_path ${model_path} --dataset_name ${dataset_name} --local_index 1 --output_dir ${output_dir} --mode ${mode} --num_gen ${n} &
            CUDA_VISIBLE_DEVICES=2 python ../eval_benchmark/gen_vllm_judge.py --model_path ${model_path} --dataset_name ${dataset_name} --local_index 2 --output_dir ${output_dir} --mode ${mode} --num_gen ${n} &
            CUDA_VISIBLE_DEVICES=3 python ../eval_benchmark/gen_vllm_judge.py --model_path ${model_path} --dataset_name ${dataset_name} --local_index 3 --output_dir ${output_dir} --mode ${mode} --num_gen ${n} &
            wait

            train_output_dir=${output_dir}/${model_name}_${mode}/${dataset_name}/generations
            python ../eval_benchmark/merge.py --base_path $train_output_dir --output_dir ${train_output_dir}.json --num_datasets 4
            rm ${train_output_dir}0.json ${train_output_dir}1.json ${train_output_dir}2.json ${train_output_dir}3.json
            python ../eval_benchmark/process_result.py --res_path ${train_output_dir}.json
        done
    done
done

