for size in 1.7 4 8 14; do
    python ../opt/train_eval_unified.py \
        --model_name_instruct Qwen3-${size}B_instruct \
        --model_name_reasoning Qwen3-${size}B_reasoning \
        --dataset_name combined \
        --dataset_base_path ../embeddings/Qwen3-4B \
        --budgets "2.0,2.5,3.0,3.5,4.0,5.0,7.0,10.0" \
        --replications 10 \
        --num_gpus 4 \
        --jobs_per_gpu 20 \
        --tau_r 1.0 \
        --tau_g 0 \
        --batch_size 64 \
        --lr 1e-4 \
        --dual_lr 1e-3 \
        --epochs 60 \
        --ood_datasets "reward_bench,reward_bench_v2,judgebench" \
        --save_dir ../racer_results_${size} \
        --delete_policy_after_eval \
        --use_separate_embeddings
done



# for tau_r in 10.0 5.0 2.0 1.0 0.5 0.1 0.05; do
# echo "========================================"
# echo "Training with tau_r=$tau_r, tau_g=0"
# echo "========================================"

# python ../opt/train_eval_unified_new.py \
#     --model_name_instruct "Qwen3-8B_instruct" \
#     --model_name_reasoning "Qwen3-8B_reasoning" \
#     --dataset_name "combined" \
#     --ood_datasets "reward_bench_v2,rm_bench,judgebench,reward_bench" \
#     --dataset_base_path ../embeddings/Qwen3-4B \
#     --num_gpus 4 \
#     --jobs_per_gpu 10 \
#     --tau_r $tau_r \
#     --tau_g 0 \
#     --budgets "2.0,2.5,3.0,3.5,4.0,5.0,7.0" \
#     --replications 2 \
#     --batch_size 64 \
#     --lr 1e-4 \
#     --dual_lr 1e-3 \
#     --epochs 60 \
#     --save_dir ../res_grid_search_latest \
#     --delete_policy_after_eval
# done

# echo "========================================" 
# echo "Grid search completed!"
# echo "Analyzing results..."