
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HOME="/data//GRM-Omni-v1"

ray stop --force && ray start --head --disable-usage-stats --num-cpus=24 

EVAL_MODEL="/data//GRM-Omni-ckpt/GRM-Omni-SFT-CKPT500/final"
MODALITY="omni"

# exploration exploitation
python src/evaluation.py \
    --output_dir "/data//GRM-Omni-v1/results/grm_omni_ckpt500_v1/" \
    --evaluation_model $EVAL_MODEL \
    --modality $MODALITY \
    --mode "meta_reward" \
    --benchmark_modality "language" \
    --benchmark "rewardbench" \
    --benchmark_dir "/data//GRM-Omni-v1/dataset/testing/benchmark/language/rewardbench_v1" \
    --batch_size 256 \
    --workers 8 \
    --sampling_n 1 \
    --top_p 0.8 \
    --temperature 0.1 \
