
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export HOME="/data//GRM-Omni-v1/src"

# ray status || ray start --head --disable-usage-stats --num-cpus=24 
ray stop --force && ray start --head --disable-usage-stats --num-cpus=24 

# EVAL_MODEL="/data//GRM-Omni-ckpt/GRM-Omni-SFT-CKPT500/final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0915_GRM_omni_all_step_final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0915_GRM_omni_lang_dpo_3k_final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0918_GRM_omni_lang_dpo_3k_temperature_0_7_final"
EVAL_MODEL="/data//hf_models/models/Qwen2.5-Omni-7B"

# EVAL_MODEL="/data//GRM-Omni-v1/GRM-Omni-ckpt/0922_GRM_omni_lang_dpo_20k_All_Step_final"
# EVAL_MODEL="/data//GRM-Omni-v1/GRM-Omni-ckpt/0922_GRM_omni_lang_sft_140k_mix_All_step_final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_300_step_not_merged/final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0924_grm_image_dpo_6k_final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_All_step_final"
# EVAL_MODEL="/data//GRM-Omni-ckpt/0925_grm_lang_dpo_high_score_final"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/language/rewardbench_v1/processed/"
# BENCH="rewardbench"
# # OUTDIR="/data//GRM-Omni-v1/results/language/rewardbench_v2"
# OUTDIR="/data//GRM-Omni-v1/925_test/language/rewardbench_dpo"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/image/vl_rewardbench/processed/"
# BENCH="vl_rewardbench"
# # OUTDIR="/data//GRM-Omni-v1/results/language/vl_rewardbench"
# OUTDIR="/data//GRM-Omni-v1/922_test/image/vl_rewardbench_dpo_v3"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/video/genai_bench/processed"
# BENCH="genai_video"
# # OUTDIR="/data//GRM-Omni-v1/results/language/genai_bench_video"
# OUTDIR="/data//GRM-Omni-v1/922_test/video/genai_bench_video_dpo_v2"

# IF Test Audio We Use Batch Size 1
BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/audio/align_anything/processed"
BENCH="audio_bench"
OUTDIR="/data//GRM-Omni-v1/922_test/language/audio_bench_raw"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/image/multimodal_bench/processed"
# BENCH="multimodal_rewardbench"
# OUTDIR="/data//GRM-Omni-v1/922_test/image/multimodal_rewardbench_dpo_v2"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/image/genai_bench/processed"
# BENCH="genai_image"
# OUTDIR="/data//GRM-Omni-v1/922_test/image/genai_bench_image_dpo_v2"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/ppe_bench/processed/"
# BENCH="ppe_bench"
# OUTDIR="/data//GRM-Omni-v1/922_test/language/ppe_bench_dpo_v2"

# BENCHDIR="/data//GRM-Omni-v1/dataset/testing/benchmark/language/rmb/processed"
# BENCH="rmb"
# OUTDIR="/data//GRM-Omni-v1/922_test/language/rmb_dpo_bon_harmlessness/"

echo benchmark:${BENCH} output:${OUTDIR}
python src/inference.py \
    --inference_model $EVAL_MODEL \
    --inference_model_modality "omni" \
    --benchmark_dir $BENCHDIR \
    --benchmark $BENCH \
    --method "pairwise_judge" \
    --manner "direct" \
    --criteria_step 1 \
    --batch_size 2 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.7 \
    --seed 123 \
    --output_dir $OUTDIR
