ulimit -u 65536
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HOME="/data//GRM-Omni-v1/src"

ray stop --force && ray start --head --disable-usage-stats --num-cpus=32

# INF_MODEL="/data//GRM-Omni-v1/GRM-Omni-ckpt/0922_GRM_omni_lang_sft_140k_mix_All_step_final"
# INF_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_150_step_not_merged/final"
INF_MODEL="/data//GRM-Omni-ckpt/0923_grm_omni_dpo_mix2_29k_300_step_not_merged/final"
REFINE_MODEL="/data//hf_models/models/Qwen3-32B-AWQ"
SCORE_MODEL="/data//hf_models/models/Skywork-Reward-V2-Llama-3.1-8B-40M"

infer_data_file="/data//GRM-Omni-v1/dataset/testing/benchmark/language/rewardbench_v1/processed/safety.jsonl"
criteria_step="1"

python src/inference.py \
    --inference_model $INF_MODEL \
    --inference_model_modality "omni" \
    --refinement_model $REFINE_MODEL \
    --refinement_model_modality "language" \
    --ranking_model $SCORE_MODEL \
    --ranking_model_modality "language" \
    --method "rule_filter" \
    --manner "direct" \
    --criteria_step $criteria_step \
    --batch_size 128 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.0 \
    --seed 132 \
    --input_file $infer_data_file \
    --output_dir "0923_omni_results/0923_filter_rewardbench_safety"
