
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HOME="/home/export/base/ycsc_1/1/online1//GRM-Omni/src"

ray stop --force && ray start --head --disable-usage-stats --num-cpus=48

# INF_MODEL="/data//GRM-Omni-ckpt/GRM-Omni-SFT-CKPT500/final"
INF_MODEL="/data//GRM-Omni-ckpt/GRM-Omni-sft-0912"
# INF_MODEL="/data//GRM-Omni-ckpt/GRM-Omni-Lang-SFT"
# INF_MODEL="/data//hf_models/models/Qwen3-32B-AWQ"
REFINE_MODEL="/data//hf_models/models/Qwen3-32B-AWQ"
# SCORE_MODEL="/data//hf_models/models/Skywork-Reward-Llama-3.1-8B"
# SCORE_MODEL="/data//hf_models/models/Skywork-Reward-V2-Llama-3.1-8B"
SCORE_MODEL="/data//hf_models/models/Skywork-Reward-V2-Llama-3.1-8B-40M"


# python src/inference.py \
#     --inference_model $INF_MODEL \
#     --inference_model_modality "language" \
#     --refinement_model $REFINE_MODEL \
#     --refinement_model_modality "language" \
#     --score_model $SCORE_MODEL \
#     --score_model_modality "language" \
#     --method "meta_reward" \
#     --manner "direct" \
#     --criteria_n 3 \
#     --sampling_n 1 \
#     --batch_size 32 \
#     --workers 1 \
#     --temperature 0.1 \
#     --seed 132 \
#     --input_file "/data//GRM-Omni-v1/dataset/testing/benchmark/language/rewardbench_v1/chat_hard.jsonl" \
#     --output_dir "/data//GRM-Omni-v1/results/chat_meta_reward_grm_omni_sft"

python src/inference.py \
    --inference_model $INF_MODEL \
    --inference_model_modality "omni" \
    --refinement_model $REFINE_MODEL \
    --refinement_model_modality "language" \
    --ranking_model $SCORE_MODEL \
    --ranking_model_modality "language" \
    --method "meta_reward" \
    --manner "stepwise" \
    --criteria_step 10 \
    --batch_size 48 \
    --workers 8 \
    --tensor_parallel 1 \
    --top_p 0.8 \
    --temperature 0.1 \
    --seed 132 \
    --input_file "/data//GRM-Omni-v1/dataset/testing/benchmark/language/rewardbench_v1/processed/chat.jsonl" \
    --output_dir "/data//GRM-Omni-v1/omni_results/chat_meta_reward_0914"
