CUDA_VISIBLE_DEVICES=2 python bon_eval.py \
    --mode inference \
    --input_file processbench.json \
    --output_file prometheus_output_stage2 \
    --model_path PRIME-RL/EurusPRM-Stage2 \
    --ref_model_path Qwen/Qwen2.5-Math-7B-Instruct \
    --tokenizer_path PRIME-RL/EurusPRM-Stage1 \
    --batch_size 1 \
    --coef 0.001