set -x

checkSuccess() {
    if [[ $? != 0 ]]; then
        echo "FAILED $1"
        exit 1
    fi
}

# 定义输出路径
OUTPUT_PATH=./datas/URM-LLaMa-3-8B/rm_evaluation

# 定义 batch inference 命令
read -r -d '' batch_infer_commands <<EOF
openrlhf.cli.reward_evaluate \
    --eval_task rm \
    --pretrain /shared/VauAI/maxiaoya/checkpoints/URM-LLaMa-3-8B-1 \
    --bf16 \
    --prompt_max_len 4096 \
    --max_new_tokens 1024 \
    --dataset HuggingFaceH4/ultrafeedback_binarized \
    --input_key prompt \
    --apply_chat_template \
    --chosen_key chosen \
    --rejected_key rejected \
    --zero_stage 0 \
    --micro_batch_size 4 \
    --output_path $OUTPUT_PATH \
    --dataset_split test_prefs \
    --value_head_prefix score
EOF
# value_head
# 运行 batch inference
# deepspeed --module $batch_infer_commands
CUDA_VISIBLE_DEVICES=0 python -m $batch_infer_commands
checkSuccess "Reward model evaluation"
