set -x

checkSuccess() {
    if [[ $? != 0 ]]; then
        echo "FAILED $1"
        exit 1
    fi
}

# 定义输出路径
INPUT_PATH=datas/ultrafeedback_binarized-1/llama-3.1-8b-instruct-test-prefs_sft_generations.jsonl
OUTPUT_PATH=datas/ultrafeedback_binarized-1/llama-3.1-8b-instruct-test-prefs_sft_proxy_0.jsonl

# 定义 batch inference 命令
read -r -d '' batch_infer_commands <<EOF
openrlhf.adv_utils.average_proxy_scores \
    --pretrain Skywork/Skywork-Reward-Llama-3.1-8B-v0.2 \
    --bf16 \
    --prompt_max_len 4096 \
    --max_new_tokens 1024 \
    --apply_chat_template \
    --zero_stage 0 \
    --micro_batch_size 4 \
    --input_file $INPUT_PATH \
    --output_file $OUTPUT_PATH \
    --value_head_prefix score
EOF

# 运行 batch inference
# deepspeed --module $batch_infer_commands
CUDA_VISIBLE_DEVICES=4,7 torchrun --master_port=29503 -m $batch_infer_commands
checkSuccess "Reward model evaluation"
