set -x

checkSuccess() {
    if [[ $? != 0 ]]; then
        echo "FAILED $1"
        exit 1
    fi
}


# 定义 batch inference 命令
read -r -d '' batch_infer_commands <<EOF
openrlhf.cli.filter_most_diff \
    --eval_task rm \
    --pretrain Skywork/Skywork-Reward-Llama-3.1-8B-v0.2\
    --bf16 \
    --prompt_max_len 4096 \
    --max_new_tokens 1024 \
    --dataset HuggingFaceH4/ultrafeedback_binarized \
    --input_key prompt \
    --apply_chat_template \
    --chosen_key chosen \
    --rejected_key rejected \
    --zero_stage 0 \
    --micro_batch_size 4 \
    --dataset_split train_prefs \
    --value_head_prefix score
EOF
# value_head
# 运行 batch inference
# deepspeed --module $batch_infer_commands
CUDA_VISIBLE_DEVICES=1 torchrun --master_port=29505 -m $batch_infer_commands
checkSuccess "Reward Scoring"
