python -m openrlhf.cli.serve_rm_vllm_chosen \
    --reward_pretrain reward_model_path \
    --value_head_prefixs score \
    --pretrain policy_model_path \
    --LLM_judge judge_model_path