base_model_name_1 : ./logs_morlhf/rlhf_harmless/batch_400
base_model_name_2 : ./logs_morlhf/rlhf_helpful/batch_832
base_model_name_3 : 
reward_peft_path1 : Ray2333/gpt2-large-harmless-reward_model
reward_peft_path2 : Ray2333/gpt2-large-helpful-reward_model
tokenier_name : meta-llama/Llama-2-7b-hf
