base_model_name_1 : ./logs_morlhf/rlhf_harmless/batch_400
base_model_name_2 : ./logs_morlhf/rlhf_humor/batch_400
base_model_name_3 : 
reward_peft_path1 : Ray2333/gpt2-large-harmless-reward_model
reward_peft_path2 : mohameddhiab/humor-no-humor
tokenier_name : meta-llama/Llama-2-7b-hf
