# ----------------------------New---------------------------------
# CUDA_VISIBLE_DEVICES=2,3 accelerate launch eval_ppo_single_model.py  \
#     --reward_names 'helpful,humor,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_ori'

# CUDA_VISIBLE_DEVICES=0,1 accelerate launch --main_process_port 29400\
#     eval_ppo_single_model.py  \
#     --base_model_name logs_trl/merged_model\
#     --reward_names 'helpful,humor,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_sft_merged'


# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29501 eval_ppo_single_model.py  \
#     --base_model_name logs_morlhf/rlhf_helpful_harmless_pref0.5_0.5/batch_200\
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_helpful_harmless_batch_200' 

# CUDA_VISIBLE_DEVICES=4,5,6,7 accelerate launch --main_process_port 29501 eval_ppo_single_model.py  \
#     --base_model_name logs_morlhf/rlhf_helpful_harmless_0.7_0.3_pref0.7_0.3/batch_200\
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_helpful_harmless_0.7_0.3_batch_200' 

# CUDA_VISIBLE_DEVICES=4,5,6,7 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
#     --base_model_name logs_morlhf/rlhf_helpful_harmless_0.3_0.7_pref0.3_0.7/batch_200\
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_helpful_harmless_0.3_0.7_batch_200' 


# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
#     --base_model_name logs_morlhf/rlhf_helpful_harmless_0.1_0.9_pref0.1_0.9/batch_200\
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_helpful_harmless_0.1_0.9_batch_200' 


# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
#     --base_model_name logs_morlhf/rlhf_helpful_harmless_0.9_0.1_pref0.9_0.1/batch_200\
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'eval_helpful_harmless_0.9_0.1_batch_200' 

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
    --base_model_name logs_morlhf/rlhf_helpful_harmless_0.2_0.8_pref0.2_0.8/batch_200\
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'eval_helpful_harmless_0.2_0.8_batch_200' 

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
    --base_model_name logs_morlhf/rlhf_helpful_harmless_0.4_0.6_pref0.4_0.6/batch_200\
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'eval_helpful_harmless_0.4_0.6_batch_200' 

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
    --base_model_name logs_morlhf/rlhf_helpful_harmless_0.6_0.4_pref0.6_0.4/batch_200\
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'eval_helpful_harmless_0.6_0.4_batch_200' 

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 29502 eval_ppo_single_model.py  \
    --base_model_name logs_morlhf/rlhf_helpful_harmless_0.8_0.2_pref0.8_0.2/batch_200\
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'eval_helpful_harmless_0.8_0.2_batch_200' 