#harmless, helpful, humor

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.5 --preference2 0.5 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.7 --preference2 0.3 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.7_0.3'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.3 --preference2 0.7 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.3_0.7'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.1 --preference2 0.9 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.1_0.9'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.9 --preference2 0.1 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.9_0.1'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.2 --preference2 0.8 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.2_0.8'

# CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch morlhf.py \
#     --preference1 0.4 --preference2 0.6 \
#     --base_model_name logs_trl/merged_model_assistant \
#     --reward_names 'helpful,harmless' \
#     --exp_type 'assistant' \
#     --wandb_name 'rlhf_helpful_harmless_0.4_0.6'

CUDA_VISIBLE_DEVICES=4,5,6,7 accelerate launch --main_process_port 29501 morlhf.py \
    --preference1 0.6 --preference2 0.4\
    --base_model_name logs_trl/merged_model_assistant \
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'rlhf_helpful_harmless_0.6_0.4'

CUDA_VISIBLE_DEVICES=4,5,6,7 accelerate launch --main_process_port 29501 morlhf.py \
    --preference1 0.8 --preference2 0.2 \
    --base_model_name logs_trl/merged_model_assistant \
    --reward_names 'helpful,harmless' \
    --exp_type 'assistant' \
    --wandb_name 'rlhf_helpful_harmless_0.8_0.2'


