CUDA_VISIBLE_DEVICES=0,1 accelerate launch ppo.py \
    --base_model_name logs_trl/merged_model_summary \
    --reward_name 'summary' \
    --exp_type 'summary' \
    --wandb_name 'rlhf_summary'

CUDA_VISIBLE_DEVICES=0,1 accelerate launch --main_process_port 29501 ppo.py \
    --base_model_name logs_trl/merged_model_summary \
    --reward_name 'faithful' \
    --exp_type 'summary' \
    --wandb_name 'rlhf_faithful'