devices=0,1,2,3,4,5,6,7
n_gpu=8
# devices=0
# n_gpu=1
train_dataset_name='ROOT/saved_data/Skywork-Reward-Preference-80K-v0.2'
# eval_dataset_name="ROOT/saved_data/reward-bench"
eval_dataset_name="ROOT/benchs/RM-Bench-main/data/total_dataset.json"
# train_dataset_name='ROOT/saved_data/HelpSteer3'
# eval_dataset_name=None
base_model='ROOT/saved_llms/Meta-Llama-3.1-8B-Instruct'
# base_model='ROOT/saved_llms/Skywork-Reward-Llama-3.1-8B-v0.2'
wandb_name="DB_Difference-0.1_from-SK-v0.2_Debug22"
main_process_port=35272

learning_rate=2e-6
max_length=4096
num_train_epochs=1
gradient_accumulation_steps=16
per_device_train_batch_size=1
per_device_eval_batch_size=1
bf16=True
log_dir="./my_outputs/"
save_steps=601
eval_steps=50
max_length=4096
save_strategy=steps
eval_on_start=True
use_debias=True
debias_type="difference"
debias_factor=0.1
debias_task='length'

# sudo fuser -v /dev/nvidia* |awk '{for(i=1;i<=NF;i++)print "kill -9 " $i;}' | sudo sh

cd ./reward_models

CUDA_VISIBLE_DEVICES=${devices} accelerate launch --num_processes ${n_gpu} --main_process_port ${main_process_port} run_debias_reward_models_train.py \
    --base_model ${base_model} --wandb_name ${wandb_name} --log_dir ${log_dir} --report_to tensorboard \
    --num_train_epochs ${num_train_epochs} \
    --max_length ${max_length} \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --learning_rate ${learning_rate} \
    --train_dataset ${train_dataset_name} \
    --eval_dataset ${eval_dataset_name} \
    --per_device_train_batch_size ${per_device_train_batch_size} \
    --per_device_eval_batch_size ${per_device_eval_batch_size} \
    --bf16 True \
    --save_strategy ${save_strategy} \
    --save_steps ${save_steps} \
    --eval_steps ${eval_steps} \
    --eval_on_start ${eval_on_start} \
    --use_debias ${use_debias} \
    --debias_type ${debias_type} \
    --debias_task ${debias_task} \
    --debias_factor ${debias_factor} \
    --deepspeed ../deepspeed_configs/deepspeed_1.json