#!/bin/bash

data_name=ultrachat1k_harmbenchpos100neg100/llama3_format

data_dir=../data/finetune_data/${data_name}

root_dir=.

model_name=llama3.1

loss_type=DPO

theta_gd=0.01
dpo_alpha=0.3
dpo_beta=1.0 # 1.0 for DPO_2 and 5.0 for DPO_1

method_params=alpha${dpo_alpha}_beta${dpo_beta}_theta${theta_gd}_bs36
max_epoch=4
max_length=1536
model_dir=meta-llama/Llama-3.1-8B-Instruct
tokenizer_path=meta-llama/Llama-3.1-8B-Instruct

seed=12
lr=5e-6

TOKENIZERS_PARALLELISM=false deepspeed --include localhost:0,1,2 --master_port=20900 \
    train_decoderonly_hf.py --ds_config=${root_dir}/ds_config_hf.json \
    --train_path=${data_dir}/train.json \
    --valid_path=${data_dir}/dev.json \
    --model_dir=${model_dir} \
    --tokenizer_path=${tokenizer_path} \
    --pretrained_model_path= \
    --batch_size=6 --val_batch_size=1 \
    --gradient_accumulation=2 \
    --incontext_learn=0 \
    --savedmodel_path=./hf_save/${data_name}_${loss_type}/${model_name}_lr${lr}_maxlen${max_length}_seed${seed}_max${max_epoch}_${method_params} --ckpt_file='' \
    --max_epochs=${max_epoch} --warmup_steps=0 --warmup_ratio=0 \
    --learning_rate=${lr} --fp16= \
    --seed=${seed} \
    --max_length=${max_length} --eval_step=75 --save_step=75 \
    --lr_decay=linear --patience=1 \
    --ema=0 --ema_start_epoch=0 --loss_type=${loss_type} \
    --theta_GD=${theta_gd}\
    --dpo_alpha=${dpo_alpha} --dpo_beta=${dpo_beta}
