LOGFILE_PATH="modify/to/your/path"

hostfile='modify/to/your/path'

SEED=1234

BASE_MODEL="modify/to/your/path"

PATH_TO_DPO_MEDEL="modify/to/your/path"

which deepspeed

deepspeed --hostfile=$hostfile train_dpo.py \
	--data_file ./data/test_data.json5 \
	--model_name_or_path $BASE_MODEL \
	--output_dir $PATH_TO_DPO_MEDEL \
	--loss_average response \
	--model_max_length 2048 \
	--num_train_epochs 1 \
	--per_device_train_batch_size 2 \
	--gradient_accumulation_steps 2 \
	--save_strategy epoch \
	--save_total_limit 2 \
	--learning_rate 5e-7 \
	--lr_scheduler_type cosine \
	--adam_beta1 0.9 \
	--adam_beta2 0.98 \
	--adam_epsilon 1e-8 \
	--max_grad_norm 1.0 \
	--weight_decay 1e-4 \
	--warmup_ratio 0.0 \
	--logging_steps 1 \
	--gradient_checkpointing True \
	--deepspeed ds_config.json \
	--bf16 True \
	--tf32 True \
	--data_cache_dir "modify/to/your/path" \
	--use_dpo_loss True \
	--use_sft_loss False \
	--use_kl_loss False \
	--overwrite_data_cache True \
	--dpo_beta 0.1 \
	--kl_topk 50