model_type: auto
model_name_or_path: kykim0/llama3-8b-ultrachat-sft
output_dir: llama3-8b/llama3-8b_dpo_tie_ratio_0
dataset_path: ultrafeedback_tied/train/mixed_data_ratio_0.jsonl
loss_type: sigmoid
##"sigmoid", "hinge", "ipo", "kto_pair", "tie_loss"
per_device_train_batch_size: 4
per_device_eval_batch_size: 2 
gradient_accumulation_steps: 4
low_rank_training: false
seed: 42 
bf16: true
dpo_beta: 0.01
dpo_theta: -0.5
trainable_theta: false
optim: adamw_torch
load_in_4bit: false
dataloader_num_workers: 1
weight_decay: 0
num_train_epochs: 3
max_prompt_length: 512
max_length: 1024
learning_rate: 0.000001
lr_scheduler_type: cosine
warmup_ratio: 0.1
lora_rank: 16
lora_alpha: 16
lora_dropout: 0
logging_strategy: steps 
#evaluation_strategy: no
logging_steps: 1
save_strategy: steps
save_steps: 100
save_total_limit: 1
logging_first_step: true 
report_to: tensorboard 
gradient_checkpointing: true
torch_dtype: bfloat16
do_train: true
remove_unused_columns: false

