
model_type: auto
model_name_or_path: alignment-handbook/zephyr-7b-sft-full
output_dir: Mistral-7B-v1/sft_then_dpo/zephyr_dpo_tie_ratio_0
dataset_path: ultrafeedback_tied/train/mixed_data_ratio_0.jsonl
loss_type: sigmoid
##sigmoid", "hinge", "ipo", "kto_pair", "tie_loss"
per_device_train_batch_size: 8
per_device_eval_batch_size: 2 
gradient_accumulation_steps: 1
low_rank_training: false
seed: 42 
bf16: true
dpo_beta: 0.01
dpo_theta: -0.5
trainable_theta: false
optim: adamw_torch
load_in_4bit: false
dataloader_num_workers: 1
weight_decay: 0
num_train_epochs: 3
max_prompt_length: 512
max_length: 1024
learning_rate: 0.0000005
lr_scheduler_type: cosine
warmup_ratio: 0.1
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.05
logging_strategy: steps 
#evaluation_strategy: no
logging_steps: 1
save_strategy: steps
#save_strategy: epoch
save_steps: 99999
save_total_limit: 1
logging_first_step: true 
report_to: tensorboard 
gradient_checkpointing: true
torch_dtype: bfloat16
do_train: true
remove_unused_columns: false