defaults:
  - base
  - _self_ 

num_train_epochs: 1
max_steps: -1
max_seq_length: 4096

# only save best model
save_strategy: epoch
save_steps: 1
do_eval: true

ddp_timeout: 180000000

train_batch_size: 64
per_device_train_batch_size: 4

gradient_checkpointing: true
gradient_checkpointing_kwargs:
  use_reentrant: false
learning_rate: 2.0e-4 
lr_scheduler_type: constant
warmup_ratio: 0.1

trainer_log_name: sft

packing: true

trainer_args:
  _target_: hydra_utils.trl.SFTConfig
  num_train_epochs: ${num_train_epochs}
  max_seq_length: ${max_seq_length}
  packing: ${packing}
  do_eval: ${do_eval}

trainer:
  _target_: hydra_utils.trl.SFTTrainer
  