model:
  name: "base_model_path"
  new_model: "trained_model_path"

dataset:
  path: "dataset_path"

training:
  output_dir: "output_directory_path"
  num_train_epochs: 5
  per_device_train_batch_size: 4
  per_device_eval_batch_size: 4
  gradient_accumulation_steps: 1
  optim: "adamw_torch" 
  adam_beta2: 0.95
  adam_epsilon: 0.00001
  max_grad_norm: 1.0
  lr_scheduler_type: "cosine"
  learning_rate: 0.000003

  max_length: 2048

  save_steps: 5000  
  logging_steps: 100  
  
  group_by_length: True
  bf16: True
  fp16: False
  
  warmup_steps: 100
  weight_decay: 0.1
