# @package _global_

defaults:
  - data

training:
  lr_scheduler_type: constant
  warmup_steps: 0

  per_device_train_batch_size: 4
  gradient_accumulation_steps: 4
  per_device_eval_batch_size: 4
  learning_rate: 5e-5

  save_steps: 8000
  eval_steps: 8000

  max_steps: 16000  # you may be able to get away with just 8k steps.
