handler: FinetuneTrainer
args:
  per_device_train_batch_size: 8
  per_device_eval_batch_size: 16
  gradient_accumulation_steps: 4
  learning_rate: 1e-5
  bf16: True
  bf16_full_eval: True
  logging_steps: 5
  output_dir: ${paths.output_dir}
  logging_dir: ${trainer.args.output_dir}/logs
  report_to: tensorboard
  ddp_find_unused_parameters: None
  gradient_checkpointing: False
  optim: paged_adamw_32bit
  save_strategy: 'no'
  save_only_model: True
  weight_decay: 0.00
  do_train: True
  do_eval: True
  eval_on_start: True
  eval_strategy: epoch
  num_train_epochs: 10
  seed: 0