  (): transformers.training_args.TrainingArguments
  output_dir: null
  overwrite_output_dir: true
  num_train_epochs: 1
  per_device_train_batch_size: 4
  # 6 x 8 gpus = 48 batch size
  # gradient_accumulation_steps: 4
  per_device_eval_batch_size: 4
  max_steps: 10
  eval_strategy: "steps"
  # dataloader_num_workers: 8
  # bf16: true
  save_steps: 500
  logging_steps: 10
  eval_steps: 50
  warmup_steps: 100
  learning_rate: 5e-5
  save_total_limit: 1
  optim: "paged_adamw_8bit"