(): transformers.training_args.TrainingArguments
output_dir: null
overwrite_output_dir: true
num_train_epochs: 1
per_device_train_batch_size: 4
# 6 x 8 gpus = 48 batch size
# gradient_accumulation_steps: 4
per_device_eval_batch_size: 4
eval_strategy: "steps"
# dataloader_num_workers: 8
# bf16: true
save_steps: 500
logging_steps: 10
eval_steps: 50
warmup_steps: 100
learning_rate: 5e-5
save_total_limit: 1
optim: "paged_adamw_8bit"
