# Training argumnents
resume_from_checkpoint: False
save_total_limit: 10
run_name: ""
output_dir: out
do_train: True
do_eval: True
max_steps: 20000
learning_rate: !!float 1e-3
lr_scheduler_type: warmup_stable_decay
lr_scheduler_kwargs:
  num_decay_steps: 4000
  min_lr_ratio: 0.01
warmup_ratio: 0.1
weight_decay: 0.01
optim: adamw_torch_fused
adam_beta2: 0.98
adam_epsilon: !!float 1e-12
logging_steps: 50
eval_strategy: steps
eval_steps: 0.1
save_steps: 0.1
remove_unused_columns: False
eval_on_start: False
per_device_train_batch_size: 512
per_device_eval_batch_size: 1024
gradient_accumulation_steps: 1
include_inputs_for_metrics: True
torch_compile: False
bf16: True
tf32: True
accelerator_config:
  dispatch_batches: False
  split_batches: True
ignore_data_skip: True
dataset_kwargs:
  skip_prepare_dataset: True
