accelerator: "gpu"
devices: 2
precision: "16-mixed"
default_root_dir: "${result_dir}"
strategy: "deepspeed_stage_2_offload"
max_epochs: 1
gradient_clip_val: 1.0
log_every_n_steps: 25
accumulate_grad_batches: 2
limit_train_batches: 4