accelerator: "gpu"
devices: 4
precision: "16-mixed"
default_root_dir: "${result_dir}"
strategy: "deepspeed_stage_2_offload"
max_epochs: 10
gradient_clip_val: 1.0
log_every_n_steps: 10
accumulate_grad_batches: 8