accelerator: "gpu"
devices: 1
precision: "16-mixed"
default_root_dir: "${result_dir}"
strategy: "ddp_find_unused_parameters_true"
max_epochs: 10
gradient_clip_val: 1.0
log_every_n_steps: 25
accumulate_grad_batches: 8