zero_wd_1d_param: False
base_lr_scale_num_shards: True
base_lr: 0.0001
cosine_after_warmup: False
cosine_end_lr: 1e-6
warmup_start_lr: 1e-6
warmup_epochs: 0.0
lr_policy: cosine
max_epoch: 200
weight_decay: 0.05
optimizing_method: adam
grad_clip_val: 0
grad_clip_strategy: value
