defaults:
  - optim_defaults
zero_wd_1d_param: True
base_lr_scale_num_shards: True
base_lr: 0.0001
cosine_after_warmup: True
cosine_end_lr: 1e-6
warmup_start_lr: 1e-6
warmup_epochs: 30.0
lr_policy: cosine
max_epoch: 200
momentum: 0.9
weight_decay: 0.05
optimizing_method: adamw
