defaults:
  - optim_defaults
optimizing_method: sgd
lr_policy: cosine
base_lr_scale_num_shards: True
base_lr: 0.05  # 1 machine
weight_decay: 5e-5
warmup_epochs: 35.0
max_epoch: 256
warmup_start_lr: 0.01
zero_wd_1d_param: False
momentum: 0.9
dampening: 0.0
nesterov: True