use_amp: True
use_ema: True 
ema:
  type: ModelEMA
  decay: 0.9996
  warmups: 250
  start: 0


epoches: 36
clip_max_norm: 0.1


optimizer:
  type: AdamW
  params: 
    - 
      params: '^(?=.*backbone)(?!.*norm).*$'
      lr: 0.00004
    - 
      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
      weight_decay: 0.

  lr: 0.0008
  betas: [0.9, 0.999]
  weight_decay: 0.000025


lr_scheduler:
  type: MultiStepLR
  milestones: [500]
  gamma: 0.1


lr_warmup_scheduler:
  type: LinearWarmup
  warmup_duration: 125