use_amp: True
use_ema: True 
ema:
  type: ModelEMA
  decay: 0.9999
  warmups: 1000
  start: 0


epoches: 72
clip_max_norm: 0.1


optimizer:
  type: AdamW
  params: 
    - 
      params: '^(?=.*backbone)(?!.*norm).*$'
      lr: 0.00002
    - 
      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
      weight_decay: 0.

  lr: 0.0002
  betas: [0.9, 0.999]
  weight_decay: 0.0001


lr_scheduler:
  type: MultiStepLR
  milestones: [500]
  gamma: 0.1


lr_warmup_scheduler:
  type: LinearWarmup
  warmup_duration: 500