
use_ema: True 
ema:
  type: ModelEMA
  decay: 0.9999
  warmups: 2000


epoches: 72
clip_max_norm: 0.1


optimizer:
  type: AdamW
  params: 
    - 
      params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$'
      lr: 0.00001
    -
      params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$'
      weight_decay: 0.
      lr: 0.00001
    - 
      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
      weight_decay: 0.

  lr: 0.0001
  betas: [0.9, 0.999]
  weight_decay: 0.0001


lr_scheduler:
  type: MultiStepLR
  milestones: [1000]
  gamma: 0.1


lr_warmup_scheduler:
  type: LinearWarmup
  warmup_duration: 2000