model_config:
  mae:
    mask_ratio: 0.75
    mae_vit:
      arch: mae_base_patch16
      params:
        norm_pix_loss: true
        decoder_embed_dim: 512
        decoder_depth: 8
        decoder_num_heads: 16

optimizer:
  type: adamw
  params:
    lr: 1.6e-3 # 1.0e-4 * batch_size / 256
    use_oss: false
    weight_decay: 0.05
    wd_norm: 0.0
    wd_bias: 0.0
    eps: 1.0e-08
    betas:
      - 0.9
      - 0.95

scheduler:
  type: cosine_annealing
  params:
    T_max: ${training.max_update}
    eta_min: 0.0
    use_warmup: true
    warmup_factor: 0.0
    warmup_iterations: 12000

training:
  max_update: 144000
  batch_size: 4096
  iter_per_update: 1
  find_unused_parameters: false
  tensorboard: true
  evaluation_interval: 5
  checkpoint_interval: 1
  log_interval: 200
  run_type: train
  iou_type: null
