defaults:
 - target_models: cdiv

epochs: 50
warm_up_steps_ratio: 0.1

base_lr: 2e-3
batch_size: 64 # 16
random_target_models: -1
num_workers: 8 # 8
# base training settings to scale lr, rarely changed
base_batch_size: 64
base_world_size: 8

weight_decay: 0.01


optimizer:
  _target_: torch.optim.AdamW
  betas: [0.9, 0.999]

lr_scheduler:
  _target_: theia.lr_schedulers.get_constant_lrs_with_linear_warm_up # get_constant_lrs_with_linear_warm_up
  warm_up_lr_start_factor: 1e-2


grad_clip: False
grad_clip_norm_warmup: 10.0
grad_clip_norm: 1.0

freeze_translator: False
freeze_translator_start_steps_ratio: 0.2
translator_lr_factor: 1.0

main_loss: cos_l1
