type: adamw

lr: 1e-4
weight_decay: 1e-4
eps: 1e-6
amsgrad: false
beta2: 0.95

# scheduler, choose from [cosine, inverse_sqrt, cosine_with_restarts, cosine_with_min_lr, warmup_stable_decay] 
scheduler_type: cosine

warmup: 0.1
max_iters: ???