name: "${.optimizer}_${.learning_rate}_${.scheduler}"
optimizer: "adamw"
betas: [0.9, 0.99]
learning_rate: 3e-4
weight_decay: 0.0
warmup: 0
scheduler: "cosine"
grad_clip: 1
