num_epochs: 10
num_inner_epochs: 1000
training_batch_size: 1000
learning_rate: 1e-4
clip_max_norm: 10.
optimizer: optax.adamw
scheduler: cosine
ema: False
ema_decay: 0.99