defaults:
  - encoder: lpe_mlp_gine_moses
  - dataset: moses
  - _self_

root: null
checkpoint: null

# Training
seed: 0
lr_scheduler: 'cosine'
lr: 2e-4
min_lr: 1e-4
lr_decay_iters: 2e4
weight_decay: 1e-4
gradient_norm: 1.0
batch_size: 512
num_steps: 2e4
num_warmup_steps: 400
num_workers: 4
log_after: 100
val_after: 2000
kl_weight: 1e-6
equi_weight: 0.
dropout: 0.

# Misc
wandb_project: null
wandb_entity: null
wandb_name: null