_target_: lambo.models.lm_elements.LanguageModel
name: mlm_transformer

model:
  _target_: lambo.models.shared_elements.Transformer
  tokenizer: ${tokenizer}
  max_len: ${task.max_len}
  embed_dim: 64
  latent_dim: 16
  ff_dim: 256
  num_heads: 2
  num_layers: 4
  out_dim: 16
  p: 0.
  max_len_delta: 0

batch_size: 32
num_epochs: 128
patience: 32
lr: 1e-3
max_shift: 0
mask_ratio: 0.125