# @package _global_
defaults:
  - /trainer: default
  - /loader: default
  - /dataset: thepile
  - /optimizer: adamw
  - /scheduler: cosine_warmup
  - /callbacks: [base, checkpoint, norm_monitor]

train:
  monitor: val/loss
  mode: min

task:
  _name_: lm
  loss: cross_entropy
  torchmetrics: ['perplexity', 'num_tokens']

encoder: null
decoder: null
