# @package _global_

# to execute this experiment run:
# python train.py experiment=example

defaults:
  - override /data: text8
  - override /model: text8_sfm_gpt_fancy
  - override /callbacks: default
  - override /trainer: default

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

tags: ["text8", "gpt"]

seed: 12345

trainer:
  min_epochs: 1
  max_epochs: 200
  gradient_clip_val: 1.0
  accumulate_grad_batches: 4

model:
  optimizer:
    lr: 0.001
  scheduler:
    T_max: ${trainer.max_epochs}
  net:
    block_size: ${data.k}
    vocab_size: ${data.dim}
  manifold: sphere
  compile: false
  eval_ppl: true
  gpt_nll_eval: true
  eval_gpt_nll_every: 5
  eval_ppl_every: 5
  gpt_nll_samples: 512
  ot_method: None
  ema: true
  ema_decay: 0.99
  fast_matmul: true
  inference_steps: 20

data:
  batch_size: 128
  k: 256
  dim: 28

logger:
  wandb:
    tags: ${tags}
    group: "text8"
    name: sfm_gpt
    project: "sfm_gpt"
  aim:
    experiment: "text8"
