# @package _global_

# to execute this experiment run:
# python train.py experiment=example

defaults:
  - override /data: text8
  - override /model: text8_sfm_gpt
  - override /callbacks: default
  - override /trainer: default

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

tags: ["text8", "gpt"]

seed: 12345

trainer:
  min_epochs: 1
  max_epochs: 200
  gradient_clip_val: 1.0

model:
  optimizer:
    lr: 0.001
  net:
    block_size: ${data.k}
    vocab_size: ${data.dim}
    n_layer: 4
    n_head: 4
    n_embd: 256
  manifold: sphere
  compile: false
  eval_ppl: true
  gpt_nll_eval: false
  eval_gpt_nll_every: 5
  eval_ppl_every: 25
  gpt_nll_samples: 512
  ot_method: exact

data:
  batch_size: 256
  k: 256
  dim: 28

logger:
  wandb:
    tags: ${tags}
    group: "text8"
    name: sfm_gpt
    project: "sfm_gpt"
  aim:
    experiment: "text8"
