defaults:
  - _self_
  - training: default
  - model: stp
  - circuit: null
  - mt_head: null
  - adaptor: none
  - lm: nanogpt
  - data: fineweb10B
  - generate: default

compile: true
device: cuda   # if cpu make source num processes=1
# from_checkpoint: path/to/model@dddd.pth
from_checkpoint: null
load_mtp_head_from_model: null

name: nanogpt
hydra:
  job:
    chdir: true
    name: ${name}
  run:
    dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

  job_logging:
    formatters:
      simple:
        format: '[%(asctime)s] - %(message)s'
