defaults:
  - _self_
  - training: default
  - model: stp
  - lm: nanogpt
  - data: fineweb10B
  - generate: default

compile: true
device: cuda   # if cpu make source num processes=1
# from_checkpoint: path/to/model@dddd.pth
from_checkpoint: null
local_rank: null # deepspeed adapt
zero_stage: 0 # select zero stage
name: nanogpt
hydra:
  job:
    chdir: true
    name: ${name}
  run:
    dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

  job_logging:
    formatters:
      simple:
        format: '[%(asctime)s] - %(message)s'
