inherit: 
    - models/GPT2.yaml
    - wandb.yaml

model:
    n_dims: 64
    n_positions: 256

training:
    data: gaussian
    task_kwargs: {}
    batch_size: 64
    learning_rate: 0.0001
    save_every_steps: 1000
    keep_every_steps: 100000
    train_steps: 100001
    curriculum:
        dims:
            start: 16
            end: 64
            inc: 16
            interval: 2000
