model:
    n_dims: 8
    n_positions: 101
    family: gpt2
    n_embd: 256
    n_layer: 12
    n_head: 8
    n_outdims: 2
    uncertainty_model: True
    
training:
    data: varsig
    task_kwargs: {
        "noise_std": 0.,
        "noise_std_min": 0.,
        "noise_std_max": 0.,
    }
    batch_size: 64
    learning_rate: 0.0001 
    save_every_steps: 5000
    keep_every_steps: 40000
    train_steps: 200001
    task: linear_uncertainty_muSigma
    curriculum:
        points:
            start: 101
            end: 101
            inc: 10
            interval: 5000
        dims:
            start: 8
            end: 8
            inc: 2
            interval: 10000

out_dir:

wandb:
    project: 
    entity: 
    name: 
    notes: ""
    log_every_steps: 500

multigpu:
    dist_backend: "nccl"