dtype: float32
output_dir: ./outputs
log_level: INFO

task:
  name: noisy_linear_regression
  n_tasks: 0
  n_data: 0
  n_dims: 2
  n_points: 32
  n_max_points:  32
  batch_size: 8
  data_seed: 101
  task_seed: 102
  noise_seed: 103
  data_scale: 1.0
  task_scale: 1.0
  noise_scale: 0.5
  clip: null
  use_weights: false
  distrib_name: generalized_normal
  distrib_param: 2

model:
  name: transformer
  n_points: 32
  n_layer: 2
  n_embd: 2
  n_head: 2
  seed: 100
  use_ln: true
  use_linear_attention: false

training:
  optimizer: adamw_attn
  lr: 1.0e-4
  schedule: triangle
  warmup_steps: 10
  total_steps: 20
  weight_decay: 0.01

eval:
  n_samples: 16
  batch_size: 8
  data_seed: 104
  task_seed: 105
  noise_seed: 106
  every: 1
  eval_n_points: 32
  task_centers:
    - 2.0
    - 4.0
    - 6.0
    - 8.0

# Hydra settings
hydra:
  mode: MULTIRUN
  run:
    dir: ${output_dir}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    dir: ${output_dir}/multirun/${now:%Y-%m-%d_%H-%M-%S}
    subdir: ${hydra.job.num}
  sweeper:
    params:
      task.distrib_param: 1.0, 1.5
      training.weight_decay: 0.0, 0.01
      training.lr: 1e-4, 1e-3
