dtype: float32
output_dir: ./outputs
log_level: INFO

task:
  name: noisy_linear_regression
  n_tasks: 2
  n_data: 2
  n_dims: 2
  n_points: 4
  batch_size: 64
  data_seed: 101
  task_seed: 102
  noise_seed: 103
  data_scale: 1.0
  task_scale: 1.0
  noise_scale: 0.
  distrib_name: normal
  distrib_param: null
  n_max_points: 4

model:
  name: transformer
  n_points: 4
  n_layer: 4
  n_embd: 16
  n_head: 2
  seed: 100
  use_ln: true
  use_linear_attention: true

training:
  optimizer: adamw
  lr: 1.0e-2
  schedule: warmup_cosine_decay
  warmup_steps: 50
  total_steps: 200
  weight_decay: 0.01

eval:
  n_samples: 16
  batch_size: 16
  data_seed: 104
  task_seed: 105
  noise_seed: 106
  every: 25
  eval_n_points: 4
  eval_ridge: false
  task_centers:
    - 2.0
    - 4.0
    - 6.0
    - 8.0

# Hydra settings
hydra:
  run:
    dir: ${output_dir}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    dir: ${output_dir}/multirun/${now:%Y-%m-%d_%H-%M-%S}
    subdir: ${hydra.job.num}
