model:
  _target_: models.toy_transformer.ToyTransformer
dataset:
  name: single_location_linear_regression
  dimension: 256
  sequence_length: 256
  p_repeat: 0
  burstiness: 1
  burstiness_eval: null
  p_repeat_eval: null
  show_relevant_token: false
  random_relevant_token_positions: false
  batch_size: 64
  iters: 64
  train_data_size: 4096
  test_data_size: 5120
training:
  iters: 10000
  batch_size: 32
  lr: 1
  wd: 0.0
  opt: sgd
  scheduler: none
  eval_interval: 25
  save_checkpoint: true
  save_interval: 25
  plot_interval: 10000.0
  test_data_size: 10000.0
  train_data_size: null
  loss: ce_softmax
run:
  pod_name: None
  random_seed: 21
  model_seed: 42
  det_run: false
  hash: VeWrLDoEVQnO7sn4fduK
  out_dir: None
  device: None
  aim_writer: false
  dtype: float32
  start_from_scratch: true
  exp_name: test_exp
  wandb_writer: true
  wandb_entity: null
