method: grid
name: ar_figure_4
parameters:
  dataset:
    value: associative_recall
  dataset.n_pairs:
    values:
      - 8
      - 16
      - 32
      - 64
  dataset.test_dist_args.burstiness:
    value: 1
  dataset.train_dist_args.burstiness:
    value: 1
  dataset.vocab_size:
    values:
      - 32
      - 64
      - 128
      - 256
  model:
    value: transformer
  model.embedding_dim:
    value: 256
  model.enable_mlp:
    value: true
  model.enable_norm:
    value: true
  model.enable_skip:
    value: true
  model.n_heads:
    value: 4
  model.n_layers:
    value: 4
  model.pos_enc:
    value: sin_cos
  program:
    value: ar_ic_learning.py
  run.det_run:
    value: false
  run.random_seed:
    values:
      - 5
      - 6
      - 7
  run.start_from_scratch:
    value: true
  run.wandb_writer:
    value: true
  training.batch_size:
    value: 32
  training.eval_interval:
    value: 100
  training.iters:
    value: 1000000
  training.lr:
    values:
      - 0.0001
      - 3e-05
      - 1e-05
  training.plot_interval:
    value: 10000000000
  training.save_checkpoint:
    value: false
  training.test_data_size:
    value: 4096
  training.train_data_size:
    value: 32768
program: sweeps/run_with_hydra.py