method: grid
name: lr_figure_3_cross_sample
parameters:
  dataset:
    value: single_location_linear_regression
  dataset.burstiness:
    value: 1
  dataset.dimension:
    values:
      - 16
      - 32
      - 64
      - 128
      - 256
      - 23
      - 45
      - 91
      - 180
  dataset.iters:
    value: 64
  dataset.p_repeat:
    values:
      - 0
      - 0.1
      - 0.2
      - 0.4
  dataset.p_repeat_eval:
    value: 0
  dataset.random_relevant_token_positions:
    value: true
  dataset.sequence_length:
    value: 256
  dataset.show_relevant_token:
    value: true
  dataset.train_data_size:
    value: 4096
  model:
    value: transformer
  model.embedding_dim:
    value: 256
  model.n_heads:
    value: 4
  model.n_layers:
    value: 2
  model.pos_enc:
    value: sin_cos
  program:
    value: sl_lr_learning.py
  run.det_run:
    value: false
  run.random_seed:
    values:
      - 5
      - 6
      - 7
      - 8
      - 9
  run.start_from_scratch:
    value: true
  run.wandb_writer:
    value: true
  training.batch_size:
    value: 32
  training.eval_interval:
    value: 50
  training.iters:
    value: 100000
  training.lr:
    values:
      - 0.0003
      - 0.0001
  training.opt:
    value: adam
  training.save_checkpoint:
    value: false
program: sweeps/run_with_hydra.py