method: grid
name: 20240801_sraven_heads
parameters:
  config:
    values:
      - configs/raven.py:raven_3x3_4features;linear_hypatt
      - configs/raven.py:raven_3x3_4features;linear_attention
      - configs/raven.py:raven_3x3_4features;softmax_attention
  config.data.num_train:
    values:
      - 20000000
  config.lr:
    values:
      - 0.001
      - 0.0003
  config.model.num_heads:
    values:
      - 1
      - 2
      - 4
      - 8
      - 16
  config.model.num_layers:
    values:
      - 4
  config.seed:
    values:
      - 0
      - 1
      - 2
  config.warmup_steps:
    values:
      - 1000
  config.weight_decay:
    values:
      - 0.1
      - 0.3
  config.width_multiplier:
    values:
      - 1
  eval_every:
    value: 100000
  log_every:
    value: 100000
  log_level:
    value: 1
program: run.py