# Don't do the full grid of positional_dim x additive_positional, just do (0, True) and (64, False)
model.positional_dim:
  - 64

model.additive_positional:
  - False

data.seed:
  - 0
  - 1
  - 2
  - 3
  - 4

# WARNING: this must divide the dim AFTER adding positional_dim
model.nheads:
  - 1
  - 2
  - 4
  - 8
  - 16
  - 32
  - 64
  - 128  # DO NOT USE this unless positional_dim is 64

model.num_layers:
  - 1
  - 3
  # - 5

model.width_multiplier:
  - 1
  - 8
  # - 64
