program: run.py
name: pretrain_vary
method: grid
metric:
  goal: minimize
  name: avg_fitness
parameters:
  n_embd:
    values: [16, 64, 256]
  n_layer:
    values: [1, 2, 4, 8]
  alpha:
    values: [0.0625, 0.125, 0.25, 3, 4, 5]
  sigma_shift:
    values: [3, 4, 5]
  num_perturbations:
    value: 0
  use_clt:
    value: True
  fast_fitness:
    value: True
  noise_reuse:
    value: 1
  track:
    value: True
  parallel_generations_per_gpu:
    values: [262144, 32768, 4096]
  group_size:
    value: 512
  tokens_per_update:
    value: 100
  validate_every:
    value: 10

command:
  - ${env}
  - ${interpreter}
  - ${program}
  - ${args_no_boolean_flags}