name: lm11m_muon_grid_1
method: grid
parameters:
  opt.batch_size:
    values: [1, 4, 16, 64, 256, 1024, 4096] # -> 7x
  opt.muon_lr:
    values: [0.0001, 0.0002, 0.0005, 0.0010, 0.0022, 0.0046, 0.0100, 0.0215, 0.0464, 0.1000] # -> 10x
  opt.muon_t1:
    values: [1000, 2783, 7743, 21544, 59948, 166810, 464159, 1291550, 3593814, 10000000] # -> 10x
  # total: 7*10*10=700
program: main.py
command:
  - ${env}
  - ${interpreter}
  - ${program}
  - +hparams=lm11m_fwedu_adam
  - opt.optimizer='muon'
  - opt.max_microbatch_size=16
  - ${args_no_hyphens}
