parent: research/conditional/train/configs/baselines/gpt/expert_choice/common.yaml
md5_parent_hash: 921cee6a3f88c58b719f0b9830cc5f52
time: 7-00:00:00

params:
  name: "expert_choice_base"
  dmodel: 1024
  n_blocks: 24
  n_att_heads: 16

