name: mixtral_for_causal_lm_moe_upscaling # or "mixtral_moe_upscaling"

num_experts: 4
experts_per_token: 2
# path to save the upscaled model
save_checkpoint: null
