name: mixtral_moe_upscaling # or "mixtral_for_causal_lm_moe_upscaling"
experts_per_token: 2
# path to save the upscaled model
save_checkpoint: null
