name: smile_mistral_upscaling
device: cpu
accelerator: cuda
# path to save/load the model
model_path: null
model_dtype: float16
num_experts_per_tok: 1
rank_of_router: 8
# if rank_of_expert < 0, dense expert is used.
rank_of_expert: 512
