name: base_attn_ffn_tp
model: MoETransformerBlock
attn_k: 3
ffn_k: 6
attn_num_experts: 4
ffn_num_experts: 8
task_num: 3
attn_expert_bias: False
ffn_expert_bias: True
attn_expert_dim_divisor: 4
ffn_expert_dim_divisor: 8
shared_routers: False
num_heads: 8
qkv_bias: False
qk_scale: None
attn_drop: 0.0
proj_drop: 0.0
w_MI: 0.0005
w_H: 0
w_finetune_MI: 0
noisy_gating: False
drop_path: 0.0