name: base_attn_ffn_ap
model: MoETransformerBlock
attn_k: 2
ffn_k: 2
attn_num_experts: 6
ffn_num_experts: 6
task_num: 3
attn_expert_bias: False
ffn_expert_bias: True
attn_expert_dim_divisor: 2
ffn_expert_dim_divisor: 2
shared_routers: False
num_heads: 4
qkv_bias: False
qk_scale: None
attn_drop: 0.0
proj_drop: 0.0
w_MI: 0.0005
w_H: 0
w_finetune_MI: 0
noisy_gating: True
drop_path: 0.0