{
    "activate_fn": "silu",
    "architectures": [
        "CPMDragonflyForCausalLM"
    ],
    "dim_ff": 1920,
    "dim_head": 128,
    "dim_model": 2048,
    "dim_model_base": 256,
    "dropout_p": 0.0,
    "eps": 1e-05,
    "ffn_gated": true,
    "flash_attn_mask_shape": "2d",
    "half_type": "bf16",
    "init_std": 0.1,
    "model_type": "cpm",
    "num_heads": 6,
    "num_kv_heads": 2,
    "num_layers": 48,
    "qk_norm": false,
    "scale": false,
    "scale_depth": -1,
    "scale_emb": 12,
    "tie_lm_head": true,
    "use_flash_attn": true,
    "use_att": false,
    "use_ffn": false,
    "use_mamba": true,
    "mamba_config": {
        "d_inner": 4096
    },
    "vocab_size": 50280,
    "residual_in_fp32": false
}