{
    "normal": [
        "model.transformer.wte.weight"
    ],
    "no_weight_decay": [
        "model.transformer.h.0.ln_1.weight",
        "model.transformer.h.0.attn.o_norm.weight",
        "model.transformer.h.0.ln_2.weight",
        "model.transformer.h.1.ln_1.weight",
        "model.transformer.h.1.attn.o_norm.weight",
        "model.transformer.h.1.ln_2.weight",
        "model.transformer.h.2.ln_1.weight",
        "model.transformer.h.2.attn.o_norm.weight",
        "model.transformer.h.2.ln_2.weight",
        "model.transformer.h.3.ln_1.weight",
        "model.transformer.h.3.ln_2.weight",
        "model.transformer.ln_f.weight"
    ],
    "mup": [
        "model.transformer.h.0.attn.c_attn.weight",
        "model.transformer.h.0.attn.q_conv1d.weight",
        "model.transformer.h.0.attn.k_conv1d.weight",
        "model.transformer.h.0.attn.v_conv1d.weight",
        "model.transformer.h.0.attn.b_proj.weight",
        "model.transformer.h.0.attn.o_proj.weight",
        "model.transformer.h.0.mlp.c_fc.weight",
        "model.transformer.h.0.mlp.c_proj.weight",
        "model.transformer.h.1.attn.c_attn.weight",
        "model.transformer.h.1.attn.q_conv1d.weight",
        "model.transformer.h.1.attn.k_conv1d.weight",
        "model.transformer.h.1.attn.v_conv1d.weight",
        "model.transformer.h.1.attn.b_proj.weight",
        "model.transformer.h.1.attn.o_proj.weight",
        "model.transformer.h.1.mlp.c_fc.weight",
        "model.transformer.h.1.mlp.c_proj.weight",
        "model.transformer.h.2.attn.c_attn.weight",
        "model.transformer.h.2.attn.q_conv1d.weight",
        "model.transformer.h.2.attn.k_conv1d.weight",
        "model.transformer.h.2.attn.v_conv1d.weight",
        "model.transformer.h.2.attn.b_proj.weight",
        "model.transformer.h.2.attn.o_proj.weight",
        "model.transformer.h.2.mlp.c_fc.weight",
        "model.transformer.h.2.mlp.c_proj.weight",
        "model.transformer.h.3.attn.c_attn.weight",
        "model.transformer.h.3.attn.c_proj.weight",
        "model.transformer.h.3.mlp.c_fc.weight",
        "model.transformer.h.3.mlp.c_proj.weight"
    ]
}
