{
  "model_large": {
    "Feature": 960,
    "ATTN Feature": 960,
    "FFN Feature": 3840,
    "Head Count": 12,
    "Decoder Count": 24,
    "Init Scalar": 0.03125,
    "RoPE Base": 100000,
    "Max Length": 8192
  },
  "model_middle": {
    "Feature": 960,
    "ATTN Feature": 960,
    "FFN Feature": 3840,
    "Head Count": 12,
    "Decoder Count": 16,
    "Init Scalar": 0.03125,
    "RoPE Base": 100000,
    "Max Length": 8192
  },
  "model_small": {
    "Feature": 960,
    "ATTN Feature": 960,
    "FFN Feature": 3840,
    "Head Count": 12,
    "Decoder Count": 10,
    "Init Scalar": 0.03125,
    "RoPE Base": 100000,
    "Max Length": 8192
  },
  "cfg_pretrain": {
    "Peak LR": 0.0001,
    "Accumulation": 2,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 2000,
    "Warmup Steps": 400,
    "Anneal Steps": 800,
    "Batch Size": 64,
    "Context Length": 1024
  },
    "cfg_posttrain": {
    "Peak LR": 0.0001,
    "Accumulation": 2,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 4000,
    "Warmup Steps": 400,
    "Anneal Steps": 800,
    "Batch Size": 64,
    "Context Length": 1024
  }
}
