{
  "model": {
    "Feature": 960,
    "ATTN Feature": 960,
    "FFN Feature": 3840,
    "Head Count": 12,
    "Decoder Count": 4,
    "Encoder Count": 4,
    "Init Scalar": 0.03125,
    "RoPE Base": 100000,
    "Max Length": 8192
  },
  "teacher-forced": {
    "Peak LR": 0.003,
    "Accumulation": 16,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 40000,
    "Warmup Steps": 4000,
    "Anneal Steps": 8000,
    "Batch Size": 96,
    "Context Length": 3072
  },
  "adaptive": {
    "Peak LR": 0.003,
    "Accumulation": 8,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 10000,
    "Warmup Steps": 800,
    "Anneal Steps": 800,
    "Batch Size": 32,
    "Context Length": 512
  }
}