{
  "model": {
    "Feature": 384,
    "ATTN Feature": 768,
    "FFN Feature": 1536,
    "Head Count": 16,
    "Decoder Count": 18,
    "Init Scalar": 0.046,
    "RoPE Base": 500,
    "Max Length": 64
  },
  "pretrain": {
    "Peak LR": 0.0005,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 20000,
    "Warmup Steps": 1000,
    "Anneal Steps": 1600,
    "Accumulation": 1,
    "Batch Size": 512
  },
  "sst": {
    "Peak LR": 0.000025,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 6000,
    "Warmup Steps": 2000,
    "Accumulation": 1,
    "Batch Size": 512
  },
  "rolling": {
    "Peak LR": 0.00008,
    "Grads Clipping": 1,
    "Weight Decay": 0.00001,
    "Total Steps": 3000,
    "Warmup Steps": 800,
    "Anneal Steps": 1200,
    "Accumulation": 1,
    "Batch Size": 512
  }
}