{
  "train_batch_size": 65536,
  "train_micro_batch_size_per_gpu": 128,
  "steps_per_print": 1000,
  "prescale_gradients": false,
  "optimizer": {
    "type": "Lamb",
    "params": {
      "lr": 11e-3,
      "weight_decay": 0,
      "bias_correction": true,
      "max_coeff": 10,
      "min_coeff": 0.0
    }
  },
  "gradient_clipping": 1.0,

  "wall_clock_breakdown": false,

  "bf16": {
    "enabled": true
  },
  "sparse_attention": {
    "mode": "fixed",
    "block": 16,
    "different_layout_per_head": true,
    "num_local_blocks": 4,
    "num_global_blocks": 1,
    "attention": "bidirectional",
    "horizontal_global_attention": false,
    "num_different_global_patterns": 4
  }
}
