{
  "train_batch_size": 4,
  "bf16": { "enabled": true },
  "gradient_accumulation_steps": 128,
  "zero_optimization": {
    "stage": 3,
    "zero_init_enabled": false,
    "overlap_comm": false,
    "offload_param":     { "device": "none" },  
    "offload_optimizer": { "device": "cpu" }
  }
}

