{
  "fp16": { "enabled": true },
  "zero_optimization": {
    "stage": 2,
    "offload_optimizer": { "device": "none" },
    "offload_param": { "device": "none" },
    "overlap_comm": true,
    "contiguous_gradients": true
  },
  "train_micro_batch_size_per_gpu": 4,
  "gradient_accumulation_steps": 2 
}

