{
  "train_micro_batch_size_per_gpu": 160,
  "gradient_accumulation_steps": 4,
  "optimizer": {
    "type": "Adam",
    "params": {
      "lr": 0.001,
      "betas": [
        0.8,
        0.999
      ],
      "eps": 1e-08,
      "weight_decay": 3e-07
    }
  },
  "scheduler": {
    "type": "WarmupLR",
    "params": {
      "warmup_min_lr": 0,
      "warmup_max_lr": 0.0005,
      "warmup_num_steps": 100
    }
  },
  "activation_checkpointing": {
    "partition_activations": true,
    "cpu_checkpointing": true,
    "contiguous_memory_optimization": false,
    "number_checkpoints": null,
    "synchronize_checkpoint_boundary": false,
    "profile": true
  },
  "fp16": {
    "enabled": false
  },
  "zero_optimization": {
    "stage": 0,
    "contiguous_gradients": true,
    "overlap_comm": true
  }
}