{
  "train_micro_batch_size_per_gpu": 24,
  "gradient_accumulation_steps": 2,
  "optimizer": {
    "type": "Adam",
    "params": {
      "lr": 0.00005,
      "betas": [
        0.9,
        0.999
      ],
      "eps": 1e-8
    }
  },
  "scheduler": {
    "type": "OneCycle",
    "params": {
      "cycle_first_step_size": 2000,
      "cycle_first_stair_count": 1000,
      "cycle_second_step_size": 2000,
      "cycle_second_stair_count": 1000,
      "decay_step_size": 10000,
      "cycle_min_lr": 0.00001,
      "cycle_max_lr": 0.00005,
      "decay_lr_rate": 0.001,
      "cycle_min_mom": 0.85,
      "cycle_max_mom": 0.99,
      "decay_mom_rate": 0.0
  }
  },
  "fp16": {
    "enabled": true,
    "auto_cast": true
  },
  "zero_optimization": {
    "stage": 2,
    "reduce_bucket_size": 5e6,
    "zero_hpz_partition_size": 4,
    "contiguous_gradients": true,
    "overlap_comm": true,
    "reduce_scatter": true,
    "offload_optimizer": {
      "device": "cpu",
      "pin_memory": true
    }
  },
  "steps_per_print": 1,
  "flops_profiler": {
    "enabled": true,
    "profile_step": 1,
    "module_depth": -1,
    "top_modules": 5,
    "detailed": true
  },
  "tensorboard": {
    "enabled": true,
    "output_path": "tensorboard_log/",
    "job_name": "demo"
  }
}