{
    "train_batch_size": 256,
    "train_micro_batch_size_per_gpu": 4,
    "optimizer": {
        "type": "AdamW",
        "params": {
            "lr": 0.0001
        }
    },
    "zero_optimization": {
        "stage": 1
    },
    "bf16": {
        "enabled": true
    },
    "zero_allow_untested_optimizer": true,
    "gradient_clipping": 1.0,
    "wall_clock_breakdown": false,
    "steps_per_print": 1
}