{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 7.948148148148148,
  "eval_steps": 500,
  "global_step": 540,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.2962962962962963,
      "grad_norm": 0.8044357895851135,
      "learning_rate": 1.998147167378645e-05,
      "loss": 1.6837,
      "step": 20
    },
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 0.9130570292472839,
      "learning_rate": 1.954021593775401e-05,
      "loss": 1.5419,
      "step": 40
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 0.5896225571632385,
      "learning_rate": 1.8535930890373467e-05,
      "loss": 1.3284,
      "step": 60
    },
    {
      "epoch": 1.1777777777777778,
      "grad_norm": 0.33998429775238037,
      "learning_rate": 1.7027885831450318e-05,
      "loss": 1.1974,
      "step": 80
    },
    {
      "epoch": 1.474074074074074,
      "grad_norm": 0.2458440661430359,
      "learning_rate": 1.5105080169021792e-05,
      "loss": 1.1353,
      "step": 100
    },
    {
      "epoch": 1.7703703703703704,
      "grad_norm": 0.22086204588413239,
      "learning_rate": 1.2880990993652379e-05,
      "loss": 1.1027,
      "step": 120
    },
    {
      "epoch": 2.0592592592592593,
      "grad_norm": 0.17722076177597046,
      "learning_rate": 1.0486876067740253e-05,
      "loss": 1.0807,
      "step": 140
    },
    {
      "epoch": 2.3555555555555556,
      "grad_norm": 0.16011089086532593,
      "learning_rate": 8.064027463374702e-06,
      "loss": 1.0701,
      "step": 160
    },
    {
      "epoch": 2.651851851851852,
      "grad_norm": 0.15892483294010162,
      "learning_rate": 5.755433011241851e-06,
      "loss": 1.0602,
      "step": 180
    },
    {
      "epoch": 2.948148148148148,
      "grad_norm": 0.14870895445346832,
      "learning_rate": 3.6973376719429134e-06,
      "loss": 1.0524,
      "step": 200
    },
    {
      "epoch": 3.237037037037037,
      "grad_norm": 0.1515774130821228,
      "learning_rate": 2.0112028473093294e-06,
      "loss": 1.0558,
      "step": 220
    },
    {
      "epoch": 3.533333333333333,
      "grad_norm": 0.1488105058670044,
      "learning_rate": 7.965381643084069e-07,
      "loss": 1.0474,
      "step": 240
    },
    {
      "epoch": 3.8296296296296295,
      "grad_norm": 0.14492806792259216,
      "learning_rate": 1.2502877393158587e-07,
      "loss": 1.0459,
      "step": 260
    },
    {
      "epoch": 4.118518518518519,
      "grad_norm": 0.14900843799114227,
      "learning_rate": 1.3702939346142286e-05,
      "loss": 1.0514,
      "step": 280
    },
    {
      "epoch": 4.4148148148148145,
      "grad_norm": 0.10958821326494217,
      "learning_rate": 1.2783372345817013e-05,
      "loss": 1.0335,
      "step": 300
    },
    {
      "epoch": 4.711111111111111,
      "grad_norm": 0.09521809220314026,
      "learning_rate": 1.1837495178165706e-05,
      "loss": 1.0281,
      "step": 320
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.09393677115440369,
      "learning_rate": 1.0874248862735445e-05,
      "loss": 1.0227,
      "step": 340
    },
    {
      "epoch": 5.296296296296296,
      "grad_norm": 0.08191889524459839,
      "learning_rate": 9.902738603065839e-06,
      "loss": 1.0148,
      "step": 360
    },
    {
      "epoch": 5.592592592592593,
      "grad_norm": 0.07811534404754639,
      "learning_rate": 8.932147718639562e-06,
      "loss": 1.01,
      "step": 380
    },
    {
      "epoch": 5.888888888888889,
      "grad_norm": 0.07469596713781357,
      "learning_rate": 7.971650838432414e-06,
      "loss": 1.0167,
      "step": 400
    },
    {
      "epoch": 6.177777777777778,
      "grad_norm": 0.07056716084480286,
      "learning_rate": 7.030327176611471e-06,
      "loss": 1.0072,
      "step": 420
    },
    {
      "epoch": 6.474074074074074,
      "grad_norm": 0.07154300063848495,
      "learning_rate": 6.117074710153366e-06,
      "loss": 1.0048,
      "step": 440
    },
    {
      "epoch": 6.770370370370371,
      "grad_norm": 0.06861760467290878,
      "learning_rate": 5.240526069629265e-06,
      "loss": 1.0037,
      "step": 460
    },
    {
      "epoch": 7.059259259259259,
      "grad_norm": 0.06891117990016937,
      "learning_rate": 4.4089669382091746e-06,
      "loss": 1.0015,
      "step": 480
    },
    {
      "epoch": 7.355555555555555,
      "grad_norm": 0.06881330162286758,
      "learning_rate": 3.630257730228979e-06,
      "loss": 1.0032,
      "step": 500
    },
    {
      "epoch": 7.651851851851852,
      "grad_norm": 0.062052953988313675,
      "learning_rate": 2.911759289663101e-06,
      "loss": 0.9989,
      "step": 520
    },
    {
      "epoch": 7.948148148148148,
      "grad_norm": 0.06376505643129349,
      "learning_rate": 2.2602633108470794e-06,
      "loss": 1.002,
      "step": 540
    }
  ],
  "logging_steps": 20,
  "max_steps": 680,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 20,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 8.410800525515162e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}
