{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.8603213844252164,
  "eval_steps": 500,
  "global_step": 580,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.09888751545117429,
      "grad_norm": 0.4664214253425598,
      "learning_rate": 9.268292682926831e-06,
      "loss": 1.3966,
      "step": 20
    },
    {
      "epoch": 0.19777503090234858,
      "grad_norm": 0.6125897765159607,
      "learning_rate": 1.902439024390244e-05,
      "loss": 1.3778,
      "step": 40
    },
    {
      "epoch": 0.29666254635352285,
      "grad_norm": 0.4593203067779541,
      "learning_rate": 1.9972833899736445e-05,
      "loss": 1.2992,
      "step": 60
    },
    {
      "epoch": 0.39555006180469715,
      "grad_norm": 0.24967049062252045,
      "learning_rate": 1.9879115872313054e-05,
      "loss": 1.2167,
      "step": 80
    },
    {
      "epoch": 0.49443757725587145,
      "grad_norm": 0.23520821332931519,
      "learning_rate": 1.9719138947345632e-05,
      "loss": 1.1712,
      "step": 100
    },
    {
      "epoch": 0.5933250927070457,
      "grad_norm": 0.18342271447181702,
      "learning_rate": 1.9493976084683814e-05,
      "loss": 1.1537,
      "step": 120
    },
    {
      "epoch": 0.69221260815822,
      "grad_norm": 0.1719566285610199,
      "learning_rate": 1.920513744406665e-05,
      "loss": 1.1415,
      "step": 140
    },
    {
      "epoch": 0.7911001236093943,
      "grad_norm": 0.1517576426267624,
      "learning_rate": 1.8854560256532098e-05,
      "loss": 1.1268,
      "step": 160
    },
    {
      "epoch": 0.8899876390605687,
      "grad_norm": 0.1562473326921463,
      "learning_rate": 1.844459583147367e-05,
      "loss": 1.1194,
      "step": 180
    },
    {
      "epoch": 0.9888751545117429,
      "grad_norm": 0.18168605864048004,
      "learning_rate": 1.7977993786487483e-05,
      "loss": 1.1187,
      "step": 200
    },
    {
      "epoch": 1.084054388133498,
      "grad_norm": 0.16773328185081482,
      "learning_rate": 1.7457883605779632e-05,
      "loss": 1.111,
      "step": 220
    },
    {
      "epoch": 1.1829419035846724,
      "grad_norm": 0.20241492986679077,
      "learning_rate": 1.6887753650820976e-05,
      "loss": 1.1118,
      "step": 240
    },
    {
      "epoch": 1.2818294190358468,
      "grad_norm": 0.18657644093036652,
      "learning_rate": 1.6271427764024205e-05,
      "loss": 1.0983,
      "step": 260
    },
    {
      "epoch": 1.380716934487021,
      "grad_norm": 0.17775581777095795,
      "learning_rate": 1.56130396223615e-05,
      "loss": 1.0977,
      "step": 280
    },
    {
      "epoch": 1.4796044499381953,
      "grad_norm": 0.17411239445209503,
      "learning_rate": 1.4917005012932292e-05,
      "loss": 1.1065,
      "step": 300
    },
    {
      "epoch": 1.5784919653893696,
      "grad_norm": 0.23075193166732788,
      "learning_rate": 1.418799221642782e-05,
      "loss": 1.1003,
      "step": 320
    },
    {
      "epoch": 1.677379480840544,
      "grad_norm": 0.16849099099636078,
      "learning_rate": 1.3430890697129727e-05,
      "loss": 1.0881,
      "step": 340
    },
    {
      "epoch": 1.7762669962917181,
      "grad_norm": 0.16026577353477478,
      "learning_rate": 1.265077830943773e-05,
      "loss": 1.0889,
      "step": 360
    },
    {
      "epoch": 1.8751545117428925,
      "grad_norm": 0.17469868063926697,
      "learning_rate": 1.1852887240871145e-05,
      "loss": 1.0898,
      "step": 380
    },
    {
      "epoch": 1.9740420271940669,
      "grad_norm": 0.1756419986486435,
      "learning_rate": 1.1042568919963323e-05,
      "loss": 1.0942,
      "step": 400
    },
    {
      "epoch": 2.069221260815822,
      "grad_norm": 0.15648303925991058,
      "learning_rate": 1.0225258124410682e-05,
      "loss": 1.0891,
      "step": 420
    },
    {
      "epoch": 2.168108776266996,
      "grad_norm": 0.1966937631368637,
      "learning_rate": 9.406436530201641e-06,
      "loss": 1.0939,
      "step": 440
    },
    {
      "epoch": 2.2669962917181707,
      "grad_norm": 0.204082190990448,
      "learning_rate": 8.591595946200348e-06,
      "loss": 1.0837,
      "step": 460
    },
    {
      "epoch": 2.365883807169345,
      "grad_norm": 0.18748576939105988,
      "learning_rate": 7.786201480769532e-06,
      "loss": 1.0905,
      "step": 480
    },
    {
      "epoch": 2.464771322620519,
      "grad_norm": 0.18722127377986908,
      "learning_rate": 6.995654887472689e-06,
      "loss": 1.0912,
      "step": 500
    },
    {
      "epoch": 2.5636588380716936,
      "grad_norm": 0.1826847791671753,
      "learning_rate": 6.225258335694818e-06,
      "loss": 1.0854,
      "step": 520
    },
    {
      "epoch": 2.6625463535228677,
      "grad_norm": 0.22083158791065216,
      "learning_rate": 5.4801788491708655e-06,
      "loss": 1.0813,
      "step": 540
    },
    {
      "epoch": 2.761433868974042,
      "grad_norm": 0.2242550402879715,
      "learning_rate": 4.7654136509314985e-06,
      "loss": 1.0865,
      "step": 560
    },
    {
      "epoch": 2.8603213844252164,
      "grad_norm": 0.18581761419773102,
      "learning_rate": 4.0857566470964846e-06,
      "loss": 1.0759,
      "step": 580
    }
  ],
  "logging_steps": 20,
  "max_steps": 808,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 20,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.137446012567683e+18,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}
