{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 9.9968,
  "global_step": 1562,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.06,
      "learning_rate": 1e-05,
      "loss": 4.3173,
      "step": 10
    },
    {
      "epoch": 0.13,
      "learning_rate": 9.957228400342174e-06,
      "loss": 2.4146,
      "step": 20
    },
    {
      "epoch": 0.19,
      "learning_rate": 9.914456800684347e-06,
      "loss": 1.8759,
      "step": 30
    },
    {
      "epoch": 0.26,
      "learning_rate": 9.87168520102652e-06,
      "loss": 1.7977,
      "step": 40
    },
    {
      "epoch": 0.32,
      "learning_rate": 9.828913601368692e-06,
      "loss": 1.7983,
      "step": 50
    },
    {
      "epoch": 0.38,
      "learning_rate": 9.786142001710865e-06,
      "loss": 1.8282,
      "step": 60
    },
    {
      "epoch": 0.45,
      "learning_rate": 9.743370402053037e-06,
      "loss": 1.7548,
      "step": 70
    },
    {
      "epoch": 0.51,
      "learning_rate": 9.70059880239521e-06,
      "loss": 1.793,
      "step": 80
    },
    {
      "epoch": 0.58,
      "learning_rate": 9.657827202737383e-06,
      "loss": 1.7059,
      "step": 90
    },
    {
      "epoch": 0.64,
      "learning_rate": 9.615055603079556e-06,
      "loss": 1.8209,
      "step": 100
    },
    {
      "epoch": 0.7,
      "learning_rate": 9.57228400342173e-06,
      "loss": 1.7114,
      "step": 110
    },
    {
      "epoch": 0.77,
      "learning_rate": 9.529512403763903e-06,
      "loss": 1.6945,
      "step": 120
    },
    {
      "epoch": 0.83,
      "learning_rate": 9.486740804106074e-06,
      "loss": 1.6624,
      "step": 130
    },
    {
      "epoch": 0.9,
      "learning_rate": 9.443969204448248e-06,
      "loss": 1.6431,
      "step": 140
    },
    {
      "epoch": 0.96,
      "learning_rate": 9.401197604790419e-06,
      "loss": 1.7382,
      "step": 150
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.4677734375,
      "eval_runtime": 8.0571,
      "eval_samples_per_second": 124.114,
      "eval_steps_per_second": 7.819,
      "step": 156
    },
    {
      "epoch": 1.02,
      "learning_rate": 9.358426005132592e-06,
      "loss": 1.5212,
      "step": 160
    },
    {
      "epoch": 1.09,
      "learning_rate": 9.315654405474765e-06,
      "loss": 1.4706,
      "step": 170
    },
    {
      "epoch": 1.15,
      "learning_rate": 9.272882805816939e-06,
      "loss": 1.3626,
      "step": 180
    },
    {
      "epoch": 1.22,
      "learning_rate": 9.230111206159112e-06,
      "loss": 1.3665,
      "step": 190
    },
    {
      "epoch": 1.28,
      "learning_rate": 9.187339606501285e-06,
      "loss": 1.3762,
      "step": 200
    },
    {
      "epoch": 1.34,
      "learning_rate": 9.144568006843457e-06,
      "loss": 1.4179,
      "step": 210
    },
    {
      "epoch": 1.41,
      "learning_rate": 9.10179640718563e-06,
      "loss": 1.3904,
      "step": 220
    },
    {
      "epoch": 1.47,
      "learning_rate": 9.059024807527801e-06,
      "loss": 1.4837,
      "step": 230
    },
    {
      "epoch": 1.54,
      "learning_rate": 9.016253207869975e-06,
      "loss": 1.3727,
      "step": 240
    },
    {
      "epoch": 1.6,
      "learning_rate": 8.973481608212148e-06,
      "loss": 1.4353,
      "step": 250
    },
    {
      "epoch": 1.66,
      "learning_rate": 8.930710008554321e-06,
      "loss": 1.4216,
      "step": 260
    },
    {
      "epoch": 1.73,
      "learning_rate": 8.887938408896494e-06,
      "loss": 1.3115,
      "step": 270
    },
    {
      "epoch": 1.79,
      "learning_rate": 8.845166809238667e-06,
      "loss": 1.4047,
      "step": 280
    },
    {
      "epoch": 1.86,
      "learning_rate": 8.802395209580839e-06,
      "loss": 1.3769,
      "step": 290
    },
    {
      "epoch": 1.92,
      "learning_rate": 8.759623609923012e-06,
      "loss": 1.3999,
      "step": 300
    },
    {
      "epoch": 1.98,
      "learning_rate": 8.716852010265184e-06,
      "loss": 1.3121,
      "step": 310
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.3994140625,
      "eval_runtime": 8.0518,
      "eval_samples_per_second": 124.196,
      "eval_steps_per_second": 7.824,
      "step": 312
    },
    {
      "epoch": 2.05,
      "learning_rate": 8.674080410607357e-06,
      "loss": 1.1751,
      "step": 320
    },
    {
      "epoch": 2.11,
      "learning_rate": 8.63130881094953e-06,
      "loss": 1.0655,
      "step": 330
    },
    {
      "epoch": 2.18,
      "learning_rate": 8.588537211291703e-06,
      "loss": 1.1074,
      "step": 340
    },
    {
      "epoch": 2.24,
      "learning_rate": 8.545765611633877e-06,
      "loss": 1.119,
      "step": 350
    },
    {
      "epoch": 2.3,
      "learning_rate": 8.50299401197605e-06,
      "loss": 1.0707,
      "step": 360
    },
    {
      "epoch": 2.37,
      "learning_rate": 8.460222412318221e-06,
      "loss": 1.096,
      "step": 370
    },
    {
      "epoch": 2.43,
      "learning_rate": 8.417450812660394e-06,
      "loss": 1.1375,
      "step": 380
    },
    {
      "epoch": 2.5,
      "learning_rate": 8.374679213002566e-06,
      "loss": 1.0796,
      "step": 390
    },
    {
      "epoch": 2.56,
      "learning_rate": 8.33190761334474e-06,
      "loss": 1.1475,
      "step": 400
    },
    {
      "epoch": 2.62,
      "learning_rate": 8.289136013686912e-06,
      "loss": 1.0872,
      "step": 410
    },
    {
      "epoch": 2.69,
      "learning_rate": 8.246364414029086e-06,
      "loss": 1.157,
      "step": 420
    },
    {
      "epoch": 2.75,
      "learning_rate": 8.203592814371259e-06,
      "loss": 1.1479,
      "step": 430
    },
    {
      "epoch": 2.82,
      "learning_rate": 8.160821214713432e-06,
      "loss": 1.1673,
      "step": 440
    },
    {
      "epoch": 2.88,
      "learning_rate": 8.118049615055604e-06,
      "loss": 1.1173,
      "step": 450
    },
    {
      "epoch": 2.94,
      "learning_rate": 8.075278015397777e-06,
      "loss": 1.131,
      "step": 460
    },
    {
      "epoch": 3.0,
      "eval_loss": 1.4365234375,
      "eval_runtime": 8.0589,
      "eval_samples_per_second": 124.086,
      "eval_steps_per_second": 7.817,
      "step": 468
    },
    {
      "epoch": 3.01,
      "learning_rate": 8.032506415739948e-06,
      "loss": 1.0934,
      "step": 470
    },
    {
      "epoch": 3.07,
      "learning_rate": 7.989734816082122e-06,
      "loss": 0.9046,
      "step": 480
    },
    {
      "epoch": 3.14,
      "learning_rate": 7.946963216424295e-06,
      "loss": 0.9161,
      "step": 490
    },
    {
      "epoch": 3.2,
      "learning_rate": 7.904191616766468e-06,
      "loss": 0.9046,
      "step": 500
    },
    {
      "epoch": 3.26,
      "learning_rate": 7.861420017108641e-06,
      "loss": 0.8453,
      "step": 510
    },
    {
      "epoch": 3.33,
      "learning_rate": 7.818648417450814e-06,
      "loss": 0.8856,
      "step": 520
    },
    {
      "epoch": 3.39,
      "learning_rate": 7.775876817792986e-06,
      "loss": 0.8545,
      "step": 530
    },
    {
      "epoch": 3.46,
      "learning_rate": 7.733105218135159e-06,
      "loss": 0.8408,
      "step": 540
    },
    {
      "epoch": 3.52,
      "learning_rate": 7.69033361847733e-06,
      "loss": 0.9151,
      "step": 550
    },
    {
      "epoch": 3.58,
      "learning_rate": 7.647562018819504e-06,
      "loss": 0.8425,
      "step": 560
    },
    {
      "epoch": 3.65,
      "learning_rate": 7.604790419161677e-06,
      "loss": 0.9097,
      "step": 570
    },
    {
      "epoch": 3.71,
      "learning_rate": 7.56201881950385e-06,
      "loss": 0.8963,
      "step": 580
    },
    {
      "epoch": 3.78,
      "learning_rate": 7.5192472198460235e-06,
      "loss": 0.8434,
      "step": 590
    },
    {
      "epoch": 3.84,
      "learning_rate": 7.476475620188196e-06,
      "loss": 0.9026,
      "step": 600
    },
    {
      "epoch": 3.9,
      "learning_rate": 7.433704020530369e-06,
      "loss": 0.8637,
      "step": 610
    },
    {
      "epoch": 3.97,
      "learning_rate": 7.390932420872541e-06,
      "loss": 0.8843,
      "step": 620
    },
    {
      "epoch": 4.0,
      "eval_loss": 1.44140625,
      "eval_runtime": 8.0541,
      "eval_samples_per_second": 124.16,
      "eval_steps_per_second": 7.822,
      "step": 625
    },
    {
      "epoch": 4.03,
      "learning_rate": 7.348160821214714e-06,
      "loss": 0.7666,
      "step": 630
    },
    {
      "epoch": 4.1,
      "learning_rate": 7.305389221556887e-06,
      "loss": 0.6669,
      "step": 640
    },
    {
      "epoch": 4.16,
      "learning_rate": 7.262617621899059e-06,
      "loss": 0.6223,
      "step": 650
    },
    {
      "epoch": 4.22,
      "learning_rate": 7.219846022241233e-06,
      "loss": 0.6663,
      "step": 660
    },
    {
      "epoch": 4.29,
      "learning_rate": 7.177074422583406e-06,
      "loss": 0.707,
      "step": 670
    },
    {
      "epoch": 4.35,
      "learning_rate": 7.134302822925578e-06,
      "loss": 0.6756,
      "step": 680
    },
    {
      "epoch": 4.42,
      "learning_rate": 7.091531223267751e-06,
      "loss": 0.6525,
      "step": 690
    },
    {
      "epoch": 4.48,
      "learning_rate": 7.048759623609923e-06,
      "loss": 0.6788,
      "step": 700
    },
    {
      "epoch": 4.54,
      "learning_rate": 7.005988023952096e-06,
      "loss": 0.6807,
      "step": 710
    },
    {
      "epoch": 4.61,
      "learning_rate": 6.963216424294269e-06,
      "loss": 0.6657,
      "step": 720
    },
    {
      "epoch": 4.67,
      "learning_rate": 6.920444824636442e-06,
      "loss": 0.6693,
      "step": 730
    },
    {
      "epoch": 4.74,
      "learning_rate": 6.877673224978615e-06,
      "loss": 0.668,
      "step": 740
    },
    {
      "epoch": 4.8,
      "learning_rate": 6.834901625320788e-06,
      "loss": 0.668,
      "step": 750
    },
    {
      "epoch": 4.86,
      "learning_rate": 6.7921300256629605e-06,
      "loss": 0.6732,
      "step": 760
    },
    {
      "epoch": 4.93,
      "learning_rate": 6.749358426005134e-06,
      "loss": 0.7372,
      "step": 770
    },
    {
      "epoch": 4.99,
      "learning_rate": 6.706586826347305e-06,
      "loss": 0.6962,
      "step": 780
    },
    {
      "epoch": 5.0,
      "eval_loss": 1.5791015625,
      "eval_runtime": 8.0715,
      "eval_samples_per_second": 123.893,
      "eval_steps_per_second": 7.805,
      "step": 781
    },
    {
      "epoch": 5.06,
      "learning_rate": 6.6638152266894785e-06,
      "loss": 0.5123,
      "step": 790
    },
    {
      "epoch": 5.12,
      "learning_rate": 6.621043627031652e-06,
      "loss": 0.4862,
      "step": 800
    },
    {
      "epoch": 5.18,
      "learning_rate": 6.578272027373824e-06,
      "loss": 0.4761,
      "step": 810
    },
    {
      "epoch": 5.25,
      "learning_rate": 6.535500427715997e-06,
      "loss": 0.4629,
      "step": 820
    },
    {
      "epoch": 5.31,
      "learning_rate": 6.4927288280581705e-06,
      "loss": 0.5087,
      "step": 830
    },
    {
      "epoch": 5.38,
      "learning_rate": 6.449957228400343e-06,
      "loss": 0.5158,
      "step": 840
    },
    {
      "epoch": 5.44,
      "learning_rate": 6.407185628742516e-06,
      "loss": 0.5063,
      "step": 850
    },
    {
      "epoch": 5.5,
      "learning_rate": 6.364414029084688e-06,
      "loss": 0.4991,
      "step": 860
    },
    {
      "epoch": 5.57,
      "learning_rate": 6.321642429426861e-06,
      "loss": 0.522,
      "step": 870
    },
    {
      "epoch": 5.63,
      "learning_rate": 6.278870829769034e-06,
      "loss": 0.473,
      "step": 880
    },
    {
      "epoch": 5.7,
      "learning_rate": 6.236099230111206e-06,
      "loss": 0.5144,
      "step": 890
    },
    {
      "epoch": 5.76,
      "learning_rate": 6.19332763045338e-06,
      "loss": 0.5029,
      "step": 900
    },
    {
      "epoch": 5.82,
      "learning_rate": 6.150556030795553e-06,
      "loss": 0.4997,
      "step": 910
    },
    {
      "epoch": 5.89,
      "learning_rate": 6.107784431137725e-06,
      "loss": 0.5104,
      "step": 920
    },
    {
      "epoch": 5.95,
      "learning_rate": 6.065012831479898e-06,
      "loss": 0.4855,
      "step": 930
    },
    {
      "epoch": 6.0,
      "eval_loss": 1.708984375,
      "eval_runtime": 8.0667,
      "eval_samples_per_second": 123.966,
      "eval_steps_per_second": 7.81,
      "step": 937
    },
    {
      "epoch": 6.02,
      "learning_rate": 6.02224123182207e-06,
      "loss": 0.4809,
      "step": 940
    },
    {
      "epoch": 6.08,
      "learning_rate": 5.979469632164243e-06,
      "loss": 0.3623,
      "step": 950
    },
    {
      "epoch": 6.14,
      "learning_rate": 5.936698032506416e-06,
      "loss": 0.3593,
      "step": 960
    },
    {
      "epoch": 6.21,
      "learning_rate": 5.893926432848589e-06,
      "loss": 0.3636,
      "step": 970
    },
    {
      "epoch": 6.27,
      "learning_rate": 5.851154833190762e-06,
      "loss": 0.3533,
      "step": 980
    },
    {
      "epoch": 6.34,
      "learning_rate": 5.808383233532935e-06,
      "loss": 0.3705,
      "step": 990
    },
    {
      "epoch": 6.4,
      "learning_rate": 5.7656116338751075e-06,
      "loss": 0.3531,
      "step": 1000
    },
    {
      "epoch": 6.46,
      "learning_rate": 5.722840034217281e-06,
      "loss": 0.3234,
      "step": 1010
    },
    {
      "epoch": 6.53,
      "learning_rate": 5.680068434559452e-06,
      "loss": 0.3523,
      "step": 1020
    },
    {
      "epoch": 6.59,
      "learning_rate": 5.6372968349016254e-06,
      "loss": 0.3258,
      "step": 1030
    },
    {
      "epoch": 6.66,
      "learning_rate": 5.594525235243799e-06,
      "loss": 0.3481,
      "step": 1040
    },
    {
      "epoch": 6.72,
      "learning_rate": 5.551753635585971e-06,
      "loss": 0.3705,
      "step": 1050
    },
    {
      "epoch": 6.78,
      "learning_rate": 5.508982035928144e-06,
      "loss": 0.3714,
      "step": 1060
    },
    {
      "epoch": 6.85,
      "learning_rate": 5.4662104362703175e-06,
      "loss": 0.3398,
      "step": 1070
    },
    {
      "epoch": 6.91,
      "learning_rate": 5.42343883661249e-06,
      "loss": 0.3521,
      "step": 1080
    },
    {
      "epoch": 6.98,
      "learning_rate": 5.380667236954663e-06,
      "loss": 0.3485,
      "step": 1090
    },
    {
      "epoch": 7.0,
      "eval_loss": 1.7568359375,
      "eval_runtime": 8.0546,
      "eval_samples_per_second": 124.153,
      "eval_steps_per_second": 7.822,
      "step": 1093
    },
    {
      "epoch": 7.04,
      "learning_rate": 5.342172797262618e-06,
      "loss": 0.2915,
      "step": 1100
    },
    {
      "epoch": 7.1,
      "learning_rate": 5.299401197604791e-06,
      "loss": 0.2397,
      "step": 1110
    },
    {
      "epoch": 7.17,
      "learning_rate": 5.256629597946964e-06,
      "loss": 0.2605,
      "step": 1120
    },
    {
      "epoch": 7.23,
      "learning_rate": 5.213857998289137e-06,
      "loss": 0.2468,
      "step": 1130
    },
    {
      "epoch": 7.3,
      "learning_rate": 5.171086398631308e-06,
      "loss": 0.2497,
      "step": 1140
    },
    {
      "epoch": 7.36,
      "learning_rate": 5.128314798973482e-06,
      "loss": 0.2384,
      "step": 1150
    },
    {
      "epoch": 7.42,
      "learning_rate": 5.085543199315655e-06,
      "loss": 0.2615,
      "step": 1160
    },
    {
      "epoch": 7.49,
      "learning_rate": 5.042771599657827e-06,
      "loss": 0.2251,
      "step": 1170
    },
    {
      "epoch": 7.55,
      "learning_rate": 5e-06,
      "loss": 0.2247,
      "step": 1180
    },
    {
      "epoch": 7.62,
      "learning_rate": 4.957228400342174e-06,
      "loss": 0.24,
      "step": 1190
    },
    {
      "epoch": 7.68,
      "learning_rate": 4.914456800684346e-06,
      "loss": 0.2268,
      "step": 1200
    },
    {
      "epoch": 7.74,
      "learning_rate": 4.871685201026518e-06,
      "loss": 0.2187,
      "step": 1210
    },
    {
      "epoch": 7.81,
      "learning_rate": 4.8289136013686916e-06,
      "loss": 0.2635,
      "step": 1220
    },
    {
      "epoch": 7.87,
      "learning_rate": 4.786142001710865e-06,
      "loss": 0.2192,
      "step": 1230
    },
    {
      "epoch": 7.94,
      "learning_rate": 4.743370402053037e-06,
      "loss": 0.2337,
      "step": 1240
    },
    {
      "epoch": 8.0,
      "learning_rate": 4.7005988023952095e-06,
      "loss": 0.2311,
      "step": 1250
    },
    {
      "epoch": 8.0,
      "eval_loss": 1.91796875,
      "eval_runtime": 8.0907,
      "eval_samples_per_second": 123.599,
      "eval_steps_per_second": 7.787,
      "step": 1250
    },
    {
      "epoch": 8.06,
      "learning_rate": 4.657827202737383e-06,
      "loss": 0.1664,
      "step": 1260
    },
    {
      "epoch": 8.13,
      "learning_rate": 4.615055603079556e-06,
      "loss": 0.1622,
      "step": 1270
    },
    {
      "epoch": 8.19,
      "learning_rate": 4.572284003421728e-06,
      "loss": 0.1425,
      "step": 1280
    },
    {
      "epoch": 8.26,
      "learning_rate": 4.529512403763901e-06,
      "loss": 0.1451,
      "step": 1290
    },
    {
      "epoch": 8.32,
      "learning_rate": 4.486740804106074e-06,
      "loss": 0.1313,
      "step": 1300
    },
    {
      "epoch": 8.38,
      "learning_rate": 4.443969204448247e-06,
      "loss": 0.164,
      "step": 1310
    },
    {
      "epoch": 8.45,
      "learning_rate": 4.4011976047904195e-06,
      "loss": 0.1574,
      "step": 1320
    },
    {
      "epoch": 8.51,
      "learning_rate": 4.358426005132592e-06,
      "loss": 0.1595,
      "step": 1330
    },
    {
      "epoch": 8.58,
      "learning_rate": 4.315654405474765e-06,
      "loss": 0.1478,
      "step": 1340
    },
    {
      "epoch": 8.64,
      "learning_rate": 4.272882805816938e-06,
      "loss": 0.1413,
      "step": 1350
    },
    {
      "epoch": 8.7,
      "learning_rate": 4.230111206159111e-06,
      "loss": 0.1477,
      "step": 1360
    },
    {
      "epoch": 8.77,
      "learning_rate": 4.187339606501283e-06,
      "loss": 0.1452,
      "step": 1370
    },
    {
      "epoch": 8.83,
      "learning_rate": 4.144568006843456e-06,
      "loss": 0.152,
      "step": 1380
    },
    {
      "epoch": 8.9,
      "learning_rate": 4.1017964071856294e-06,
      "loss": 0.1656,
      "step": 1390
    },
    {
      "epoch": 8.96,
      "learning_rate": 4.059024807527802e-06,
      "loss": 0.162,
      "step": 1400
    },
    {
      "epoch": 9.0,
      "eval_loss": 2.068359375,
      "eval_runtime": 8.0861,
      "eval_samples_per_second": 123.669,
      "eval_steps_per_second": 7.791,
      "step": 1406
    },
    {
      "epoch": 9.02,
      "learning_rate": 4.016253207869974e-06,
      "loss": 0.1223,
      "step": 1410
    },
    {
      "epoch": 9.09,
      "learning_rate": 3.973481608212147e-06,
      "loss": 0.1027,
      "step": 1420
    },
    {
      "epoch": 9.15,
      "learning_rate": 3.930710008554321e-06,
      "loss": 0.0753,
      "step": 1430
    },
    {
      "epoch": 9.22,
      "learning_rate": 3.887938408896493e-06,
      "loss": 0.1049,
      "step": 1440
    },
    {
      "epoch": 9.28,
      "learning_rate": 3.845166809238665e-06,
      "loss": 0.0958,
      "step": 1450
    },
    {
      "epoch": 9.34,
      "learning_rate": 3.8023952095808385e-06,
      "loss": 0.0753,
      "step": 1460
    },
    {
      "epoch": 9.41,
      "learning_rate": 3.7596236099230118e-06,
      "loss": 0.0925,
      "step": 1470
    },
    {
      "epoch": 9.47,
      "learning_rate": 3.7168520102651845e-06,
      "loss": 0.0774,
      "step": 1480
    },
    {
      "epoch": 9.54,
      "learning_rate": 3.674080410607357e-06,
      "loss": 0.0932,
      "step": 1490
    },
    {
      "epoch": 9.6,
      "learning_rate": 3.6313088109495297e-06,
      "loss": 0.0773,
      "step": 1500
    },
    {
      "epoch": 9.66,
      "learning_rate": 3.588537211291703e-06,
      "loss": 0.0853,
      "step": 1510
    },
    {
      "epoch": 9.73,
      "learning_rate": 3.5457656116338757e-06,
      "loss": 0.101,
      "step": 1520
    },
    {
      "epoch": 9.79,
      "learning_rate": 3.502994011976048e-06,
      "loss": 0.0979,
      "step": 1530
    },
    {
      "epoch": 9.86,
      "learning_rate": 3.460222412318221e-06,
      "loss": 0.0818,
      "step": 1540
    },
    {
      "epoch": 9.92,
      "learning_rate": 3.417450812660394e-06,
      "loss": 0.0884,
      "step": 1550
    },
    {
      "epoch": 9.98,
      "learning_rate": 3.374679213002567e-06,
      "loss": 0.0918,
      "step": 1560
    },
    {
      "epoch": 10.0,
      "eval_loss": 2.203125,
      "eval_runtime": 8.0904,
      "eval_samples_per_second": 123.604,
      "eval_steps_per_second": 7.787,
      "step": 1562
    }
  ],
  "max_steps": 2340,
  "num_train_epochs": 15,
  "total_flos": 1.6190315413781873e+18,
  "trial_name": null,
  "trial_params": null
}
