step 0: train loss 11.0107, val loss 11.0322, B_tok: 0.000000
step 1000: train loss 5.5537, val loss 5.6564, B_tok: 0.000061
step 2000: train loss 4.6039, val loss 4.6401, B_tok: 0.000061
step 3000: train loss 4.0416, val loss 4.0546, B_tok: 0.000061
step 4000: train loss 3.8057, val loss 3.8332, B_tok: 0.000061
step 5000: train loss 3.6986, val loss 3.7235, B_tok: 0.000061
step 6000: train loss 3.6268, val loss 3.6583, B_tok: 0.000061
step 7000: train loss 3.5592, val loss 3.5831, B_tok: 0.000061
step 8000: train loss 3.5064, val loss 3.5502, B_tok: 0.000061
step 9000: train loss 3.4873, val loss 3.5307, B_tok: 0.000061
step 10000: train loss 3.4886, val loss 3.5361, B_tok: 0.000061
step 11000: train loss 3.4570, val loss 3.4919, B_tok: 0.000061
step 12000: train loss 3.4463, val loss 3.4879, B_tok: 0.000061
step 13000: train loss 3.4422, val loss 3.4890, B_tok: 0.000061
step 14000: train loss 3.4520, val loss 3.5002, B_tok: 0.000061
step 15000: train loss 3.4708, val loss 3.4870, B_tok: 0.000061
step 16000: train loss 3.4684, val loss 3.5079, B_tok: 0.000061
step 17000: train loss 3.4883, val loss 3.5506, B_tok: 0.000061
step 18000: train loss 3.4562, val loss 3.5175, B_tok: 0.000061
step 19000: train loss 3.4765, val loss 3.4927, B_tok: 0.000061
step 20000: train loss 3.4434, val loss 3.4926, B_tok: 0.000061
step 21000: train loss 3.4805, val loss 3.5082, B_tok: 0.000061
step 22000: train loss 3.5322, val loss 3.6524, B_tok: 0.000061
step 23000: train loss 3.5010, val loss 3.4879, B_tok: 0.000061
step 24000: train loss 3.4233, val loss 3.4660, B_tok: 0.000061
step 25000: train loss 3.4391, val loss 3.4762, B_tok: 0.000061
step 26000: train loss 3.4301, val loss 3.4840, B_tok: 0.000061
step 27000: train loss 3.4819, val loss 3.5015, B_tok: 0.000061
step 28000: train loss 3.4210, val loss 3.4722, B_tok: 0.000061
step 29000: train loss 3.4548, val loss 3.4592, B_tok: 0.000061
step 30000: train loss 3.4059, val loss 3.4479, B_tok: 0.000061
step 31000: train loss 3.4205, val loss 3.4384, B_tok: 0.000061
step 32000: train loss 3.3973, val loss 3.4792, B_tok: 0.000061
step 33000: train loss 3.3782, val loss 3.4233, B_tok: 0.000061
step 34000: train loss 3.3860, val loss 3.4260, B_tok: 0.000061
step 35000: train loss 3.3754, val loss 3.4046, B_tok: 0.000061
step 36000: train loss 3.3398, val loss 3.3738, B_tok: 0.000061
step 37000: train loss 3.3325, val loss 3.3749, B_tok: 0.000061
step 38000: train loss 3.3280, val loss 3.3705, B_tok: 0.000061
step 39000: train loss 3.3176, val loss 3.3755, B_tok: 0.000061
step 40000: train loss 3.3022, val loss 3.3347, B_tok: 0.000061
step 41000: train loss 3.2971, val loss 3.3288, B_tok: 0.000061
step 42000: train loss 3.2932, val loss 3.3267, B_tok: 0.000061
step 43000: train loss 3.2816, val loss 3.3238, B_tok: 0.000061
step 44000: train loss 3.2572, val loss 3.2931, B_tok: 0.000061
step 45000: train loss 3.2594, val loss 3.2945, B_tok: 0.000061
step 46000: train loss 3.2479, val loss 3.2917, B_tok: 0.000061
step 47000: train loss 3.2455, val loss 3.2870, B_tok: 0.000061
step 48000: train loss 3.2332, val loss 3.2599, B_tok: 0.000061
step 49000: train loss 3.2155, val loss 3.2524, B_tok: 0.000061
step 50000: train loss 3.2119, val loss 3.2479, B_tok: 0.000061
step 51000: train loss 3.2163, val loss 3.2500, B_tok: 0.000061
step 52000: train loss 3.1978, val loss 3.2272, B_tok: 0.000061
step 53000: train loss 3.1861, val loss 3.2197, B_tok: 0.000061
step 54000: train loss 3.1826, val loss 3.2099, B_tok: 0.000061
step 55000: train loss 3.1782, val loss 3.2148, B_tok: 0.000061
step 56000: train loss 3.1650, val loss 3.1965, B_tok: 0.000061
step 57000: train loss 3.1603, val loss 3.1898, B_tok: 0.000061
step 58000: train loss 3.1560, val loss 3.1867, B_tok: 0.000061
step 59000: train loss 3.1564, val loss 3.1829, B_tok: 0.000061
step 60000: train loss 3.1447, val loss 3.1661, B_tok: 0.000061
step 61000: train loss 3.1360, val loss 3.1613, B_tok: 0.000061
step 62000: train loss 3.1310, val loss 3.1580, B_tok: 0.000061
step 62000: train loss 3.1310, val loss 3.1580, B_tok: 0.000000
step 63000: train loss 3.1095, val loss 3.1527, B_tok: 0.000061
step 64000: train loss 3.1132, val loss 3.1549, B_tok: 0.000061
step 65000: train loss 3.1084, val loss 3.1418, B_tok: 0.000061
step 66000: train loss 3.1025, val loss 3.1301, B_tok: 0.000061
step 67000: train loss 3.0974, val loss 3.1329, B_tok: 0.000061
step 68000: train loss 3.0971, val loss 3.1345, B_tok: 0.000061
step 69000: train loss 3.0863, val loss 3.1173, B_tok: 0.000061
step 70000: train loss 3.0752, val loss 3.1126, B_tok: 0.000061
step 71000: train loss 3.0704, val loss 3.1083, B_tok: 0.000061
step 72000: train loss 3.0730, val loss 3.1139, B_tok: 0.000061
step 73000: train loss 3.0657, val loss 3.0966, B_tok: 0.000061
step 74000: train loss 3.0554, val loss 3.0871, B_tok: 0.000061
step 75000: train loss 3.0526, val loss 3.0854, B_tok: 0.000061
step 76000: train loss 3.0508, val loss 3.0869, B_tok: 0.000061
step 77000: train loss 3.0474, val loss 3.0773, B_tok: 0.000061
step 78000: train loss 3.0377, val loss 3.0674, B_tok: 0.000061
step 79000: train loss 3.0350, val loss 3.0664, B_tok: 0.000061
step 80000: train loss 3.0354, val loss 3.0668, B_tok: 0.000061
step 81000: train loss 3.0269, val loss 3.0552, B_tok: 0.000061
step 82000: train loss 3.0239, val loss 3.0502, B_tok: 0.000061
step 83000: train loss 3.0224, val loss 3.0495, B_tok: 0.000061
step 84000: train loss 3.0218, val loss 3.0493, B_tok: 0.000061
step 85000: train loss 3.0161, val loss 3.0413, B_tok: 0.000061
step 86000: train loss 3.0088, val loss 3.0343, B_tok: 0.000061
step 87000: train loss 3.0079, val loss 3.0359, B_tok: 0.000061
step 88000: train loss 3.0072, val loss 3.0356, B_tok: 0.000061
step 89000: train loss 3.0058, val loss 3.0326, B_tok: 0.000061
step 90000: train loss 3.0005, val loss 3.0246, B_tok: 0.000061
step 91000: train loss 3.0002, val loss 3.0239, B_tok: 0.000061
step 92000: train loss 2.9985, val loss 3.0224, B_tok: 0.000061
step 93000: train loss 2.9962, val loss 3.0199, B_tok: 0.000061
step 94000: train loss 2.9929, val loss 3.0163, B_tok: 0.000061
step 95000: train loss 2.9912, val loss 3.0147, B_tok: 0.000061
step 96000: train loss 2.9914, val loss 3.0146, B_tok: 0.000061
step 97000: train loss 2.9887, val loss 3.0110, B_tok: 0.000061
step 98000: train loss 2.9863, val loss 3.0064, B_tok: 0.000061
step 99000: train loss 2.9835, val loss 3.0061, B_tok: 0.000061
step 100000: train loss 2.9842, val loss 3.0099, B_tok: 0.000061
