A,B,E,alpha,beta,loss,r_squared,kaplan,data,loss_name,extrap_pred,extrap_loss
1437.597074638401,9950.276141142727,2.491327910346117,0.4206416093666477,0.4687899050491425,1.5130144899891891e-06,0.9989359161632114,False,smollm-corpus,eval/c4_val/CrossEntropyLoss,2.729310090083975,2.642553806304932
25.168939262390317,65.73129970744787,0.0008849265395158955,0.11772357610534413,0.1702869267122169,1.562495251459514e-05,0.9701587535318555,False,smollm-corpus,eval/downstream_ce_loss/arc_challenge_test_ce_loss,2.8964020090176783,2.836977481842041
33.27098073383463,186.53905218622188,0.01669666483764946,0.1264994809799035,0.22080376433186524,2.181378660593047e-05,0.9573882687258757,False,smollm-corpus,eval/downstream_ce_loss/arc_easy_test_ce_loss,2.904075791807372,2.706071615219116
672048609.2087054,21543134865.7771,1.871760744812452,1.149273499246623,1.1597433655483156,9.5330578480405e-05,0.8556709921000448,False,smollm-corpus,eval/downstream_ce_loss/boolq_test_ce_loss,1.887811642009127,4.256070613861084
2830.279444001197,12640.533601492145,2.099612894322889,0.4595936913111745,0.47938613607933794,1.970800582928246e-06,0.9985314221750999,False,smollm-corpus,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.3125145927286246,2.2464566230773926
183.26376971113902,1579.3706557899718,2.53741185403666,0.284124437494398,0.36671532137637153,1.1378103445333573e-05,0.9769961655339141,False,smollm-corpus,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.08692274260235,2.9045870304107666
277.252003760315,1016.7932744935233,1.6414593686809422,0.2871389100243734,0.31978089155295325,9.73908198556837e-06,0.9921393423275501,False,smollm-corpus,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.5380982486254546,2.452460289001465
28.81693430614823,107.02944150244376,0.1707804395508156,0.12827336255166616,0.1993671638565619,1.3504107993672918e-05,0.9814658604459088,False,smollm-corpus,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,2.689432859640434,2.526923418045044
583.6666092273578,1707.7412788139993,2.1264096602123272,0.35550692309848314,0.3659908380604301,1.4410525939997351e-05,0.9774205913667924,False,smollm-corpus,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.5741036419591556,2.2851953506469727
955.8235002009675,25706.043192346293,4.468985157796694,0.395873452224433,0.519253536485006,3.974627461355151e-06,0.9924708477865397,False,smollm-corpus,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.703094752413847,4.527759552001953
849.6247831939181,6039.31290283353,2.3658328546386014,0.3753336402514768,0.4332395245607907,4.688094112047521e-06,0.9964328697470557,False,smollm-corpus,eval/downstream_ce_loss/piqa_test_ce_loss,2.732339686108486,2.720210075378418
18.362088691271452,57.57954028310011,0.6623125748525232,0.10439610091908326,0.1684899424731928,3.303463902808169e-05,0.8318229679396323,False,smollm-corpus,eval/downstream_ce_loss/sciq_test_ce_loss,3.430692850957519,2.641565322875977
23042.594531882005,1505863.3585489718,2.896807486618969,0.5901697736476411,0.7248417223968363,3.8021055755614757e-06,0.9963484760659528,False,smollm-corpus,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9786316669963346,2.842735767364502
1359.2592975228624,4916.664502610866,2.3828159529059794,0.41596544960387966,0.4307125982698554,1.5575977592679148e-06,0.9989357523903151,False,smollm-corpus,eval/fineweb_100b_val/CrossEntropyLoss,2.652713624318154,2.5680596828460693
1541.3126210050127,6940.094056665566,2.271357375904847,0.4240133576755377,0.4487925818763247,1.5620012127395726e-06,0.9989353300564536,False,smollm-corpus,eval/fineweb_1T_val/CrossEntropyLoss,2.5222921159030047,2.444973945617676
2119.8388229491293,7047.149707309116,2.037944153430052,0.44250814658632603,0.4484820460578543,1.5948356262616455e-06,0.9990898615806777,False,smollm-corpus,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.2796479904628137,2.1882436275482178
2436.928519455241,17008.304100671885,1.7494782032239606,0.4541911601706172,0.493382860593896,2.07626699540551e-06,0.9977550921806402,False,smollm-corpus,eval/proof_pile_2_val/CrossEntropyLoss,1.9543161511578448,1.892138719558716
1705.0928798775788,8305.035010945165,2.2755397168402265,0.4287255482892253,0.4549669004607651,1.6122889621918e-06,0.9986745270177306,False,smollm-corpus,eval/slimpajama_val/CrossEntropyLoss,2.5291821371897583,2.44992470741272
2439.8335848803763,6916.032588373108,1.5515050666629593,0.4476990309586343,0.4436429165235239,2.241232711144815e-06,0.9987744939575729,False,smollm-corpus,eval/smollm_val/CrossEntropyLoss,1.8085518581154898,1.7137517929077148
3071.9242029243987,36804.97031865365,1.4601859214364508,0.46246641514428377,0.5247400829812117,3.5201611855266143e-06,0.9926956689904389,False,smollm-corpus,eval/starcoder_val/CrossEntropyLoss,1.6709025223471548,1.6067986488342283
67.1178601104149,401.3895777924891,2.0622440434568134,0.20964693626308692,0.2854971759121602,9.911579433736049e-06,0.9836017672299734,False,smollm-corpus,olmo_suite_ce_loss,3.093261874945725,2.9316823482513428
212.4957855836608,869.9546071799673,2.0321862136234143,0.2814935290787489,0.3242431768448502,1.1170461693578357e-05,0.9866247857128626,False,smollm-corpus,mmlu_suite_ce_loss,2.7711284266120426,2.542291522026062
2439.8335848803763,6916.032588373108,1.5515050666629593,0.4476990309586343,0.4436429165235239,2.241232711144815e-06,0.9987744939575729,False,smollm-corpus,val_loss,1.8085518581154898,1.7137517929077148
1794.391591788976,11145.077785416355,2.508699766878972,0.43492365962414997,0.47434098360174376,1.5510307143208786e-06,0.9989695237442234,False,fineweb-edu-100b,eval/c4_val/CrossEntropyLoss,2.732043636497944,2.649285078048706
25.384630008769342,103.94678865617831,0.3337774799404906,0.11587614655183961,0.20346377386493564,1.5161203408339295e-05,0.9654318472542822,False,fineweb-edu-100b,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.026488968020204,2.7264742851257324
35.808316240716195,120.54811808674378,0.007396108287825178,0.12923312714934396,0.20066054592992652,2.2597444257874666e-05,0.947006848617661,False,fineweb-edu-100b,eval/downstream_ce_loss/arc_easy_test_ce_loss,2.972996423768631,2.537889242172241
9183866.146655895,3184483917.121382,1.9315234663456546,0.8939045517330079,1.0526002857386592,7.085765942132752e-05,0.892913792234716,False,fineweb-edu-100b,eval/downstream_ce_loss/boolq_test_ce_loss,1.9772179998046882,5.0046586990356445
3730.6095017525618,17627.830810247156,2.1489883129260856,0.4770220635451865,0.4966172972110644,2.0312148751705797e-06,0.9985425224376759,False,fineweb-edu-100b,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.3416780534245145,2.261918544769287
558.4698668114281,70332.87832075034,2.809451509919166,0.35489079614158825,0.5632669408694162,9.922312324465809e-06,0.9812396350044233,False,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.108993703399629,2.928243398666382
195.62352873868593,1806.543327659543,1.6929906522404181,0.2644349821590023,0.35544763437806404,1.0061546985956255e-05,0.9907983755509673,False,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.5710331176091805,2.411628246307373
26.019707250389086,73.99634049453965,0.002682580074345698,0.1214772713965357,0.17875195186080678,1.3981459040609303e-05,0.9769705140911407,False,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,2.721684346265912,2.5014147758483887
86.81896291265532,1001.8376412973236,1.7089871097284715,0.2236747221223294,0.33511347763581917,1.3439498582472943e-05,0.9790930742984367,False,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.6132174320319272,2.3934459686279297
984.4916752938915,30900.47002236715,4.579647892006552,0.40340980610308064,0.5352788171867189,3.6159087158443537e-06,0.9922658981921206,False,fineweb-edu-100b,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.779551820858844,4.558957099914551
3082.84101825775,10271.903697710233,2.5825588184755985,0.45686080398994694,0.461443472709907,5.799401595224866e-06,0.9947688658035915,False,fineweb-edu-100b,eval/downstream_ce_loss/piqa_test_ce_loss,2.8388853257984525,2.7313506603240967
12.164048121414702,49.75102974054176,0.750951520584565,0.06999125404462023,0.18586642854462088,3.742125057530123e-05,0.7195362229520041,False,fineweb-edu-100b,eval/downstream_ce_loss/sciq_test_ce_loss,3.8839023467253018,2.5908124446868896
12563.211398861456,677674.8254996341,2.8650536360884478,0.5553694239569005,0.6862997688626437,4.254866008356859e-06,0.9949470075357576,False,fineweb-edu-100b,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9605646526949174,2.8436450958251958
1616.8432703303974,5582.724478305957,2.385277325593376,0.4274729719699051,0.43741775529148486,1.612045947150518e-06,0.9989455156260664,False,fineweb-edu-100b,eval/fineweb_100b_val/CrossEntropyLoss,2.639364733993859,2.553007125854492
1790.5537714734094,7889.739020704551,2.3008494162906747,0.43403340222992837,0.45551194179197013,1.6259489344817703e-06,0.9989054045978764,False,fineweb-edu-100b,eval/fineweb_1T_val/CrossEntropyLoss,2.5382345316441453,2.4554443359375
2517.506177974272,7160.5596829948545,2.002191409489778,0.45401922795901956,0.4494721896113453,1.7403793220664928e-06,0.999022959588757,False,fineweb-edu-100b,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.232919546728783,2.1262636184692383
66978.4281034083,532376865.14385706,4.176909411456503,0.6108383177213312,0.9787183710876127,1.854260768119511e-05,0.9583147298868977,False,fineweb-edu-100b,eval/proof_pile_2_val/CrossEntropyLoss,4.2973046548451785,4.166804313659668
2786.7987506539066,56642.74479423184,2.550597784985624,0.4538308616227278,0.5484949944009807,2.082455510986468e-06,0.9989763812419783,False,fineweb-edu-100b,eval/slimpajama_val/CrossEntropyLoss,2.7602703229831613,2.6699390411376958
3197.265769924049,88856.6390765435,2.243489954859767,0.4577281680702188,0.5690852041130795,2.3013570455030538e-06,0.9991489829160176,False,fineweb-edu-100b,eval/smollm_val/CrossEntropyLoss,2.456028918446733,2.3667337894439697
9873.184099178094,24831675.661958616,2.856781685049698,0.49722036863810115,0.8242122438842521,1.2319897842779851e-05,0.9877317613438897,False,fineweb-edu-100b,eval/starcoder_val/CrossEntropyLoss,3.0766942256898466,3.0782690048217773
40.922394443371395,58.46009347124477,1.5126780506207114,0.17253895397388264,0.18154166776356515,1.0636863566934624e-05,0.9771456576388919,False,fineweb-edu-100b,olmo_suite_ce_loss,3.1114886584814343,2.8930067675454274
114.66449125100374,730.4686256580216,1.8447870745357664,0.23869965463364792,0.3158216649649151,1.0281749826583992e-05,0.9868349271014476,False,fineweb-edu-100b,mmlu_suite_ce_loss,2.7617663554311993,2.5586830973625183
2517.506177974272,7160.5596829948545,2.002191409489778,0.45401922795901956,0.4494721896113453,1.7403793220664928e-06,0.999022959588757,False,fineweb-edu-100b,val_loss,2.232919546728783,2.1262636184692383
994.6423298253942,6957.46649219219,2.4558481607481206,0.3864690611270666,0.45385205965163583,4.153670921512725e-06,0.9966305591262298,False,slimpajama-chunk1,eval/c4_val/CrossEntropyLoss,2.7607562294732975,2.733860969543457
113.37235957026436,688.2542887074422,2.687054795255118,0.23843032251732468,0.30290635016579537,1.2074898427362818e-05,0.9758029017103708,False,slimpajama-chunk1,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.6906516718737192,3.2937986850738525
103.42669838421716,733.6578415016807,2.4526314948203485,0.21735834394317094,0.29388964219138336,1.6725234440748647e-05,0.9639542722593789,False,slimpajama-chunk1,eval/downstream_ce_loss/arc_easy_test_ce_loss,3.859676784579358,3.263295412063598
2758680.0438622152,21066289.465092853,1.8918685513406281,0.8287377336414412,0.8141172803757966,7.279193360115431e-05,0.8993440388270859,False,slimpajama-chunk1,eval/downstream_ce_loss/boolq_test_ce_loss,1.968274422907461,5.0746750831604
2125.6809418242656,10933.738407920999,2.088248850943594,0.4392963008776586,0.4681887452678197,1.7194581446608937e-06,0.9989776665243546,False,slimpajama-chunk1,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.334747355351209,2.277791976928711
171.77610860080313,1004.0988447929517,2.7429241655972154,0.2865877148903695,0.3433515537067573,8.937896496469135e-06,0.9839573977785799,False,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.276498733099795,2.9913547039031982
771.377408164432,10542.489722237546,2.456064542907398,0.35275518081148055,0.44114855616424603,8.288719608756433e-06,0.9930400202447128,False,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.9945435372186138,2.667079448699951
32.6634435094138,291.7749967134469,1.6676342277621354,0.15018351514641143,0.26294307423407004,9.851566294783659e-06,0.9841041263186976,False,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.3297157706003286,3.005012273788452
74.61493242300114,700.7785618622254,1.9967284963906764,0.21823931204890867,0.31451901437723445,1.1844388192321965e-05,0.980872654037232,False,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.9224983098958655,2.6991677284240723
394.2117725063568,47920.273075686586,4.592712216093291,0.33608985930383367,0.547050454170331,3.7130806747908637e-06,0.9930686198434001,False,slimpajama-chunk1,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.908704163359163,4.784531116485596
4738.501536667465,28368.48203116338,2.6587979861106397,0.47887297850606997,0.5084473806235624,4.439716223801703e-06,0.997114431249399,False,slimpajama-chunk1,eval/downstream_ce_loss/piqa_test_ce_loss,2.8923366532583437,2.774200439453125
30805.360725506405,650715.3940341996,4.08423742786248,0.5782179957104517,0.6434639436485684,2.6817257150212762e-05,0.929663216776861,False,slimpajama-chunk1,eval/downstream_ce_loss/sciq_test_ce_loss,4.265173472964377,3.17514443397522
15659.596412119601,1706639.9932798294,2.900137405271161,0.5720461898383851,0.7351662650759911,3.7380238659022507e-06,0.9961140869184162,False,slimpajama-chunk1,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9793829367786584,2.827159881591797
1380.716037037478,8983.940207751139,2.5854462245907146,0.4059516175753427,0.46962467172431765,4.871666966799419e-06,0.9954618859390633,False,slimpajama-chunk1,eval/fineweb_100b_val/CrossEntropyLoss,2.8586666238189253,2.8753135204315186
1134.161277829345,15385.600983631537,2.5260709262727103,0.39191540073494446,0.4990179683585987,5.290515502778347e-06,0.9947731664111109,False,slimpajama-chunk1,eval/fineweb_1T_val/CrossEntropyLoss,2.8070368513224873,2.835650682449341
1639.833105008933,11720.04515956123,2.4061583327661857,0.41800499332507235,0.4800234414480997,3.747449252383286e-06,0.9973133393309122,False,slimpajama-chunk1,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.6635822894805883,2.638636589050293
1930.3822990861768,24446.886053295424,1.7440260359399693,0.4466285927799579,0.5290844825020918,3.7027352081522406e-06,0.997238671907439,False,slimpajama-chunk1,eval/proof_pile_2_val/CrossEntropyLoss,1.9053284837656124,1.840466976165772
2048.3539339495346,6018.806526520867,2.0068496071220574,0.44091634411915875,0.43849091762026093,1.5198910903806237e-06,0.9988773750772344,False,slimpajama-chunk1,eval/slimpajama_val/CrossEntropyLoss,2.2589694821886765,2.1738879680633545
1686.6238814431954,19039.299860586914,2.1308907683841714,0.42183476910699247,0.5002982859796706,2.6652510206277157e-06,0.9986054747461417,False,slimpajama-chunk1,eval/smollm_val/CrossEntropyLoss,2.3777560107257805,2.3053719997406006
2229.5764402343916,295845.0379764381,1.4302379023018876,0.4479645442156886,0.6475208797754394,6.374635175612795e-06,0.9959185448289858,False,slimpajama-chunk1,eval/starcoder_val/CrossEntropyLoss,1.586262977459885,1.547000527381897
2127.5600244727448,16042.272758979881,3.2864487492209453,0.43116977378570764,0.47372561867911045,7.846710094105846e-06,0.9894199204133721,False,slimpajama-chunk1,olmo_suite_ce_loss,3.590240475840578,3.1994174207959856
97.2379409191931,1857.5913443489856,2.302251605964072,0.23270339458711972,0.3635727566168628,8.61088553591007e-06,0.9902050844386234,False,slimpajama-chunk1,mmlu_suite_ce_loss,3.133322793375477,2.8406535387039185
2048.3539339495346,6018.806526520867,2.0068496071220574,0.44091634411915875,0.43849091762026093,1.5198910903806237e-06,0.9988773750772344,False,slimpajama-chunk1,val_loss,2.2589694821886765,2.1738879680633545
1812.2463630955506,6189.873198278078,2.2852328416096017,0.4347396335108267,0.4435907814603418,1.627096832744058e-06,0.9991928817144183,False,fineweb-100b,eval/c4_val/CrossEntropyLoss,2.5276834885311716,2.4410955905914307
32.73353164420906,266.7700991413651,2.206502496209695,0.15680041311062684,0.25632179048421055,1.2148522278118997e-05,0.9631415555598478,False,fineweb-100b,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.7410640213023543,3.312608242034912
26.53461650699608,651.2930994501335,1.8144035039672084,0.1254407805109314,0.2943776401367779,1.6570170438013084e-05,0.9427066556931103,False,fineweb-100b,eval/downstream_ce_loss/arc_easy_test_ce_loss,3.9718512473753704,3.327625036239624
20817467.114091597,134188696943285.11,2.1200382176372563,0.9708035986124524,1.6048198852977305,7.453891391483251e-05,0.8045078091432614,False,fineweb-100b,eval/downstream_ce_loss/boolq_test_ce_loss,2.132862868861235,4.323652744293213
3818.7778595994223,18293.234584266596,2.0704086387074,0.4767218781627757,0.49744730692182815,1.77016556626809e-06,0.9992658524665892,False,fineweb-100b,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.2677886563155325,2.209911346435547
1057.77707024117,45200.92050600171,3.119172268246051,0.39928355034561114,0.5409352172064515,6.986659854269094e-06,0.9891342115546813,False,fineweb-100b,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.3598499823493806,3.0759177207946777
1021.7904974999078,7722.768557566867,2.4874744495363954,0.3734187971875845,0.4275333313299963,7.186567265548861e-06,0.9947144797978967,False,fineweb-100b,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.97749376990096,2.654109477996826
102.50454106929763,482.89925679544086,2.5567881237197727,0.23834746884052732,0.2954816440990327,7.745271196213538e-06,0.9884275129400678,False,fineweb-100b,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.440862891255438,3.0161561965942383
187.46526386872918,983.2257384511712,2.4764641198345743,0.2803842447935144,0.33295384538205175,9.231450145709897e-06,0.9844145088016689,False,fineweb-100b,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,3.1467537777105545,2.857668161392212
574.559689261567,5689.732568838844,4.538457824817154,0.3634635520536414,0.4396036771422331,3.3528049582543444e-06,0.9928208953463817,False,fineweb-100b,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.849944957602775,4.700038909912109
7273.290937959683,98939.24147053336,2.6411979918956257,0.5091399034322465,0.5777642830334312,4.306928884593359e-06,0.9971348154898584,False,fineweb-100b,eval/downstream_ce_loss/piqa_test_ce_loss,2.8095480763960308,2.6879100799560547
124158.33935978863,5516.16961760098,4.2975704324170945,0.6756548167413741,0.4160365358474887,2.554933276640582e-05,0.8560523269131869,False,fineweb-100b,eval/downstream_ce_loss/sciq_test_ce_loss,4.538103240975842,3.333494186401367
30380.69101616145,1316402.7366731644,2.877493894754377,0.6126960532031601,0.7247855424592571,3.9593522057052505e-06,0.9955122237655549,False,fineweb-100b,eval/downstream_ce_loss/winogrande_test_ce_loss,2.945198059629996,2.811347007751465
1636.5843435210331,4198.638775040238,2.1471759753034596,0.4267220125012678,0.421205669668254,1.4105020623875686e-06,0.9994445556944588,False,fineweb-100b,eval/fineweb_100b_val/CrossEntropyLoss,2.4193808258913045,2.328246593475342
1872.3015786419858,6453.284088261013,2.1136184560618534,0.43614842184206976,0.44471152658418717,1.5631472155355632e-06,0.9993539956188396,False,fineweb-100b,eval/fineweb_1T_val/CrossEntropyLoss,2.3578637165269787,2.268545627593994
1953.4934206095188,7374.620997934127,2.118447023302502,0.43711885763271313,0.4495243057688507,1.4660957431599937e-06,0.9994529691479473,False,fineweb-100b,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.3671972924669142,2.2674803733825684
97121.2109442879,30368437219.859177,4.4792937025301365,0.6340308820362847,1.182346257870903,1.251275721396392e-05,0.9774317884808398,False,fineweb-100b,eval/proof_pile_2_val/CrossEntropyLoss,4.575461142722157,4.401041507720947
2637.8376733771297,51594.884903984035,2.4425844204589375,0.4502344170301093,0.5437099291327601,2.12144142855151e-06,0.999007521235711,False,fineweb-100b,eval/slimpajama_val/CrossEntropyLoss,2.65738134591765,2.569021701812744
2117.5524889622006,104625.40982943635,2.353916620656909,0.4328375059001424,0.5762879641176003,2.631507786879371e-06,0.9989131964570184,False,fineweb-100b,eval/smollm_val/CrossEntropyLoss,2.5855031890522926,2.495053768157959
15594.311510561734,196078357.44554558,3.0716714082332053,0.5262368383998298,0.9272887803139924,1.062083737535031e-05,0.9896848676643761,False,fineweb-100b,eval/starcoder_val/CrossEntropyLoss,3.2475410404993315,3.222542524337769
986.7616888977012,5411.251571075463,3.2561610405750625,0.39042046190257457,0.4245313666455849,8.715727109951672e-06,0.9810336433521045,False,fineweb-100b,olmo_suite_ce_loss,3.6004685889306254,3.197562115533011
334.678319343405,2531.317018688598,2.684572322809546,0.3151420339502758,0.38209827901448234,6.104099937798449e-06,0.9932878632683849,False,fineweb-100b,mmlu_suite_ce_loss,3.225296600497537,2.9009628891944885
1636.5843435210331,4198.638775040238,2.1471759753034596,0.4267220125012678,0.421205669668254,1.4105020623875686e-06,0.9994445556944588,False,fineweb-100b,val_loss,2.4193808258913045,2.328246593475342
1022.8910359541036,2636.90960847699,2.91711098242774,0.39732208005206743,0.387615473650608,1.1421013039490767e-06,0.9994532587621623,False,proof-pile-2,eval/c4_val/CrossEntropyLoss,3.2733428774732207,3.1742208003997803
71.72370776731783,578.077819414805,2.9835466592689905,0.21193424067253222,0.29686584045470304,9.815799547474362e-06,0.979255155245419,False,proof-pile-2,eval/downstream_ce_loss/arc_challenge_test_ce_loss,4.056856927599611,3.498213529586792
63.91350821834418,294.5702221597714,2.6368597473161426,0.18956159475831028,0.24985100233944219,1.3129541003735994e-05,0.9687321497259411,False,proof-pile-2,eval/downstream_ce_loss/arc_easy_test_ce_loss,4.263297162737539,3.4872355461120605
3244.997014599312,5794.6997357800055,2.37989214697009e-37,0.40785376274416835,0.3644180011462061,0.00010764770533369871,0.8576984701833481,False,proof-pile-2,eval/downstream_ce_loss/boolq_test_ce_loss,1.1546187719668053,3.6646692752838135
797.4615739592382,3167.3639214768014,2.522244819739164,0.3751859793067845,0.39230311183470407,2.1004826089222627e-06,0.9990352139184489,False,proof-pile-2,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.9366110247166857,2.825848340988159
159.7248377810243,1118.14809437498,3.357450130710477,0.2809350691115236,0.3445614725177752,8.646347421441825e-06,0.9809676047916117,False,proof-pile-2,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.925064332043038,3.576704502105713
66.33850049736678,195.03789592008349,1.64740634027196,0.18758159800849,0.22131159033598727,8.457955860451927e-06,0.9895825367963156,False,proof-pile-2,eval/downstream_ce_loss/mmlu_other_test_ce_loss,3.5687912304944653,3.3571393489837646
65.14909358973244,361.58139331809286,2.727252831519473,0.20486798571347886,0.2701230033797996,7.720766581654334e-06,0.9876710126362828,False,proof-pile-2,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.9226030750027006,3.3494341373443604
89.4338369650296,483.02667679148504,1.9776964496741012,0.23713023708976988,0.29837633900509614,1.0657675698218555e-05,0.9826325095223798,False,proof-pile-2,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.7816362723341763,2.4539883136749268
152.7521982374653,1032.814499212464,4.557795603584523,0.2685625199083616,0.3297251006380635,3.397309211277704e-06,0.9947433259633243,False,proof-pile-2,eval/downstream_ce_loss/openbook_qa_test_ce_loss,5.287450844244875,5.145816326141357
168.46896154194727,443.9851984377045,2.7216026655955825,0.2718030236538979,0.2805159303574943,5.134651753876676e-06,0.9945524034460059,False,proof-pile-2,eval/downstream_ce_loss/piqa_test_ce_loss,3.598972810135259,3.46978497505188
326.3593535818434,20951.331482545356,3.1200993529840213,0.2964654186768122,0.4683200364489995,2.6204306444270867e-05,0.9232797742639695,False,proof-pile-2,eval/downstream_ce_loss/sciq_test_ce_loss,3.8152728857301996,2.620037078857422
6378.758867970335,26245.023280034136,3.0752831525990687,0.49675617471855577,0.49596294238252586,4.043495001187696e-06,0.9972092278800471,False,proof-pile-2,eval/downstream_ce_loss/winogrande_test_ce_loss,3.323519521457095,3.211418390274048
984.7273684700416,2311.834769085274,2.9287516769971877,0.39356589730808145,0.37983983468938315,1.147161386912188e-06,0.9994949511769152,False,proof-pile-2,eval/fineweb_100b_val/CrossEntropyLoss,3.3042304877007997,3.206017255783081
1041.8390688226282,2637.3366360711043,2.8117398432497325,0.395829970989622,0.385736557234005,1.0939001018861765e-06,0.9995534577725163,False,proof-pile-2,eval/fineweb_1T_val/CrossEntropyLoss,3.1857289608100663,3.090193510055542
1288.8283818041125,3225.8073848292124,2.5853470566038,0.40693922462778476,0.39458880553919823,1.0256359329478293e-06,0.9996172040748359,False,proof-pile-2,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.950696226565921,2.857412338256836
3770.7575896035546,3588.0840903163994,1.3300416858591686,0.5071265705258877,0.4349393065619421,1.932519686653331e-06,0.999029522809581,False,proof-pile-2,eval/proof_pile_2_val/CrossEntropyLoss,1.4655611226923775,1.4032412767410278
1344.8981850837774,3232.4928354453587,2.706584020299298,0.41384923187038575,0.39813353476072516,1.1184157575475933e-06,0.9995112939562064,False,proof-pile-2,eval/slimpajama_val/CrossEntropyLoss,3.0383776613845965,2.9498729705810547
1489.4639112996736,2676.728063637836,2.0965416899253255,0.41513086576969627,0.3849920603045216,1.2537379725347737e-06,0.9995659443540251,False,proof-pile-2,eval/smollm_val/CrossEntropyLoss,2.4658645411205824,2.372230291366577
4619.2449190001835,16798.92143198847,1.3802411547589768,0.49961660058887136,0.49311844128186477,3.311718913409137e-06,0.9982593203612564,False,proof-pile-2,eval/starcoder_val/CrossEntropyLoss,1.5499066906295105,1.4932903051376345
294.1991682261516,3330.513110827794,3.2919232757061416,0.3028796324298908,0.3867725862945132,6.6067653750273484e-06,0.9912198487450665,False,proof-pile-2,olmo_suite_ce_loss,3.9184550808985112,3.465479169573103
81.91011010834693,335.23579039190463,2.4729611629581947,0.2222383922234357,0.2679414409937546,7.047689617027116e-06,0.9904917423467622,False,proof-pile-2,mmlu_suite_ce_loss,3.555352917174532,3.184316575527191
3770.7575896035546,3588.0840903163994,1.3300416858591686,0.5071265705258877,0.4349393065619421,1.932519686653331e-06,0.999029522809581,False,proof-pile-2,val_loss,1.4655611226923775,1.4032412767410278
1091.0397084323195,2475.4837138067846,3.0011828546386607,0.400193620816371,0.3806297096860118,1.4702304197429996e-06,0.9992331751946651,False,starcoder,eval/c4_val/CrossEntropyLoss,3.379255856656977,3.268944025039673
961.3780907681788,513.356193168053,3.9121565839543386,0.3799595235937081,0.2802187851040251,1.2410473673269773e-05,0.9658796644876391,False,starcoder,eval/downstream_ce_loss/arc_challenge_test_ce_loss,4.658943034406746,4.323122024536133
122.89299722976145,52.96345244963668,2.259822956517531,0.2368958822726115,0.13364879125088175,1.705745602773231e-05,0.9446488852893067,False,starcoder,eval/downstream_ce_loss/arc_easy_test_ce_loss,4.909027618634028,4.568445682525635
4318.792292237308,31629.578982585575,5.0381628321923415e-12,0.41267577066399536,0.46129708030615174,0.00010165329535560016,0.8794648101151562,False,starcoder,eval/downstream_ce_loss/boolq_test_ce_loss,0.8748823541866159,3.229860782623291
1180.1945608566332,5989.332863995853,2.6905119585338397,0.39916566081404603,0.42304837075489865,1.4963611892020416e-06,0.9994405657096087,False,starcoder,eval/downstream_ce_loss/hellaswag_test_ce_loss,3.0553546279863943,2.907686233520508
870.8058290310923,2897.44034342484,3.7352111725335737,0.38737557510016213,0.38757794325325723,6.875188084554776e-06,0.9847538881730037,False,starcoder,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,4.120063730306688,4.123088359832764
167.73400961504493,109.97544512581725,2.125557208314728,0.25237636010536113,0.18315440492315735,1.0605116824100377e-05,0.9828467262466896,False,starcoder,eval/downstream_ce_loss/mmlu_other_test_ce_loss,3.994880933023373,3.918388605117798
157.64791300009082,432.56286541000577,3.5171481788324166,0.2683198281724485,0.2775100091474789,7.533030425702874e-06,0.9829828538026921,False,starcoder,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,4.42061056778569,4.203283786773682
155.04567652646082,122.71503129839358,2.0210350761317604,0.266756248669235,0.21037990925173417,1.1218026273621779e-05,0.972962113967605,False,starcoder,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,3.1564474458843983,3.0979409217834477
1336.023296052102,1345.1884695810982,5.2872619853904705,0.4035991442363496,0.3390494484407745,3.008110976352944e-06,0.9958262604182319,False,starcoder,eval/downstream_ce_loss/openbook_qa_test_ce_loss,5.795826234277978,5.801511764526367
891.6887979946082,1329.6327076731238,3.169590333521593,0.3771418079016377,0.3381307466524195,4.959289834066206e-06,0.9950340216541007,False,starcoder,eval/downstream_ce_loss/piqa_test_ce_loss,3.7185439016217976,3.5099611282348637
58.680507171949834,1338.0992904405332,1.4555902618882106,0.1617884049285,0.3095043917046508,3.203211174290058e-05,0.9136465196385894,False,starcoder,eval/downstream_ce_loss/sciq_test_ce_loss,3.7990618327496124,3.817956924438477
3103.0120284950003,24838.49034313178,3.395868975679844,0.4598941450012936,0.4938680261812878,4.366348147617432e-06,0.9959367576040843,False,starcoder,eval/downstream_ce_loss/winogrande_test_ce_loss,3.654534900545611,3.4577558040618896
1064.6248796017792,2340.8009702804143,3.0268171512587156,0.3971858372512953,0.37681927506304075,1.4375167184996987e-06,0.9992769952892031,False,starcoder,eval/fineweb_100b_val/CrossEntropyLoss,3.42012579253959,3.312097787857056
965.3139201566938,2364.033108494215,2.872076819604219,0.38925110339643004,0.37587216921649275,1.5581931542703682e-06,0.9992572231939655,False,starcoder,eval/fineweb_1T_val/CrossEntropyLoss,3.2866331728089238,3.1881885528564453
1012.3671009372093,2617.6878205523576,2.765282140931811,0.38968618892966667,0.3783386804075271,1.5777561665297301e-06,0.9993419474830477,False,starcoder,eval/fineweb_edu_100b_val/CrossEntropyLoss,3.196639367130534,3.0918307304382324
4357.889410444944,7556.409622050922,1.8384741859310094,0.49117851807123303,0.44923315044500134,2.1773905396939997e-06,0.9978422790698591,False,starcoder,eval/proof_pile_2_val/CrossEntropyLoss,2.048035280931939,1.9821343421936035
1307.6769337096066,2736.3136875226414,2.764084720867752,0.4119090821409509,0.3865841061548466,1.4309316360838081e-06,0.999304046963325,False,starcoder,eval/slimpajama_val/CrossEntropyLoss,3.1203270431147865,3.0186474323272705
1395.19728499605,2659.710686803277,2.3992399464303262,0.4108756136102185,0.38110236549251153,1.6582292299794072e-06,0.9993184244091522,False,starcoder,eval/smollm_val/CrossEntropyLoss,2.792265585580322,2.685784101486206
7753.623416617022,4189.620600067792,0.862588056825541,0.5468235899467293,0.4386530199108323,3.216395808976347e-06,0.9976050432223044,False,starcoder,eval/starcoder_val/CrossEntropyLoss,0.9955403044364688,0.9477230906486512
282.65820724981165,1042.6749179666494,3.372302113726752,0.2974100142331653,0.31771628453981565,8.843009398902162e-06,0.9855937936522574,False,starcoder,olmo_suite_ce_loss,4.204041354009618,4.055205651691982
159.88047517275226,372.13942392234026,2.9263151249168318,0.26590814328067225,0.2669819651488968,7.859692914057738e-06,0.9845434379364392,False,starcoder,mmlu_suite_ce_loss,3.9136000546095335,3.8356754183769226
7753.623416617022,4189.620600067792,0.862588056825541,0.5468235899467293,0.4386530199108323,3.216395808976347e-06,0.9976050432223044,False,starcoder,val_loss,0.9955403044364688,0.9477230906486512
67955624.43230093,877066819.3057203,2.4487268185984825,0.39219190785748054,0.45471546486195363,6.2090099345780365e-06,0.9924742295521556,True,smollm-corpus,eval/c4_val/CrossEntropyLoss,2.710451269591022,2.642553806304932
3571816731743.0337,20616361911921.996,0.2060613093296746,0.1315044220010421,0.1441791137247402,1.60348656137009e-05,0.9687393437905287,True,smollm-corpus,eval/downstream_ce_loss/arc_challenge_test_ce_loss,2.9108074519485916,2.836977481842041
2902418043284.887,12289284168883.883,0.02086961235319179,0.14263418397401006,0.16285425245093268,2.2978579855823614e-05,0.9551722785597943,True,smollm-corpus,eval/downstream_ce_loss/arc_easy_test_ce_loss,2.8718385834750872,2.706071615219116
39085283.610017404,663429093.9720404,1.9348237673592712,1.2604293367954185,1.5258449413270716,9.353794032631033e-05,0.8345279523135226,True,smollm-corpus,eval/downstream_ce_loss/boolq_test_ce_loss,1.9418324894431527,4.256070613861084
59300979.93654105,764179605.3982116,2.0974939878226286,0.4382234065027514,0.4989821982819206,7.588797262373426e-06,0.9919822095959012,True,smollm-corpus,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.3089080393968877,2.2464566230773926
292188208.9540376,2299613329.357051,2.5149461393596524,0.2693618505648065,0.36622034238314977,1.2124696050414358e-05,0.972355628136764,True,smollm-corpus,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.0829442554571505,2.9045870304107666
5744948142.933578,35944657125.43383,1.2190672710745059,0.22499603761191048,0.27112081948494,1.3011764141803915e-05,0.986763176866769,True,smollm-corpus,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.4715425810455116,2.452460289001465
1219423360040.471,7605897610867.386,0.06778255386812782,0.14467780633707403,0.15715872925702498,1.412159071695079e-05,0.9803319869570133,True,smollm-corpus,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,2.6128515016555065,2.526923418045044
3196145396.816285,54444582077.208244,1.2030312568300925,0.22018403283627938,0.20762400995744887,1.3461976577888676e-05,0.9763446450093786,True,smollm-corpus,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.3631431256421696,2.2851953506469727
66118145.16609939,805591521.0753248,4.417539281571303,0.37911554689544114,0.4567760288662979,5.5249912160808535e-06,0.9846389504552338,True,smollm-corpus,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.683060422814227,4.527759552001953
129436474.49660209,1476581098.846242,2.374553933365009,0.3693656905997433,0.436349383967292,8.782910867855308e-06,0.9892727336306548,True,smollm-corpus,eval/downstream_ce_loss/piqa_test_ce_loss,2.7303528640409946,2.720210075378418
25826597380875.297,73396950133379.36,0.28552836148481053,0.11493859795254122,0.13421383901587844,3.1427614590279584e-05,0.8309814565014539,True,smollm-corpus,eval/downstream_ce_loss/sciq_test_ce_loss,3.288104814125388,2.641565322875977
29898226.319913708,405196685.0695847,2.9166918978728447,0.5914230693505818,0.7626909781892034,6.225608872266386e-06,0.9904212131909759,True,smollm-corpus,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9926130607332584,2.842735767364502
80408017.5394636,1037218520.207934,2.339330337255207,0.3810344809776293,0.42680583460816707,6.657087076911308e-06,0.9922991966162195,True,smollm-corpus,eval/fineweb_100b_val/CrossEntropyLoss,2.6333124599428808,2.5680596828460693
71161363.35979298,888593964.8331876,2.2443689135694513,0.393910842521085,0.45767495454516377,6.978870872068369e-06,0.9924273981745779,True,smollm-corpus,eval/fineweb_1T_val/CrossEntropyLoss,2.5080922895701803,2.444973945617676
69218282.45242012,899990083.3029909,2.01575663202941,0.4112069325397516,0.4562638304412037,7.862151631645025e-06,0.9919630469124603,True,smollm-corpus,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.2671761589712416,2.1882436275482178
49503965.382220864,701399870.3876985,1.7711776130019354,0.4456296764681111,0.5424602180610161,8.5041334160695e-06,0.9925492837294355,True,smollm-corpus,eval/proof_pile_2_val/CrossEntropyLoss,1.9584733171068702,1.892138719558716
73170584.76286955,1003300877.0829608,2.2504151532288637,0.4021642787937537,0.4583595316240187,7.114248007277465e-06,0.9921657140761666,True,smollm-corpus,eval/slimpajama_val/CrossEntropyLoss,2.5154846889478994,2.44992470741272
77882957.0769991,1062913008.532086,1.5340203711959466,0.41811955216504054,0.45490377291583667,9.881331156170213e-06,0.9916910483017789,True,smollm-corpus,eval/smollm_val/CrossEntropyLoss,1.7967845572922085,1.7137517929077148
50884170.8388112,799137343.6486695,1.5260016187197325,0.47241881909022004,0.6324487269581645,1.0399405587281122e-05,0.9921319740588448,True,smollm-corpus,eval/starcoder_val/CrossEntropyLoss,1.6949033343287143,1.6067986488342283
5113042618.752231,61136272128.15745,1.7913370247088662,0.20488353779857105,0.2140053629636453,1.105962535776099e-05,0.9792977459610974,True,smollm-corpus,olmo_suite_ce_loss,3.0311581896881683,2.9316823482513428
4426963959.173365,45831066547.03555,1.4284143680012111,0.21929939868148557,0.23400096649541752,1.2103935338169614e-05,0.9834693672062128,True,smollm-corpus,mmlu_suite_ce_loss,2.634018227080216,2.542291522026062
77882957.0769991,1062913008.532086,1.5340203711959466,0.41811955216504054,0.45490377291583667,9.881331156170213e-06,0.9916910483017789,True,smollm-corpus,val_loss,1.7967845572922085,1.7137517929077148
60995161.473565996,797744927.984253,2.480125021666896,0.41019602148502954,0.46858492649143435,5.921428274522159e-06,0.9931287516091925,True,fineweb-edu-100b,eval/c4_val/CrossEntropyLoss,2.716755358632354,2.649285078048706
12050371933061.576,21816673943423.812,0.166649616607525,0.12138279213759344,0.1438451585276501,1.7586677534499537e-05,0.962560230050923,True,fineweb-edu-100b,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.0146033964409136,2.7264742851257324
10865293147012.008,3340635502321.5093,0.0684417608942679,0.1288234105913347,0.18204764867928655,2.6016053476827912e-05,0.9396801893816614,True,fineweb-edu-100b,eval/downstream_ce_loss/arc_easy_test_ce_loss,3.0088356018987876,2.537889242172241
56453553.28592102,895575204.8275348,1.9678614619308856,0.9135030763678706,1.1762117653636397,6.973206532442664e-05,0.8891116442964955,True,fineweb-edu-100b,eval/downstream_ce_loss/boolq_test_ce_loss,2.004500504841014,5.0046586990356445
57223717.03728889,752379601.6575615,2.1195342815566818,0.44374237538334543,0.5024194170797633,7.534877250229396e-06,0.9919811922479707,True,fineweb-edu-100b,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.3246306615710477,2.261918544769287
95531380.89864033,1087831231.2468183,2.785276586657147,0.37898460177810606,0.4593738295248264,1.0674743615718001e-05,0.9790027302719997,True,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.0900451166834593,2.928243398666382
4891183520.938393,31953203983.94658,1.2769212940345798,0.2276141838798767,0.26949501468512144,1.3185399104537864e-05,0.985268441131908,True,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.4863955710327983,2.411628246307373
2962476122229.6826,8034111465743.193,0.0776392708431072,0.1327734094360142,0.15482454363030526,1.4661883127295157e-05,0.9771593560745058,True,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,2.6946163146565776,2.5014147758483887
4198179191.4983716,26267145978.991417,1.4129491649517523,0.20627981093408468,0.24709090996310842,1.4287552709814303e-05,0.9781573587475122,True,fineweb-edu-100b,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.5598072209591587,2.3934459686279297
63560530.9658419,727056660.1184478,4.441832625256438,0.34401790096034424,0.44214271138657174,5.010136050148018e-06,0.9843170953834239,True,fineweb-edu-100b,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.731367176220907,4.558957099914551
83050977.59300523,1049544840.2479208,2.584710821208217,0.4371036656190455,0.4770157221458591,9.382194259535716e-06,0.9879303445773391,True,fineweb-edu-100b,eval/downstream_ce_loss/piqa_test_ce_loss,2.835392291738152,2.7313506603240967
1.9859640931106836e+16,2314232173292.545,0.5998841550464954,0.07538796513146592,0.17108331782300795,3.92933213915557e-05,0.6932784249519097,True,fineweb-edu-100b,eval/downstream_ce_loss/sciq_test_ce_loss,3.8689983994962844,2.5908124446868896
28721982.311726596,395243027.674574,2.921729261147032,0.6027338594885658,0.7903756006014487,6.615517789120533e-06,0.9895383395133405,True,fineweb-edu-100b,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9918146032871307,2.8436450958251958
71832694.54914896,953494750.6973826,2.35217369366043,0.39531455569595564,0.43598917609079924,6.542176302204043e-06,0.9924753932086666,True,fineweb-edu-100b,eval/fineweb_100b_val/CrossEntropyLoss,2.622951044360404,2.553007125854492
68181476.20875522,878672800.7479123,2.2605163322740793,0.39632631755333153,0.4553085057593258,6.864895526962829e-06,0.9924703305487304,True,fineweb-edu-100b,eval/fineweb_1T_val/CrossEntropyLoss,2.5196493207818813,2.4554443359375
66798878.45905815,889955656.4320827,1.9669051342679635,0.4128980698285724,0.45558129866811403,7.92450060588217e-06,0.9919788511540093,True,fineweb-edu-100b,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.2146707169008595,2.1262636184692383
78179138.84311847,814169426.137429,4.193169995163371,0.6170357581518119,1.0004868389067645,1.8587741243270126e-05,0.9571360493414999,True,fineweb-edu-100b,eval/proof_pile_2_val/CrossEntropyLoss,4.308469080712668,4.166804313659668
67586551.39060712,882777009.4170644,2.525822274658558,0.4291282538049843,0.5374752091380508,6.67415207401015e-06,0.9930942641385749,True,fineweb-edu-100b,eval/slimpajama_val/CrossEntropyLoss,2.7507100186486717,2.6699390411376958
78066103.0238586,911559160.8951073,2.208138287345257,0.42418708942360017,0.5640964509631566,7.551063450580884e-06,0.9935591359299542,True,fineweb-edu-100b,eval/smollm_val/CrossEntropyLoss,2.445025100222923,2.3667337894439697
107335256.03258653,1003538853.8829664,2.9379295093959006,0.5346040102410448,0.8703557635373541,1.34981333890606e-05,0.9841151224881374,True,fineweb-edu-100b,eval/starcoder_val/CrossEntropyLoss,3.1204236619411896,3.0782690048217773
298978500357.1051,358197171949.9708,1.1267274883593295,0.13681562386989324,0.1862652226679023,1.3452117703735243e-05,0.9705224242604438,True,fineweb-edu-100b,olmo_suite_ce_loss,3.0600409760325835,2.8930067675454274
23961673734.02695,93838346011.99664,1.1138989061436784,0.17831609562702724,0.22051625436269334,1.112927148605599e-05,0.9849199197545254,True,fineweb-edu-100b,mmlu_suite_ce_loss,2.640987670818639,2.5586830973625183
66798878.45905815,889955656.4320827,1.9669051342679635,0.4128980698285724,0.45558129866811403,7.92450060588217e-06,0.9919788511540093,True,fineweb-edu-100b,val_loss,2.2146707169008595,2.1262636184692383
137888999.1990296,1244194203.209628,2.3491159625768128,0.35229015641600003,0.39040037704356423,4.9032063514286425e-06,0.9949762925332424,True,slimpajama-chunk1,eval/c4_val/CrossEntropyLoss,2.724920526771309,2.733860969543457
19440110220.96509,388822846376.66095,1.9338644276273773,0.19342932720500694,0.18899786994180667,1.151739601366482e-05,0.9774380626479888,True,slimpajama-chunk1,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.577087493797431,3.2937986850738525
40246763951.93413,994763334462.5973,1.8219267332462505,0.1993474341421901,0.19108920003552585,1.5482831411832415e-05,0.9693935320265162,True,slimpajama-chunk1,eval/downstream_ce_loss/arc_easy_test_ce_loss,3.7759324157585086,3.263295412063598
70443790.52851142,1142054206.3021598,1.8658349517573534,0.775734978168021,0.8012892313080392,7.324292297938738e-05,0.8967402724897986,True,slimpajama-chunk1,eval/downstream_ce_loss/boolq_test_ce_loss,1.9517485228357112,5.0746750831604
73284531.03535393,958090516.2152772,2.07867464749712,0.4176635108074853,0.47242858533298465,7.835483807723586e-06,0.9924625240183279,True,slimpajama-chunk1,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.3293429792179605,2.277791976928711
295446428.53397304,3602848647.7107887,2.6313148729672617,0.26600227567859824,0.31031433323481056,8.726460945521826e-06,0.9829176146135392,True,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.2359105406822946,2.9913547039031982
1105540017.3159811,13055761752.830374,2.0091299188937164,0.2858196677208613,0.3174625971345329,1.0429027566296386e-05,0.9888096855159498,True,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.873512071969051,2.667079448699951
555913392710.6693,3734402318738.265,1.041130982860954,0.14067251181467605,0.15412150229122973,8.50001016076148e-06,0.9870917668196174,True,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.2704161572819173,3.005012273788452
41077093555.03586,492816129844.3485,1.1351781065469126,0.1642523345615678,0.17258406228375592,1.1641950016286895e-05,0.978336134256718,True,slimpajama-chunk1,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.823255387959117,2.6991677284240723
100631307.9195796,1034696720.5038446,4.546764488075156,0.3377486978075655,0.4611010013803365,4.886981933014752e-06,0.9886499333447926,True,slimpajama-chunk1,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.889338414215696,4.784531116485596
91237521.60186897,1230318741.217345,2.6216676055775454,0.4608490193871201,0.4758111601552513,8.179143503740106e-06,0.990117530494971,True,slimpajama-chunk1,eval/downstream_ce_loss/piqa_test_ce_loss,2.8738711077948977,2.774200439453125
93057740.9649031,1559782329.9778326,4.0376693051348385,0.5391183743861434,0.6396593442636788,2.8087282545457043e-05,0.9224534067249094,True,slimpajama-chunk1,eval/downstream_ce_loss/sciq_test_ce_loss,4.236857298997212,3.17514443397522
25031015.332093198,371709926.20357746,2.9201543855335244,0.5773337494554118,0.786882420996188,5.781982727949699e-06,0.9915399405580564,True,slimpajama-chunk1,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9919061974524155,2.827159881591797
143681157.99671975,1227962883.1128216,2.4231966994965894,0.3515994112356013,0.3799144910400618,4.86351784120753e-06,0.994652100962476,True,slimpajama-chunk1,eval/fineweb_100b_val/CrossEntropyLoss,2.8048520162576858,2.8753135204315186
145577041.783241,1123491457.4476943,2.382489781802479,0.35271378985183316,0.3946844573780409,5.0810888578295596e-06,0.9944689459432539,True,slimpajama-chunk1,eval/fineweb_1T_val/CrossEntropyLoss,2.7580336233594607,2.835650682449341
100511766.11167307,946826674.1138421,2.3362079981820627,0.386873095500358,0.43373058162521544,5.199151959825913e-06,0.9947099319680616,True,slimpajama-chunk1,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.6378761525955907,2.638636589050293
35963785.58935134,415653430.94153976,1.7425345795321758,0.43484798919152895,0.543480253983897,5.916828923825714e-06,0.9941998134183636,True,slimpajama-chunk1,eval/proof_pile_2_val/CrossEntropyLoss,1.904451832643469,1.840466976165772
74673473.63316469,1064203126.3602942,1.9672370351123856,0.4012666686140908,0.43405859895007043,7.801392065277136e-06,0.9917224468161562,True,slimpajama-chunk1,eval/slimpajama_val/CrossEntropyLoss,2.242448081632321,2.1738879680633545
83657194.51975009,884097513.1469259,2.102370079275651,0.40350416159272023,0.4783382837317898,6.144377381629318e-06,0.9943633999893497,True,slimpajama-chunk1,eval/smollm_val/CrossEntropyLoss,2.36782825396902,2.3053719997406006
46178510.522881486,523467774.61573154,1.3654303257550908,0.4031168858415685,0.6083604659654844,8.16522291595094e-06,0.9932257637694986,True,slimpajama-chunk1,eval/starcoder_val/CrossEntropyLoss,1.5626577658963943,1.547000527381897
112843368.31447105,1672849964.3623738,3.266019915009933,0.41304197461417264,0.45288903628595,1.0056131165463956e-05,0.9839574808326743,True,slimpajama-chunk1,olmo_suite_ce_loss,3.5830036822716544,3.1994174207959856
3382341267.2253957,36717225269.06377,1.9107468575241882,0.21539956622196144,0.2374690814967297,8.602263217367974e-06,0.988547590202876,True,slimpajama-chunk1,mmlu_suite_ce_loss,3.0527193249940625,2.8406535387039185
74673473.63316469,1064203126.3602942,1.9672370351123856,0.4012666686140908,0.43405859895007043,7.801392065277136e-06,0.9917224468161562,True,slimpajama-chunk1,val_loss,2.242448081632321,2.1738879680633545
59089521.50314758,801750654.3194072,2.300641164962535,0.4190149987756831,0.48025587518173957,6.711657346163394e-06,0.992168066852769,True,fineweb-100b,eval/c4_val/CrossEntropyLoss,2.527966781781864,2.4410955905914307
18824637658.583702,67214950603.55362,2.2741824451883073,0.1733080945526979,0.2305047316869474,1.3784829313448474e-05,0.9563280484591324,True,fineweb-100b,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.7253146908822456,3.312608242034912
15817583608.62449,65280509927.263,2.5069045482456267,0.1932570630605378,0.25331903049986854,1.8224578146797357e-05,0.9380579156485207,True,fineweb-100b,eval/downstream_ce_loss/arc_easy_test_ce_loss,3.978391442225121,3.327625036239624
28911206.26815575,457846552.9442373,2.135854015229285,1.056645499893358,1.7184898376365372,7.4155112934058e-05,0.8195285678105703,True,fineweb-100b,eval/downstream_ce_loss/boolq_test_ce_loss,2.144570410109443,4.323652744293213
54731449.56209313,725153317.8048038,2.082803609318069,0.46096277592868296,0.5286445595826246,7.380894465140982e-06,0.9922426972993615,True,fineweb-100b,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.270798001727451,2.209911346435547
96463419.47509928,1051113546.9219887,2.9756522078734617,0.3441289086145654,0.48115438296001367,7.795303588971676e-06,0.9871611924741388,True,fineweb-100b,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.306920013896252,3.0759177207946777
268591074.6153986,2907411407.629643,2.512633109899071,0.3678720613127759,0.4396225127920718,1.1423085248441004e-05,0.9868911364150853,True,fineweb-100b,eval/downstream_ce_loss/mmlu_other_test_ce_loss,2.9831788104270505,2.654109477996826
1199000550.6617439,9120343193.043156,2.5690713266708127,0.23148536723272606,0.2952430234394613,9.73738366633972e-06,0.981960364759165,True,fineweb-100b,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.442445489445811,3.0161561965942383
1031356914.9470661,8295457429.020194,2.269125401804241,0.233997831575901,0.2986567949963926,1.117167650893549e-05,0.978846519813225,True,fineweb-100b,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,3.1127231619123625,2.857668161392212
72291154.17484394,935371396.7929864,4.566296966278244,0.37219341255204846,0.4480195530085106,5.276950689087098e-06,0.9861730363121756,True,fineweb-100b,eval/downstream_ce_loss/openbook_qa_test_ce_loss,4.85044451684839,4.700038909912109
51213398.323639475,658834891.5586916,2.6902272493408512,0.5149103807951799,0.6380289940157686,8.271044871888172e-06,0.9899613678759609,True,fineweb-100b,eval/downstream_ce_loss/piqa_test_ce_loss,2.8336739620979206,2.6879100799560547
61383334.34457362,1216420045.706012,4.3806646266816704,0.554444199654478,0.48456346322734467,2.627824922974279e-05,0.8539336260062558,True,fineweb-100b,eval/downstream_ce_loss/sciq_test_ce_loss,4.576569340424722,3.333494186401367
23280153.160079982,333762690.9060965,2.9120188841923955,0.6158141368476442,0.8364241674868681,5.7239211524233505e-06,0.9912457227786104,True,fineweb-100b,eval/downstream_ce_loss/winogrande_test_ce_loss,2.9691326756756236,2.811347007751465
67873108.54367575,931025251.4924387,2.1700144171656954,0.41072325854631786,0.45308760854630176,7.216911522038606e-06,0.9921282094608158,True,fineweb-100b,eval/fineweb_100b_val/CrossEntropyLoss,2.422604373455911,2.328246593475342
63288861.121307015,829788802.9644985,2.114170725404088,0.4110856014567772,0.48602561046833465,7.358241943603884e-06,0.9918487737237741,True,fineweb-100b,eval/fineweb_1T_val/CrossEntropyLoss,2.3510994649369983,2.268545627593994
65887288.08586585,884276109.7209793,2.12584920266469,0.4187280785519244,0.4774544662491703,7.475509134062658e-06,0.9920638224998389,True,fineweb-100b,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.364593439556432,2.2674803733825684
54440010.82106132,555005420.6229626,4.571479063883392,0.67720134966469,1.3883215909674207,1.173423447196722e-05,0.9789093245216063,True,fineweb-100b,eval/proof_pile_2_val/CrossEntropyLoss,4.640552218709601,4.401041507720947
57841269.348857075,785655869.5680884,2.5012468797886647,0.47642888923773485,0.5775868448213725,6.703023084163407e-06,0.9927063051459062,True,fineweb-100b,eval/slimpajama_val/CrossEntropyLoss,2.680727781170546,2.569021701812744
73781382.43275417,930708285.3104922,2.367379336995069,0.44044890049527735,0.5708936706565115,6.83171593718291e-06,0.9934581311529264,True,fineweb-100b,eval/smollm_val/CrossEntropyLoss,2.589413183800461,2.495053768157959
96662085.56819834,794513364.9094766,3.092995679158599,0.5128093992774492,1.0367504357221322,1.0781000495802239e-05,0.9887656758220262,True,fineweb-100b,eval/starcoder_val/CrossEntropyLoss,3.271688801900016,3.222542524337769
122005445.97528194,1526432017.601084,3.2360367788338316,0.3599845554239177,0.42196922848710966,1.0505660238245767e-05,0.9744388479051959,True,fineweb-100b,olmo_suite_ce_loss,3.5983405347607618,3.197562115533011
274980012.88058513,2656398063.4089093,2.7030055649605975,0.3022020166167004,0.39520921491862393,8.83111253832652e-06,0.9879993135625946,True,fineweb-100b,mmlu_suite_ce_loss,3.234414750063636,2.9009628891944885
67873108.54367575,931025251.4924387,2.1700144171656954,0.41072325854631786,0.45308760854630176,7.216911522038606e-06,0.9921282094608158,True,fineweb-100b,val_loss,2.422604373455911,2.328246593475342
138513480.5108616,2299395309.0307617,2.8132341480874947,0.3423022960361812,0.3612435884011205,6.9141509700470015e-06,0.98983249049662,True,proof-pile-2,eval/c4_val/CrossEntropyLoss,3.2406565131918894,3.1742208003997803
544721856235.95715,2013251730198.0872,1.7190222810578202,0.1392638407888262,0.16748780203774508,1.1107895700204411e-05,0.9734568517313661,True,proof-pile-2,eval/downstream_ce_loss/arc_challenge_test_ce_loss,3.915158245436129,3.498213529586792
21088065361813.684,35264192286004.734,1.0663155265608637,0.12162423911981145,0.14811477673825596,1.4093965812535887e-05,0.9644053693879819,True,proof-pile-2,eval/downstream_ce_loss/arc_easy_test_ce_loss,4.155878788296077,3.4872355461120605
2061842519.9843323,40350498957.985054,1.4616756199085973e-06,0.37748489423199333,0.3770470344663588,0.00010868485599085164,0.8545140719074933,True,proof-pile-2,eval/downstream_ce_loss/boolq_test_ce_loss,1.1425443566616054,3.6646692752838135
223227656.78592813,3129595123.672087,2.3914026552865653,0.3200223742108597,0.3624572621022999,8.37408143958809e-06,0.9898620215568507,True,proof-pile-2,eval/downstream_ce_loss/hellaswag_test_ce_loss,2.8997406858019397,2.825848340988159
583363806.5106186,6952364819.481214,3.1137498955648124,0.23322018497431385,0.2837683992222825,1.0678520971725e-05,0.9709140085334249,True,proof-pile-2,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,3.8726145358965915,3.576704502105713
1419435268127.4836,4263686082435.4976,0.885284137678414,0.14618487167111693,0.17566776106793983,1.1478534026522116e-05,0.9836190980130324,True,proof-pile-2,eval/downstream_ce_loss/mmlu_other_test_ce_loss,3.503726591451713,3.3571393489837646
549714212472.9917,1325419207825.9463,1.6349924261815045,0.13821064403975075,0.1754823458569089,9.79520183750724e-06,0.98046108745636,True,proof-pile-2,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,3.8037158942547133,3.3494341373443604
1616735565.0159152,10064455018.962612,1.8239652706099638,0.20807741326461923,0.2794497279279439,1.4955272334406723e-05,0.9688936991664104,True,proof-pile-2,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,2.7590247834554837,2.4539883136749268
1044706388.3513554,14193388687.353855,4.356643904402526,0.2462133205530444,0.2721433194462442,5.4306668069245115e-06,0.9887952867273296,True,proof-pile-2,eval/downstream_ce_loss/openbook_qa_test_ce_loss,5.240068757912846,5.145816326141357
19841706785.742367,293506263298.22015,1.8624545798168242,0.18535304678418826,0.19331196566794687,8.687927932635018e-06,0.98539241818602,True,proof-pile-2,eval/downstream_ce_loss/piqa_test_ce_loss,3.4630935714758726,3.46978497505188
516631596.8161927,2867474751.721254,3.2821380870128425,0.3124365073836283,0.5231635302668127,2.8671810009905718e-05,0.9167318930580389,True,proof-pile-2,eval/downstream_ce_loss/sciq_test_ce_loss,3.890503494728606,2.620037078857422
91851438.98707712,1534857578.2516804,3.041142080673029,0.44549496587630555,0.5023368748487901,8.369263518130869e-06,0.9903136876695725,True,proof-pile-2,eval/downstream_ce_loss/winogrande_test_ce_loss,3.3080351899375557,3.211418390274048
147283746.65773377,2486944676.898176,2.8364550811862483,0.34208073500820557,0.3569149820721822,6.947244315205803e-06,0.9899475005888174,True,proof-pile-2,eval/fineweb_100b_val/CrossEntropyLoss,3.276133123996239,3.206017255783081
152729028.73484915,2505856606.591931,2.716489100384104,0.34326474604152085,0.36236434613316093,7.374242973641897e-06,0.9898017355523548,True,proof-pile-2,eval/fineweb_1T_val/CrossEntropyLoss,3.156894750628457,3.090193510055542
135591229.95531034,2301111479.3391886,2.527824082359763,0.36429874504331083,0.3801705728079651,8.055742421777342e-06,0.9896624104094067,True,proof-pile-2,eval/fineweb_edu_100b_val/CrossEntropyLoss,2.9323764250805784,2.857412338256836
21399832.68158203,329014140.4916143,1.3191056383496804,0.45268444525949536,0.45522632660279727,9.712506468078742e-06,0.988135905195859,True,proof-pile-2,eval/proof_pile_2_val/CrossEntropyLoss,1.458484233718733,1.4032412767410278
112712047.39406922,1938086996.7888966,2.637456346987737,0.366125688312068,0.3804221037350639,7.445167869899256e-06,0.989720686749474,True,proof-pile-2,eval/slimpajama_val/CrossEntropyLoss,3.014791339144732,2.9498729705810547
138358258.29546854,2361400072.4246626,2.0396295479405397,0.3680964388895797,0.3759873932677162,9.483625270996103e-06,0.9892569476061331,True,proof-pile-2,eval/smollm_val/CrossEntropyLoss,2.4466776895202917,2.372230291366577
39654719.77084467,671733897.6356823,1.3806255851733507,0.470597833783001,0.5148850284441812,1.1811453367798511e-05,0.9888877464638524,True,proof-pile-2,eval/starcoder_val/CrossEntropyLoss,1.5474574476742773,1.4932903051376345
666000944.0944827,8747915880.06849,3.1225515621941806,0.28189576681453915,0.32124361589421546,9.570530169140312e-06,0.983293367662104,True,proof-pile-2,olmo_suite_ce_loss,3.8785268522952294,3.465479169573103
4820754317.3421955,37530147413.546455,2.318445223557582,0.20211180296288056,0.24588721999105326,1.0376900671636822e-05,0.9809626752281329,True,proof-pile-2,mmlu_suite_ce_loss,3.519627982878811,3.184316575527191
21399832.68158203,329014140.4916143,1.3191056383496804,0.45268444525949536,0.45522632660279727,9.712506468078742e-06,0.988135905195859,True,proof-pile-2,val_loss,1.458484233718733,1.4032412767410278
210866888.38433886,3601348336.017518,2.7963992136884985,0.30875624978780714,0.33051199666171666,6.213811909838861e-06,0.9902013552530924,True,starcoder,eval/c4_val/CrossEntropyLoss,3.328082099908201,3.268944025039673
994620671504.2318,4503756706984.779,1.9003689638291006,0.14161076559140226,0.1666512498482016,1.3976628194535598e-05,0.9544118617469057,True,starcoder,eval/downstream_ce_loss/arc_challenge_test_ce_loss,4.351442461242759,4.323122024536133
95397349542.4947,1040014314291.5278,2.6766078819713286,0.18423122846045398,0.19933849328402087,1.8823301232681203e-05,0.9340911650276833,True,starcoder,eval/downstream_ce_loss/arc_easy_test_ce_loss,4.793029826670621,4.568445682525635
1421663960.8648493,9280951428.104595,7.303589697274224e-15,0.39350591431420684,0.5094251244137664,0.00010478396196625523,0.852227546763987,True,starcoder,eval/downstream_ce_loss/boolq_test_ce_loss,0.8368204699736036,3.229860782623291
234483624.75438005,3279711737.2601867,2.4779558799775443,0.31131296681111953,0.36660164796515354,7.146908243150544e-06,0.9908958744292247,True,starcoder,eval/downstream_ce_loss/hellaswag_test_ce_loss,3.0010399989593948,2.907686233520508
189556916.93543428,3680841926.2566175,3.556724011897614,0.3189303078488688,0.32548868706405726,9.17668275368268e-06,0.9727194338577214,True,starcoder,eval/downstream_ce_loss/mmlu_humanities_test_ce_loss,4.076284346333739,4.123088359832764
1595542230009.7778,49575921555815.15,0.8699357660133265,0.15399187536546405,0.1475230101893063,1.287195596208668e-05,0.9742959885824338,True,starcoder,eval/downstream_ce_loss/mmlu_other_test_ce_loss,3.8439474739550232,3.918388605117798
140751432302.25385,1418263014494.4163,2.2258132055645925,0.1560455917964476,0.17177392164500496,9.640075457750045e-06,0.9737952516507483,True,starcoder,eval/downstream_ce_loss/mmlu_social_sciences_test_ce_loss,4.236461800145138,4.203283786773682
1535469627.2409837,31781329485.11519,2.1343278407544424,0.2526303325186216,0.23846556418439835,1.3740383514022528e-05,0.9628087181891005,True,starcoder,eval/downstream_ce_loss/mmlu_stem_test_ce_loss,3.1515822579865564,3.0979409217834477
554200490.341637,8791443510.10795,4.99007701199184,0.28921243546909964,0.30394888056124236,5.438007439776739e-06,0.9856823729362116,True,starcoder,eval/downstream_ce_loss/openbook_qa_test_ce_loss,5.721388510236805,5.801511764526367
956293198.8590612,13539404462.887524,2.762230227689052,0.2608022257554253,0.2883302879781554,8.911785020491201e-06,0.985657275622541,True,starcoder,eval/downstream_ce_loss/piqa_test_ce_loss,3.622848230993323,3.5099611282348637
632044480073.2175,598696799775.6685,1.3193029950634452,0.16088243545809416,0.23934414208925217,3.410348373593483e-05,0.8977599460576071,True,starcoder,eval/downstream_ce_loss/sciq_test_ce_loss,3.81980024123815,3.817956924438477
90505478.31110281,1525771105.438739,3.3477311204899785,0.42299734420730206,0.4860841546983759,7.493733105601476e-06,0.9874552535076917,True,starcoder,eval/downstream_ce_loss/winogrande_test_ce_loss,3.6296595193369763,3.4577558040618896
217690587.39932892,3716127447.7075863,2.840188120052651,0.3112693671306493,0.3321195046466603,6.300151355837077e-06,0.9901023236473561,True,starcoder,eval/fineweb_100b_val/CrossEntropyLoss,3.374884484525264,3.312097787857056
246980101.21780974,3993194790.026587,2.6882535892818002,0.3072073923865556,0.33560174849136826,6.725637537852539e-06,0.990126170775213,True,starcoder,eval/fineweb_1T_val/CrossEntropyLoss,3.2424311910005272,3.1881885528564453
282844344.78455687,4691489503.463488,2.5698414569266768,0.30920429710059144,0.33332981573847914,7.12631722418596e-06,0.9900686075171425,True,starcoder,eval/fineweb_edu_100b_val/CrossEntropyLoss,3.1500073062188165,3.0918307304382324
58519271.96043026,929212372.7694234,1.778364533065006,0.40332837628688945,0.46222949582026696,9.05288403888163e-06,0.9893368428252073,True,starcoder,eval/proof_pile_2_val/CrossEntropyLoss,2.024126862307185,1.9821343421936035
166187646.46559566,2927389606.049664,2.5997218260483845,0.32575392590786273,0.3441412223576884,6.621423026529886e-06,0.9900409777299359,True,starcoder,eval/slimpajama_val/CrossEntropyLoss,3.0776750381895983,3.0186474323272705
208225133.765411,3623503175.446067,2.2381201698045454,0.3288171244416028,0.34489288229519066,7.823988163868196e-06,0.989805258825122,True,starcoder,eval/smollm_val/CrossEntropyLoss,2.750076386158742,2.685784101486206
22270138.298477136,377544568.1675311,0.8452474567051246,0.4512681118187197,0.4715652585120178,1.2354820912292906e-05,0.9872958824048893,True,starcoder,eval/starcoder_val/CrossEntropyLoss,0.9868583120506553,0.9477230906486512
726951342.3262621,9779959353.60332,3.409406678984418,0.2868074875415726,0.33746061145755596,1.1543468598554422e-05,0.9717264095959518,True,starcoder,olmo_suite_ce_loss,4.184466366399151,4.055205651691982
5224754910.908937,143493940589.5037,2.4807437237385535,0.22037805734073734,0.2076150580519331,1.0520881522676618e-05,0.9749327168961657,True,starcoder,mmlu_suite_ce_loss,3.8455711297403683,3.8356754183769226
22270138.298477136,377544568.1675311,0.8452474567051246,0.4512681118187197,0.4715652585120178,1.2354820912292906e-05,0.9872958824048893,True,starcoder,val_loss,0.9868583120506553,0.9477230906486512
