,model_id,eval dataset,Performance
0,checkpoint-738000,alpaca,0.0
1,checkpoint-342000,alpaca,0.0
2,olmo1b_ckpt505000_tulu_5epoch_2e-6,alpaca,1.8012422360248446
3,olmo1b_ckpt342000_tulu_5epoch_2e-6,alpaca,1.4303482587064678
4,checkpoint-424000,alpaca,0.0
5,olmo1b_ckpt424000_tulu_5epoch_2e-6,alpaca,1.5527950310559007
6,checkpoint-592000,alpaca,0.0
7,checkpoint-505000,alpaca,0.0
8,checkpoint-342000_cot_chat,gsm,0.03
9,checkpoint-738000,gsm,0.025
10,checkpoint-342000,gsm,0.035
11,checkpoint-738000_cot_chat,gsm,0.025
12,olmo1b_tulu_5epoch_2e-6,gsm,0.03
13,olmo1b_ckpt505000_tulu_5epoch_2e-6,gsm,0.03
14,olmo1b_hf_original_cot_chat,gsm,0.015
15,olmo1b_ckpt342000_tulu_5epoch_2e-6,gsm,0.03
16,checkpoint-424000,gsm,0.01
17,olmo1b_ckpt342000_tulu_5epoch_2e-6_cot_chat,gsm,0.06
18,olmo1b_ckpt424000_tulu_5epoch_2e-6,gsm,0.03
19,checkpoint-18000,gsm,0.0
20,checkpoint-592000_cot_chat,gsm,0.0
21,olmo1b_hf_original,gsm,0.02
22,checkpoint-505000_cot_chat,gsm,0.035
23,olmo1b_ckpt1000_tulu_5epoch_2e-6,gsm,0.015
24,checkpoint-424000_cot_chat,gsm,0.02
25,checkpoint-592000,gsm,0.02
26,olmo1b_original,gsm,0.02
27,checkpoint-1000,gsm,0.015
28,checkpoint-18000_cot_chat,gsm,0.02
29,checkpoint-1000_cot_chat,gsm,0.005
30,olmo1b_ckpt424000_tulu_5epoch_2e-6_cot_chat,gsm,0.06
31,checkpoint-505000,gsm,0.025
32,olmo1b_ckpt1000_tulu_5epoch_2e-6_cot_chat,gsm,0.025
33,olmo1b_ckpt738000_tulu_5epoch_2e-6,gsm,0.035
34,olmo1b_ckpt592000_tulu_5epoch_2e-6_cot_chat,gsm,0.055
35,olmo1b_ckpt505000_tulu_5epoch_2e-6_cot_chat,gsm,0.06
36,olmo1b_original_cot_chat,gsm,0.015
37,olmo1b_ckpt592000_tulu_5epoch_2e-6,gsm,0.03
38,olmo1b_tulu_5epoch_2e-6_cot_chat,gsm,0.06
39,olmo1b_ckpt738000_tulu_5epoch_2e-6_cot_chat,gsm,0.045
40,olmo1b_hf_main_tulu_5epoch_2e-6,gsm,0.015
41,olmo1b_hf_main_tulu_5epoch_2e-6_cot_chat,gsm,0.055
42,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6,mmlu,0.24284289987181312
43,checkpoint-738000,mmlu,0.24312775957840763
44,checkpoint-342000,mmlu,0.23486682808716708
45,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6,mmlu,0.2784503631961259
46,olmo1b_tulu_5epoch_2e-6,mmlu,0.2673408346389403
47,olmo1b_ckpt505000_tulu_5epoch_2e-6,mmlu,0.2673408346389403
48,olmo1b_ckpt342000_tulu_5epoch_2e-6,mmlu,0.2673408346389403
49,checkpoint-424000,mmlu,0.24782794473721692
50,olmo1b_ckpt424000_tulu_5epoch_2e-6,mmlu,0.2624982196268338
51,checkpoint-18000,mmlu,0.2332288847742487
52,olmo1b_hf_original,mmlu,0.2501780373166216
53,olmo1b_ckpt1000_tulu_5epoch_2e-6,mmlu,0.2464036462042444
54,checkpoint-592000,mmlu,0.24882495371029767
55,olmo1b_original,mmlu,0.24854009400370317
56,checkpoint-1000,mmlu,0.23137729668138443
57,checkpoint-505000,mmlu,0.2507477567298106
58,olmo1b_ckpt738000_tulu_5epoch_2e-6,mmlu,0.2699045719982908
59,olmo1b_ckpt592000_tulu_5epoch_2e-6,mmlu,0.26520438683948155
60,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6,mmlu,0.24519299245121778
61,olmo1b_hf_main_tulu_5epoch_2e-6,mmlu,0.31313203247400656
62,checkpoint-738000,truthfulqa,0.576499388004896
63,checkpoint-342000,truthfulqa,0.6462668298653611
64,olmo1b_tulu_5epoch_2e-6,truthfulqa,0.401468788249694
65,olmo1b_ckpt505000_tulu_5epoch_2e-6,truthfulqa,0.401468788249694
66,olmo1b_ckpt342000_tulu_5epoch_2e-6,truthfulqa,0.401468788249694
67,checkpoint-424000,truthfulqa,0.598531211750306
68,olmo1b_ckpt424000_tulu_5epoch_2e-6,truthfulqa,0.401468788249694
69,checkpoint-18000,truthfulqa,0.1346389228886169
70,olmo1b_ckpt1000_tulu_5epoch_2e-6,truthfulqa,0.01346389228886169
71,checkpoint-592000,truthfulqa,0.565483476132191
72,olmo1b_original,truthfulqa,0.47613219094247244
73,checkpoint-1000,truthfulqa,0.033047735618115054
74,checkpoint-505000,truthfulqa,0.408812729498164
75,olmo1b_ckpt738000_tulu_5epoch_2e-6,truthfulqa,0.3072215422276622
76,olmo1b_ckpt592000_tulu_5epoch_2e-6,truthfulqa,0.31701346389228885
77,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6,toxigen,0.2502857142857143
78,checkpoint-738000,toxigen,0.6905714285714286
79,checkpoint-342000,toxigen,0.7352857142857143
80,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6,toxigen,0.667
81,olmo1b_tulu_5epoch_2e-6,toxigen,0.6282857142857143
82,olmo1b_ckpt505000_tulu_5epoch_2e-6,toxigen,0.6282857142857143
83,olmo1b_ckpt342000_tulu_5epoch_2e-6,toxigen,0.6282857142857143
84,checkpoint-424000,toxigen,0.7507142857142857
85,olmo1b_ckpt424000_tulu_5epoch_2e-6,toxigen,0.6282857142857143
86,checkpoint-18000,toxigen,0.6125714285714285
87,olmo1b_hf_original,toxigen,0.7194285714285714
88,olmo1b_ckpt1000_tulu_5epoch_2e-6,toxigen,0.17785714285714285
89,checkpoint-592000,toxigen,0.6744285714285714
90,olmo1b_original,toxigen,0.7194285714285714
91,checkpoint-1000,toxigen,0.10714285714285714
92,checkpoint-505000,toxigen,0.6992857142857143
93,olmo1b_ckpt738000_tulu_5epoch_2e-6,toxigen,0.6104285714285714
94,olmo1b_ckpt592000_tulu_5epoch_2e-6,toxigen,0.6181428571428571
95,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6,toxigen,0.5154285714285715
96,olmo1b_hf_main_tulu_5epoch_2e-6,toxigen,0.6405714285714286
