,model_id,eval dataset,Performance,std
0,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.33560876209882834,0.0047747151036286965
1,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,cnn,0.06093113920980165,0.0004070524650884239
2,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,xsum,0.09632860031547044,0.0006384562167357828
3,olmo1b_checkpoint-1000_original_hf_4shots,cnn_instruct,0.09398842494254464,0.00045255275026395724
4,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.633125015206684,0.007306285908789524
5,olmo1b_checkpoint-18000_original_hf_4shots,xsum,0.11856305995852336,0.000572316517529117
6,olmo1b_original_hf_4shots,llmbar_Natural,0.575,0.04996749015356188
7,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,cnn,0.09711169253851143,0.00048341602628880926
8,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,mnli_matched,0.3909819154355578,0.004950109564485745
9,olmo1b_checkpoint-738000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.4253731343283582,0.042611859480176525
10,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.3617435048395313,0.004847220152357857
11,olmo1b_checkpoint-342000_original_hf_4shots,rte_instruct,0.4608433734939759,0.010080668008299801
12,olmo1b_checkpoint-342000_original_hf_4shots,sciq,0.16562103422752658,0.004174279373440377
13,olmo1b_original_hf,socialiqa,0.03343192087546714,0.0008097436203347742
14,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3283749363219562,0.0046995055670160804
15,olmo1b_checkpoint-592000_original_hf_4shots,stsb_instruct,0.9960116026105874,0.001604386030961336
16,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,stsb,0.7480058013052937,0.011567935594974675
17,olmo1b_checkpoint-18000_original_hf_4shots,mnli_matched_instruct,0.32760952623535405,0.004791419217076594
18,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,paws,0.557875,0.005544469094413084
19,olmo1b_checkpoint-342000_original_hf_4shots,sciq_instruct,0.18092232860846386,0.0038575035972397732
20,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
21,olmo1b_checkpoint-592000_original_hf_4shots,socialiqa,0.06383194409085649,0.0014657117426968398
22,olmo1b_checkpoint-592000_original_hf_4shots,llmbar_Adversarial_Manual,0.42391304347826086,0.07281782015844196
23,olmo1b_checkpoint-505000_original_hf_4shots,xlsum_inputoutput,0.0630541505103424,0.00042571669341279883
24,olmo1b_checkpoint-738000_original_hf_4shots,xsum_inputoutput,0.10855935425902086,0.0005685625076715466
25,olmo1b_checkpoint-592000_original_hf_4shots,xsum_instruct,0.10798788085105183,0.0006244615421032476
26,olmo1b_checkpoint-424000_original_hf_4shots,paws_instruct,0.5578125,0.0055550747376312696
27,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.6827292409577178,0.0046983392991565434
28,olmo1b_checkpoint-592000_original_hf_4shots,xsum_inputoutput,0.11076047791146319,0.000550686782585387
29,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
30,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.06499337350096171,0.00043878052541033926
31,olmo1b_hf_ckpt505000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.5290894039735099,0.005118605927938221
32,olmo1b_checkpoint-342000_original_hf_4shots,mnli_matched_instruct,0.3967906265919511,0.00492352277710856
33,olmo1b_checkpoint-738000_original_hf_4shots,cnn,0.061994506877563596,0.0003858641403307508
34,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,xsum,0.1417470758759034,0.0007830557807709427
35,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.6403904706576848,0.007174596892446927
36,olmo1b_original_hf_4shots,paws_instruct,0.5585609375,0.005626119796408836
37,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.7575264442636289,0.004352019461176934
38,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.0810775177120622,0.0040756848150917575
39,olmo1b_checkpoint-592000_original_hf_4shots,mnli_mismatched,0.34321602929210737,0.0047114376078455835
40,olmo1b_checkpoint-1000_original_hf_4shots,llmbar_Adversarial_Manual,1.0,0.0
41,olmo1b_checkpoint-424000_original_hf_4shots,stsb_instruct,1.0,0.0
42,olmo1b_original_hf_4shots,mnli_mismatched_instruct,0.3295362082994304,0.004748184889772986
43,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.8086249999999999,0.004398166241404051
44,olmo1b_checkpoint-424000_original_hf_4shots,qqp_instruct,0.9999910477920018,4.352361823225461e-06
45,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.5580625,0.00550652887002823
46,olmo1b_checkpoint-505000_original_hf_4shots,socialiqa,0.05608307709087419,0.0010507425469394218
47,olmo1b_checkpoint-505000_original_hf_2shots,xlsum,0.0818973391979182,0.0007126574851634795
48,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,rte,0.6630522088353414,0.009487328176966979
49,olmo1b_checkpoint-424000_original_hf_4shots,cnn_instruct,0.06071685610705885,0.0003664931456340297
50,olmo1b_checkpoint-342000_original_hf_4shots,gpt3nli_instruct,0.3369,0.00275295890002425
51,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.09613333868533307,0.000828557473386283
52,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
53,olmo1b_checkpoint-342000_original_hf_4shots,llmbar_Adversarial_Manual,0.7391304347826086,0.06559252317078157
54,olmo1b_checkpoint-505000_original_hf_4shots,mnli_mismatched,0.36391375101708706,0.004766258663331827
55,olmo1b_checkpoint-424000_original,socialiqa,0.02316203396161412,0.0007138813026472832
56,olmo1b_checkpoint-18000_original_hf_4shots,mnli_mismatched,0.33182338283157037,0.004826764020069917
57,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,paws,0.5805,0.005418943442981755
58,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,paws,0.5573140625,0.005616642135892505
59,olmo1b_checkpoint-1000_original_hf_4shots,gpt3nli,0.3333166666666667,0.002718528981276353
60,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.05631844812868882,0.000404365882961127
61,olmo1b_checkpoint-592000_original_hf_4shots,mnli_matched_instruct,0.35904355578196634,0.0048682339036073
62,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,xsum,0.0965969550485806,0.0008290027099024124
63,olmo1b_original_hf_4shots,paws,0.5579984375,0.005583662994021959
64,olmo1b_hf_ckpt592000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.36867167600611306,0.004883188756506808
65,olmo1b_checkpoint-18000_original,mnli_matched,0.35032857870606215,0.004752575231700838
66,olmo1b_hf_ckpt738000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.32608695652173914,0.06840083623357213
67,olmo1b_original_hf,rte,0.014658634538152611,0.0024061484704830526
68,olmo1b_checkpoint-592000_original_hf_4shots,sciq_instruct,0.15337619259054047,0.0032508791658973158
69,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,paws_instruct,0.8069375,0.004397390730569816
70,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.38064356183889336,0.0049195498285666255
71,olmo1b_checkpoint-505000_original_hf_4shots,tweetqa,0.07206423338143936,0.0007548811164635489
72,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6,mnli_matched,0.82496306673459,0.00392633670338375
73,olmo1b_checkpoint-505000_original_hf_4shots,tweetqa_instruct,0.03943179802908133,0.0005188518362312099
74,olmo1b_checkpoint-505000_original_hf_4shots,stsb_instruct,0.998912255257433,0.0007015712374723199
75,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.6740235964198535,0.004702863045755788
76,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.35201095262353543,0.004971754954701529
77,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,qqp,0.9999910477920018,4.336293328772568e-06
78,olmo1b_checkpoint-342000_original_hf_4shots,mnli_mismatched_instruct,0.40261391375101707,0.004957611206887589
79,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,sciq,0.008034997881936743,0.00124062606937705
80,olmo1b_checkpoint-424000_original_hf_4shots,gpt3nli,0.33743291666666664,0.00269665010352484
81,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_matched,0.7917982679572083,0.004133563586014189
82,olmo1b_checkpoint-592000_original_hf_4shots,rte_instruct,0.4401656626506024,0.009811352744753194
83,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_matched,0.5566492613346918,0.005125952520004886
84,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,mnli_matched,0.3273051451859399,0.004721517036293053
85,olmo1b_original_hf,tweetqa,0.012331073468436031,0.00037843928781424845
86,olmo1b_hf_ckpt342000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.2055663701107663,0.006302050124976717
87,olmo1b_checkpoint-505000_original_hf_4shots,mnli_matched_inputoutput,0.3247592969943963,0.004636759827269219
88,olmo1b_original_hf_4shots,xlsum,0.06214973749319069,0.0004610141327740586
89,olmo1b_checkpoint-1000_original_hf_4shots,qqp,0.9999782589234331,7.225806991130099e-06
90,olmo1b_checkpoint-505000_original_hf_4shots,xlsum_instruct,0.061082693482555156,0.0004128546211013354
91,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.045704815912892105,0.0035271807653718523
92,olmo1b_checkpoint-1000_original_hf_2shots,xlsum,0.08111107477159904,0.0005131014686952578
93,olmo1b_checkpoint-738000_original_hf_4shots,mnli_matched_instruct,0.32801961283749365,0.004797640767701655
94,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.1428248275414602,0.0008269727851555545
95,olmo1b_checkpoint-505000_original_hf_4shots,xsum_instruct,0.10281425414769908,0.0005424832722127429
96,olmo1b_checkpoint-738000_original_hf_4shots,rte_instruct,0.13092369477911647,0.006746387498644161
97,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,xsum,0.1342043979274437,0.0007263772032747182
98,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,qqp,0.92730551328124,0.000425341301752299
99,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,paws_instruct,0.849625,0.003923392685305657
100,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,sciq,0.05019131826926338,0.0028928574960328025
101,olmo1b_original_hf_4shots,mnli_matched_instruct,0.33341823739174736,0.004741263737414768
102,olmo1b_hf_main_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.5109526235354049,0.0050596666332444135
103,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,xlsum,0.06508009448203793,0.0004840772984351829
104,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.278342711341551,0.0064922467442328526
105,olmo1b_hf_main_socialiqa_3epoch_2e-6,socialiqa,0.7219417773345425,0.006686294591099561
106,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,tweetqa_instruct,0.003292126960628289,0.00022023958126864628
107,olmo1b_checkpoint-505000_original,socialiqa,0.019687909472967374,0.0006776935047808756
108,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.5174223127865512,0.005144087833743929
109,olmo1b_checkpoint-18000_original_hf_4shots,paws_instruct,0.5581875000000001,0.005547527159636443
110,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,xsum,0.005277478691564452,0.00023636962148159646
111,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,rte,0.6528112449799197,0.009257076408794062
112,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.4392811737184703,0.005156840639232077
113,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,mnli_matched,0.323331635252165,0.004677667103997451
114,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,paws,0.5580624999999999,0.005623153058860454
115,olmo1b_hf_ckpt592000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.5597014925373134,0.04275147550997242
116,olmo1b_hf_main_mnli_3epoch_2e-6,rte,0.6493975903614457,0.009635398512554216
117,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.04781830192535806,0.000757047872037649
118,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,cnn,0.062487363922747834,0.0004320260124058127
119,olmo1b_checkpoint-738000_original_hf_4shots,xsum_instruct,0.11245880416092444,0.0005919022197112106
120,olmo1b_checkpoint-424000_original_hf_4shots,paws,0.5577484374999999,0.005589502679676463
121,olmo1b_checkpoint-342000_original_hf_4shots,tweetqa_instruct,0.06306948075101912,0.0005354888262356537
122,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,paws,0.8738765625,0.0037314483093693184
123,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,socialiqa,0.07033056663808475,0.0011983648524072543
124,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,rte,0.5311244979919678,0.010118471449825719
125,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,xlsum,0.0626052351117055,0.00043089690545712557
126,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,stsb,1.0,0.0
127,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,gpt3nli,0.7280666666666666,0.0024998044266925277
128,olmo1b_checkpoint-342000_original_hf_4shots,socialiqa_inputoutput,0.09352383874310191,0.0011442159677486278
129,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,xsum,0.10366034626989593,0.0007073274634339418
130,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,paws,0.557625,0.005540563458005489
131,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,xsum,0.1825717900396668,0.0011537128057353678
132,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.35231768714401956,0.004984181387757842
133,olmo1b_checkpoint-592000_original_hf_4shots,tweetqa,0.06301430857328347,0.0007195449500197266
134,olmo1b_original_hf_4shots,llmbar_Adversarial_Manual,0.3369565217391305,0.06969688452868567
135,olmo1b_checkpoint-592000_original_hf_4shots,cnn,0.07531209815083198,0.0004076481226668205
136,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.6898393002441009,0.004781733204792543
137,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,rte,0.6497991967871486,0.009406560074837526
138,olmo1b_hf_ckpt592000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.4891304347826087,0.0724360323084059
139,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,paws_instruct,0.9086859375,0.0031263733216680464
140,olmo1b_hf_main_paws_3epoch_2e-6_4shots,paws,0.878125,0.0036700242539078644
141,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_matched,0.7837493632195619,0.004079905110789591
142,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,gpt3nli,0.33316625,0.0027473974854092
143,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.6754444727457973,0.004799491897626491
144,olmo1b_checkpoint-424000_original_hf_4shots,llmbar_Adversarial_Manual,0.6953804347826094,0.07026700769708367
145,olmo1b_hf_ckpt505000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.5335820895522388,0.04336260641125821
146,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,xsum,0.035891061993020126,0.0006442960070612443
147,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.354253693326541,0.004851543344186186
148,olmo1b_checkpoint-1000_original_hf_2shots,xsum,0.08443359264203798,0.00047744635238168353
149,olmo1b_checkpoint-738000_original_hf_4shots,qqp,0.9997608481577634,2.424066161476744e-05
150,olmo1b_hf_main_paws_3epoch_2e-6_4shots,paws_inputoutput,0.8790609375,0.003733529090379744
151,olmo1b_hf_ckpt592000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.05809437966521558,0.0020752105419664575
152,olmo1b_checkpoint-505000_original_hf_4shots,qqp_instruct,0.9991265202767511,4.708542637525559e-05
153,olmo1b_hf_ckpt592000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.375,0.04870836240512913
154,olmo1b_checkpoint-592000_original_hf_4shots,gpt3nli_instruct,0.39016666666666666,0.0027985175296619063
155,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,xsum,0.11174784226669128,0.0007733441745959888
156,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.47129916547969253,0.007908463624727993
157,olmo1b_checkpoint-18000_original_hf_4shots,tweetqa_instruct,0.06536380842769174,0.0005590883930597795
158,olmo1b_checkpoint-424000_original_hf_4shots,xlsum,0.05876357629815298,0.0004213849566475335
159,olmo1b_original_hf_4shots,xsum,0.07100120271384322,0.0007731305758273091
160,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.6543806186705368,0.006715601851343119
161,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3273560876209883,0.004825229195164509
162,olmo1b_hf_main_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.4,0.049275267354727245
163,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.5201589238076154,0.007464081179022355
164,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.35246943453897095,0.004809605121980458
165,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,sciq,0.051698675331484825,0.0030220478189191156
166,olmo1b_checkpoint-505000_original_hf_4shots,socialiqa_inputoutput,0.09217414349178425,0.0012901662942528258
167,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,tweetqa_instruct,0.07775674770796981,0.0008649627503366055
168,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,qqp,0.4853950801222616,0.0007886160086875557
169,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.7601708706265256,0.004291820294153536
170,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.011640148868422216,0.00037518715376418614
171,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,tweetqa_instruct,0.013125052599488918,0.0005427246770545601
172,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.5578353280065942,0.007240145242535398
173,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,rte,0.6367469879518073,0.009491394176229942
174,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.6349178702196908,0.00494379830397452
175,olmo1b_checkpoint-18000_original_hf_4shots,cnn,0.05856491299411525,0.0004647076265133681
176,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.35272542027508913,0.00483538629413983
177,olmo1b_checkpoint-738000_original_hf_4shots,gpt3nli_instruct,0.3558004166666667,0.0027535996006810624
178,olmo1b_checkpoint-1000_original_hf_4shots,mnli_matched_inputoutput,0.33713576158940395,0.004709721622484995
179,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,paws,0.7320609375,0.005022683308339474
180,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,paws,0.4526875,0.005702982670092525
181,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.5108695652173914,0.07461145222376062
182,olmo1b_hf_main_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
183,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,paws_instruct,0.561375,0.005564052349132745
184,olmo1b_checkpoint-18000_original_hf_4shots,cnn_instruct,0.08013078944281637,0.0004404412575231237
185,olmo1b_checkpoint-18000_original_hf_4shots,sciq,0.06434860798797354,0.002726876766032033
186,olmo1b_hf_ckpt342000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.45175751400916964,0.004991441272088984
187,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,qqp,0.888596165897203,0.0004971961464410418
188,olmo1b_checkpoint-1000_original_hf_4shots,xlsum,0.0761296961629569,0.0004620868899247433
189,olmo1b_checkpoint-738000_original_hf_4shots,paws_instruct,0.5580624999999999,0.005540672626731141
190,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,stsb,0.7160986221899928,0.012149783587606516
191,olmo1b_checkpoint-342000_original_hf_4shots,xlsum_inputoutput,0.052753962916298025,0.0003905002311419291
192,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,cnn,0.06523965660946826,0.00042267789097219624
193,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
194,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.17323672924023908,0.005606471052220299
195,olmo1b_checkpoint-738000_original_hf_4shots,xlsum_instruct,0.05845174525424927,0.0004327564000300246
196,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,tweetqa_instruct,0.044658365021256335,0.0009368598594720665
197,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.72,0.04532892335643002
198,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,socialiqa,0.00031236057528278633,0.0001566418049595918
199,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.14455132745476618,0.0012390724024162714
200,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,cnn,0.05013566314671189,0.0003771989744764467
201,olmo1b_original_hf_4shots,mnli_matched_inputoutput,0.3250624044829343,0.004652894003929162
202,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.33431651749389746,0.004852503205844264
203,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
204,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.8788125,0.003652874354884532
205,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.2848750000000003,0.04481450486857507
206,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,paws,0.9110015625000001,0.0031900470269866656
207,olmo1b_checkpoint-592000_original_hf_4shots,rte,0.5008082329317269,0.010106456498400309
208,olmo1b_checkpoint-738000_original_hf_4shots,xsum,0.08792711773413073,0.000811639273433372
209,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.1092552201281638,0.0006260704040537094
210,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.3913043478260869,0.07055167002902589
211,olmo1b_checkpoint-592000_original_hf_4shots,paws,0.5440625,0.005607041316964921
212,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3531329597554763,0.004810908774277249
213,olmo1b_checkpoint-592000_original_hf_4shots,llmbar_Natural,0.47,0.049514580322919555
214,olmo1b_checkpoint-592000_original_hf_4shots,mnli_matched_inputoutput,0.32740575649516046,0.004770366818227359
215,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8091931956875509,0.004010748635746156
216,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3315830361691289,0.0047819134611244395
217,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,sciq,0.1152464072598306,0.0020526873046630132
218,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.3941720911310008,0.004942169307381066
219,olmo1b_checkpoint-738000_original_hf_4shots,sciq_instruct,0.16532885284860444,0.00352079911526251
220,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8144845911310008,0.0038851109537640814
221,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.004794715641247766,0.0002458226676681828
222,olmo1b_checkpoint-1000_original_hf_4shots,qqp_instruct,0.9997940992160423,2.282619040527496e-05
223,olmo1b_hf_main_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.28260869565217395,0.06645625250611155
224,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.1650341335848621,0.003965593841151591
225,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.5664089035608995,0.008236833112600017
226,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,paws,0.55725,0.005546961815720287
227,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.752572592969944,0.004375122760660764
228,olmo1b_checkpoint-18000_original_hf_4shots,mnli_mismatched_instruct,0.32933279088689993,0.004662968638308979
229,olmo1b_hf_ckpt505000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.5108695652173914,0.07214544288795141
230,olmo1b_original_hf_4shots,mnli_mismatched,0.3295349369406021,0.004762737436006696
231,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,xlsum,0.1252663781291108,0.000612878435685427
232,olmo1b_checkpoint-738000_original_hf_4shots,mnli_mismatched_inputoutput,0.32953747965825875,0.004790618855234367
233,olmo1b_checkpoint-738000_original_hf_4shots,mnli_mismatched,0.3294840825874695,0.004769373958102588
234,olmo1b_checkpoint-1000_original_hf_4shots,rte,0.4853463855421687,0.009924264728852356
235,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.5202240516083083,0.007428485731409764
236,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.5430507434271552,0.008275480396817326
237,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,socialiqa,0.02376869369105527,0.0008174973162411134
238,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.6401126404205871,0.006618781132940945
239,olmo1b_checkpoint-18000_original,socialiqa,0.030086487363065607,0.0008076809609710034
240,olmo1b_checkpoint-592000_original_hf_4shots,mnli_matched,0.33754457463066734,0.004832676621587194
241,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,paws,0.5455609375,0.0056260110048178
242,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
243,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,socialiqa,0.08786072420329315,0.0015725164569783418
244,olmo1b_checkpoint-18000_original_hf_4shots,tweetqa,0.05264472689519767,0.0005804998248953572
245,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8003966639544345,0.003949239117528729
246,olmo1b_checkpoint-1000_original_hf_4shots,xsum,0.07926452383284513,0.0005123697437224181
247,olmo1b_original_hf_4shots,xsum_inputoutput,0.1079581313788689,0.0005441156012991313
248,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3308698420784514,0.004772949122617091
249,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,paws,0.5579375,0.005521315038040434
250,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.11350100145348652,0.0011127289209792617
251,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,sciq,0.135755833251946,0.003284423623109407
252,olmo1b_checkpoint-505000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.21641791044776118,0.03580545371108068
253,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,gpt3nli,0.55526625,0.0028411334589395058
254,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3301566479877738,0.004705992656878522
255,olmo1b_checkpoint-424000_original_hf_4shots,mnli_mismatched_inputoutput,0.32872126729048,0.004721045921435803
256,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.3295988283239939,0.004817768360471252
257,olmo1b_checkpoint-342000_original_hf_2shots,xsum,0.11883082863858932,0.0006026334863389245
258,olmo1b_checkpoint-738000_original_hf_4shots,tweetqa_instruct,0.04268348049714498,0.0005631237432059821
259,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,xlsum,0.06248949036486112,0.000492514497604779
260,olmo1b_checkpoint-342000_original_hf_4shots,paws_inputoutput,0.5582515625,0.005611631382626659
261,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,gpt3nli,0.6887337499999999,0.0026607654330634794
262,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.3555259806418747,0.004836340806262678
263,olmo1b_checkpoint-424000_original_hf_4shots,cnn,0.06218377481060783,0.0003789044197303722
264,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,paws,0.5579390625,0.005563954964710334
265,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,socialiqa,0.000366786632880289,0.00011929670853037083
266,olmo1b_original_hf_4shots,socialiqa,0.06947967353931225,0.0011698163188232533
267,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.6311875,0.00550053894049794
268,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,sciq,0.1292989886243115,0.003144190823992559
269,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,rte,0.5012048192771085,0.009774936970450992
270,olmo1b_checkpoint-18000_original_hf_4shots,xsum_instruct,0.11446947156332868,0.0005186196367646565
271,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_matched,0.7871625063678044,0.004072487342010111
272,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_matched,0.3543581253183902,0.004796750484482521
273,olmo1b_checkpoint-1000_original_hf_4shots,rte_instruct,0.501004016064257,0.009835671599827034
274,olmo1b_checkpoint-342000_original_hf_4shots,mnli_matched,0.3433520122261844,0.00469790426008665
275,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.87825,0.003630176048446444
276,olmo1b_hf_main_mnli_3epoch_2e-5,rte,0.4291164658634538,0.009922647662077597
277,olmo1b_checkpoint-342000_original_hf_4shots,paws_instruct,0.464375,0.0056346440980155075
278,olmo1b_checkpoint-342000_original_hf_4shots,tweetqa,0.05429852986155729,0.0005622214532934828
279,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.6828358208955223,0.04004167029110078
280,olmo1b_original_hf_4shots,stsb,0.9641044234952865,0.004982480078990778
281,olmo1b_hf_main_mnli_3epoch_2e-6,mnli_matched,0.8208354559347937,0.003750794407483827
282,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.46892926159479253,0.005136132712741811
283,olmo1b_original_hf_4shots,rte,0.24095883534136547,0.008571266180757268
284,olmo1b_hf_ckpt738000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.4979623025980642,0.005019665383770405
285,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.1091064269330506,0.0010330024639714768
286,olmo1b_checkpoint-738000_original_hf_4shots,tweetqa,0.06522357386695199,0.0006581449083818741
287,olmo1b_checkpoint-18000_original_hf_4shots,rte,0.501812248995984,0.010339381067253019
288,olmo1b_checkpoint-1000_original_hf_4shots,socialiqa_instruct,0.023256319447948304,0.0005220958991215901
289,olmo1b_checkpoint-342000_original_hf_4shots,xsum_instruct,0.11346604605549877,0.0005866455169755676
290,olmo1b_checkpoint-424000_original_hf_4shots,tweetqa_instruct,0.03399968513073599,0.0005163682887178928
291,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,xsum,0.1266562846109593,0.0006684562697151118
292,olmo1b_checkpoint-592000_original_hf_4shots,sciq,0.16569526419396258,0.0038366226610247094
293,olmo1b_checkpoint-1000_original_hf_4shots,paws_inputoutput,0.557690625,0.00556266856627594
294,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.7397350993377483,0.004399909852921074
295,olmo1b_checkpoint-424000_original_hf_4shots,rte,0.5024096385542169,0.010056979461437661
296,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,paws,0.5582484375000001,0.005522634448076603
297,olmo1b_checkpoint-738000_original_hf_4shots,llmbar_Adversarial_Manual,0.34782608695652173,0.07180418265487894
298,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.5328804347826097,0.0736380153797148
299,olmo1b_original_hf,mnli_matched,0.32995415180845644,0.0048334465434199175
300,olmo1b_checkpoint-738000_original_hf_4shots,socialiqa,0.10102650671573069,0.0015750336019755551
301,olmo1b_checkpoint-18000_original_hf_4shots,gpt3nli_instruct,0.33355,0.002737267510827778
302,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,xsum_instruct,0.10348993341198173,0.0010048658666847508
303,olmo1b_checkpoint-505000_original_hf_4shots,xlsum,0.0599157647117969,0.0004078720909202709
304,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,mnli_matched,0.33265410086602143,0.004704601546331546
305,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,xsum,0.005680832857013799,0.00025285560201302967
306,olmo1b_checkpoint-424000_original_hf_2shots,xlsum,0.06595331708072778,0.0006675978770208171
307,olmo1b_checkpoint-738000_original_hf_4shots,stsb_instruct,1.0,0.0
308,olmo1b_checkpoint-18000_original_hf_4shots,qqp,0.9999910477920018,4.444784584864269e-06
309,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,xsum,0.04178241657766927,0.0006385897584168869
310,olmo1b_checkpoint-424000_original_hf_4shots,sciq,0.12740863385947598,0.0036643994858849356
311,olmo1b_checkpoint-505000_original_hf_4shots,rte_instruct,0.48674698795180726,0.010078886040740673
312,olmo1b_checkpoint-738000_original_hf_4shots,paws_inputoutput,0.556875,0.005554835811556752
313,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.5578693326541009,0.004918644566598023
314,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.6934767439279301,0.006676846205715252
315,olmo1b_checkpoint-18000_original_hf_2shots,xlsum,0.10514808150906646,0.000515333651143366
316,olmo1b_hf_ckpt505000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.05953301093763233,0.002392246534334283
317,olmo1b_checkpoint-738000_original_hf_4shots,llmbar_Natural,0.585,0.048871688834594244
318,olmo1b_checkpoint-1000_original_hf_4shots,xsum_inputoutput,0.078056778386302,0.0005035299898954644
319,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8032445077298617,0.004112135037379106
320,olmo1b_hf_ckpt738000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.355,0.04829482266902127
321,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,xsum,0.182428668426926,0.0011235072842333696
322,olmo1b_checkpoint-592000_original_hf_4shots,cnn_instruct,0.07637167071158732,0.00040812696115761544
323,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,paws_instruct,0.8993140625,0.003308478879365373
324,olmo1b_checkpoint-738000_original,socialiqa,0.027285578588965922,0.0007528042410961887
325,olmo1b_hf_main_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.5373134328358209,0.04316378186402056
326,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,xsum,0.04206914120896203,0.0004786419500685717
327,olmo1b_checkpoint-1000_original_hf_4shots,socialiqa,0.01846589268927494,0.0006859258001810503
328,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.4844385679414158,0.004935471046637503
329,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,socialiqa,0.6586412804549997,0.00684071294585729
330,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.05372711267411737,0.0014205409129346795
331,olmo1b_checkpoint-342000_original_hf_4shots,cnn,0.060332966549820115,0.00037817828059798567
332,olmo1b_checkpoint-424000_original_hf_4shots,xlsum_instruct,0.058695024379009,0.0004156055693538301
333,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.5257259949996056,0.007303977414934593
334,olmo1b_checkpoint-424000_original_hf_4shots,xsum,0.01890320156864276,0.00042083110800469377
335,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,rte,0.6694728915662651,0.009316546214842687
336,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.13995601243865707,0.001246019380200304
337,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
338,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,paws,0.5549390625,0.005565321420706008
339,olmo1b_original_hf,stsb,0.1936185641769398,0.010760331342553726
340,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,qqp,0.8666057383653267,0.0005521077786350857
341,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8077705451586656,0.00400417110224536
342,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,xsum,0.010711999101314828,0.0003666705297785317
343,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.41890091696383086,0.004956780322915604
344,olmo1b_checkpoint-342000_original_hf_2shots,xlsum,0.0592666815924274,0.0006710683924231967
345,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.8166192026037429,0.00384117989997473
346,olmo1b_checkpoint-424000_original_hf_4shots,socialiqa_instruct,0.041569736187619666,0.001130790369487931
347,olmo1b_checkpoint-342000_original_hf_4shots,xlsum,0.05306794063611088,0.00038972224484611296
348,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,stsb,0.33756345177664976,0.012966311227253632
349,olmo1b_checkpoint-424000_original_hf_4shots,stsb,0.998912255257433,0.0007350012269812808
350,olmo1b_checkpoint-18000_original_hf_4shots,sciq_instruct,0.12870396445673407,0.004043296427182266
351,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,xlsum,0.06309655267287134,0.00045620608738122234
352,olmo1b_checkpoint-18000_original_hf_4shots,socialiqa,0.07659374201223515,0.001500770036127289
353,olmo1b_original_hf_4shots,mnli_mismatched_inputoutput,0.3294840825874695,0.0047290462959490845
354,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.026703153415574315,0.000501487718296003
355,olmo1b_checkpoint-592000_original_hf_4shots,paws_instruct,0.5181859375,0.005509444532866139
356,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,xsum,0.17046032610567788,0.0009980860379623388
357,olmo1b_checkpoint-18000_original_hf_2shots,xsum,0.12256989241114595,0.0005621436365409651
358,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
359,olmo1b_checkpoint-1000_original_hf_4shots,tweetqa_instruct,0.048700981074221106,0.00038854367770620104
360,olmo1b_checkpoint-18000_original_hf_4shots,paws_inputoutput,0.558565625,0.005565340782633377
361,olmo1b_hf_main_paws_3epoch_2e-6,qqp,0.8198675073216273,0.0006142981877284969
362,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.06662087750471993,0.0005105109953108426
363,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.6666693003118378,0.0066522585454545
364,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,paws_instruct,0.5947484375000001,0.0055315983610391434
365,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,xsum,0.18503684613111798,0.001159294497916721
366,olmo1b_checkpoint-592000_original_hf_4shots,xlsum_inputoutput,0.0610012440041153,0.0004503708964558808
367,olmo1b_checkpoint-1000_original_hf_4shots,xlsum_instruct,0.06725580723122584,0.0004620357957095853
368,olmo1b_original_hf_4shots,socialiqa_inputoutput,0.08590967978167782,0.0011604759606274105
369,olmo1b_checkpoint-424000_original_hf_4shots,rte_instruct,0.48353915662650604,0.010111182424364807
370,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.7692484374999999,0.004672499360184401
371,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,xsum,0.09214589433353111,0.0011144317843622313
372,olmo1b_hf_main_paws_3epoch_2e-6_4shots,xsum,0.01074937869418079,0.00033766178129292825
373,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,paws,0.5579984375,0.005537526687098282
374,olmo1b_checkpoint-342000_original_hf_4shots,stsb_instruct,0.9945612762871646,0.0019130148868276267
375,olmo1b_checkpoint-342000_original_hf_4shots,socialiqa_instruct,0.06195641607230925,0.0012703350118408242
376,olmo1b_hf_ckpt342000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Manual,0.5760869565217391,0.0730933271943789
377,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,xsum,0.09152664233589716,0.0006381332214400716
378,olmo1b_checkpoint-505000_original_hf_4shots,mnli_mismatched_instruct,0.3784580960130187,0.00499743289015406
379,olmo1b_checkpoint-592000_original_hf_4shots,mnli_mismatched_inputoutput,0.3297917514239219,0.004708182839169173
380,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_mismatched,0.5639239218877136,0.005008105339399055
381,olmo1b_hf_main_socialiqa_3epoch_2e-6,sciq,0.13606261281903942,0.00337061834652058
382,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.3532343368592351,0.004925334338573789
383,olmo1b_checkpoint-592000_original_hf_4shots,xlsum_instruct,0.0606140456828949,0.000446499379124792
384,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,sciq,0.0,0.0
385,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.37566110659072416,0.004905901789005714
386,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.35242066720911314,0.004948826904600978
387,olmo1b_original_hf_7shots,mnli_matched,0.3273051451859399,0.004776507626716959
388,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,sciq,0.0003333333333333334,0.00021919747487392907
389,olmo1b_checkpoint-342000_original_hf_4shots,mnli_matched_inputoutput,0.32598064187468156,0.004702296684837198
390,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.6810150059118805,0.006595732035663744
391,olmo1b_checkpoint-424000_original_hf_4shots,sciq_instruct,0.16828945849257998,0.0037186999117395595
392,olmo1b_checkpoint-505000_original_hf_4shots,sciq_instruct,0.15374059293139286,0.003401130536726447
393,olmo1b_original_hf_4shots,tweetqa,0.05325580350104314,0.0006026306312321668
394,olmo1b_checkpoint-738000_original_hf_4shots,mnli_matched_inputoutput,0.3248586347427407,0.0047052065116216174
395,olmo1b_checkpoint-1000_original_hf_4shots,stsb,1.0,0.0
396,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,paws,0.45550156249999996,0.005487193012030162
397,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.13129131732740923,0.0012292968177284303
398,olmo1b_checkpoint-18000_original_hf_4shots,llmbar_Natural,0.565,0.05089344034004222
399,olmo1b_checkpoint-1000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.4626865671641791,0.0439654404701788
400,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,tweetqa,0.12311000442790737,0.0011849757875156022
401,olmo1b_checkpoint-18000_original_hf_4shots,paws,0.5578749999999999,0.005460597841444124
402,olmo1b_checkpoint-592000_original_hf_4shots,socialiqa_instruct,0.04174921799222333,0.0014358146789692034
403,olmo1b_original_hf_4shots,paws_inputoutput,0.5572515625,0.005441998019159483
404,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,xlsum,0.06652186268022081,0.0005244224733328953
405,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,xsum,0.08670135619624922,0.0008027356596830396
406,olmo1b_checkpoint-1000_original_hf_4shots,xlsum_inputoutput,0.07984310789989238,0.0004690166301872862
407,checkpoint-1000_4shots,socialiqa,0.01845113654395985,0.0006975881449884598
408,olmo1b_checkpoint-1000_original_hf_4shots,llmbar_Natural,0.585,0.04945456372786432
409,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,paws,0.4468125,0.005493965129201087
410,olmo1b_hf_ckpt342000_socialiqa_3epoch_2e-6_4shots,socialiqa_instruct,0.05675575213766105,0.004654491764757564
411,olmo1b_hf_main_paws_3epoch_2e-6_4shots,mnli_matched,0.3189009169638309,0.004692254030046888
412,olmo1b_checkpoint-738000_original_hf_4shots,sciq,0.16860528822930482,0.003620836534789962
413,olmo1b_checkpoint-424000_original_hf_4shots,socialiqa,0.05612473232389632,0.0012447244412442265
414,olmo1b_checkpoint-1000_original_hf_4shots,mnli_mismatched,0.35216639544344996,0.0048173927488884705
415,olmo1b_checkpoint-424000_original_hf_4shots,tweetqa,0.06729067621637944,0.0007143184605120828
416,olmo1b_hf_main_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.3305145185939888,0.004764291623317967
417,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,xsum,0.016115855209353346,0.00046030567972108643
418,olmo1b_checkpoint-424000_original_hf_4shots,xsum_instruct,0.11360828514487495,0.0005738627230664959
419,olmo1b_checkpoint-505000_original_hf_4shots,sciq,0.12415886530879285,0.0035480010207224605
420,olmo1b_original_hf_4shots,llmbar_Adversarial_Neighbor,0.3096082089552241,0.040308995743869695
421,olmo1b_checkpoint-505000_original_hf_4shots,mnli_mismatched_inputoutput,0.3302481692432872,0.004764005745326354
422,olmo1b_hf_ckpt18000_tulu_5epoch_2e-6_4shots,mnli_matched_instruct,0.32654100866021396,0.004758373721969955
423,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.37988726474892676,0.008136705439951236
424,olmo1b_checkpoint-18000_original_hf_4shots,socialiqa_inputoutput,0.05263999892312703,0.0009414175184011567
425,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,sciq,0.15822531907756357,0.003250696795568863
426,olmo1b_original_hf_4shots,socialiqa_instruct,0.06374787552128124,0.0011216370620552832
427,olmo1b_checkpoint-1000_original_hf_4shots,gpt3nli_instruct,0.33333333333333337,0.002741909496907172
428,olmo1b_checkpoint-592000_original_hf_4shots,qqp,0.9933600194390801,0.00013039956051826481
429,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,xsum,0.09399837255585436,0.0006703597799913738
430,olmo1b_hf_main_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.132255624230455,0.0051983926448902025
431,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,gpt3nli,0.7171329166666667,0.002597915563336732
432,olmo1b_original_hf_4shots,qqp,0.9996111864233372,3.1641014778294e-05
433,olmo1b_hf_ckpt505000_paws_3epoch_2e-6_4shots,paws_inputoutput,0.7918125,0.004474644535112655
434,olmo1b_checkpoint-18000_original_hf_4shots,gpt3nli,0.33313291666666667,0.0027302049249528265
435,olmo1b_checkpoint-738000_original_hf_4shots,socialiqa_inputoutput,0.09278806246005368,0.0012309220901657026
436,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,xsum,0.013008919895629975,0.0003345984507714407
437,olmo1b_checkpoint-1000_original_hf_4shots,sciq,0.02762249996076257,0.001368039676844607
438,olmo1b_checkpoint-1000_original_hf_4shots,xsum_instruct,0.05272158518190839,0.0005008548913342103
439,olmo1b_checkpoint-1000_original_hf_4shots,tweetqa,0.03902571016145693,0.00043885631709252536
440,olmo1b_hf_main_paws_3epoch_2e-6_4shots,stsb,0.743645757795504,0.011819820487038292
441,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,cnn,0.06080554916315917,0.0004091194803397535
442,olmo1b_checkpoint-424000_original_hf_4shots,xsum_inputoutput,0.11194772252497069,0.000569319305650937
443,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,xsum,0.1920131089004024,0.0011251259375714261
444,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.605,0.04881856586395412
445,olmo1b_original_hf,qqp,0.12899616973386363,0.0005334284876020966
446,olmo1b_checkpoint-342000_original_hf_4shots,qqp_instruct,0.9996802463136086,2.8951889922071084e-05
447,olmo1b_checkpoint-424000_original_hf_4shots,paws_inputoutput,0.5580625,0.005679776271365051
448,olmo1b_checkpoint-738000_original_hf_4shots,socialiqa_instruct,0.05710403283685345,0.001165585941557241
449,olmo1b_checkpoint-424000_original_hf_4shots,mnli_matched_instruct,0.3815588385124809,0.004871378934197958
450,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.4750270397706588,0.007280174833930026
451,olmo1b_checkpoint-505000_original_hf_4shots,mnli_matched_instruct,0.3648522669383597,0.004918449434451187
452,olmo1b_hf_ckpt738000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.12061549948988112,0.0011775632612803552
453,olmo1b_original_hf_4shots,xsum_instruct,0.10835486509325232,0.000616067019452613
454,olmo1b_checkpoint-505000_original_hf_4shots,paws_instruct,0.5397484375,0.00560386650704956
455,olmo1b_checkpoint-592000_original_hf_4shots,qqp_instruct,0.9994628675201105,3.709623671566731e-05
456,olmo1b_checkpoint-738000_original_hf_4shots,rte,0.29839357429718877,0.009124740698216874
457,olmo1b_checkpoint-424000_original_hf_4shots,mnli_matched_inputoutput,0.32766174223127864,0.004757466766090608
458,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,socialiqa,0.055791705501624725,0.0010999488081043483
459,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.6464595007641365,0.004875658837104004
460,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.10208860131199461,0.0006647262809071557
461,olmo1b_checkpoint-505000_original_hf_4shots,xsum,0.10835338107974898,0.0007498126265663549
462,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,socialiqa,0.02337866380724632,0.001018745100056113
463,olmo1b_checkpoint-424000_original_hf_2shots,xsum,0.0969446006198581,0.0006933258398920575
464,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6,mnli_matched,0.8094752929190014,0.003893733919925227
465,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,xsum,0.11072964835467738,0.0006315566485840747
466,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,stsb,0.873821609862219,0.008895397087886784
467,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,rte,0.6608433734939758,0.009553119325195196
468,olmo1b_checkpoint-18000_original_hf_4shots,qqp_instruct,0.9999091990331616,1.5093532055748669e-05
469,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,gpt3nli,0.7012666666666667,0.002671471872321602
470,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,stsb,0.48984771573604063,0.013230924650526188
471,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.0704040190066397,0.0007754067438101828
472,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,xsum,0.0017122706252784073,0.00012846208570673932
473,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,mnli_mismatched_instruct,0.7231489015459723,0.004566077479740206
474,olmo1b_checkpoint-342000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.21651119402985108,0.035653793805220546
475,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.5218730379736491,0.0073744927022267575
476,olmo1b_checkpoint-505000_original_hf_4shots,llmbar_Natural,0.5800000000000001,0.048913299069177824
477,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.3296484971981661,0.004719007483145904
478,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.06410802518808109,0.000497592054690727
479,olmo1b_checkpoint-18000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.44402985074626866,0.04256689943388416
480,olmo1b_checkpoint-592000_original_hf_2shots,xsum,0.11697438360092428,0.0005847042970995455
481,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,paws,0.5586265625,0.005475701691832431
482,olmo1b_checkpoint-592000_original_hf_4shots,mnli_mismatched_instruct,0.3301985862489829,0.004781037965322271
483,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6,rte,0.6580321285140562,0.009672549389484378
484,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,socialiqa_inputoutput,0.04292351316998761,0.001882393780279947
485,olmo1b_checkpoint-1000_original_hf_4shots,mnli_matched,0.3542014773306164,0.004852782750578254
486,olmo1b_checkpoint-1000_original_hf_4shots,socialiqa_inputoutput,0.019303100167826756,0.0005258291522858332
487,olmo1b_checkpoint-1000_original_hf_4shots,mnli_matched_instruct,0.35471217524197657,0.004836916476378154
488,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,xsum,0.028334097130137174,0.0006319377285859428
489,olmo1b_checkpoint-505000_original_hf_4shots,qqp,0.9866113654674971,0.0001804829433069928
490,olmo1b_checkpoint-342000_original_hf_4shots,cnn_instruct,0.05914777905714838,0.00035891998826211306
491,olmo1b_hf_main_paws_3epoch_2e-6_7shots,paws,0.7685,0.004815498346569848
492,olmo1b_checkpoint-18000_original_hf_4shots,mnli_mismatched_inputoutput,0.33136696501220503,0.004710305131245218
493,olmo1b_original_hf_4shots,mnli_matched,0.3273051451859399,0.004719446779195929
494,olmo1b_hf_main_paws_3epoch_2e-6_4shots,paws_instruct,0.861875,0.003818558958524641
495,olmo1b_checkpoint-505000_original_hf_2shots,xsum,0.11898587457759714,0.0006624029593725971
496,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.11184593207159141,0.0011069557455447839
497,olmo1b_hf_ckpt738000_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
498,olmo1b_checkpoint-592000_original_hf_4shots,socialiqa_inputoutput,0.08608404681177526,0.001114304877443635
499,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,qqp,0.9342025181282212,0.0004014010987270176
500,olmo1b_checkpoint-592000_original_hf_4shots,tweetqa_instruct,0.04443211241020649,0.0004985135773848558
501,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.37457972491085073,0.0048672834012509604
502,olmo1b_checkpoint-424000_original_hf_4shots,qqp,0.9999424500914404,1.220138917165711e-05
503,olmo1b_hf_ckpt424000_socialiqa_3epoch_2e-6_4shots,tweetqa,0.1434059590692509,0.0012533762231330267
504,olmo1b_checkpoint-505000_original_hf_4shots,paws_inputoutput,0.5575609374999999,0.0055636978841196
505,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,mnli_matched,0.3527241467142129,0.004934398766881186
506,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,paws,0.9051875,0.0033179078368544474
507,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,paws,0.444875,0.005540452819839896
508,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,socialiqa,0.0,0.0
509,olmo1b_checkpoint-342000_original_hf_4shots,llmbar_Natural,0.565,0.04862691085005921
510,olmo1b_checkpoint-738000_original_hf_4shots,paws,0.5578765625,0.005627572928502935
511,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_2shots,xsum,0.1654395489312659,0.0010974145608793019
512,olmo1b_checkpoint-424000_original_hf_4shots,gpt3nli_instruct,0.39233291666666664,0.0027817866155548997
513,olmo1b_checkpoint-342000_original_hf_4shots,mnli_mismatched,0.3361472742066721,0.004714764120986066
514,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6,mnli_matched,0.6834437086092715,0.004701689248967083
515,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.022533012837429672,0.00048637908993342616
516,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,paws,0.5551875,0.005522058447695372
517,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,socialiqa,0.0703747324086764,0.0011963187908977028
518,olmo1b_checkpoint-18000_original_hf_4shots,rte_instruct,0.13253514056224902,0.006769149934142268
519,olmo1b_checkpoint-18000_original_hf_4shots,xlsum,0.045341561755415816,0.0005261881581653708
520,olmo1b_hf_ckpt424000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.3955223880597015,0.04170663259822546
521,olmo1b_checkpoint-18000_original_hf_4shots,xsum_inputoutput,0.10284749206359392,0.0005302693393534737
522,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,paws_instruct,0.800625,0.004551978724575927
523,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,paws,0.5479375,0.005595642462182253
524,olmo1b_checkpoint-738000_original_hf_4shots,cnn_instruct,0.0625083819340988,0.0003777958791302583
525,olmo1b_hf_ckpt424000_mnli_3epoch_2e-6_4shots,gpt3nli,0.7500833333333333,0.002506719795490932
526,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.04395676293889571,0.0005619533475229959
527,olmo1b_checkpoint-342000_original_hf_4shots,mnli_mismatched_inputoutput,0.33080756712774617,0.0046597759743863786
528,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_mismatched_inputoutput,0.4694873881204231,0.005079552272182001
529,olmo1b_checkpoint-592000_original_hf_4shots,xlsum,0.06152952386153519,0.0004456900373270583
530,olmo1b_checkpoint-505000_original_hf_4shots,xsum_inputoutput,0.10925977932424857,0.0005351362391100764
531,olmo1b_checkpoint-738000_original_hf_4shots,mnli_mismatched_instruct,0.32953747965825875,0.0047657783854049545
532,olmo1b_checkpoint-1000_original_hf_4shots,paws_instruct,0.5580625,0.005670513176300937
533,olmo1b_checkpoint-505000_original_hf_4shots,cnn,0.06559587242674123,0.00038042251893606637
534,olmo1b_hf_main_paws_3epoch_2e-6_4shots,qqp,0.9081196206821582,0.0004572251353659384
535,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.09018904667798622,0.0006978681827935547
536,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,xsum,0.11965046389883985,0.0009876224735483215
537,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,xlsum,0.03347458150779799,0.0005413073297837943
538,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,gpt3nli,0.7287995833333334,0.002554204630144795
539,olmo1b_hf_ckpt592000_socialiqa_3epoch_2e-6_4shots,tweetqa_instruct,0.0391284246995239,0.0008378152717347833
540,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,qqp,0.8534793076106557,0.000567379415242719
541,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,paws,0.55825,0.005601353144608372
542,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6,mnli_matched,0.8133469179826796,0.00395262675085481
543,olmo1b_checkpoint-505000_original_hf_4shots,paws,0.476125,0.0056468366626541856
544,olmo1b_checkpoint-505000_original_hf_4shots,gpt3nli_instruct,0.34566624999999995,0.0027491019255402077
545,olmo1b_hf_ckpt738000_tulu_5epoch_2e-6_4shots,socialiqa_instruct,0.06472963059008605,0.0017288201558208136
546,olmo1b_checkpoint-505000_original_hf_4shots,socialiqa_instruct,0.05452491939275219,0.001239925499778274
547,olmo1b_hf_ckpt505000_tulu_5epoch_2e-6_4shots,llmbar_Natural,0.32012500000000044,0.04743636090713575
548,olmo1b_checkpoint-342000_original_hf_4shots,gpt3nli,0.34785,0.0027214256794007977
549,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,xsum,0.11748526033101978,0.0005777548858227433
550,olmo1b_checkpoint-1000_original_hf_4shots,mnli_mismatched_instruct,0.35211554109031734,0.0047489858700540165
551,olmo1b_checkpoint-1000_original_hf_4shots,paws,0.5576265625,0.005766617991959273
552,olmo1b_checkpoint-424000_original_hf_4shots,socialiqa_inputoutput,0.09233633454108597,0.001144711231973284
553,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,mnli_matched,0.3245529801324503,0.004851299826591474
554,olmo1b_checkpoint-342000_original,socialiqa,0.02093713809260126,0.0006931917567394279
555,olmo1b_checkpoint-342000_original_hf_4shots,xlsum_instruct,0.0549943692314806,0.00039408676234166406
556,olmo1b_checkpoint-738000_original_hf_2shots,xlsum,0.09702222377666027,0.0006956297261268491
557,olmo1b_checkpoint-1000_original_hf_4shots,stsb_instruct,1.0,0.0
558,olmo1b_checkpoint-505000_original_hf_4shots,gpt3nli,0.3334841666666667,0.0027708739073156905
559,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,socialiqa,0.054008626592806946,0.00134938150340987
560,olmo1b_checkpoint-18000_original_hf_4shots,stsb,1.0,0.0
561,olmo1b_checkpoint-592000_original,socialiqa,0.03934733860516939,0.0008643455789475397
562,olmo1b_checkpoint-592000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.11194029850746268,0.027415297628073525
563,olmo1b_checkpoint-342000_original_hf_4shots,rte,0.4789156626506024,0.010076855822189482
564,olmo1b_hf_ckpt505000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.5278145695364238,0.005051211673719365
565,olmo1b_checkpoint-342000_original_hf_4shots,stsb,0.986947063089195,0.0030441096969921447
566,olmo1b_hf_ckpt18000_paws_3epoch_2e-6_4shots,paws,0.8051875,0.004392210099567393
567,olmo1b_checkpoint-342000_original_hf_4shots,qqp,0.9994846085966774,3.635786068249297e-05
568,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,paws,0.48106249999999995,0.0056569051261974946
569,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,sciq,0.13470333074667923,0.0024464103082087623
570,olmo1b_hf_ckpt18000_xsum_3epoch_2e-7_4shots,cnn,0.04274871459698795,0.0004659985599141103
571,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,sciq,0.0,0.0
572,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,stsb,0.9307469180565627,0.0066304561002003826
573,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,xlsum,0.057031906257232104,0.00040561512930731243
574,olmo1b_checkpoint-18000_original_hf_4shots,socialiqa_instruct,0.059249420431156566,0.0010842176252138795
575,olmo1b_hf_ckpt738000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.5298507462686567,0.04339792275128455
576,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_matched_instruct,0.6837493632195619,0.00465261897713601
577,olmo1b_hf_ckpt1000_socialiqa_3epoch_2e-6_4shots,paws,0.5581265625,0.005550681594267729
578,olmo1b_checkpoint-592000_original_hf_2shots,xlsum,0.11667863432797497,0.0005991987277732002
579,olmo1b_original_hf_4shots,sciq,0.16336433852788595,0.0035777346758660666
580,olmo1b_checkpoint-424000_original_hf_4shots,llmbar_Adversarial_Neighbor,0.43656716417910446,0.04341347857567259
581,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
582,olmo1b_hf_ckpt1000_paws_3epoch_2e-6_4shots,mnli_matched,0.354253693326541,0.004936647265777775
583,olmo1b_checkpoint-592000_original_hf_4shots,gpt3nli,0.35191625000000004,0.002787079506098997
584,olmo1b_original_hf_4shots,gpt3nli,0.3450833333333333,0.002771859054325308
585,olmo1b_checkpoint-342000_original_hf_4shots,paws,0.5395625,0.005508675092184789
586,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.6090677534386144,0.004933714551043349
587,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.10658581635543453,0.0006169008448739392
588,olmo1b_checkpoint-18000_original_hf_4shots,llmbar_Adversarial_Manual,0.532608695652174,0.07235816081505415
589,olmo1b_hf_ckpt18000_mnli_3epoch_2e-6_4shots,socialiqa,8.5295121119072e-05,5.688971293612582e-05
590,olmo1b_checkpoint-738000_original_hf_4shots,xlsum,0.06071915370343803,0.00044333265446048657
591,olmo1b_checkpoint-1000_original_hf_4shots,sciq_instruct,0.06632093494268569,0.0018091345128287415
592,olmo1b_hf_main_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.066977932317492,0.0005205673589671788
593,olmo1b_hf_ckpt1000_tulu_5epoch_2e-6_4shots,llmbar_Adversarial_Neighbor,0.8880597014925373,0.027569408165711844
594,olmo1b_checkpoint-592000_original_hf_4shots,paws_inputoutput,0.5579390625,0.005547349787851717
595,olmo1b_checkpoint-18000_original_hf_4shots,stsb_instruct,1.0,0.0
596,olmo1b_hf_main_mnli_3epoch_2e-6,mnli_mismatched,0.8316225081366966,0.0037113038428602924
597,olmo1b_hf_ckpt738000_mnli_3epoch_2e-6_4shots,mnli_matched,0.7976057055527255,0.004086529825421359
598,olmo1b_checkpoint-738000_original_hf_4shots,stsb,0.9822335025380711,0.0036087716432088
599,olmo1b_checkpoint-424000_original_hf_4shots,mnli_mismatched_instruct,0.4110557363710333,0.004992647806075729
600,olmo1b_checkpoint-592000_original_hf_4shots,xsum,0.11489613363991111,0.0006322545029175719
601,olmo1b_checkpoint-18000_original_hf_4shots,xlsum_inputoutput,0.07646293819440869,0.000465516136622847
602,olmo1b_checkpoint-738000_original_hf_4shots,gpt3nli,0.34161708333333335,0.0027331627396203124
603,olmo1b_checkpoint-424000_original_hf_4shots,mnli_mismatched,0.3569962876322213,0.004761498210197344
604,olmo1b_hf_ckpt18000_socialiqa_3epoch_2e-6_4shots,mnli_matched,0.32867931737137035,0.004684008733829437
605,olmo1b_checkpoint-738000_original_hf_2shots,xsum,0.11325802024146192,0.0006637732582022776
606,olmo1b_checkpoint-342000_original_hf_4shots,xsum,0.10634878326637512,0.0006710967944354993
607,olmo1b_checkpoint-505000_original_hf_4shots,rte,0.5018072289156627,0.009977414027257641
608,olmo1b_hf_ckpt342000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.42883341823739174,0.0049511666861332725
609,olmo1b_hf_main_socialiqa_3epoch_2e-6,tweetqa,0.13348322647171687,0.0012683016402339035
610,olmo1b_checkpoint-505000_original_hf_4shots,stsb,0.887962291515591,0.008383692603475424
611,olmo1b_checkpoint-505000_original_hf_4shots,cnn_instruct,0.06710817515353722,0.0003867794957895352
612,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,socialiqa,0.6620725641904389,0.006674560070347519
613,olmo1b_hf_ckpt738000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.07917002227654818,0.0007086722137077719
614,olmo1b_hf_ckpt424000_xsum_3epoch_2e-7_4shots,xlsum_inputoutput,0.06126287646173695,0.00044592422195185863
615,olmo1b_checkpoint-738000_original_hf_4shots,mnli_matched,0.32766174223127864,0.004738187152554191
616,olmo1b_original_hf_4shots,cnn,0.06279037915549629,0.00039787412155507367
617,olmo1b_hf_ckpt424000_paws_3epoch_2e-6_4shots,paws,0.8680609375,0.0038424880304471637
618,olmo1b_checkpoint-738000_original_hf_4shots,qqp_instruct,0.9999680278285781,8.836807101180692e-06
619,olmo1b_checkpoint-18000_original_hf_4shots,mnli_matched,0.3350993377483444,0.004650426731162524
620,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,xsum_instruct,0.12512597456910052,0.0009920610177087022
621,olmo1b_hf_ckpt505000_socialiqa_3epoch_2e-6_4shots,paws,0.5579375,0.0056018321933903785
622,olmo1b_hf_ckpt1000_xsum_3epoch_2e-7_2shots,xsum,0.1127980274340395,0.0007322533276336475
623,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,xsum,0.005373778525422199,0.00022461109505706123
624,olmo1b_hf_main_paws_3epoch_2e-6,stsb,0.6058828861493837,0.01326234451429775
625,olmo1b_checkpoint-1000_original_hf_4shots,mnli_mismatched_inputoutput,0.3285699755899105,0.004805514664323077
626,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,cnn,0.05902714015048241,0.0003736582295328997
627,olmo1b_checkpoint-505000_original_hf_4shots,mnli_matched,0.35241976566479877,0.004735386232991117
628,olmo1b_checkpoint-424000_original_hf_4shots,llmbar_Natural,0.5800000000000001,0.04859199196146572
629,olmo1b_checkpoint-592000_original_hf_4shots,stsb,0.8179840464104424,0.01044859835486536
630,olmo1b_hf_ckpt505000_xsum_3epoch_2e-7_4shots,xsum_inputoutput,0.10004803963617255,0.0006498416202441658
631,olmo1b_hf_main_mnli_3epoch_2e-6_4shots,mnli_matched,0.7901171676006113,0.004093637090041076
632,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_matched,0.7993377483443709,0.004085068529323873
633,olmo1b_checkpoint-18000_original_hf_4shots,xlsum_instruct,0.07064485002027579,0.0004672698792822898
634,olmo1b_checkpoint-424000_original_hf_4shots,mnli_matched,0.34427024961793173,0.0047871734059537685
635,olmo1b_hf_ckpt1000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.3458469179826796,0.004916826875817611
636,olmo1b_checkpoint-505000_original_hf_4shots,llmbar_Adversarial_Manual,0.4891304347826087,0.07345058056447473
637,olmo1b_hf_ckpt342000_paws_3epoch_2e-6_4shots,xsum,0.09914960905298079,0.0008595230377682895
638,olmo1b_checkpoint-18000_original_hf_4shots,mnli_matched_inputoutput,0.3238920020376974,0.004623096335101924
639,olmo1b_checkpoint-424000_original_hf_4shots,xlsum_inputoutput,0.05920453746750936,0.0004204558334769291
640,olmo1b_hf_main_xsum_3epoch_2e-7,xsum,0.25598617512627087,0.0012178212246281154
641,olmo1b_checkpoint-342000_original_hf_4shots,xsum_inputoutput,0.10934120422486404,0.0005516597595314982
642,olmo1b_checkpoint-738000_original_hf_4shots,xlsum_inputoutput,0.06216307499462707,0.00045485660450190975
643,olmo1b_checkpoint-342000_original_hf_4shots,socialiqa,0.044078877690946205,0.001265684014729289
644,olmo1b_hf_ckpt592000_xsum_3epoch_2e-7_4shots,socialiqa,0.10552732016876866,0.0014028971755627754
645,olmo1b_hf_ckpt592000_paws_3epoch_2e-6_4shots,socialiqa,0.0,0.0
646,olmo1b_checkpoint-1000_original_hf_4shots,cnn,0.07642444121427952,0.0005446146162814857
647,olmo1b_hf_ckpt342000_xsum_3epoch_2e-7_4shots,paws,0.5375624999999999,0.005554341979173339
648,olmo1b_hf_ckpt592000_mnli_3epoch_2e-6_4shots,mnli_matched_inputoutput,0.46469689251146207,0.00500484968669523
