dataset,seed,pad_test,mmd_test,mdm_test,test_f1_mean
qwen2.5-7b_zero-shot_bg_train-time-info_v1,42,1.3629262351428968,5.316734313964844e-05,0.7497101624806722,0.527231985461243
qwen2.5-32b_zero-shot_bg_test-time-info_v1,42,1.3502488201471523,4.220008850097656e-05,0.7762362758318583,0.4741856220857426
qwen2.5-7b_zero-shot_bg_test-time-info_v1,42,1.3917856092218273,5.269050598144531e-05,0.7503997286160787,0.4827027089338502
llama3.3-70b_zero-shot_bg_v1,42,1.7848288539625947,0.0002105236053466,0.4279626707235972,0.3282303009018227
qwen2.5-32b_zero-shot_v1,42,1.5852469384300512,0.0001089572906494,0.5935880343119303,0.4173268375182258
qwen2.5-32b_few-shot_bg_train-time-info_v1,42,1.3383862855218012,4.38690185546875e-05,0.7754026651382446,0.5225871124450079
llama3.3-70b_few-shot_bg_train-time-info_v1,42,1.592522539836722,0.0001196861267089,0.7512742280960083,0.5271798242915346
llama3.3-70b_zero-shot_bg_train-time-info_v1,42,1.6543803920978686,0.0001378059387207,0.6919950048128763,0.5038276561118737
qwen2.5-32b_few-shot_bg_test-time-info_v1,42,1.3207874539240694,4.124641418457031e-05,0.7610696951548258,0.5113872516682835
qwen2.5-7b_zero-shot_bg_v1,42,1.408811925447251,5.793571472167969e-05,0.6816078821818033,0.4998135477561506
qwen2.5-32b_zero-shot_bg_train-time-info_v1,42,1.3381931345552698,4.220008850097656e-05,0.7710253596305847,0.492670011291339
llama3.1-8b_zero-shot_bg_v1,42,1.3947321859257706,5.125999450683594e-05,0.6629075407981873,0.4826218171590078
llama3.1-8b_zero-shot_v1,42,1.516309921520941,9.751319885253906e-05,0.6559315721193949,0.5032629118541946
llama3.3-70b_few-shot_v1,42,1.5455117567483558,0.0001063346862792,0.5784371097882589,0.4435948886909056
qwen2.5-32b_few-shot_bg_v1,42,1.4582892658356372,5.7220458984375e-05,0.7285506923993429,0.529778063726081
qwen2.5-32b_few-shot_v1,42,1.518451281666871,9.512901306152344e-05,0.6459193030993143,0.4477791263128046
qwen2.5-32b_zero-shot_bg_v1,42,1.4441057684527987,6.29425048828125e-05,0.7470043301582336,0.5199273320346325
qwen2.5-7b_zero-shot_v1,42,1.5566788497913582,0.0001111030578613,0.6266456842422485,0.470245540199139
llama3.1-8b_zero-shot_bg_train-time-info_v1,42,1.3126821782056934,4.863739013671875e-05,0.7546398838361105,0.5874206389347598
qwen2.5-7b_few-shot_v1,42,1.4201771321504362,6.127357482910156e-05,0.7140947381655375,0.4943400233617596
llama3.3-70b_few-shot_bg_v1,42,1.6622700343517054,0.0001604557037353,0.5909350911776224,0.3653814511306223
llama3.1-8b_few-shot_v1,42,1.335665086384449,5.054473876953125e-05,0.747531513373057,0.492952506134439
llama3.1-8b_few-shot_bg_train-time-info_v1,42,1.2527054412968328,3.528594970703125e-05,0.7789896726608276,0.586443907103533
qwen2.5-7b_few-shot_bg_v1,42,1.3901535740305269,5.745887756347656e-05,0.646178791920344,0.4945024532038129
qwen2.5-7b_few-shot_bg_train-time-info_v1,42,1.3472994668003009,4.935264587402344e-05,0.7517579595247904,0.5463744957226343
qwen2.5-7b_few-shot_bg_test-time-info_v1,42,1.3483290935134251,4.839897155761719e-05,0.7363895376523336,0.5322643075780752
llama3.3-70b_few-shot_bg_test-time-info_v1,42,1.5714240090648244,0.000117540359497,0.7349316279093424,0.5373743853000066
llama3.1-8b_zero-shot_bg_test-time-info_v1,42,1.3332875772491248,4.553794860839844e-05,0.7791555722554525,0.576060770944536
llama3.3-70b_zero-shot_bg_test-time-info_v1,42,1.6755163747531687,0.0001356601715087,0.719734807809194,0.5235933186720454
llama3.3-70b_zero-shot_v1,42,1.5449601298583846,0.0001087188720703,0.5925049384435018,0.4041010447243863
llama3.1-8b_few-shot_bg_v1,42,1.3321054163963146,3.981590270996094e-05,0.7739014426867167,0.5166349035643699
llama3.1-8b_few-shot_bg_test-time-info_v1,42,1.2323586623912188,3.4809112548828125e-05,0.7891334891319275,0.5392026969012758
qwen2.5-7b_zero-shot_bg_train-time-info_v1,43,1.382116156261488,4.458427429199219e-05,0.7497101624806722,0.527231985461243
qwen2.5-32b_zero-shot_bg_test-time-info_v1,43,1.3609573573290037,4.029273986816406e-05,0.7762362758318583,0.4741856220857426
qwen2.5-7b_zero-shot_bg_test-time-info_v1,43,1.4099563614196806,4.410743713378906e-05,0.7503997286160787,0.4827027089338502
llama3.3-70b_zero-shot_bg_v1,43,1.7807300577341172,0.0002031326293945,0.4279626707235972,0.3282303009018227
qwen2.5-32b_zero-shot_v1,43,1.5959982703929123,0.0001096725463867,0.5935880343119303,0.4173268375182258
qwen2.5-32b_few-shot_bg_train-time-info_v1,43,1.3449602609678457,4.38690185546875e-05,0.7754026651382446,0.5225871124450079
llama3.3-70b_few-shot_bg_train-time-info_v1,43,1.580519589678011,0.0001113414764404,0.7512742280960083,0.5271798242915346
llama3.3-70b_zero-shot_bg_train-time-info_v1,43,1.647067522635376,0.0001306533813476,0.6919950048128763,0.5038276561118737
qwen2.5-32b_few-shot_bg_test-time-info_v1,43,1.328718343838432,4.076957702636719e-05,0.7610696951548258,0.5113872516682835
qwen2.5-7b_zero-shot_bg_v1,43,1.3995876418070965,4.673004150390625e-05,0.6816078821818033,0.4998135477561506
qwen2.5-32b_zero-shot_bg_train-time-info_v1,43,1.3560975002759037,4.029273986816406e-05,0.7710253596305847,0.492670011291339
llama3.1-8b_zero-shot_bg_v1,43,1.3869913805377487,4.553794860839844e-05,0.6629075407981873,0.4826218171590078
llama3.1-8b_zero-shot_v1,43,1.5086478109196242,9.679794311523438e-05,0.6559315721193949,0.5032629118541946
llama3.3-70b_few-shot_v1,43,1.5441494221536165,0.0001084804534912,0.5784371097882589,0.4435948886909056
qwen2.5-32b_few-shot_bg_v1,43,1.4742205799387702,5.340576171875e-05,0.7285506923993429,0.529778063726081
qwen2.5-32b_few-shot_v1,43,1.517193932933155,9.5367431640625e-05,0.6459193030993143,0.4477791263128046
qwen2.5-32b_zero-shot_bg_v1,43,1.4554438407798636,5.674362182617188e-05,0.7470043301582336,0.5199273320346325
qwen2.5-7b_zero-shot_v1,43,1.5594576386010663,0.0001125335693359,0.6266456842422485,0.470245540199139
llama3.1-8b_zero-shot_bg_train-time-info_v1,43,1.317966136191862,4.9591064453125e-05,0.7546398838361105,0.5874206389347598
qwen2.5-7b_few-shot_v1,43,1.399073928212544,5.412101745605469e-05,0.7140947381655375,0.4943400233617596
llama3.3-70b_few-shot_bg_v1,43,1.6509584501043588,0.0001511573791503,0.5909350911776224,0.3653814511306223
llama3.1-8b_few-shot_v1,43,1.291454255992256,4.267692565917969e-05,0.747531513373057,0.492952506134439
llama3.1-8b_few-shot_bg_train-time-info_v1,43,1.2640195015663738,3.027915954589844e-05,0.7789896726608276,0.586443907103533
qwen2.5-7b_few-shot_bg_v1,43,1.3857180301198893,4.744529724121094e-05,0.646178791920344,0.4945024532038129
qwen2.5-7b_few-shot_bg_train-time-info_v1,43,1.340578409135726,4.1961669921875e-05,0.7517579595247904,0.5463744957226343
qwen2.5-7b_few-shot_bg_test-time-info_v1,43,1.350685630073004,4.029273986816406e-05,0.7363895376523336,0.5322643075780752
llama3.3-70b_few-shot_bg_test-time-info_v1,43,1.5627128527832987,0.0001091957092285,0.7349316279093424,0.5373743853000066
llama3.1-8b_zero-shot_bg_test-time-info_v1,43,1.3342244769146552,4.649162292480469e-05,0.7791555722554525,0.576060770944536
llama3.3-70b_zero-shot_bg_test-time-info_v1,43,1.6672471509201947,0.0001277923583984,0.719734807809194,0.5235933186720454
llama3.3-70b_zero-shot_v1,43,1.5428234166531003,0.0001096725463867,0.5925049384435018,0.4041010447243863
llama3.1-8b_few-shot_bg_v1,43,1.3170453868061442,3.337860107421875e-05,0.7739014426867167,0.5166349035643699
llama3.1-8b_few-shot_bg_test-time-info_v1,43,1.2318571805513945,2.9087066650390625e-05,0.7891334891319275,0.5392026969012758
qwen2.5-7b_zero-shot_bg_train-time-info_v1,44,1.3570726281396754,4.482269287109375e-05,0.7497101624806722,0.527231985461243
qwen2.5-32b_zero-shot_bg_test-time-info_v1,44,1.3445442837072188,3.695487976074219e-05,0.7762362758318583,0.4741856220857426
qwen2.5-7b_zero-shot_bg_test-time-info_v1,44,1.3902552372226844,4.458427429199219e-05,0.7503997286160787,0.4827027089338502
llama3.3-70b_zero-shot_bg_v1,44,1.7780745046219244,0.0002171993255615,0.4279626707235972,0.3282303009018227
qwen2.5-32b_zero-shot_v1,44,1.5808911257211944,9.5367431640625e-05,0.5935880343119303,0.4173268375182258
qwen2.5-32b_few-shot_bg_train-time-info_v1,44,1.3384560231922291,3.838539123535156e-05,0.7754026651382446,0.5225871124450079
llama3.3-70b_few-shot_bg_train-time-info_v1,44,1.5797578818353697,0.0001201629638671,0.7512742280960083,0.5271798242915346
llama3.3-70b_zero-shot_bg_train-time-info_v1,44,1.6412519069171705,0.0001389980316162,0.6919950048128763,0.5038276561118737
qwen2.5-32b_few-shot_bg_test-time-info_v1,44,1.3280399325247902,3.504753112792969e-05,0.7610696951548258,0.5113872516682835
qwen2.5-7b_zero-shot_bg_v1,44,1.38674875183654,5.14984130859375e-05,0.6816078821818033,0.4998135477561506
qwen2.5-32b_zero-shot_bg_train-time-info_v1,44,1.338790593168579,3.671646118164063e-05,0.7710253596305847,0.492670011291339
llama3.1-8b_zero-shot_bg_v1,44,1.3841441549409343,4.482269287109375e-05,0.6629075407981873,0.4826218171590078
llama3.1-8b_zero-shot_v1,44,1.5010073916075344,8.416175842285156e-05,0.6559315721193949,0.5032629118541946
llama3.3-70b_few-shot_v1,44,1.5415498836692194,9.441375732421876e-05,0.5784371097882589,0.4435948886909056
qwen2.5-32b_few-shot_bg_v1,44,1.4601957170811657,5.507469177246094e-05,0.7285506923993429,0.529778063726081
qwen2.5-32b_few-shot_v1,44,1.501450234916648,8.177757263183594e-05,0.6459193030993143,0.4477791263128046
qwen2.5-32b_zero-shot_bg_v1,44,1.4384790523832898,6.127357482910156e-05,0.7470043301582336,0.5199273320346325
qwen2.5-7b_zero-shot_v1,44,1.5469597802190262,9.775161743164062e-05,0.6266456842422485,0.470245540199139
llama3.1-8b_zero-shot_bg_train-time-info_v1,44,1.3042195431547403,4.100799560546875e-05,0.7546398838361105,0.5874206389347598
qwen2.5-7b_few-shot_v1,44,1.3973951008966687,4.9591064453125e-05,0.7140947381655375,0.4943400233617596
llama3.3-70b_few-shot_bg_v1,44,1.6458924944225566,0.0001635551452636,0.5909350911776224,0.3653814511306223
llama3.1-8b_few-shot_v1,44,1.2977580511112272,3.933906555175781e-05,0.747531513373057,0.492952506134439
llama3.1-8b_few-shot_bg_train-time-info_v1,44,1.2484551559071564,2.813339233398437e-05,0.7789896726608276,0.586443907103533
qwen2.5-7b_few-shot_bg_v1,44,1.3810478698135844,5.030632019042969e-05,0.646178791920344,0.4945024532038129
qwen2.5-7b_few-shot_bg_train-time-info_v1,44,1.3401665131417753,4.0531158447265625e-05,0.7517579595247904,0.5463744957226343
qwen2.5-7b_few-shot_bg_test-time-info_v1,44,1.3474463595563733,3.9577484130859375e-05,0.7363895376523336,0.5322643075780752
llama3.3-70b_few-shot_bg_test-time-info_v1,44,1.5534993650227742,0.0001182556152343,0.7349316279093424,0.5373743853000066
llama3.1-8b_zero-shot_bg_test-time-info_v1,44,1.3281318138374458,3.838539123535156e-05,0.7791555722554525,0.576060770944536
llama3.3-70b_zero-shot_bg_test-time-info_v1,44,1.6592578219039964,0.0001366138458251,0.719734807809194,0.5235933186720454
llama3.3-70b_zero-shot_v1,44,1.5224206770306623,9.465217590332033e-05,0.5925049384435018,0.4041010447243863
llama3.1-8b_few-shot_bg_v1,44,1.3088542927416678,3.314018249511719e-05,0.7739014426867167,0.5166349035643699
llama3.1-8b_few-shot_bg_test-time-info_v1,44,1.230231699315305,2.7179718017578125e-05,0.7891334891319275,0.5392026969012758
qwen2.5-7b_zero-shot_bg_train-time-info_v1,45,1.3164310473442464,3.981590270996094e-05,0.7497101624806722,0.527231985461243
qwen2.5-32b_zero-shot_bg_test-time-info_v1,45,1.3200388745889051,3.528594970703125e-05,0.7762362758318583,0.4741856220857426
qwen2.5-7b_zero-shot_bg_test-time-info_v1,45,1.3429102446829724,3.910064697265625e-05,0.7503997286160787,0.4827027089338502
llama3.3-70b_zero-shot_bg_v1,45,1.776826447971744,0.0002055168151855,0.4279626707235972,0.3282303009018227
qwen2.5-32b_zero-shot_v1,45,1.5959914403678503,0.0001029968261718,0.5935880343119303,0.4173268375182258
qwen2.5-32b_few-shot_bg_train-time-info_v1,45,1.3131030444405645,3.767013549804688e-05,0.7754026651382446,0.5225871124450079
llama3.3-70b_few-shot_bg_train-time-info_v1,45,1.555550663450586,0.0001106262207031,0.7512742280960083,0.5271798242915346
llama3.3-70b_zero-shot_bg_train-time-info_v1,45,1.627670183153456,0.0001301765441894,0.6919950048128763,0.5038276561118737
qwen2.5-32b_few-shot_bg_test-time-info_v1,45,1.2877574396737883,3.457069396972656e-05,0.7610696951548258,0.5113872516682835
qwen2.5-7b_zero-shot_bg_v1,45,1.3845225813575197,4.601478576660156e-05,0.6816078821818033,0.4998135477561506
qwen2.5-32b_zero-shot_bg_train-time-info_v1,45,1.3011474644624186,3.504753112792969e-05,0.7710253596305847,0.492670011291339
llama3.1-8b_zero-shot_bg_v1,45,1.3905602163460304,4.38690185546875e-05,0.6629075407981873,0.4826218171590078
llama3.1-8b_zero-shot_v1,45,1.516335397613196,9.322166442871094e-05,0.6559315721193949,0.5032629118541946
llama3.3-70b_few-shot_v1,45,1.5535469379457272,0.0001032352447509,0.5784371097882589,0.4435948886909056
qwen2.5-32b_few-shot_bg_v1,45,1.4634472482451708,5.197525024414063e-05,0.7285506923993429,0.529778063726081
qwen2.5-32b_few-shot_v1,45,1.5067528916592638,9.03606414794922e-05,0.6459193030993143,0.4477791263128046
qwen2.5-32b_zero-shot_bg_v1,45,1.4457238149728455,5.626678466796875e-05,0.7470043301582336,0.5199273320346325
qwen2.5-7b_zero-shot_v1,45,1.5596861957116848,0.000108003616333,0.6266456842422485,0.470245540199139
llama3.1-8b_zero-shot_bg_train-time-info_v1,45,1.2949964311344146,4.363059997558594e-05,0.7546398838361105,0.5874206389347598
qwen2.5-7b_few-shot_v1,45,1.3866788567487722,5.221366882324219e-05,0.7140947381655375,0.4943400233617596
llama3.3-70b_few-shot_bg_v1,45,1.6431077474726286,0.0001540184020996,0.5909350911776224,0.3653814511306223
llama3.1-8b_few-shot_v1,45,1.2903992910899813,4.1961669921875e-05,0.747531513373057,0.492952506134439
llama3.1-8b_few-shot_bg_train-time-info_v1,45,1.2153119936773378,2.574920654296875e-05,0.7789896726608276,0.586443907103533
qwen2.5-7b_few-shot_bg_v1,45,1.368064883501191,4.529953002929688e-05,0.646178791920344,0.4945024532038129
qwen2.5-7b_few-shot_bg_train-time-info_v1,45,1.2881524335275096,3.5762786865234375e-05,0.7517579595247904,0.5463744957226343
qwen2.5-7b_few-shot_bg_test-time-info_v1,45,1.3037901269430885,3.457069396972656e-05,0.7363895376523336,0.5322643075780752
llama3.3-70b_few-shot_bg_test-time-info_v1,45,1.5370507141076477,0.0001089572906494,0.7349316279093424,0.5373743853000066
llama3.1-8b_zero-shot_bg_test-time-info_v1,45,1.2937421032108072,4.100799560546875e-05,0.7791555722554525,0.576060770944536
llama3.3-70b_zero-shot_bg_test-time-info_v1,45,1.6442202267911952,0.0001280307769775,0.719734807809194,0.5235933186720454
llama3.3-70b_zero-shot_v1,45,1.5378013124281988,0.0001049041748046,0.5925049384435018,0.4041010447243863
llama3.1-8b_few-shot_bg_v1,45,1.3174753481440624,3.1948089599609375e-05,0.7739014426867167,0.5166349035643699
llama3.1-8b_few-shot_bg_test-time-info_v1,45,1.1825373550316594,2.431869506835937e-05,0.7891334891319275,0.5392026969012758
qwen2.5-7b_zero-shot_bg_train-time-info_v1,46,1.334575420274959,4.458427429199219e-05,0.7497101624806722,0.527231985461243
qwen2.5-32b_zero-shot_bg_test-time-info_v1,46,1.327527390487933,3.600120544433594e-05,0.7762362758318583,0.4741856220857426
qwen2.5-7b_zero-shot_bg_test-time-info_v1,46,1.3599082004170735,4.363059997558594e-05,0.7503997286160787,0.4827027089338502
llama3.3-70b_zero-shot_bg_v1,46,1.8030573994858217,0.0002195835113525,0.4279626707235972,0.3282303009018227
qwen2.5-32b_zero-shot_v1,46,1.5667577250368183,8.845329284667969e-05,0.5935880343119303,0.4173268375182258
qwen2.5-32b_few-shot_bg_train-time-info_v1,46,1.3204694797287726,3.647804260253906e-05,0.7754026651382446,0.5225871124450079
llama3.3-70b_few-shot_bg_train-time-info_v1,46,1.623895799638966,0.0001235008239746,0.7512742280960083,0.5271798242915346
llama3.3-70b_zero-shot_bg_train-time-info_v1,46,1.6792255227028092,0.0001428127288818,0.6919950048128763,0.5038276561118737
qwen2.5-32b_few-shot_bg_test-time-info_v1,46,1.302090280214178,3.218650817871094e-05,0.7610696951548258,0.5113872516682835
qwen2.5-7b_zero-shot_bg_v1,46,1.3724364580950996,4.982948303222656e-05,0.6816078821818033,0.4998135477561506
qwen2.5-32b_zero-shot_bg_train-time-info_v1,46,1.317700961167671,3.552436828613281e-05,0.7710253596305847,0.492670011291339
llama3.1-8b_zero-shot_bg_v1,46,1.3579164183735095,4.148483276367188e-05,0.6629075407981873,0.4826218171590078
llama3.1-8b_zero-shot_v1,46,1.4958067420424612,7.915496826171875e-05,0.6559315721193949,0.5032629118541946
llama3.3-70b_few-shot_v1,46,1.5338958543371457,8.916854858398438e-05,0.5784371097882589,0.4435948886909056
qwen2.5-32b_few-shot_bg_v1,46,1.461406049995699,5.364418029785156e-05,0.7285506923993429,0.529778063726081
qwen2.5-32b_few-shot_v1,46,1.4900888928948235,7.724761962890625e-05,0.6459193030993143,0.4477791263128046
qwen2.5-32b_zero-shot_bg_v1,46,1.4485830910989674,6.103515625e-05,0.7470043301582336,0.5199273320346325
qwen2.5-7b_zero-shot_v1,46,1.5375068075192804,9.226799011230467e-05,0.6266456842422485,0.470245540199139
llama3.1-8b_zero-shot_bg_train-time-info_v1,46,1.2712040776134987,3.743171691894531e-05,0.7546398838361105,0.5874206389347598
qwen2.5-7b_few-shot_v1,46,1.393143858863267,4.7206878662109375e-05,0.7140947381655375,0.4943400233617596
llama3.3-70b_few-shot_bg_v1,46,1.6827432223728849,0.0001666545867919,0.5909350911776224,0.3653814511306223
llama3.1-8b_few-shot_v1,46,1.2967684354608031,3.9577484130859375e-05,0.747531513373057,0.492952506134439
llama3.1-8b_few-shot_bg_train-time-info_v1,46,1.2182359794311917,2.5987625122070312e-05,0.7789896726608276,0.586443907103533
qwen2.5-7b_few-shot_bg_v1,46,1.3592432560207188,4.863739013671875e-05,0.646178791920344,0.4945024532038129
qwen2.5-7b_few-shot_bg_train-time-info_v1,46,1.3036654972781696,3.838539123535156e-05,0.7517579595247904,0.5463744957226343
qwen2.5-7b_few-shot_bg_test-time-info_v1,46,1.3015933506398212,3.743171691894531e-05,0.7363895376523336,0.5322643075780752
llama3.3-70b_few-shot_bg_test-time-info_v1,46,1.5942866767358763,0.0001215934753417,0.7349316279093424,0.5373743853000066
llama3.1-8b_zero-shot_bg_test-time-info_v1,46,1.285838937684658,3.457069396972656e-05,0.7791555722554525,0.576060770944536
llama3.3-70b_zero-shot_bg_test-time-info_v1,46,1.696374002178019,0.0001404285430908,0.719734807809194,0.5235933186720454
llama3.3-70b_zero-shot_v1,46,1.511861137225624,8.988380432128906e-05,0.5925049384435018,0.4041010447243863
llama3.1-8b_few-shot_bg_v1,46,1.295617464384745,3.1948089599609375e-05,0.7739014426867167,0.5166349035643699
llama3.1-8b_few-shot_bg_test-time-info_v1,46,1.1838288891839153,2.47955322265625e-05,0.7891334891319275,0.5392026969012758
