template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.5764594674110413,0.0759730864369293
llm_goals_75,test,1,0.5764182806015015,0.8120127334654941
llm_goals_75,test,2,0.5765257477760315,0.114709127325878
llm_goals_75,test,3,0.5764471888542175,0.1496870071208148
llm_goals_75,test,4,0.5764058232307434,0.321632167436288
llm_goals_75,test,5,0.5765001177787781,0.0308084020440725
llm_goals_75,test,6,0.5765475630760193,0.1843175427563358
llm_goals_75,test,7,0.5764830112457275,0.0324508518984932
llm_goals_75,test,8,0.5764293670654297,0.5189439655904322
llm_goals_75,test,9,0.5765437483787537,0.1502023728460554
llm_goals_75,test,10,0.5764468908309937,0.7657257241793485
llm_goals_75,test,11,0.576432466506958,0.8035850534204568
llm_goals_75,test,12,0.5765049457550049,0.7307072346740583
llm_goals_75,test,13,0.5764868855476379,0.048717541539827
llm_goals_75,test,14,0.5764148235321045,0.4316714754134473
llm_goals_75,test,15,0.5764654278755188,0.3935335107130105
llm_goals_75,test,16,0.5764576196670532,0.6144967070154126
llm_goals_75,test,17,0.5764846205711365,0.4488809036296748
llm_goals_75,test,18,0.5764809250831604,0.3920271677291012
llm_goals_75,test,19,0.5765933990478516,0.7810532991351343
llm_goals_75,test,20,0.5764913558959961,0.5443958675997833
llm_goals_75,test,21,0.5764907598495483,0.1604437012338976
llm_goals_75,test,22,0.5764517188072205,0.856696781503626
llm_goals_75,test,23,0.5764756202697754,0.590876283737086
llm_goals_75,test,24,0.5763877630233765,0.4156776627984636
llm_goals_75,test,25,0.5765307545661926,0.5957841371692396
llm_goals_75,test,26,0.576410710811615,0.3752723036244256
llm_goals_75,test,27,0.5765079259872437,0.7866846353759014
llm_goals_75,test,28,0.5764820575714111,0.1838073487826972
llm_goals_75,test,29,0.5765177011489868,0.853944837222159
llm_goals_75,test,30,0.5765255093574524,0.5992601606803653
llm_goals_75,test,31,0.5764937400817871,0.0743179753131944
llm_goals_75,test,32,0.5764700174331665,0.8015013965908157
llm_goals_75,test,33,0.5765354633331299,0.3498422912836759
llm_goals_75,test,34,0.5764445066452026,0.6692496458902099
llm_goals_75,test,35,0.576420783996582,0.134422492314073
llm_goals_75,test,36,0.5763997435569763,0.6683886675406214
llm_goals_75,test,37,0.5765587687492371,0.7419528346018889
llm_goals_75,test,38,0.5765157341957092,0.6995551035614008
llm_goals_75,test,39,0.5765278935432434,0.4983520000991506
llm_goals_75,test,40,0.576399028301239,0.8622382893733471
llm_goals_75,test,41,0.5764420628547668,0.1026909531781266
llm_goals_75,test,42,0.5764702558517456,0.0240063792397545
llm_goals_75,test,43,0.5764671564102173,0.1143049422953708
llm_goals_75,test,44,0.5764235854148865,0.6884653626060131
llm_goals_75,test,45,0.576416552066803,0.1059329319681491
llm_goals_75,test,46,0.5764099359512329,0.405588044943207
llm_goals_75,test,47,0.5764219760894775,0.1393457779753145
llm_goals_75,test,48,0.5763987898826599,0.3670858714001939
llm_goals_75,test,49,0.5764244198799133,0.8052537448889737
llm_goals_78,test,0,0.685570478439331,0.710685793210421
llm_goals_78,test,1,0.7781220078468323,0.4521352090792044
llm_goals_78,test,2,0.6669651865959167,0.6984783435600235
llm_goals_78,test,3,0.3097321093082428,0.2113029164438998
llm_goals_78,test,4,0.23886877298355103,0.59362413330766
llm_goals_78,test,5,0.4119967222213745,0.2973601904856865
llm_goals_78,test,6,0.3826577663421631,0.4911861319335979
llm_goals_78,test,7,0.704980731010437,0.5109292120432304
llm_goals_78,test,8,0.18632961809635162,0.0214058927819057
llm_goals_78,test,9,0.6371378898620605,0.4561729519761929
llm_goals_78,test,10,0.22166089713573456,0.0679189625299845
llm_goals_78,test,11,0.44660264253616333,0.0515682065315942
llm_goals_78,test,12,0.4951256215572357,0.2832565853408079
llm_goals_78,test,13,0.5546984672546387,0.3661356287525266
llm_goals_78,test,14,0.3680918216705322,0.5580731095047592
llm_goals_78,test,15,0.20757539570331573,0.3539146882613382
llm_goals_78,test,16,0.39565908908843994,0.5198975918853231
llm_goals_78,test,17,0.6756800413131714,0.5974518854580513
llm_goals_78,test,18,0.3612592816352844,0.2539582988569515
llm_goals_78,test,19,0.8869765996932983,0.7818671446797811
llm_goals_78,test,20,0.20108766853809357,0.0817902870466741
llm_goals_78,test,21,0.41475048661231995,0.5313736848686219
llm_goals_78,test,22,0.7504453063011169,0.0578457193154468
llm_goals_78,test,23,0.7947657704353333,0.0588067732422271
llm_goals_78,test,24,0.4469620883464813,0.504620490567031
llm_goals_78,test,25,0.2734173536300659,0.1421301439321696
llm_goals_78,test,26,0.2839566469192505,0.4043160008204248
llm_goals_78,test,27,0.78782057762146,0.1341547030792127
llm_goals_78,test,28,0.33753859996795654,0.585613528389559
llm_goals_78,test,29,0.4742134213447571,0.8037635629119642
llm_goals_78,test,30,0.2272164672613144,0.2785354206868534
llm_goals_78,test,31,0.4348613917827606,0.5074585147112473
llm_goals_78,test,32,0.7833913564682007,0.1365325187594412
llm_goals_78,test,33,0.36304613947868347,0.559776178990876
llm_goals_78,test,34,0.20806725323200226,0.3923464392379233
llm_goals_78,test,35,0.7003437876701355,0.8694947336510349
llm_goals_78,test,36,0.31591323018074036,0.3406365028138333
llm_goals_78,test,37,0.5796717405319214,0.7407991807699866
llm_goals_78,test,38,0.5536725521087646,0.5783193820848918
llm_goals_78,test,39,0.5155509114265442,0.2552456607848852
llm_goals_78,test,40,0.3894805908203125,0.0543816822443674
llm_goals_78,test,41,0.39214783906936646,0.5111749720190839
llm_goals_78,test,42,0.7157011032104492,0.6373460300781935
llm_goals_78,test,43,0.450996071100235,0.5824615770869916
llm_goals_78,test,44,0.1822628229856491,0.0725045867582824
llm_goals_78,test,45,0.5055932402610779,0.2055844845637347
llm_goals_78,test,46,0.7897699475288391,0.3443727538373435
llm_goals_78,test,47,0.42954665422439575,0.5137599661573641
llm_goals_78,test,48,0.3249548077583313,0.2386868392211005
llm_goals_78,test,49,0.71882164478302,0.1092249314217729
llm_goals_53,test,0,0.21629486978054047,0.0313037944392233
llm_goals_53,test,1,0.21625912189483643,0.8240056677766623
llm_goals_53,test,2,0.2163943201303482,0.1449565288533016
llm_goals_53,test,3,0.2174430638551712,0.3918474727299789
llm_goals_53,test,4,0.21614214777946472,0.8286818229760559
llm_goals_53,test,5,0.21671460568904877,0.0496580173268359
llm_goals_53,test,6,0.21676750481128693,0.0987357123371914
llm_goals_53,test,7,0.21645858883857727,0.0188377606185046
llm_goals_53,test,8,0.2164820283651352,0.4663370658319237
llm_goals_53,test,9,0.2182505875825882,0.0353320635735392
llm_goals_53,test,10,0.21666614711284637,0.2465247528295183
llm_goals_53,test,11,0.21662692725658417,0.3482216267575191
llm_goals_53,test,12,0.2166503667831421,0.252877033485255
llm_goals_53,test,13,0.21650098264217377,0.1153444147006914
llm_goals_53,test,14,0.23207563161849976,0.3868210483124377
llm_goals_53,test,15,0.21729399263858795,0.3705090883623566
llm_goals_53,test,16,0.21736913919448853,0.3407126088990169
llm_goals_53,test,17,0.216430202126503,0.7543820151744927
llm_goals_53,test,18,0.21819399297237396,0.3322955201354975
llm_goals_53,test,19,0.21673591434955597,0.2261316448538241
llm_goals_53,test,20,0.22701579332351685,0.7343425814116562
llm_goals_53,test,21,0.22546391189098358,0.1601759070107346
llm_goals_53,test,22,0.21665515005588531,0.4190051893758809
llm_goals_53,test,23,0.24210397899150848,0.2354442333363782
llm_goals_53,test,24,0.21831578016281128,0.9719922543639756
llm_goals_53,test,25,0.21710070967674255,0.3289260309913144
llm_goals_53,test,26,0.2164926826953888,0.5781819989075234
llm_goals_53,test,27,0.21671080589294434,0.790876622992061
llm_goals_53,test,28,0.2191752791404724,0.2747068584160578
llm_goals_53,test,29,0.2166440486907959,0.796844478423965
llm_goals_53,test,30,0.21630601584911346,0.3573432990729852
llm_goals_53,test,31,0.2303714007139206,0.1489779797461384
llm_goals_53,test,32,0.21673095226287842,0.4788477745757265
llm_goals_53,test,33,0.2171677201986313,0.967617631882366
llm_goals_53,test,34,0.2169901579618454,0.2252558680565209
llm_goals_53,test,35,0.21885566413402557,0.2300146581986259
llm_goals_53,test,36,0.21725967526435852,0.2311446011459964
llm_goals_53,test,37,0.21655744314193726,0.144409781340655
llm_goals_53,test,38,0.21709708869457245,0.705274211964156
llm_goals_53,test,39,0.24975110590457916,0.5921281985506138
llm_goals_53,test,40,0.24747896194458008,0.2443004460536172
llm_goals_53,test,41,0.21810075640678406,0.061497045516474
llm_goals_53,test,42,0.2162218540906906,0.1570495895565415
llm_goals_53,test,43,0.21629305183887482,0.0641649100604756
llm_goals_53,test,44,0.21895916759967804,0.8074959504603542
llm_goals_53,test,45,0.21760223805904388,0.1813221143349916
llm_goals_53,test,46,0.21655644476413727,0.2352295334667747
llm_goals_53,test,47,0.21740257740020752,0.820244135045574
llm_goals_53,test,48,0.21655991673469543,0.4512817747836695
llm_goals_53,test,49,0.21636109054088593,0.7022675298006026
llm_goals_305,test,0,0.4540817439556122,0.0438240495208372
llm_goals_305,test,1,0.811062753200531,0.1352509650243347
llm_goals_305,test,2,0.44436463713645935,0.1723536643548477
llm_goals_305,test,3,0.7720192074775696,0.250094549889143
llm_goals_305,test,4,0.8135057091712952,0.8419536365764244
llm_goals_305,test,5,0.45779335498809814,0.1913177614094214
llm_goals_305,test,6,0.45099884271621704,0.3723916849874114
llm_goals_305,test,7,0.4403432011604309,0.3337485526549648
llm_goals_305,test,8,0.8250359296798706,0.5824263052609597
llm_goals_305,test,9,0.45702818036079407,0.620184539039364
llm_goals_305,test,10,0.8106756210327148,0.494000537986959
llm_goals_305,test,11,0.442303866147995,0.0482253546984711
llm_goals_305,test,12,0.43594640493392944,0.0565816635108951
llm_goals_305,test,13,0.44924646615982056,0.1136796894182332
llm_goals_305,test,14,0.8033967614173889,0.6102884358627442
llm_goals_305,test,15,0.8205015659332275,0.8302422164176205
llm_goals_305,test,16,0.8157443404197693,0.589511050327059
llm_goals_305,test,17,0.47121310234069824,0.2622011524235305
llm_goals_305,test,18,0.8089243769645691,0.3737828315077605
llm_goals_305,test,19,0.49204593896865845,0.0401465973617238
llm_goals_305,test,20,0.8251268863677979,0.8971526617856483
llm_goals_305,test,21,0.4461861550807953,0.513862561708882
llm_goals_305,test,22,0.47114238142967224,0.6617399492970566
llm_goals_305,test,23,0.44045743346214294,0.3090300849059566
llm_goals_305,test,24,0.8076643347740173,0.4310886044152408
llm_goals_305,test,25,0.782927930355072,0.7346082271960779
llm_goals_305,test,26,0.8177862763404846,0.7784622217319951
llm_goals_305,test,27,0.81167072057724,0.2285268892180965
llm_goals_305,test,28,0.7697176337242126,0.1970890988987535
llm_goals_305,test,29,0.5687539577484131,0.0543679274789402
llm_goals_305,test,30,0.4588971436023712,0.8325365217586828
llm_goals_305,test,31,0.45048147439956665,0.5078524819974076
llm_goals_305,test,32,0.5819061398506165,0.2061603354174771
llm_goals_305,test,33,0.7679848670959473,0.6039954962603894
llm_goals_305,test,34,0.7981875538825989,0.5754865922825898
llm_goals_305,test,35,0.5279922485351562,0.1664152616423
llm_goals_305,test,36,0.8123044371604919,0.7880486291444165
llm_goals_305,test,37,0.4437701106071472,0.03605078558617
llm_goals_305,test,38,0.43729928135871887,0.0575570687729377
llm_goals_305,test,39,0.4348500967025757,0.4297583044645929
llm_goals_305,test,40,0.44593381881713867,0.1915445959084499
llm_goals_305,test,41,0.4548766314983368,0.6536953532364804
llm_goals_305,test,42,0.4572462737560272,0.122527873906398
llm_goals_305,test,43,0.45449915528297424,0.1711906255032107
llm_goals_305,test,44,0.8216221928596497,0.8371975999741996
llm_goals_305,test,45,0.7746105790138245,0.3836914756409174
llm_goals_305,test,46,0.442891389131546,0.0543049670678201
llm_goals_305,test,47,0.7397066950798035,0.5245484205428286
llm_goals_305,test,48,0.7973271012306213,0.5969765977742552
llm_goals_305,test,49,0.47441166639328003,0.2351952691938564
llm_goals_81,test,0,0.527897834777832,0.2060992725380174
llm_goals_81,test,1,0.4301759898662567,0.1581010259271764
llm_goals_81,test,2,0.5819482803344727,0.6899052301996583
llm_goals_81,test,3,0.5916239023208618,0.7805136989347726
llm_goals_81,test,4,0.4963907301425934,0.0972643506873081
llm_goals_81,test,5,0.10734052956104279,0.3089356066572108
llm_goals_81,test,6,0.23369574546813965,0.0383605574042886
llm_goals_81,test,7,0.5380614399909973,0.3161536190910042
llm_goals_81,test,8,0.5601508021354675,0.5126683642741202
llm_goals_81,test,9,0.3937404453754425,0.1286837550045714
llm_goals_81,test,10,0.5098972916603088,0.1256029490894943
llm_goals_81,test,11,0.5514655113220215,0.408772132619307
llm_goals_81,test,12,0.3940281867980957,0.1214277758013974
llm_goals_81,test,13,0.515029788017273,0.0409721836992895
llm_goals_81,test,14,0.33587780594825745,0.0827685330888261
llm_goals_81,test,15,0.3958820104598999,0.224678660518271
llm_goals_81,test,16,0.49649035930633545,0.0872837265533343
llm_goals_81,test,17,0.4937783479690552,0.2837028272857018
llm_goals_81,test,18,0.4553162455558777,0.1193591553103013
llm_goals_81,test,19,0.3059574365615845,0.4952181327203863
llm_goals_81,test,20,0.484934002161026,0.2564665599863756
llm_goals_81,test,21,0.4569931924343109,0.2041083994609424
llm_goals_81,test,22,0.5162301063537598,0.3119739178030907
llm_goals_81,test,23,0.4672936797142029,0.0994638293022036
llm_goals_81,test,24,0.4814397394657135,0.1575467450844306
llm_goals_81,test,25,0.5250041484832764,0.0291125673317165
llm_goals_81,test,26,0.41042348742485046,0.0869071714890548
llm_goals_81,test,27,0.531340479850769,0.1863589895989203
llm_goals_81,test,28,0.5452456474304199,0.119977738469712
llm_goals_81,test,29,0.5249540209770203,0.4410572612602489
llm_goals_81,test,30,0.3000158369541168,0.0555125882985712
llm_goals_81,test,31,0.11245514452457428,0.3486004983652399
llm_goals_81,test,32,0.5585693717002869,0.189941914212834
llm_goals_81,test,33,0.3278777599334717,0.1890390776737304
llm_goals_81,test,34,0.36146280169487,0.1328217427635933
llm_goals_81,test,35,0.5839500427246094,0.1343397356978365
llm_goals_81,test,36,0.31359347701072693,0.0899701336915993
llm_goals_81,test,37,0.23708194494247437,0.6478431799513802
llm_goals_81,test,38,0.4645299017429352,0.7351879424598803
llm_goals_81,test,39,0.5729579925537109,0.0885373562628188
llm_goals_81,test,40,0.5437416434288025,0.3150516869571108
llm_goals_81,test,41,0.45377591252326965,0.0498826197855212
llm_goals_81,test,42,0.4922129511833191,0.1381899328653333
llm_goals_81,test,43,0.36602121591567993,0.3536647169335493
llm_goals_81,test,44,0.5014702677726746,0.4857917971856136
llm_goals_81,test,45,0.5456408262252808,0.3921515914618259
llm_goals_81,test,46,0.5117787718772888,0.1179764264181158
llm_goals_81,test,47,0.4817036986351013,0.2786576050295211
llm_goals_81,test,48,0.46751803159713745,0.0726556891905063
llm_goals_81,test,49,0.5764134526252747,0.3912461690418435
llm_goals_133,test,0,0.5565894246101379,0.4440529323069227
llm_goals_133,test,1,0.5190299153327942,0.3955848959165448
llm_goals_133,test,2,0.5296867489814758,0.5130030466159122
llm_goals_133,test,3,0.5601179599761963,0.220245744335209
llm_goals_133,test,4,0.5391886234283447,0.5879895213269322
llm_goals_133,test,5,0.7836454510688782,0.8588037554744828
llm_goals_133,test,6,0.5637671947479248,0.8322442189227136
llm_goals_133,test,7,0.5139803886413574,0.6588760518065184
llm_goals_133,test,8,0.5205567479133606,0.5253890103378467
llm_goals_133,test,9,0.5164666175842285,0.8170254264656348
llm_goals_133,test,10,0.5110753178596497,0.4608975636274737
llm_goals_133,test,11,0.5238641500473022,0.2002942757447116
llm_goals_133,test,12,0.5392962694168091,0.4022389740133304
llm_goals_133,test,13,0.5340476632118225,0.8516519653230656
llm_goals_133,test,14,0.5150943398475647,0.0158444788418117
llm_goals_133,test,15,0.5231155157089233,0.5978365001716675
llm_goals_133,test,16,0.50153648853302,0.0529535109452945
llm_goals_133,test,17,0.5056248903274536,0.2556263204518722
llm_goals_133,test,18,0.5042186379432678,0.0167466717766424
llm_goals_133,test,19,0.7794337868690491,0.0944244428527934
llm_goals_133,test,20,0.5345067381858826,0.6914976263434149
llm_goals_133,test,21,0.5748486518859863,0.8436048633272258
llm_goals_133,test,22,0.509315550327301,0.2868007235124719
llm_goals_133,test,23,0.5156517624855042,0.1160262834732456
llm_goals_133,test,24,0.5165832042694092,0.0154812210183983
llm_goals_133,test,25,0.5276398062705994,0.3119789757098087
llm_goals_133,test,26,0.5123895406723022,0.7929420521534107
llm_goals_133,test,27,0.5249568223953247,0.2037716603127804
llm_goals_133,test,28,0.4913501739501953,0.3118772754734958
llm_goals_133,test,29,0.7720724940299988,0.1808521166351184
llm_goals_133,test,30,0.5661640167236328,0.5045142070808941
llm_goals_133,test,31,0.7722689509391785,0.8498369503935175
llm_goals_133,test,32,0.5005139708518982,0.2694815541177057
llm_goals_133,test,33,0.5315613150596619,0.0177588567715867
llm_goals_133,test,34,0.5199224948883057,0.445395356923409
llm_goals_133,test,35,0.5176671743392944,0.4995371449295338
llm_goals_133,test,36,0.5761939287185669,0.4823747071618506
llm_goals_133,test,37,0.7857850193977356,0.0266815206082507
llm_goals_133,test,38,0.5714889168739319,0.0874820007739646
llm_goals_133,test,39,0.5077097415924072,0.3366422826024648
llm_goals_133,test,40,0.5695205926895142,0.2329899847029727
llm_goals_133,test,41,0.5613350868225098,0.8571491285285516
llm_goals_133,test,42,0.5122777223587036,0.8193201391590431
llm_goals_133,test,43,0.5682833790779114,0.8422289427227235
llm_goals_133,test,44,0.5269477963447571,0.5294372908894646
llm_goals_133,test,45,0.5199509263038635,0.2588603231187393
llm_goals_133,test,46,0.5559961795806885,0.2337208499272645
llm_goals_133,test,47,0.6060922145843506,0.1166634693988454
llm_goals_133,test,48,0.49915021657943726,0.6138306242825586
llm_goals_133,test,49,0.5710381269454956,0.2806552214021111
llm_goals_157,test,0,0.33147644996643066,0.6355320308190959
llm_goals_157,test,1,0.3315044343471527,0.7424853921670768
llm_goals_157,test,2,0.3315238058567047,0.073314241900373
llm_goals_157,test,3,0.33148977160453796,0.4675850432855137
llm_goals_157,test,4,0.3315032720565796,0.5458627288071527
llm_goals_157,test,5,0.3314991891384125,0.0771889198722071
llm_goals_157,test,6,0.33154579997062683,0.0818153292956077
llm_goals_157,test,7,0.33148935437202454,0.0574362851972565
llm_goals_157,test,8,0.3314805030822754,0.612650525482466
llm_goals_157,test,9,0.3315492868423462,0.025148981997583
llm_goals_157,test,10,0.33150342106819153,0.075973919657683
llm_goals_157,test,11,0.3315151333808899,0.1464687737403695
llm_goals_157,test,12,0.331525981426239,0.7268917489370714
llm_goals_157,test,13,0.331528902053833,0.0531648084509613
llm_goals_157,test,14,0.3315170705318451,0.3299706213737123
llm_goals_157,test,15,0.3315272629261017,0.3043292296967997
llm_goals_157,test,16,0.3315025568008423,0.7463776357566768
llm_goals_157,test,17,0.33150261640548706,0.8521737469617978
llm_goals_157,test,18,0.3314843475818634,0.6376039881194869
llm_goals_157,test,19,0.3315512239933014,0.6077618725185671
llm_goals_157,test,20,0.33152154088020325,0.7242060188576479
llm_goals_157,test,21,0.3315242528915405,0.2051165554994685
llm_goals_157,test,22,0.33151525259017944,0.7995715125449558
llm_goals_157,test,23,0.331510066986084,0.1467023010082007
llm_goals_157,test,24,0.33149537444114685,0.3108830330026099
llm_goals_157,test,25,0.3315162658691406,0.1001087986592319
llm_goals_157,test,26,0.3314961791038513,0.4552169748909065
llm_goals_157,test,27,0.33153775334358215,0.8485816155817194
llm_goals_157,test,28,0.33150729537010193,0.2265082237706469
llm_goals_157,test,29,0.3315073847770691,0.6283476283012512
llm_goals_157,test,30,0.3314768671989441,0.2620283904445711
llm_goals_157,test,31,0.331530898809433,0.167952021094149
llm_goals_157,test,32,0.3314972519874573,0.0898426790762853
llm_goals_157,test,33,0.33151891827583313,0.5043942365008254
llm_goals_157,test,34,0.33151769638061523,0.0725868476993519
llm_goals_157,test,35,0.3314835727214813,0.1237829648198263
llm_goals_157,test,36,0.3315093517303467,0.1925318890375481
llm_goals_157,test,37,0.33151501417160034,0.6842419436738383
llm_goals_157,test,38,0.3314635157585144,0.691649855522258
llm_goals_157,test,39,0.3315393030643463,0.5264401359594252
llm_goals_157,test,40,0.3315134346485138,0.1777385641102212
llm_goals_157,test,41,0.331505686044693,0.0328183379619016
llm_goals_157,test,42,0.33151108026504517,0.1949198629220696
llm_goals_157,test,43,0.3315173089504242,0.135028229661103
llm_goals_157,test,44,0.33149486780166626,0.7258662504199943
llm_goals_157,test,45,0.33150890469551086,0.0935995604675028
llm_goals_157,test,46,0.33154118061065674,0.15418174898882
llm_goals_157,test,47,0.33151182532310486,0.3212815335896988
llm_goals_157,test,48,0.3314886689186096,0.2795304252191649
llm_goals_157,test,49,0.3315074145793915,0.7371862441831876
llm_goals_186,test,0,0.10480993986129761,0.0367510709036243
llm_goals_186,test,1,0.799481213092804,0.9589190483469818
llm_goals_186,test,2,0.10506560653448105,0.0667798014228945
llm_goals_186,test,3,0.8444809913635254,0.0340377111776925
llm_goals_186,test,4,0.4409710466861725,0.4002710909538108
llm_goals_186,test,5,0.7996722459793091,0.8817241785963961
llm_goals_186,test,6,0.8060673475265503,0.8798493094453552
llm_goals_186,test,7,0.10340645164251328,0.0443878504309986
llm_goals_186,test,8,0.429823100566864,0.4415592471567901
llm_goals_186,test,9,0.7829744815826416,0.8847759344734797
llm_goals_186,test,10,0.8317778706550598,0.8905032311770802
llm_goals_186,test,11,0.8763400912284851,0.704948600699243
llm_goals_186,test,12,0.21412284672260284,0.409597002882604
llm_goals_186,test,13,0.8732437491416931,0.8867287662434378
llm_goals_186,test,14,0.8105853199958801,0.8128145923588551
llm_goals_186,test,15,0.8955888152122498,0.4832503599366539
llm_goals_186,test,16,0.7979767322540283,0.8267478910426029
llm_goals_186,test,17,0.6789799332618713,0.957207678971151
llm_goals_186,test,18,0.8167623281478882,0.7610878868091969
llm_goals_186,test,19,0.8829553127288818,0.7872715543385574
llm_goals_186,test,20,0.44315198063850403,0.520514563098938
llm_goals_186,test,21,0.8195391893386841,0.9084513175350956
llm_goals_186,test,22,0.8736503720283508,0.9157385254478472
llm_goals_186,test,23,0.8803927302360535,0.5585242484052028
llm_goals_186,test,24,0.8021672368049622,0.8535531187105866
llm_goals_186,test,25,0.8426520824432373,0.8466653814359101
llm_goals_186,test,26,0.8643771409988403,0.8550637970014335
llm_goals_186,test,27,0.8764208555221558,0.9398175480710378
llm_goals_186,test,28,0.07656551897525787,0.0028363068023126
llm_goals_186,test,29,0.8646645545959473,0.8305997204848772
llm_goals_186,test,30,0.831637442111969,0.8987706923113074
llm_goals_186,test,31,0.7829760313034058,0.8880043621770078
llm_goals_186,test,32,0.8386090397834778,0.4759852559635721
llm_goals_186,test,33,0.8051923513412476,0.8450176079466206
llm_goals_186,test,34,0.8766254782676697,0.9532899841201126
llm_goals_186,test,35,0.7539909482002258,0.0290458993492998
llm_goals_186,test,36,0.8512340188026428,0.9324312223548752
llm_goals_186,test,37,0.5076815485954285,0.7062064442252718
llm_goals_186,test,38,0.3115769922733307,0.8475385890175953
llm_goals_186,test,39,0.8810789585113525,0.2451742937665777
llm_goals_186,test,40,0.8823325037956238,0.7628755019143414
llm_goals_186,test,41,0.7962633371353149,0.9118681525061464
llm_goals_186,test,42,0.10341739654541016,0.0299416334392138
llm_goals_186,test,43,0.531679093837738,0.9088531704037304
llm_goals_186,test,44,0.4440155327320099,0.399223879564458
llm_goals_186,test,45,0.883517861366272,0.0207412737387863
llm_goals_186,test,46,0.8754009008407593,0.504435185656878
llm_goals_186,test,47,0.06812237948179245,0.0426878976387859
llm_goals_186,test,48,0.8079861402511597,0.8374391175680358
llm_goals_186,test,49,0.8865119814872742,0.9286286845568336
llm_goals_401,test,0,0.8296383619308472,0.9617876405899952
llm_goals_401,test,1,0.815834105014801,0.9404490002093446
llm_goals_401,test,2,0.8690575361251831,0.935046673136648
llm_goals_401,test,3,0.9195356965065002,0.9543637645078082
llm_goals_401,test,4,0.2718498110771179,0.9660480618061604
llm_goals_401,test,5,0.8506612777709961,0.9211199313115676
llm_goals_401,test,6,0.8993476033210754,0.902884063341732
llm_goals_401,test,7,0.8962175846099854,0.94729781169548
llm_goals_401,test,8,0.9122962951660156,0.957207800210959
llm_goals_401,test,9,0.904985249042511,0.9145740305533586
llm_goals_401,test,10,0.6523478627204895,0.9267561414021268
llm_goals_401,test,11,0.8861892223358154,0.9242701512661912
llm_goals_401,test,12,0.8622724413871765,0.8435903191618971
llm_goals_401,test,13,0.2248782515525818,0.938192186231483
llm_goals_401,test,14,0.4302520751953125,0.8529976602342201
llm_goals_401,test,15,0.9141342639923096,0.9478884329856586
llm_goals_401,test,16,0.42070773243904114,0.8192354997719525
llm_goals_401,test,17,0.6930392384529114,0.6739075253562599
llm_goals_401,test,18,0.41961804032325745,0.5197439125792754
llm_goals_401,test,19,0.9236536622047424,0.8583977843568281
llm_goals_401,test,20,0.9132837653160095,0.9455993763925636
llm_goals_401,test,21,0.8532243967056274,0.9705763460601288
llm_goals_401,test,22,0.9090998768806458,0.8491748031226035
llm_goals_401,test,23,0.8413888216018677,0.8515620190775874
llm_goals_401,test,24,0.4239988327026367,0.9138265832467732
llm_goals_401,test,25,0.6697307825088501,0.9028179661562172
llm_goals_401,test,26,0.4223632514476776,0.9558187125982178
llm_goals_401,test,27,0.8338127136230469,0.5612720368847214
llm_goals_401,test,28,0.9084793925285339,0.9617025244944858
llm_goals_401,test,29,0.9156500697135925,0.8060652645257104
llm_goals_401,test,30,0.6829843521118164,0.8896843204884566
llm_goals_401,test,31,0.8487648963928223,0.9526923309875832
llm_goals_401,test,32,0.9124109745025635,0.8506568677811456
llm_goals_401,test,33,0.24285075068473816,0.7802594846608962
llm_goals_401,test,34,0.7093304991722107,0.8870658880824575
llm_goals_401,test,35,0.8703480958938599,0.954492271718892
llm_goals_401,test,36,0.6669310927391052,0.562048146808884
llm_goals_401,test,37,0.749434769153595,0.7781126533121301
llm_goals_401,test,38,0.9220514893531799,0.8591165497422784
llm_goals_401,test,39,0.8685519099235535,0.8788934372485709
llm_goals_401,test,40,0.91788250207901,0.8091102829935539
llm_goals_401,test,41,0.8545513153076172,0.8675616408909181
llm_goals_401,test,42,0.8902552127838135,0.949993671778176
llm_goals_401,test,43,0.8898730278015137,0.9467448581700896
llm_goals_401,test,44,0.9122741222381592,0.8687242293966851
llm_goals_401,test,45,0.9133915305137634,0.9587250339741858
llm_goals_401,test,46,0.21939364075660706,0.860452357605835
llm_goals_401,test,47,0.907702624797821,0.9253614550163576
llm_goals_401,test,48,0.42849257588386536,0.8647797053472309
llm_goals_401,test,49,0.8453792929649353,0.9454760570818088
llm_goals_420,test,0,0.3842678368091583,0.9711560599789952
llm_goals_420,test,1,0.3835456669330597,0.5978053926454786
llm_goals_420,test,2,0.38355693221092224,0.0817351976918742
llm_goals_420,test,3,0.38557639718055725,0.4730792485055278
llm_goals_420,test,4,0.3839648365974426,0.8487827178347965
llm_goals_420,test,5,0.3828743100166321,0.1386299974231985
llm_goals_420,test,6,0.38362905383110046,0.9677888754301456
llm_goals_420,test,7,0.38398534059524536,0.9193048504515688
llm_goals_420,test,8,0.383650541305542,0.1105940972856699
llm_goals_420,test,9,0.38624799251556396,0.0770199238560772
llm_goals_420,test,10,0.3832836151123047,0.6569762799970394
llm_goals_420,test,11,0.384727418422699,0.4533228613613927
llm_goals_420,test,12,0.38368090987205505,0.9317349816078236
llm_goals_420,test,13,0.38337472081184387,0.5042190525433249
llm_goals_420,test,14,0.38468363881111145,0.1440102415442949
llm_goals_420,test,15,0.38327252864837646,0.3273024402815145
llm_goals_420,test,16,0.38604050874710083,0.3681827771891491
llm_goals_420,test,17,0.38480105996131897,0.4632337459639504
llm_goals_420,test,18,0.3833167850971222,0.0396023029282074
llm_goals_420,test,19,0.3829302191734314,0.1093558671361818
llm_goals_420,test,20,0.38436993956565857,0.0396252534530884
llm_goals_420,test,21,0.3844459652900696,0.0465700238365722
llm_goals_420,test,22,0.38348904252052307,0.1916998836408959
llm_goals_420,test,23,0.38379308581352234,0.1681177802630551
llm_goals_420,test,24,0.38315659761428833,0.1698145969501656
llm_goals_420,test,25,0.38471853733062744,0.9829779862454848
llm_goals_420,test,26,0.3849673271179199,0.0308922007548325
llm_goals_420,test,27,0.3841836452484131,0.8125830946093627
llm_goals_420,test,28,0.38372600078582764,0.9367654930611784
llm_goals_420,test,29,0.38370251655578613,0.8184437096518178
llm_goals_420,test,30,0.3850241005420685,0.9386603865018432
llm_goals_420,test,31,0.38270503282546997,0.10851579640767
llm_goals_420,test,32,0.38435348868370056,0.9666374926215946
llm_goals_420,test,33,0.3829614520072937,0.8901044715204638
llm_goals_420,test,34,0.38504889607429504,0.9215939624410928
llm_goals_420,test,35,0.3842628598213196,0.5267802921981926
llm_goals_420,test,36,0.38480308651924133,0.092175658510858
llm_goals_420,test,37,0.3848237097263336,0.984362841437198
llm_goals_420,test,38,0.38510286808013916,0.8828047505171253
llm_goals_420,test,39,0.3840239942073822,0.2972862903877061
llm_goals_420,test,40,0.3830673098564148,0.0444444838151291
llm_goals_420,test,41,0.385206013917923,0.5478361768565629
llm_goals_420,test,42,0.38447943329811096,0.0713350995649345
llm_goals_420,test,43,0.38326242566108704,0.5106248261137879
llm_goals_420,test,44,0.38467416167259216,0.0905225225380096
llm_goals_420,test,45,0.38350993394851685,0.1531352912609338
llm_goals_420,test,46,0.3842543363571167,0.1216667665904784
llm_goals_420,test,47,0.38355115056037903,0.1240460201984882
llm_goals_420,test,48,0.3840961754322052,0.0596825371886435
llm_goals_420,test,49,0.38536661863327026,0.8335117138475971
llm_goals_263,test,0,0.8573192954063416,0.5209935935366439
llm_goals_263,test,1,0.5858364105224609,0.7968068746445729
llm_goals_263,test,2,0.86490398645401,0.5467470232144288
llm_goals_263,test,3,0.5535712242126465,0.4733215007319302
llm_goals_263,test,4,0.8644493818283081,0.0057083956844275
llm_goals_263,test,5,0.862160861492157,0.3493473596509424
llm_goals_263,test,6,0.7375895977020264,0.8201550249820808
llm_goals_263,test,7,0.627169132232666,0.6557906816477753
llm_goals_263,test,8,0.5572687387466431,0.7543426271500917
llm_goals_263,test,9,0.5429184436798096,0.2009829696053626
llm_goals_263,test,10,0.8637752532958984,0.8489753664207734
llm_goals_263,test,11,0.5621498227119446,0.652436611134806
llm_goals_263,test,12,0.8654323220252991,0.8143466734577086
llm_goals_263,test,13,0.6591519713401794,0.4245607212842352
llm_goals_263,test,14,0.6225461363792419,0.0036452536113296
llm_goals_263,test,15,0.5610851645469666,0.0558692673280436
llm_goals_263,test,16,0.5941181182861328,0.0066133854077101
llm_goals_263,test,17,0.59520423412323,0.7648655133279786
llm_goals_263,test,18,0.6190994381904602,0.006651634113929
llm_goals_263,test,19,0.6507881879806519,0.0487544569045993
llm_goals_263,test,20,0.8621309399604797,0.0331700030843074
llm_goals_263,test,21,0.8478057384490967,0.2223244924357846
llm_goals_263,test,22,0.5908961892127991,0.8850367693524676
llm_goals_263,test,23,0.58967125415802,0.6136206316393849
llm_goals_263,test,24,0.5786141753196716,0.0103991722036265
llm_goals_263,test,25,0.6024591326713562,0.7490420921699364
llm_goals_263,test,26,0.566685140132904,0.0149267434716544
llm_goals_263,test,27,0.8067132234573364,0.7989164572724189
llm_goals_263,test,28,0.5594255328178406,0.6376088146659757
llm_goals_263,test,29,0.6442697644233704,0.0419343519282845
llm_goals_263,test,30,0.7345257997512817,0.8598701912381717
llm_goals_263,test,31,0.830859363079071,0.168156003178124
llm_goals_263,test,32,0.8629155158996582,0.6210063725022936
llm_goals_263,test,33,0.8598348498344421,0.0062242916440703
llm_goals_263,test,34,0.5840651392936707,0.5691545063377933
llm_goals_263,test,35,0.6739481091499329,0.7982008481236453
llm_goals_263,test,36,0.852290689945221,0.763706617142148
llm_goals_263,test,37,0.5834945440292358,0.0342430752915087
llm_goals_263,test,38,0.6772735118865967,0.1222660265497154
llm_goals_263,test,39,0.6119908094406128,0.8269810089104592
llm_goals_263,test,40,0.5655151009559631,0.5318831885299807
llm_goals_263,test,41,0.5547193288803101,0.3207528198494705
llm_goals_263,test,42,0.86374831199646,0.4801431121790341
llm_goals_263,test,43,0.8616092801094055,0.1981375631842198
llm_goals_263,test,44,0.5678957104682922,0.1080272425734687
llm_goals_263,test,45,0.8645410537719727,0.8354177467829539
llm_goals_263,test,46,0.598348081111908,0.7413273974280716
llm_goals_263,test,47,0.8640948534011841,0.4918597295945824
llm_goals_263,test,48,0.5650440454483032,0.0043793883332247
llm_goals_263,test,49,0.8650387525558472,0.949535011062065
llm_goals_427,test,0,0.521841824054718,0.2826418194423331
llm_goals_427,test,1,0.7605456113815308,0.380977464557011
llm_goals_427,test,2,0.47853752970695496,0.1487359361961987
llm_goals_427,test,3,0.4840945601463318,0.3576580711889061
llm_goals_427,test,4,0.5590776801109314,0.2270212997591984
llm_goals_427,test,5,0.7537922263145447,0.2711625929555684
llm_goals_427,test,6,0.7505221366882324,0.1402530886796432
llm_goals_427,test,7,0.4926000237464905,0.2935996266753878
llm_goals_427,test,8,0.4988381862640381,0.2137670314614922
llm_goals_427,test,9,0.4936088025569916,0.2160041132788326
llm_goals_427,test,10,0.766090452671051,0.3669200145460971
llm_goals_427,test,11,0.5331735610961914,0.1516882402250043
llm_goals_427,test,12,0.4943297803401947,0.1475453757541205
llm_goals_427,test,13,0.49707454442977905,0.1350458405109811
llm_goals_427,test,14,0.4881554841995239,0.2337964230636721
llm_goals_427,test,15,0.7718092799186707,0.3115728193455654
llm_goals_427,test,16,0.4873652458190918,0.131520745523965
llm_goals_427,test,17,0.7643466591835022,0.3278496493117007
llm_goals_427,test,18,0.7610998153686523,0.3527567015929387
llm_goals_427,test,19,0.4822549819946289,0.2313815817463674
llm_goals_427,test,20,0.5349233746528625,0.3221674001172885
llm_goals_427,test,21,0.4960642457008362,0.2494982149574568
llm_goals_427,test,22,0.7681995034217834,0.3779860883941361
llm_goals_427,test,23,0.4845094382762909,0.3509138567308049
llm_goals_427,test,24,0.7522106170654297,0.1946130607095621
llm_goals_427,test,25,0.5082067251205444,0.228222711067827
llm_goals_427,test,26,0.762052595615387,0.2768263295001016
llm_goals_427,test,27,0.4844822287559509,0.2199474495731038
llm_goals_427,test,28,0.4920440912246704,0.1579098434370231
llm_goals_427,test,29,0.4885600507259369,0.069843346359134
llm_goals_427,test,30,0.7618464231491089,0.5359787931933129
llm_goals_427,test,31,0.4860963523387909,0.1293182240484197
llm_goals_427,test,32,0.5061228275299072,0.2396093502744168
llm_goals_427,test,33,0.7487253546714783,0.1913976476875258
llm_goals_427,test,34,0.652533233165741,0.4990200469731286
llm_goals_427,test,35,0.4864262640476227,0.357807030322738
llm_goals_427,test,36,0.668613076210022,0.2580007688260685
llm_goals_427,test,37,0.48176953196525574,0.4031150735356658
llm_goals_427,test,38,0.7421385049819946,0.2737247490124149
llm_goals_427,test,39,0.483761191368103,0.2478362318668904
llm_goals_427,test,40,0.5272771716117859,0.5026721270474518
llm_goals_427,test,41,0.5017876625061035,0.2273186194850299
llm_goals_427,test,42,0.4892980456352234,0.4444071308114509
llm_goals_427,test,43,0.5462906956672668,0.2369720640870139
llm_goals_427,test,44,0.7671980857849121,0.3961639877527012
llm_goals_427,test,45,0.4895385503768921,0.2771055535106792
llm_goals_427,test,46,0.756078839302063,0.3138421601355035
llm_goals_427,test,47,0.4877642095088959,0.3609031182529178
llm_goals_427,test,48,0.5026125907897949,0.1899314535085168
llm_goals_427,test,49,0.5980682373046875,0.4452677872765214
llm_goals_449,test,0,0.3131377100944519,0.0338418398422384
llm_goals_449,test,1,0.3135029077529907,0.5657697432527007
llm_goals_449,test,2,0.3130607306957245,0.0110135668657682
llm_goals_449,test,3,0.3132723569869995,0.2352946829974157
llm_goals_449,test,4,0.3137468695640564,0.4842577903424374
llm_goals_449,test,5,0.3135363459587097,0.3485549555066181
llm_goals_449,test,6,0.31358957290649414,0.0519669241570106
llm_goals_449,test,7,0.3132314682006836,0.0031269987549636
llm_goals_449,test,8,0.31309714913368225,0.2974090419757364
llm_goals_449,test,9,0.31339576840400696,0.0925536782452943
llm_goals_449,test,10,0.3134644031524658,0.4386853694122202
llm_goals_449,test,11,0.3132087290287018,0.0063782741293255
llm_goals_449,test,12,0.31319260597229004,0.0714949969968408
llm_goals_449,test,13,0.31338974833488464,0.0227751515807358
llm_goals_449,test,14,0.313218355178833,0.3195753835685478
llm_goals_449,test,15,0.3132534921169281,0.2420638006790782
llm_goals_449,test,16,0.3133457601070404,0.0953277975239941
llm_goals_449,test,17,0.31347355246543884,0.5835221517250182
llm_goals_449,test,18,0.31364700198173523,0.1603112674891041
llm_goals_449,test,19,0.3131524622440338,0.6833613521345718
llm_goals_449,test,20,0.31342706084251404,0.4549636770044163
llm_goals_449,test,21,0.3137263059616089,0.1579551412473356
llm_goals_449,test,22,0.31325656175613403,0.4400750405221786
llm_goals_449,test,23,0.31371501088142395,0.0251530775884869
llm_goals_449,test,24,0.3132130205631256,0.2695772384712642
llm_goals_449,test,25,0.3133792579174042,0.4519585097229938
llm_goals_449,test,26,0.3135731518268585,0.1123650856049343
llm_goals_449,test,27,0.31314077973365784,0.4873581137262729
llm_goals_449,test,28,0.3132690489292145,0.1751834751469549
llm_goals_449,test,29,0.3133276104927063,0.5804994670396382
llm_goals_449,test,30,0.31334954500198364,0.4626257946707522
llm_goals_449,test,31,0.3137083947658539,0.093699276239858
llm_goals_449,test,32,0.3138068914413452,0.0067638379987912
llm_goals_449,test,33,0.31366363167762756,0.1681834079617312
llm_goals_449,test,34,0.31337231397628784,0.3493466516790004
llm_goals_449,test,35,0.3133028745651245,0.0068633490722964
llm_goals_449,test,36,0.3132506012916565,0.3064552348162913
llm_goals_449,test,37,0.3132505714893341,0.6652150761082033
llm_goals_449,test,38,0.31365373730659485,0.6741024889471738
llm_goals_449,test,39,0.3136662542819977,0.0101379083480811
llm_goals_449,test,40,0.3130955696105957,0.0089588521989727
llm_goals_449,test,41,0.3132261633872986,0.1780122280374072
llm_goals_449,test,42,0.31314516067504883,0.0051090174127486
llm_goals_449,test,43,0.3136025369167328,0.2498444495344637
llm_goals_449,test,44,0.31362104415893555,0.221646438441774
llm_goals_449,test,45,0.31354689598083496,0.2094790944562388
llm_goals_449,test,46,0.3133949339389801,0.0060341568900946
llm_goals_449,test,47,0.3133590519428253,0.1241710666090617
llm_goals_449,test,48,0.31333962082862854,0.1338989075405874
llm_goals_449,test,49,0.3131844997406006,0.5982472498607049
llm_goals_93,test,0,0.3033926784992218,0.0729546648027557
llm_goals_93,test,1,0.4845615029335022,0.6981648167380458
llm_goals_93,test,2,0.5131763815879822,0.3318565449695651
llm_goals_93,test,3,0.48507794737815857,0.5505450337981316
llm_goals_93,test,4,0.9175912141799927,0.9547701486411684
llm_goals_93,test,5,0.5545885562896729,0.8378989133869518
llm_goals_93,test,6,0.9173443913459778,0.8645678173925403
llm_goals_93,test,7,0.24215394258499146,0.8034776985663136
llm_goals_93,test,8,0.9289183020591736,0.9139015687588464
llm_goals_93,test,9,0.8945550918579102,0.84076714934408
llm_goals_93,test,10,0.5592747330665588,0.6739811171843284
llm_goals_93,test,11,0.8312158584594727,0.1039005746210211
llm_goals_93,test,12,0.10269015282392502,0.4267719606730696
llm_goals_93,test,13,0.5579550862312317,0.8366404177501517
llm_goals_93,test,14,0.6353368163108826,0.51043302648597
llm_goals_93,test,15,0.14564745128154755,0.9305120134073452
llm_goals_93,test,16,0.8245664238929749,0.3610424966705102
llm_goals_93,test,17,0.10057750344276428,0.6383014796360676
llm_goals_93,test,18,0.5385660529136658,0.7718638733110297
llm_goals_93,test,19,0.58343905210495,0.6413577201238602
llm_goals_93,test,20,0.9139429330825806,0.9315750038397262
llm_goals_93,test,21,0.4132847785949707,0.959779321458113
llm_goals_93,test,22,0.09665757417678833,0.8212707656396169
llm_goals_93,test,23,0.8865413069725037,0.0620020892855925
llm_goals_93,test,24,0.5165167450904846,0.6707394124648912
llm_goals_93,test,25,0.4187423586845398,0.8513622881275814
llm_goals_93,test,26,0.4719519317150116,0.9295520797235572
llm_goals_93,test,27,0.18644443154335022,0.731125599089208
llm_goals_93,test,28,0.26431816816329956,0.6387209955509132
llm_goals_93,test,29,0.5354630351066589,0.904010833763946
llm_goals_93,test,30,0.5868067741394043,0.6688304494781727
llm_goals_93,test,31,0.8106753826141357,0.7246857956085144
llm_goals_93,test,32,0.1887609362602234,0.8395389979928227
llm_goals_93,test,33,0.9262081384658813,0.4902935277900161
llm_goals_93,test,34,0.09991711378097534,0.5478889802411547
llm_goals_93,test,35,0.6727423667907715,0.8178324059837278
llm_goals_93,test,36,0.5887491703033447,0.8865698576591567
llm_goals_93,test,37,0.5032276511192322,0.9194802654649477
llm_goals_93,test,38,0.6453635096549988,0.8798558779688433
llm_goals_93,test,39,0.7271510362625122,0.2399301633758412
llm_goals_93,test,40,0.8461641073226929,0.946265162703226
llm_goals_93,test,41,0.6802605390548706,0.8435175392494392
llm_goals_93,test,42,0.30292361974716187,0.260949780916787
llm_goals_93,test,43,0.9215409755706787,0.8190814792570137
llm_goals_93,test,44,0.9026283621788025,0.951412738003951
llm_goals_93,test,45,0.9312945008277893,0.5085180485155563
llm_goals_93,test,46,0.5974830389022827,0.946404217454057
llm_goals_93,test,47,0.3719422221183777,0.5778150570910173
llm_goals_93,test,48,0.27526208758354187,0.6859809771933842
llm_goals_93,test,49,0.6229862570762634,0.6187362092588319
llm_goals_358,test,0,0.8332763314247131,0.9198604804287828
llm_goals_358,test,1,0.7632737755775452,0.8623428941495346
llm_goals_358,test,2,0.8299117684364319,0.8146633538972576
llm_goals_358,test,3,0.8242139220237732,0.6635203414505186
llm_goals_358,test,4,0.841041088104248,0.9873765704706148
llm_goals_358,test,5,0.7834556102752686,0.8085697334742067
llm_goals_358,test,6,0.7785527110099792,0.8449593731392603
llm_goals_358,test,7,0.8127280473709106,0.7802023295414534
llm_goals_358,test,8,0.8403149843215942,0.5598487865057791
llm_goals_358,test,9,0.821250855922699,0.7689259718491731
llm_goals_358,test,10,0.7239089608192444,0.4090564458803354
llm_goals_358,test,11,0.20797982811927795,0.7281676519766522
llm_goals_358,test,12,0.693349301815033,0.781706684015145
llm_goals_358,test,13,0.8034588098526001,0.6175350663762746
llm_goals_358,test,14,0.11251509189605713,0.1259366314847969
llm_goals_358,test,15,0.8377938866615295,0.9665391433681092
llm_goals_358,test,16,0.3249461054801941,0.0977212357476949
llm_goals_358,test,17,0.4759245812892914,0.839291034278836
llm_goals_358,test,18,0.7870519757270813,0.0422931239637008
llm_goals_358,test,19,0.6766488552093506,0.3805861792548485
llm_goals_358,test,20,0.8275086879730225,0.972559035375534
llm_goals_358,test,21,0.8094126582145691,0.7844216881478699
llm_goals_358,test,22,0.6654201149940491,0.8432835924874589
llm_goals_358,test,23,0.10629676282405853,0.5831400725193898
llm_goals_358,test,24,0.23457546532154083,0.4717356494812245
llm_goals_358,test,25,0.1044851765036583,0.5591997574513735
llm_goals_358,test,26,0.13832290470600128,0.2416094818565881
llm_goals_358,test,27,0.1360034942626953,0.8557277309389357
llm_goals_358,test,28,0.8136913776397705,0.7784201381020285
llm_goals_358,test,29,0.6209198236465454,0.9848793746115032
llm_goals_358,test,30,0.2308935821056366,0.6211451382223804
llm_goals_358,test,31,0.7768032550811768,0.3507967043222499
llm_goals_358,test,32,0.10471092164516449,0.670993868258675
llm_goals_358,test,33,0.15803493559360504,0.0563775762088827
llm_goals_358,test,34,0.10646612197160721,0.4923522105012786
llm_goals_358,test,35,0.8294684886932373,0.8157534628947803
llm_goals_358,test,36,0.24496059119701385,0.7491294702469764
llm_goals_358,test,37,0.43898507952690125,0.5478990059483921
llm_goals_358,test,38,0.5633208155632019,0.9873113036659013
llm_goals_358,test,39,0.38474372029304504,0.6807296173615788
llm_goals_358,test,40,0.833412766456604,0.6446558925323718
llm_goals_358,test,41,0.7837128639221191,0.3360622093964198
llm_goals_358,test,42,0.8303960561752319,0.6737437200032999
llm_goals_358,test,43,0.8115326166152954,0.453765234769123
llm_goals_358,test,44,0.6302782893180847,0.921309112250278
llm_goals_358,test,45,0.14522512257099152,0.5133269325272689
llm_goals_358,test,46,0.6562010049819946,0.7619134214235583
llm_goals_358,test,47,0.7890066504478455,0.5260478340890856
llm_goals_358,test,48,0.1913922280073166,0.0910570466126295
llm_goals_358,test,49,0.6909488439559937,0.4710935988277175
