template_id,split,question_idx,prediction,label
llm_goals_449,test,0,0.1347680538892746,0.1336325872199141
llm_goals_449,test,1,0.2920703589916229,0.2356014848705813
llm_goals_449,test,2,0.33110105991363525,0.3977331353545913
llm_goals_449,test,3,0.5878740549087524,0.9671817093217177
llm_goals_449,test,4,0.8072128295898438,0.7857460873195503
llm_goals_449,test,5,0.41764363646507263,0.5164481763709956
llm_goals_449,test,6,0.3107980787754059,0.0370459810848937
llm_goals_449,test,7,0.23688124120235443,0.2618660511464079
llm_goals_449,test,8,0.6268746256828308,0.9771937256062914
llm_goals_449,test,9,0.061091016978025436,0.0204302625650166
llm_goals_449,test,10,0.2684206962585449,0.0731782442371117
llm_goals_449,test,11,0.07585360109806061,0.0091003072940505
llm_goals_449,test,12,0.25851646065711975,0.1108627498844831
llm_goals_449,test,13,0.21238143742084503,0.054840421346723
llm_goals_449,test,14,0.5966901779174805,0.8313263696124994
llm_goals_449,test,15,0.5957493782043457,0.8253071431638606
llm_goals_449,test,16,0.5015527606010437,0.831142754565476
llm_goals_449,test,17,0.30338701605796814,0.3065891414116863
llm_goals_449,test,18,0.6170012950897217,0.4235929706341914
llm_goals_449,test,19,0.4636882245540619,0.2342867705548718
llm_goals_449,test,20,0.6971475481987,0.5687148418873526
llm_goals_449,test,21,0.516610324382782,0.4888083146033534
llm_goals_449,test,22,0.1885502189397812,0.0069036151286541
llm_goals_449,test,23,0.7635430693626404,0.0739780745569788
llm_goals_449,test,24,0.4184418320655823,0.9314584192860104
llm_goals_449,test,25,0.20855046808719635,0.1420477852561596
llm_goals_449,test,26,0.48978909850120544,0.7006375316273405
llm_goals_449,test,27,0.20112541317939758,0.0612984362789981
llm_goals_449,test,28,0.23224614560604095,0.9549750406111494
llm_goals_449,test,29,0.4609701633453369,0.2943014776892277
llm_goals_449,test,30,0.2688026428222656,0.0576629803259434
llm_goals_449,test,31,0.5548166632652283,0.2255338162735628
llm_goals_449,test,32,0.27984485030174255,0.0217578202648259
llm_goals_449,test,33,0.7852368354797363,0.5881005329750926
llm_goals_449,test,34,0.15373866260051727,0.0656003360881953
llm_goals_449,test,35,0.41028475761413574,0.1899455989825758
llm_goals_449,test,36,0.13305552303791046,0.1356723510839239
llm_goals_449,test,37,0.29949551820755005,0.1724577552330986
llm_goals_449,test,38,0.58601975440979,0.097576330461044
llm_goals_449,test,39,0.27101507782936096,0.0831443503045603
llm_goals_449,test,40,0.25384649634361267,0.0085659129540734
llm_goals_449,test,41,0.3926771581172943,0.0438119169092525
llm_goals_449,test,42,0.4438744783401489,0.4942733211865646
llm_goals_449,test,43,0.36528730392456055,0.1247723307463423
llm_goals_449,test,44,0.4844537377357483,0.8400723355781036
llm_goals_449,test,45,0.6054046750068665,0.925208223971735
llm_goals_449,test,46,0.617467999458313,0.0740922882863047
llm_goals_449,test,47,0.6024554967880249,0.9373555404045412
llm_goals_449,test,48,0.3366484045982361,0.3920310752556691
llm_goals_449,test,49,0.33751773834228516,0.0277662962683335
llm_goals_75,test,0,0.7646589279174805,0.8642124165137652
llm_goals_75,test,1,0.7118194699287415,0.910359703136258
llm_goals_75,test,2,0.8122325539588928,0.8128286324258283
llm_goals_75,test,3,0.4054138660430908,0.1661489886313124
llm_goals_75,test,4,0.4887351989746094,0.1236451678860016
llm_goals_75,test,5,0.7303045392036438,0.6739037233559247
llm_goals_75,test,6,0.44561198353767395,0.1134434733777058
llm_goals_75,test,7,0.8343794345855713,0.6946055843235053
llm_goals_75,test,8,0.37723538279533386,0.1004496505890182
llm_goals_75,test,9,0.681840181350708,0.1540130266900075
llm_goals_75,test,10,0.6008909344673157,0.3569515830662073
llm_goals_75,test,11,0.6551099419593811,0.4576154291835114
llm_goals_75,test,12,0.5728214979171753,0.1367346048355968
llm_goals_75,test,13,0.3563505709171295,0.0646890805500496
llm_goals_75,test,14,0.4316774904727936,0.1675122689898172
llm_goals_75,test,15,0.37804749608039856,0.069582458224139
llm_goals_75,test,16,0.4401216506958008,0.2354277864496811
llm_goals_75,test,17,0.9180133938789368,0.9447837743973488
llm_goals_75,test,18,0.4376358389854431,0.0781001795812696
llm_goals_75,test,19,0.7357173562049866,0.4868721494751054
llm_goals_75,test,20,0.5084386467933655,0.1033036348939128
llm_goals_75,test,21,0.47110041975975037,0.1276999559430948
llm_goals_75,test,22,0.8814324140548706,0.8431325364742722
llm_goals_75,test,23,0.5405991673469543,0.7460283918906632
llm_goals_75,test,24,0.5400494933128357,0.192573320599938
llm_goals_75,test,25,0.43768739700317383,0.3801085776614793
llm_goals_75,test,26,0.38464727997779846,0.1267870145951939
llm_goals_75,test,27,0.8763846158981323,0.9607504432606712
llm_goals_75,test,28,0.6166085600852966,0.0423146645658729
llm_goals_75,test,29,0.5994177460670471,0.4777054812224888
llm_goals_75,test,30,0.5862857103347778,0.4822637810175876
llm_goals_75,test,31,0.4240284264087677,0.0805248550261055
llm_goals_75,test,32,0.6400626301765442,0.6890841093919252
llm_goals_75,test,33,0.44282764196395874,0.1145962248845397
llm_goals_75,test,34,0.3782923221588135,0.2921261463182895
llm_goals_75,test,35,0.5642354488372803,0.6530562837025381
llm_goals_75,test,36,0.4918196201324463,0.1183304077208745
llm_goals_75,test,37,0.9021989703178406,0.868052766582826
llm_goals_75,test,38,0.7272375226020813,0.6535917953521371
llm_goals_75,test,39,0.3718253970146179,0.0644053193022136
llm_goals_75,test,40,0.6732860207557678,0.6019618834627347
llm_goals_75,test,41,0.41571712493896484,0.230389069862908
llm_goals_75,test,42,0.8621546030044556,0.7894553184375537
llm_goals_75,test,43,0.6203150153160095,0.2977582471382819
llm_goals_75,test,44,0.6020022630691528,0.2211815525008748
llm_goals_75,test,45,0.6841498613357544,0.0745441466493943
llm_goals_75,test,46,0.6372784972190857,0.7127955514788996
llm_goals_75,test,47,0.49845215678215027,0.0161223980025345
llm_goals_75,test,48,0.5408913493156433,0.1382513395050604
llm_goals_75,test,49,0.9000193476676941,0.9815106661613362
llm_goals_305,test,0,0.5222141742706299,0.2062097726301097
llm_goals_305,test,1,0.5161014795303345,0.7603213407742078
llm_goals_305,test,2,0.3930032551288605,0.0406395612652842
llm_goals_305,test,3,0.5796051025390625,0.4422001539134155
llm_goals_305,test,4,0.14753775298595428,0.2689791708049951
llm_goals_305,test,5,0.11193235218524933,0.1412581238797707
llm_goals_305,test,6,0.26312780380249023,0.0992911357773575
llm_goals_305,test,7,0.19606256484985352,0.1984947738037692
llm_goals_305,test,8,0.14071711897850037,0.1577864619647619
llm_goals_305,test,9,0.4560741186141968,0.0621641107275182
llm_goals_305,test,10,0.606491208076477,0.9879174023450592
llm_goals_305,test,11,0.43205246329307556,0.5269669419962362
llm_goals_305,test,12,0.27425143122673035,0.2105593475656798
llm_goals_305,test,13,0.29905131459236145,0.2072161629845012
llm_goals_305,test,14,0.4831866919994354,0.6545892217736428
llm_goals_305,test,15,0.1886085867881775,0.5104040428268571
llm_goals_305,test,16,0.6106811165809631,0.5524398163622298
llm_goals_305,test,17,0.2646772861480713,0.8665614751565875
llm_goals_305,test,18,0.6334723234176636,0.3175479682579358
llm_goals_305,test,19,0.35790497064590454,0.3701558259397783
llm_goals_305,test,20,0.11681553721427917,0.0528314061272507
llm_goals_305,test,21,0.14905685186386108,0.3148726989644924
llm_goals_305,test,22,0.6936976313591003,0.447303668549321
llm_goals_305,test,23,0.2521830201148987,0.1744797562558885
llm_goals_305,test,24,0.46208667755126953,0.8158235756439657
llm_goals_305,test,25,0.6429587006568909,0.9578316364476024
llm_goals_305,test,26,0.5545032024383545,0.5595920193399587
llm_goals_305,test,27,0.4978366792201996,0.5967193097172223
llm_goals_305,test,28,0.4071170687675476,0.9037929343333102
llm_goals_305,test,29,0.3959556221961975,0.278165145436113
llm_goals_305,test,30,0.4972885847091675,0.9532629887346658
llm_goals_305,test,31,0.20011433959007263,0.5121334481158791
llm_goals_305,test,32,0.37627437710762024,0.1605879890433021
llm_goals_305,test,33,0.4431830644607544,0.6222808213139804
llm_goals_305,test,34,0.7595334649085999,0.9709568242405192
llm_goals_305,test,35,0.40833431482315063,0.5513220475450542
llm_goals_305,test,36,0.7486233115196228,0.9876292784574268
llm_goals_305,test,37,0.38094595074653625,0.258566594074644
llm_goals_305,test,38,0.34406355023384094,0.3940893870837836
llm_goals_305,test,39,0.199154332280159,0.6249683049434822
llm_goals_305,test,40,0.5023698210716248,0.036012250509986
llm_goals_305,test,41,0.4362098276615143,0.0238784684017155
llm_goals_305,test,42,0.2420864701271057,0.1015334102504109
llm_goals_305,test,43,0.3040432333946228,0.0682627831684364
llm_goals_305,test,44,0.15679392218589783,0.0901285910304827
llm_goals_305,test,45,0.31767433881759644,0.8520020311890948
llm_goals_305,test,46,0.31506016850471497,0.512946133742762
llm_goals_305,test,47,0.3649091422557831,0.9460715758270588
llm_goals_305,test,48,0.5714491009712219,0.2634284981215168
llm_goals_305,test,49,0.3929027020931244,0.5226278491298435
llm_goals_263,test,0,0.2821843922138214,0.9722113849684072
llm_goals_263,test,1,0.5955796837806702,0.4308884142088059
llm_goals_263,test,2,0.43110373616218567,0.9112282637417588
llm_goals_263,test,3,0.36520108580589294,0.4678322072304764
llm_goals_263,test,4,0.3608059883117676,0.0681171790473069
llm_goals_263,test,5,0.4867634177207947,0.3304750158657515
llm_goals_263,test,6,0.13633568584918976,0.1049596434028026
llm_goals_263,test,7,0.30246835947036743,0.868290478100796
llm_goals_263,test,8,0.05763978883624077,0.0107688258732955
llm_goals_263,test,9,0.4086843729019165,0.5335894362422906
llm_goals_263,test,10,0.4598580300807953,0.4571801332195204
llm_goals_263,test,11,0.3441867232322693,0.4017475232284387
llm_goals_263,test,12,0.2656421661376953,0.7410716651377723
llm_goals_263,test,13,0.3765699565410614,0.2222869461895513
llm_goals_263,test,14,0.1560160219669342,0.0236037417776026
llm_goals_263,test,15,0.3878105878829956,0.0998823008982278
llm_goals_263,test,16,0.18917806446552277,0.0090037236123257
llm_goals_263,test,17,0.43885868787765503,0.3371875979935536
llm_goals_263,test,18,0.46374577283859253,0.2095794100566156
llm_goals_263,test,19,0.3010333478450775,0.1483621477954244
llm_goals_263,test,20,0.31554746627807617,0.2829259876769517
llm_goals_263,test,21,0.23522871732711792,0.1128506991103582
llm_goals_263,test,22,0.6957932114601135,0.8918927085227047
llm_goals_263,test,23,0.13091781735420227,0.1960590879366542
llm_goals_263,test,24,0.2153502255678177,0.0409316032122589
llm_goals_263,test,25,0.1716688722372055,0.1971178454456452
llm_goals_263,test,26,0.33454474806785583,0.290576284754869
llm_goals_263,test,27,0.3704345226287842,0.3914767323316075
llm_goals_263,test,28,0.1598171591758728,0.3629055172172753
llm_goals_263,test,29,0.14081880450248718,0.0312142081028261
llm_goals_263,test,30,0.3304923176765442,0.1928083275379132
llm_goals_263,test,31,0.24605321884155273,0.318621097342132
llm_goals_263,test,32,0.19634392857551575,0.1747653444762649
llm_goals_263,test,33,0.19341066479682922,0.0531218265570888
llm_goals_263,test,34,0.27640897035598755,0.1598154925951403
llm_goals_263,test,35,0.6209198236465454,0.94307484220015
llm_goals_263,test,36,0.3508971035480499,0.5041818847108344
llm_goals_263,test,37,0.2756422162055969,0.1111175217519077
llm_goals_263,test,38,0.4910929501056671,0.1867701836992809
llm_goals_263,test,39,0.5216553211212158,0.9513736387795227
llm_goals_263,test,40,0.4897361397743225,0.726601907835183
llm_goals_263,test,41,0.47348758578300476,0.2793940710071973
llm_goals_263,test,42,0.4578569531440735,0.7591325538288878
llm_goals_263,test,43,0.2767550051212311,0.0599255853483329
llm_goals_263,test,44,0.2672981023788452,0.0248060297565235
llm_goals_263,test,45,0.3873730003833771,0.4429293016127257
llm_goals_263,test,46,0.37674644589424133,0.3978895760863248
llm_goals_263,test,47,0.4670577645301819,0.9291514786865128
llm_goals_263,test,48,0.38581180572509766,0.2313178866490127
llm_goals_263,test,49,0.5343006253242493,0.3779293731936072
llm_goals_157,test,0,0.48963895440101624,0.7943095194508789
llm_goals_157,test,1,0.24309901893138885,0.0516624712053447
llm_goals_157,test,2,0.4334059953689575,0.8020233336204885
llm_goals_157,test,3,0.26258549094200134,0.5904722684734964
llm_goals_157,test,4,0.6343173384666443,0.9619353001797089
llm_goals_157,test,5,0.43645811080932617,0.6767415952202784
llm_goals_157,test,6,0.38931024074554443,0.7612977247981438
llm_goals_157,test,7,0.30892011523246765,0.6294506387377161
llm_goals_157,test,8,0.5173711776733398,0.8937361945430545
llm_goals_157,test,9,0.5315530896186829,0.9788153257749777
llm_goals_157,test,10,0.5397434234619141,0.8824125505492224
llm_goals_157,test,11,0.3310658931732178,0.199986733795525
llm_goals_157,test,12,0.6715931296348572,0.9284577144290443
llm_goals_157,test,13,0.47875842452049255,0.9240516415338534
llm_goals_157,test,14,0.41103774309158325,0.2475658313194042
llm_goals_157,test,15,0.6926302313804626,0.976340679507306
llm_goals_157,test,16,0.3147059977054596,0.3126950781624377
llm_goals_157,test,17,0.2647056579589844,0.046177739393252
llm_goals_157,test,18,0.3036275804042816,0.4250899393616417
llm_goals_157,test,19,0.37607961893081665,0.6637557888114428
llm_goals_157,test,20,0.7105515599250793,0.9747431080575436
llm_goals_157,test,21,0.4207206666469574,0.566102644985123
llm_goals_157,test,22,0.40588346123695374,0.4645121573786108
llm_goals_157,test,23,0.4202769100666046,0.1429816093888423
llm_goals_157,test,24,0.1807357221841812,0.0397822616409018
llm_goals_157,test,25,0.27713191509246826,0.2832629442692311
llm_goals_157,test,26,0.37733525037765503,0.3005834296865567
llm_goals_157,test,27,0.3238706588745117,0.049069242775247
llm_goals_157,test,28,0.24994756281375885,0.2908678434467844
llm_goals_157,test,29,0.22081206738948822,0.1061660413895585
llm_goals_157,test,30,0.25298431515693665,0.5164500368467446
llm_goals_157,test,31,0.5864975452423096,0.9364517603204892
llm_goals_157,test,32,0.20617951452732086,0.0583368009018155
llm_goals_157,test,33,0.19574548304080963,0.102471328139649
llm_goals_157,test,34,0.2658320963382721,0.7754930766823708
llm_goals_157,test,35,0.5392856001853943,0.930098280704675
llm_goals_157,test,36,0.49295318126678467,0.8978537108401503
llm_goals_157,test,37,0.22948716580867767,0.185447729939923
llm_goals_157,test,38,0.2928832173347473,0.4989516423125952
llm_goals_157,test,39,0.43085622787475586,0.8172337053734404
llm_goals_157,test,40,0.3598344624042511,0.3208967259617554
llm_goals_157,test,41,0.35382524132728577,0.9628746978064082
llm_goals_157,test,42,0.5603890419006348,0.9537549409903032
llm_goals_157,test,43,0.7078995108604431,0.9854372229811326
llm_goals_157,test,44,0.6687545776367188,0.9446223273489608
llm_goals_157,test,45,0.4347513020038605,0.917287033427004
llm_goals_157,test,46,0.5174368619918823,0.2958138118998824
llm_goals_157,test,47,0.4485132098197937,0.7644578181201236
llm_goals_157,test,48,0.2847687304019928,0.2919786437406029
llm_goals_157,test,49,0.5172966122627258,0.2435548949640933
llm_goals_358,test,0,0.8435153961181641,0.9710288332808288
llm_goals_358,test,1,0.24647288024425507,0.1090780967183576
llm_goals_358,test,2,0.675241231918335,0.8957724929016077
llm_goals_358,test,3,0.12639369070529938,0.0314742636417454
llm_goals_358,test,4,0.8361120820045471,0.9345866127648594
llm_goals_358,test,5,0.3453962504863739,0.3075908794558576
llm_goals_358,test,6,0.40976130962371826,0.1938894394639401
llm_goals_358,test,7,0.9064338207244873,0.9366675383938772
llm_goals_358,test,8,0.7297821044921875,0.9600018984290702
llm_goals_358,test,9,0.31075721979141235,0.0356972898371527
llm_goals_358,test,10,0.5501209497451782,0.7396882546703099
llm_goals_358,test,11,0.42422258853912354,0.0409023187764638
llm_goals_358,test,12,0.5334850549697876,0.0976624021485058
llm_goals_358,test,13,0.39064574241638184,0.1216232236474993
llm_goals_358,test,14,0.9296093583106995,0.7864359180640846
llm_goals_358,test,15,0.5242433547973633,0.8556484582282166
llm_goals_358,test,16,0.817226767539978,0.7341230488107452
llm_goals_358,test,17,0.47590306401252747,0.2726809060238253
llm_goals_358,test,18,0.5299151539802551,0.7566054280756713
llm_goals_358,test,19,0.4327571988105774,0.0434563732454263
llm_goals_358,test,20,0.8425628542900085,0.8243328886415383
llm_goals_358,test,21,0.5029411315917969,0.1112076480131408
llm_goals_358,test,22,0.18764930963516235,0.0845862458189736
llm_goals_358,test,23,0.5083659887313843,0.0375312043866978
llm_goals_358,test,24,0.8453021049499512,0.8172477900339192
llm_goals_358,test,25,0.8312789797782898,0.9764694049382328
llm_goals_358,test,26,0.6870221495628357,0.7040948478982805
llm_goals_358,test,27,0.4134018123149872,0.125707811898555
llm_goals_358,test,28,0.35879573225975037,0.1566378003751444
llm_goals_358,test,29,0.39072492718696594,0.1707030695577025
llm_goals_358,test,30,0.8535726070404053,0.7917262984233946
llm_goals_358,test,31,0.7023907899856567,0.0448402492326266
llm_goals_358,test,32,0.674769401550293,0.0880149966006802
llm_goals_358,test,33,0.8505005240440369,0.9036261072893176
llm_goals_358,test,34,0.3621433675289154,0.6926631691100236
llm_goals_358,test,35,0.6206309199333191,0.772573980123232
llm_goals_358,test,36,0.4446394443511963,0.443962006250219
llm_goals_358,test,37,0.803581953048706,0.5987374702917627
llm_goals_358,test,38,0.3467610776424408,0.0973650244988885
llm_goals_358,test,39,0.44488587975502014,0.0374517082121219
llm_goals_358,test,40,0.2630167603492737,0.0459188553104236
llm_goals_358,test,41,0.08637019991874695,0.067381376462992
llm_goals_358,test,42,0.7740439176559448,0.9322344143419644
llm_goals_358,test,43,0.37848374247550964,0.0773258781385884
llm_goals_358,test,44,0.9034584164619446,0.9867304662942348
llm_goals_358,test,45,0.5213754773139954,0.0921047575496883
llm_goals_358,test,46,0.3540813624858856,0.0203810451816961
llm_goals_358,test,47,0.3771660029888153,0.0161558697095388
llm_goals_358,test,48,0.8186912536621094,0.7751475880899051
llm_goals_358,test,49,0.5818597674369812,0.4160470958249456
llm_goals_78,test,0,0.40037259459495544,0.2251708162334548
llm_goals_78,test,1,0.5398575067520142,0.0367340336052023
llm_goals_78,test,2,0.5385740399360657,0.0527379912395899
llm_goals_78,test,3,0.2633034586906433,0.0259505842905422
llm_goals_78,test,4,0.3616751432418823,0.0939178372549231
llm_goals_78,test,5,0.4368882477283478,0.6658069720444946
llm_goals_78,test,6,0.30480921268463135,0.0424593855688115
llm_goals_78,test,7,0.5325213074684143,0.3068789421692927
llm_goals_78,test,8,0.36720502376556396,0.0254129292421898
llm_goals_78,test,9,0.20111887156963348,0.0407835069490974
llm_goals_78,test,10,0.7617608308792114,0.9375706694734316
llm_goals_78,test,11,0.7478800415992737,0.7386383432572439
llm_goals_78,test,12,0.4513290822505951,0.3108688169890243
llm_goals_78,test,13,0.3736221492290497,0.0946944663222736
llm_goals_78,test,14,0.40641841292381287,0.1717796546359565
llm_goals_78,test,15,0.3048097789287567,0.1277153625141483
llm_goals_78,test,16,0.4264211058616638,0.0689165589110325
llm_goals_78,test,17,0.6270452737808228,0.0883911059243408
llm_goals_78,test,18,0.3260476887226105,0.0337617130750813
llm_goals_78,test,19,0.6442899107933044,0.7169547264343438
llm_goals_78,test,20,0.3894350230693817,0.1528222000997753
llm_goals_78,test,21,0.31114453077316284,0.0350685532944032
llm_goals_78,test,22,0.5221465826034546,0.0940461009061424
llm_goals_78,test,23,0.9045392870903015,0.9060885414573472
llm_goals_78,test,24,0.4754829406738281,0.0217941677583094
llm_goals_78,test,25,0.6937886476516724,0.9667879364041514
llm_goals_78,test,26,0.3535989820957184,0.0797221269088608
llm_goals_78,test,27,0.6231606006622314,0.0603014558146647
llm_goals_78,test,28,0.3128522038459778,0.019185604671188
llm_goals_78,test,29,0.5113152265548706,0.181847707574815
llm_goals_78,test,30,0.7894316911697388,0.9576328540155246
llm_goals_78,test,31,0.3572993278503418,0.1600616173680257
llm_goals_78,test,32,0.8341837525367737,0.8196381975442921
llm_goals_78,test,33,0.42517805099487305,0.0444875750236097
llm_goals_78,test,34,0.782209575176239,0.9277514916547148
llm_goals_78,test,35,0.5425791144371033,0.1986446962511548
llm_goals_78,test,36,0.626878023147583,0.9359311183925404
llm_goals_78,test,37,0.6506589651107788,0.8760354323987716
llm_goals_78,test,38,0.4852229952812195,0.4610991071324792
llm_goals_78,test,39,0.4189034104347229,0.2972845381897555
llm_goals_78,test,40,0.8396516442298889,0.7716125042657197
llm_goals_78,test,41,0.5354094505310059,0.1328309068036672
llm_goals_78,test,42,0.6121910810470581,0.3354286767519867
llm_goals_78,test,43,0.28914451599121094,0.086342843675339
llm_goals_78,test,44,0.3242572247982025,0.0958279087075554
llm_goals_78,test,45,0.24148951470851898,0.1302156649936615
llm_goals_78,test,46,0.9098809361457825,0.8983792614483327
llm_goals_78,test,47,0.3839553892612457,0.1775307323333411
llm_goals_78,test,48,0.44570985436439514,0.0658487938814022
llm_goals_78,test,49,0.5131084322929382,0.079644576255816
llm_goals_420,test,0,0.6452524065971375,0.7165124795044365
llm_goals_420,test,1,0.5144399404525757,0.7608911842606508
llm_goals_420,test,2,0.5293800830841064,0.799936385630717
llm_goals_420,test,3,0.41272076964378357,0.2211062209422303
llm_goals_420,test,4,0.34208834171295166,0.2926466150380465
llm_goals_420,test,5,0.603537380695343,0.9140651066289046
llm_goals_420,test,6,0.6335048675537109,0.4131756456187543
llm_goals_420,test,7,0.6656029224395752,0.6643261466520904
llm_goals_420,test,8,0.6300139427185059,0.3578404079978603
llm_goals_420,test,9,0.7978590130805969,0.8413468212597344
llm_goals_420,test,10,0.4375379979610443,0.4981438990065536
llm_goals_420,test,11,0.553569495677948,0.2468097596316683
llm_goals_420,test,12,0.32741838693618774,0.1549754451331909
llm_goals_420,test,13,0.5856706500053406,0.8951478256933764
llm_goals_420,test,14,0.25075846910476685,0.0722168451540495
llm_goals_420,test,15,0.4921718239784241,0.7175548720080566
llm_goals_420,test,16,0.24865520000457764,0.0482770035552138
llm_goals_420,test,17,0.5705963969230652,0.9171404716362525
llm_goals_420,test,18,0.4470345079898834,0.2149325509708838
llm_goals_420,test,19,0.43785735964775085,0.9314370105190324
llm_goals_420,test,20,0.4883260726928711,0.6958358689335516
llm_goals_420,test,21,0.5721017718315125,0.6712961637420737
llm_goals_420,test,22,0.7652136087417603,0.9091188630862124
llm_goals_420,test,23,0.2796998620033264,0.080457132763502
llm_goals_420,test,24,0.3383323848247528,0.0910185464003223
llm_goals_420,test,25,0.648379385471344,0.0266374948315803
llm_goals_420,test,26,0.3385462164878845,0.3477530565713536
llm_goals_420,test,27,0.5785107016563416,0.8201931514993901
llm_goals_420,test,28,0.5052183270454407,0.2425383031071764
llm_goals_420,test,29,0.6787643432617188,0.4181605367795475
llm_goals_420,test,30,0.44200369715690613,0.0299293846265948
llm_goals_420,test,31,0.5387336611747742,0.940670822547668
llm_goals_420,test,32,0.4960111379623413,0.0196824789824236
llm_goals_420,test,33,0.25782445073127747,0.1078959069076236
llm_goals_420,test,34,0.6516913771629333,0.2191371931392249
llm_goals_420,test,35,0.5962620377540588,0.9034152914594604
llm_goals_420,test,36,0.35413089394569397,0.548380428606202
llm_goals_420,test,37,0.6883077025413513,0.50388122703708
llm_goals_420,test,38,0.4162202477455139,0.4294891851736946
llm_goals_420,test,39,0.44618499279022217,0.4382416051683284
llm_goals_420,test,40,0.4122951030731201,0.141879820547579
llm_goals_420,test,41,0.6111758947372437,0.6151086701978562
llm_goals_420,test,42,0.4388589560985565,0.7490416767979864
llm_goals_420,test,43,0.47760072350502014,0.6273048386641107
llm_goals_420,test,44,0.4124796986579895,0.8162625944607043
llm_goals_420,test,45,0.34198522567749023,0.3347655055973148
llm_goals_420,test,46,0.24364511668682098,0.2248938224655197
llm_goals_420,test,47,0.2531750798225403,0.7293239346358632
llm_goals_420,test,48,0.3750400245189667,0.1530180333157787
llm_goals_420,test,49,0.6567445993423462,0.9588707841469084
llm_goals_53,test,0,0.7881709933280945,0.976097291167194
llm_goals_53,test,1,0.744315505027771,0.2598265618814769
llm_goals_53,test,2,0.8885679841041565,0.9770793290161108
llm_goals_53,test,3,0.793415904045105,0.4768547215758607
llm_goals_53,test,4,0.710182785987854,0.2361872878274303
llm_goals_53,test,5,0.4589375853538513,0.0986317022184206
llm_goals_53,test,6,0.49087271094322205,0.0477100161960414
llm_goals_53,test,7,0.783103883266449,0.937994454790112
llm_goals_53,test,8,0.606081485748291,0.2701156822033339
llm_goals_53,test,9,0.5260046720504761,0.2989108711032385
llm_goals_53,test,10,0.5016827583312988,0.5544564065686126
llm_goals_53,test,11,0.5718874335289001,0.3953618346843373
llm_goals_53,test,12,0.4773387610912323,0.1335346211746911
llm_goals_53,test,13,0.5113754272460938,0.0585116233918817
llm_goals_53,test,14,0.5338683724403381,0.1376328700771095
llm_goals_53,test,15,0.5516586303710938,0.4350138720043813
llm_goals_53,test,16,0.6194707751274109,0.0717024446401859
llm_goals_53,test,17,0.6987603306770325,0.6563471973646152
llm_goals_53,test,18,0.4935392737388611,0.2165785858914521
llm_goals_53,test,19,0.7222910523414612,0.3746358016952676
llm_goals_53,test,20,0.6385234594345093,0.5337460360023749
llm_goals_53,test,21,0.3873107135295868,0.0467709874601332
llm_goals_53,test,22,0.7144659161567688,0.7796763173252493
llm_goals_53,test,23,0.5050844550132751,0.3205915991497378
llm_goals_53,test,24,0.5015316009521484,0.0640075835802484
llm_goals_53,test,25,0.6610367298126221,0.833565084619613
llm_goals_53,test,26,0.41747888922691345,0.0854875283183964
llm_goals_53,test,27,0.7441323399543762,0.9078949534688217
llm_goals_53,test,28,0.686263918876648,0.3980814368097108
llm_goals_53,test,29,0.5700670480728149,0.0495337152571608
llm_goals_53,test,30,0.7088539600372314,0.49697645864992
llm_goals_53,test,31,0.5728517174720764,0.4685591079414317
llm_goals_53,test,32,0.5635418891906738,0.0870796105685882
llm_goals_53,test,33,0.5689834952354431,0.0188204043218978
llm_goals_53,test,34,0.6332400441169739,0.5566033569957117
llm_goals_53,test,35,0.614516019821167,0.9313365617424948
llm_goals_53,test,36,0.7965514063835144,0.9304982364910718
llm_goals_53,test,37,0.6037588119506836,0.1916887084340081
llm_goals_53,test,38,0.47132954001426697,0.0179322256288976
llm_goals_53,test,39,0.37612488865852356,0.042191597199028
llm_goals_53,test,40,0.5632070302963257,0.4480237505797492
llm_goals_53,test,41,0.5419937372207642,0.199711476764323
llm_goals_53,test,42,0.7761027216911316,0.984673662230153
llm_goals_53,test,43,0.4505281150341034,0.1015360353200591
llm_goals_53,test,44,0.7703145146369934,0.2398468184361071
llm_goals_53,test,45,0.7780094146728516,0.5147270663759912
llm_goals_53,test,46,0.607116162776947,0.6026394622908158
llm_goals_53,test,47,0.6871325969696045,0.7105197565308571
llm_goals_53,test,48,0.6012088656425476,0.1056143884471251
llm_goals_53,test,49,0.6672489047050476,0.2875693652635701
llm_goals_427,test,0,0.9160258173942566,0.9912377865392056
llm_goals_427,test,1,0.4345095753669739,0.1393504988400085
llm_goals_427,test,2,0.8953573107719421,0.9826717605764466
llm_goals_427,test,3,0.8995893597602844,0.9914966630190112
llm_goals_427,test,4,0.2114596962928772,0.0287440390481499
llm_goals_427,test,5,0.4421307146549225,0.1508234315553654
llm_goals_427,test,6,0.2758401036262512,0.0241177394276523
llm_goals_427,test,7,0.8759503960609436,0.9533421090109172
llm_goals_427,test,8,0.5461329221725464,0.1357554081265164
llm_goals_427,test,9,0.6839566826820374,0.0422408742214398
llm_goals_427,test,10,0.686677098274231,0.3560677064830195
llm_goals_427,test,11,0.7014920115470886,0.2848587352361455
llm_goals_427,test,12,0.444533109664917,0.0783011900572995
llm_goals_427,test,13,0.7057621479034424,0.053588048642731
llm_goals_427,test,14,0.7381207942962646,0.1755007793342038
llm_goals_427,test,15,0.35847294330596924,0.03905897487628
llm_goals_427,test,16,0.36880412697792053,0.0861236052612804
llm_goals_427,test,17,0.6688891649246216,0.2907015821684348
llm_goals_427,test,18,0.42318904399871826,0.0468453325440364
llm_goals_427,test,19,0.92348712682724,0.7510010963524218
llm_goals_427,test,20,0.5181393027305603,0.0467429391605962
llm_goals_427,test,21,0.7775433659553528,0.1304366461429133
llm_goals_427,test,22,0.6800128221511841,0.1246622694694481
llm_goals_427,test,23,0.6793652176856995,0.5644455097827197
llm_goals_427,test,24,0.3139178454875946,0.0414160339225927
llm_goals_427,test,25,0.8429326415061951,0.8926284254200217
llm_goals_427,test,26,0.5691421031951904,0.0591453922508401
llm_goals_427,test,27,0.7466055154800415,0.5016671352552752
llm_goals_427,test,28,0.8349202871322632,0.9189449605602464
llm_goals_427,test,29,0.7004192471504211,0.5267252864404038
llm_goals_427,test,30,0.4821053445339203,0.3196802851986922
llm_goals_427,test,31,0.7494990229606628,0.0622742618121558
llm_goals_427,test,32,0.40796297788619995,0.2393552597896355
llm_goals_427,test,33,0.19610340893268585,0.0317774218950805
llm_goals_427,test,34,0.5277693867683411,0.2969897604985466
llm_goals_427,test,35,0.8194776177406311,0.911826685378058
llm_goals_427,test,36,0.8270428776741028,0.5044385498946317
llm_goals_427,test,37,0.9356603622436523,0.961152239017662
llm_goals_427,test,38,0.5930790305137634,0.383885411527425
llm_goals_427,test,39,0.6987460851669312,0.0767754062509803
llm_goals_427,test,40,0.7293986678123474,0.3019915478879681
llm_goals_427,test,41,0.5896251201629639,0.0597208539505394
llm_goals_427,test,42,0.9442416429519653,0.9798415326165176
llm_goals_427,test,43,0.5372920036315918,0.0828234646890313
llm_goals_427,test,44,0.4362129867076874,0.1075404432723246
llm_goals_427,test,45,0.7776950597763062,0.9497563751433828
llm_goals_427,test,46,0.26405271887779236,0.2027056215184361
llm_goals_427,test,47,0.935263454914093,0.9493266919171398
llm_goals_427,test,48,0.507205605506897,0.0344289143960405
llm_goals_427,test,49,0.3486097753047943,0.1952894749193552
llm_goals_186,test,0,0.7757841944694519,0.534925569771277
llm_goals_186,test,1,0.5879901647567749,0.0649953944046378
llm_goals_186,test,2,0.7773895263671875,0.91763923479236
llm_goals_186,test,3,0.5742407441139221,0.3425021516930106
llm_goals_186,test,4,0.557763397693634,0.1153021531057396
llm_goals_186,test,5,0.9086472988128662,0.7928793741281699
llm_goals_186,test,6,0.9166655540466309,0.9038094070559602
llm_goals_186,test,7,0.6408564448356628,0.6367372560980368
llm_goals_186,test,8,0.5579396486282349,0.2126364849604726
llm_goals_186,test,9,0.8727979063987732,0.8694824597543704
llm_goals_186,test,10,0.6229713559150696,0.5802607311846129
llm_goals_186,test,11,0.7149343490600586,0.2225491999116792
llm_goals_186,test,12,0.564714252948761,0.2300986684078339
llm_goals_186,test,13,0.7738999724388123,0.9257638147127129
llm_goals_186,test,14,0.33521461486816406,0.1695718974503099
llm_goals_186,test,15,0.5631950497627258,0.151038541979901
llm_goals_186,test,16,0.3575019836425781,0.0464142133590747
llm_goals_186,test,17,0.6047067642211914,0.0732763222959853
llm_goals_186,test,18,0.411186158657074,0.0482171595539919
llm_goals_186,test,19,0.5220171213150024,0.347547591319228
llm_goals_186,test,20,0.2880305051803589,0.1739822028354879
llm_goals_186,test,21,0.8376463651657104,0.9541198242132036
llm_goals_186,test,22,0.566338062286377,0.049497112506125
llm_goals_186,test,23,0.7756385207176208,0.1810016520865223
llm_goals_186,test,24,0.5147583484649658,0.1851077351512521
llm_goals_186,test,25,0.5263348817825317,0.6713192505556377
llm_goals_186,test,26,0.48296070098876953,0.0711032925420902
llm_goals_186,test,27,0.5519275069236755,0.4081627252904761
llm_goals_186,test,28,0.42180296778678894,0.2745405964802801
llm_goals_186,test,29,0.5357558727264404,0.1854855714896691
llm_goals_186,test,30,0.48940080404281616,0.7354449882906446
llm_goals_186,test,31,0.7564159035682678,0.9912562823764032
llm_goals_186,test,32,0.6565201878547668,0.2618042375153737
llm_goals_186,test,33,0.4638294577598572,0.0337875336716617
llm_goals_186,test,34,0.5675586462020874,0.6428971957222914
llm_goals_186,test,35,0.7088022828102112,0.7900459432200765
llm_goals_186,test,36,0.663368284702301,0.8730032999439788
llm_goals_186,test,37,0.4886744022369385,0.0649927594327202
llm_goals_186,test,38,0.7396020889282227,0.0471132735562561
llm_goals_186,test,39,0.4785209000110626,0.2481877539812294
llm_goals_186,test,40,0.6960668563842773,0.2111860555781098
llm_goals_186,test,41,0.916604220867157,0.8712882416841639
llm_goals_186,test,42,0.7064979672431946,0.6193248853412385
llm_goals_186,test,43,0.8885908126831055,0.8760979623283635
llm_goals_186,test,44,0.43049219250679016,0.0767958111106498
llm_goals_186,test,45,0.5893487930297852,0.0848440049881495
llm_goals_186,test,46,0.8106555938720703,0.6601266976660584
llm_goals_186,test,47,0.3914959728717804,0.4818652455075419
llm_goals_186,test,48,0.31898748874664307,0.1752603358335205
llm_goals_186,test,49,0.5816860198974609,0.027320507820661
llm_goals_133,test,0,0.6726321578025818,0.1030518399074919
llm_goals_133,test,1,0.39086467027664185,0.1009270067527766
llm_goals_133,test,2,0.3489243984222412,0.4669147668825159
llm_goals_133,test,3,0.34787577390670776,0.9886211587587188
llm_goals_133,test,4,0.39868274331092834,0.6589999206031925
llm_goals_133,test,5,0.5002128481864929,0.0478680529217008
llm_goals_133,test,6,0.525651216506958,0.0213367665689539
llm_goals_133,test,7,0.41698428988456726,0.06300162626541
llm_goals_133,test,8,0.42126375436782837,0.8613404237563876
llm_goals_133,test,9,0.3612121641635895,0.0502374017351794
llm_goals_133,test,10,0.3754298686981201,0.0728398857397339
llm_goals_133,test,11,0.3504602313041687,0.0886358546776828
llm_goals_133,test,12,0.47740307450294495,0.4714116383526195
llm_goals_133,test,13,0.1990661323070526,0.0406440348343292
llm_goals_133,test,14,0.5002797842025757,0.8416674339341796
llm_goals_133,test,15,0.5416242480278015,0.5989281494815651
llm_goals_133,test,16,0.2804276943206787,0.9641061261695968
llm_goals_133,test,17,0.6104268431663513,0.0521719884710187
llm_goals_133,test,18,0.6039624214172363,0.9257678218156756
llm_goals_133,test,19,0.315751314163208,0.0409677417198195
llm_goals_133,test,20,0.3963388204574585,0.9201317528016504
llm_goals_133,test,21,0.35757651925086975,0.0611858046803647
llm_goals_133,test,22,0.5709978342056274,0.0749444450486417
llm_goals_133,test,23,0.41719892621040344,0.484676139228053
llm_goals_133,test,24,0.5669886469841003,0.8221724152075577
llm_goals_133,test,25,0.5697988867759705,0.2957602203771531
llm_goals_133,test,26,0.37128952145576477,0.8046360378637115
llm_goals_133,test,27,0.46569475531578064,0.0759761030516968
llm_goals_133,test,28,0.42953988909721375,0.9366340572155704
llm_goals_133,test,29,0.4491743743419647,0.1272708658186502
llm_goals_133,test,30,0.61101233959198,0.1227999985147976
llm_goals_133,test,31,0.3452053666114807,0.0136010194313778
llm_goals_133,test,32,0.26106148958206177,0.3597276462870885
llm_goals_133,test,33,0.5713827610015869,0.8726937465962196
llm_goals_133,test,34,0.2978784739971161,0.2894476365444396
llm_goals_133,test,35,0.4087361991405487,0.0165111879681815
llm_goals_133,test,36,0.5281176567077637,0.0576655273020702
llm_goals_133,test,37,0.36930057406425476,0.2554780098122276
llm_goals_133,test,38,0.5091466903686523,0.032809734462924
llm_goals_133,test,39,0.48308995366096497,0.1481243946785008
llm_goals_133,test,40,0.44207963347435,0.4462128198301662
llm_goals_133,test,41,0.29671627283096313,0.093264815138027
llm_goals_133,test,42,0.50338214635849,0.2383228605626679
llm_goals_133,test,43,0.23491442203521729,0.196654087051587
llm_goals_133,test,44,0.4373337924480438,0.9747002088111656
llm_goals_133,test,45,0.3988085687160492,0.9831163433792584
llm_goals_133,test,46,0.48507562279701233,0.1097011751248294
llm_goals_133,test,47,0.4017598628997803,0.8283354147237331
llm_goals_133,test,48,0.4552188515663147,0.8799294836893965
llm_goals_133,test,49,0.4429728090763092,0.2364671831052382
llm_goals_81,test,0,0.049755536019802094,0.1758977435560022
llm_goals_81,test,1,0.21164250373840332,0.4421252700970533
llm_goals_81,test,2,0.2052825540304184,0.5612444031805882
llm_goals_81,test,3,0.18143488466739655,0.3079533253993743
llm_goals_81,test,4,0.18311506509780884,0.1763212150593861
llm_goals_81,test,5,0.24408148229122162,0.9704465583106344
llm_goals_81,test,6,0.28558605909347534,0.9821312875358365
llm_goals_81,test,7,0.11074880510568619,0.3270685617728195
llm_goals_81,test,8,0.0768393874168396,0.4843756215736013
llm_goals_81,test,9,0.3953613042831421,0.9933654977534896
llm_goals_81,test,10,0.16353677213191986,0.4711097472541105
llm_goals_81,test,11,0.13165608048439026,0.1673212093996636
llm_goals_81,test,12,0.1325407773256302,0.2643530158966523
llm_goals_81,test,13,0.2310355305671692,0.9714035818561728
llm_goals_81,test,14,0.13510587811470032,0.9265405587707818
llm_goals_81,test,15,0.1621706187725067,0.1723195082090535
llm_goals_81,test,16,0.242536723613739,0.9360974516501586
llm_goals_81,test,17,0.1063399389386177,0.4363540220565442
llm_goals_81,test,18,0.25499171018600464,0.9611565911249424
llm_goals_81,test,19,0.14033201336860657,0.0324757314893466
llm_goals_81,test,20,0.17429670691490173,0.6006056520074408
llm_goals_81,test,21,0.24160194396972656,0.9971859123159944
llm_goals_81,test,22,0.12316423654556274,0.0870371051461664
llm_goals_81,test,23,0.11909416317939758,0.2408633212672459
llm_goals_81,test,24,0.21631094813346863,0.9951702226622482
llm_goals_81,test,25,0.12440377473831177,0.6867633490140795
llm_goals_81,test,26,0.44939205050468445,0.9718244652881064
llm_goals_81,test,27,0.21645329892635345,0.4588186617124049
llm_goals_81,test,28,0.1454697549343109,0.5322330497164953
llm_goals_81,test,29,0.11504234373569489,0.2121016556019813
llm_goals_81,test,30,0.15840958058834076,0.6774810136296258
llm_goals_81,test,31,0.5331085920333862,0.9816113605758902
llm_goals_81,test,32,0.14878500998020172,0.2656694493977183
llm_goals_81,test,33,0.2604147791862488,0.942492448912156
llm_goals_81,test,34,0.11002025753259659,0.6429005762747164
llm_goals_81,test,35,0.06471814960241318,0.1528167702888629
llm_goals_81,test,36,0.2204025238752365,0.4512348514816248
llm_goals_81,test,37,0.1307874470949173,0.0605006450173409
llm_goals_81,test,38,0.2543223202228546,0.0274755323758623
llm_goals_81,test,39,0.08523920178413391,0.2279901766778581
llm_goals_81,test,40,0.13110464811325073,0.2775541527352398
llm_goals_81,test,41,0.2642366290092468,0.9829489706314426
llm_goals_81,test,42,0.08057851344347,0.2494339762077133
llm_goals_81,test,43,0.3256494104862213,0.9819424303599844
llm_goals_81,test,44,0.17555077373981476,0.632760349651464
llm_goals_81,test,45,0.1446380913257599,0.090990563553155
llm_goals_81,test,46,0.14922542870044708,0.0910438800719751
llm_goals_81,test,47,0.25794723629951477,0.1292081639058968
llm_goals_81,test,48,0.27201563119888306,0.974747372736112
llm_goals_81,test,49,0.11444169282913208,0.2645580764012665
llm_goals_93,test,0,0.5248690247535706,0.5635269221536086
llm_goals_93,test,1,0.18869346380233765,0.8801163792253427
llm_goals_93,test,2,0.3701620101928711,0.0696003588446044
llm_goals_93,test,3,0.24315208196640015,0.8186048531566104
llm_goals_93,test,4,0.1458340734243393,0.2222011584760154
llm_goals_93,test,5,0.23160216212272644,0.1268359519713418
llm_goals_93,test,6,0.2071285843849182,0.4582622566067956
llm_goals_93,test,7,0.2711535096168518,0.429479690525812
llm_goals_93,test,8,0.12971849739551544,0.1536290973166499
llm_goals_93,test,9,0.23117606341838837,0.6112573193671209
llm_goals_93,test,10,0.5311886072158813,0.7998858649804967
llm_goals_93,test,11,0.27617597579956055,0.5454222135748753
llm_goals_93,test,12,0.3358345031738281,0.2508427619545539
llm_goals_93,test,13,0.2823125422000885,0.0884842413878226
llm_goals_93,test,14,0.5154730081558228,0.6768600290465995
llm_goals_93,test,15,0.2658657133579254,0.0352718405868165
llm_goals_93,test,16,0.534313976764679,0.8009568965617743
llm_goals_93,test,17,0.10344784706830978,0.8092096317591537
llm_goals_93,test,18,0.5348014831542969,0.963804174465268
llm_goals_93,test,19,0.23599036037921906,0.9058854167265824
llm_goals_93,test,20,0.22772976756095886,0.265390180652688
llm_goals_93,test,21,0.10864295810461044,0.1541113602215154
llm_goals_93,test,22,0.4451679587364197,0.9766790566784582
llm_goals_93,test,23,0.35028788447380066,0.7843335253318285
llm_goals_93,test,24,0.1360897719860077,0.548404369330273
llm_goals_93,test,25,0.7341263890266418,0.9813529135147334
llm_goals_93,test,26,0.6267986297607422,0.9397963116499958
llm_goals_93,test,27,0.2092127501964569,0.542547443300257
llm_goals_93,test,28,0.18612399697303772,0.8277938193324692
llm_goals_93,test,29,0.1813982129096985,0.8549419878819347
llm_goals_93,test,30,0.3794100880622864,0.9761371695174176
llm_goals_93,test,31,0.2828183174133301,0.1774015734695253
llm_goals_93,test,32,0.28828057646751404,0.9590755456587298
llm_goals_93,test,33,0.38717958331108093,0.9496314531215296
llm_goals_93,test,34,0.4588729739189148,0.9112802317013324
llm_goals_93,test,35,0.29238829016685486,0.0646469314244137
llm_goals_93,test,36,0.6353264451026917,0.965643964207344
llm_goals_93,test,37,0.3627668619155884,0.963180821243277
llm_goals_93,test,38,0.33384668827056885,0.9411316422109696
llm_goals_93,test,39,0.24595677852630615,0.1075404372822008
llm_goals_93,test,40,0.43416303396224976,0.959466160286712
llm_goals_93,test,41,0.24704663455486298,0.3698215332403156
llm_goals_93,test,42,0.354850709438324,0.1589404653011508
llm_goals_93,test,43,0.24891668558120728,0.2338719832936831
llm_goals_93,test,44,0.30312082171440125,0.1124984808029749
llm_goals_93,test,45,0.34650638699531555,0.866543330684459
llm_goals_93,test,46,0.28076058626174927,0.8519759554788342
llm_goals_93,test,47,0.47020500898361206,0.8737971308795239
llm_goals_93,test,48,0.48156410455703735,0.8916334225039363
llm_goals_93,test,49,0.31506314873695374,0.8758339889840314
llm_goals_401,test,0,0.20804518461227417,0.1359674012107803
llm_goals_401,test,1,0.1152426227927208,0.939276601509621
llm_goals_401,test,2,0.12085001915693283,0.3090277756984529
llm_goals_401,test,3,0.35637763142585754,0.2822982814806737
llm_goals_401,test,4,0.42849957942962646,0.9227317209028506
llm_goals_401,test,5,0.09561430662870407,0.0683562610885711
llm_goals_401,test,6,0.05351186916232109,0.1529393504632372
llm_goals_401,test,7,0.1322915256023407,0.2291880493623187
llm_goals_401,test,8,0.5155174136161804,0.910221445630232
llm_goals_401,test,9,0.11894507706165314,0.0669374443869285
llm_goals_401,test,10,0.17884425818920135,0.2562041584734201
llm_goals_401,test,11,0.321113646030426,0.1977345307962301
llm_goals_401,test,12,0.3388010859489441,0.9351182671047276
llm_goals_401,test,13,0.06684190779924393,0.084239780653768
llm_goals_401,test,14,0.15157705545425415,0.3803121877997557
llm_goals_401,test,15,0.4547504186630249,0.9255856152963072
llm_goals_401,test,16,0.09516318142414093,0.2187242193427731
llm_goals_401,test,17,0.13754725456237793,0.9062633304766484
llm_goals_401,test,18,0.17448250949382782,0.0989866685824555
llm_goals_401,test,19,0.14249689877033234,0.0798642603957049
llm_goals_401,test,20,0.5720216035842896,0.7283447799445965
llm_goals_401,test,21,0.10748705267906189,0.3040102668224276
llm_goals_401,test,22,0.17369993031024933,0.8871123935548803
llm_goals_401,test,23,0.21265622973442078,0.6306703155992205
llm_goals_401,test,24,0.2117665559053421,0.342468541641041
llm_goals_401,test,25,0.2878681719303131,0.0306332962828717
llm_goals_401,test,26,0.1658860296010971,0.1178928014307589
llm_goals_401,test,27,0.16357186436653137,0.8247832937930811
llm_goals_401,test,28,0.15187864005565643,0.3694959007053917
llm_goals_401,test,29,0.13108587265014648,0.0798909574616735
llm_goals_401,test,30,0.2091684192419052,0.3951674202836078
llm_goals_401,test,31,0.0816798135638237,0.466307960095254
llm_goals_401,test,32,0.3239595890045166,0.1971577420397864
llm_goals_401,test,33,0.08412285149097443,0.1760928155683087
llm_goals_401,test,34,0.2867376208305359,0.2047678657851429
llm_goals_401,test,35,0.23615847527980804,0.2637872561841102
llm_goals_401,test,36,0.12153586745262146,0.4724849406439279
llm_goals_401,test,37,0.32062241435050964,0.0192510310263775
llm_goals_401,test,38,0.11334657669067383,0.1812826057265395
llm_goals_401,test,39,0.5812386274337769,0.905821781657468
llm_goals_401,test,40,0.3139228820800781,0.1146947001091413
llm_goals_401,test,41,0.12168773263692856,0.1032293515072512
llm_goals_401,test,42,0.09684433788061142,0.3834835806051595
llm_goals_401,test,43,0.07669899612665176,0.2508475125103526
llm_goals_401,test,44,0.3342953324317932,0.8248843542416243
llm_goals_401,test,45,0.1379929631948471,0.5847666727108293
llm_goals_401,test,46,0.17032653093338013,0.4045147001032884
llm_goals_401,test,47,0.10663807392120361,0.6278339401682421
llm_goals_401,test,48,0.2588934302330017,0.1361878043204594
llm_goals_401,test,49,0.16055771708488464,0.8435596200846341
