template_id,split,question_idx,prediction,label
llm_goals_146,test,0,0.8752235174179077,0.3466493369029843
llm_goals_146,test,1,0.8607310056686401,0.971693336326244
llm_goals_146,test,2,0.8258222341537476,0.6455769508045827
llm_goals_146,test,3,0.8446733951568604,0.5822222843930172
llm_goals_146,test,4,0.8663978576660156,0.6842324614600077
llm_goals_146,test,5,0.8521867990493774,0.3860201177799698
llm_goals_146,test,6,0.71458500623703,0.2341590231269344
llm_goals_146,test,7,0.8711628317832947,0.3643683725782807
llm_goals_146,test,8,0.8703166842460632,0.7055765903024079
llm_goals_146,test,9,0.8733102679252625,0.5084888304786936
llm_goals_146,test,10,0.8429627418518066,0.421220281945486
llm_goals_146,test,11,0.8651421666145325,0.489412697220974
llm_goals_146,test,12,0.8265495300292969,0.601020744296073
llm_goals_146,test,13,0.596027135848999,0.2557097387485441
llm_goals_146,test,14,0.6426455974578857,0.4640750948087747
llm_goals_146,test,15,0.8741177916526794,0.7362086189794756
llm_goals_146,test,16,0.6245300769805908,0.4987515019820814
llm_goals_146,test,17,0.8524100184440613,0.9587866055408388
llm_goals_146,test,18,0.8764442801475525,0.6036783393019786
llm_goals_146,test,19,0.8655403852462769,0.3513374958540852
llm_goals_146,test,20,0.8244304656982422,0.8281412663626416
llm_goals_146,test,21,0.715599000453949,0.34065502012531
llm_goals_146,test,22,0.8758398294448853,0.9907655718703378
llm_goals_146,test,23,0.8553957939147949,0.2061079946990629
llm_goals_146,test,24,0.8719381093978882,0.3446073251785768
llm_goals_146,test,25,0.861444890499115,0.6576184069508046
llm_goals_146,test,26,0.795630156993866,0.4110159240539976
llm_goals_146,test,27,0.8443465232849121,0.984345377326772
llm_goals_146,test,28,0.8701509833335876,0.4903698662236391
llm_goals_146,test,29,0.8764368295669556,0.362068682571018
llm_goals_146,test,30,0.8742534518241882,0.4551468142410695
llm_goals_146,test,31,0.873666524887085,0.3773142189484755
llm_goals_146,test,32,0.8746209144592285,0.4187681470726767
llm_goals_146,test,33,0.8656987547874451,0.3985742178328911
llm_goals_146,test,34,0.6595620512962341,0.5044141394199024
llm_goals_146,test,35,0.8695591688156128,0.4783486718786691
llm_goals_146,test,36,0.8765304088592529,0.5445552456181276
llm_goals_146,test,37,0.8232161402702332,0.2691607872517095
llm_goals_146,test,38,0.8163292407989502,0.5517828611974395
llm_goals_146,test,39,0.8743797540664673,0.4823935111879393
llm_goals_146,test,40,0.846815288066864,0.496646227759238
llm_goals_146,test,41,0.8743780255317688,0.410176485322602
llm_goals_146,test,42,0.851782500743866,0.2603856324469091
llm_goals_146,test,43,0.5987664461135864,0.555094401775619
llm_goals_146,test,44,0.8731963634490967,0.5218179738711017
llm_goals_146,test,45,0.8769446015357971,0.4447090989140364
llm_goals_146,test,46,0.6334222555160522,0.5567532038584041
llm_goals_146,test,47,0.8646948337554932,0.4034445534741869
llm_goals_146,test,48,0.8726329207420349,0.5335126942675598
llm_goals_146,test,49,0.8665247559547424,0.9775228494630558
llm_goals_293,test,0,0.5366582870483398,0.8297409974007197
llm_goals_293,test,1,0.5601091980934143,0.9166843306899098
llm_goals_293,test,2,0.8726476430892944,0.4850136353820626
llm_goals_293,test,3,0.47845396399497986,0.391440800258883
llm_goals_293,test,4,0.9614309668540955,0.9814967512328644
llm_goals_293,test,5,0.9593305587768555,0.9737153059180352
llm_goals_293,test,6,0.41027697920799255,0.985194742988862
llm_goals_293,test,7,0.9616283178329468,0.8402554946174373
llm_goals_293,test,8,0.477565199136734,0.9863079191276464
llm_goals_293,test,9,0.6570836901664734,0.9549403173664608
llm_goals_293,test,10,0.6611650586128235,0.8132036948651367
llm_goals_293,test,11,0.5225949883460999,0.2287407340843056
llm_goals_293,test,12,0.9262728095054626,0.9235533469292896
llm_goals_293,test,13,0.41917023062705994,0.9750785225411348
llm_goals_293,test,14,0.6586171984672546,0.9922545554063744
llm_goals_293,test,15,0.96591717004776,0.9838300936204548
llm_goals_293,test,16,0.560404896736145,0.9858987926415744
llm_goals_293,test,17,0.4137401878833771,0.9556264290993132
llm_goals_293,test,18,0.5942816734313965,0.9458334455175702
llm_goals_293,test,19,0.5615996718406677,0.5736353307249014
llm_goals_293,test,20,0.9419472217559814,0.9812628634528624
llm_goals_293,test,21,0.8960914015769958,0.975720704169484
llm_goals_293,test,22,0.7541842460632324,0.9409119473005016
llm_goals_293,test,23,0.4078892171382904,0.6306895569552333
llm_goals_293,test,24,0.44348931312561035,0.9831594820284292
llm_goals_293,test,25,0.4900757968425751,0.8981066919341846
llm_goals_293,test,26,0.537660539150238,0.9716764680193836
llm_goals_293,test,27,0.8462783694267273,0.8155843585345258
llm_goals_293,test,28,0.6028120517730713,0.9733620875264264
llm_goals_293,test,29,0.427237331867218,0.9051490452062086
llm_goals_293,test,30,0.906252920627594,0.7348576092486574
llm_goals_293,test,31,0.6777085661888123,0.9808893198314528
llm_goals_293,test,32,0.9663673639297485,0.593206056427014
llm_goals_293,test,33,0.4128076732158661,0.9657488455537
llm_goals_293,test,34,0.9053791165351868,0.8851169960520849
llm_goals_293,test,35,0.9156527519226074,0.8555944515949592
llm_goals_293,test,36,0.4886051118373871,0.5855041803376898
llm_goals_293,test,37,0.6555726528167725,0.6409415533832579
llm_goals_293,test,38,0.39346349239349365,0.8743998352900223
llm_goals_293,test,39,0.8486848473548889,0.8182019034258088
llm_goals_293,test,40,0.4381779730319977,0.146751538769175
llm_goals_293,test,41,0.9527953267097473,0.9709715243488852
llm_goals_293,test,42,0.9100712537765503,0.8829771283195118
llm_goals_293,test,43,0.8693251013755798,0.9398047936750782
llm_goals_293,test,44,0.9544104933738708,0.9795592430097252
llm_goals_293,test,45,0.5512887239456177,0.7025255505490043
llm_goals_293,test,46,0.41280385851860046,0.6531552142747035
llm_goals_293,test,47,0.9121909141540527,0.8527514903629965
llm_goals_293,test,48,0.9410086274147034,0.9844985009129076
llm_goals_293,test,49,0.7901478409767151,0.9160653951101896
llm_goals_415,test,0,0.9227730631828308,0.945942102532565
llm_goals_415,test,1,0.9759195446968079,0.8927012108946369
llm_goals_415,test,2,0.9755764007568359,0.9655095349643328
llm_goals_415,test,3,0.8029745817184448,0.5773151635184978
llm_goals_415,test,4,0.9754993915557861,0.9305359112645492
llm_goals_415,test,5,0.9756728410720825,0.2698413337656404
llm_goals_415,test,6,0.4426145851612091,0.1924170964690193
llm_goals_415,test,7,0.9760755300521851,0.964020489204398
llm_goals_415,test,8,0.9752944707870483,0.9764932994994092
llm_goals_415,test,9,0.44401586055755615,0.3491418165522914
llm_goals_415,test,10,0.936942994594574,0.8933741476361363
llm_goals_415,test,11,0.9755223989486694,0.920598685397126
llm_goals_415,test,12,0.6170507073402405,0.6909930118311174
llm_goals_415,test,13,0.8955388069152832,0.1145420932706047
llm_goals_415,test,14,0.977026641368866,0.9423774493447
llm_goals_415,test,15,0.9751455783843994,0.9532362831473804
llm_goals_415,test,16,0.9570826292037964,0.8356775665343158
llm_goals_415,test,17,0.976043164730072,0.9361823813143908
llm_goals_415,test,18,0.9750601053237915,0.9889487026200174
llm_goals_415,test,19,0.975104808807373,0.6368843297568499
llm_goals_415,test,20,0.9758759140968323,0.9870089292806328
llm_goals_415,test,21,0.4441035985946655,0.192645257351692
llm_goals_415,test,22,0.975247859954834,0.9748569718628384
llm_goals_415,test,23,0.9762282371520996,0.8083263035006917
llm_goals_415,test,24,0.9760425090789795,0.931151511592295
llm_goals_415,test,25,0.8530450463294983,0.8863766227722848
llm_goals_415,test,26,0.9756010174751282,0.9640595915946516
llm_goals_415,test,27,0.9599272012710571,0.9081509748500972
llm_goals_415,test,28,0.9120878577232361,0.702391734675441
llm_goals_415,test,29,0.9750262498855591,0.5027195285232966
llm_goals_415,test,30,0.9768929481506348,0.9145678910630844
llm_goals_415,test,31,0.4378572702407837,0.2428483145137429
llm_goals_415,test,32,0.9765426516532898,0.8852615952458959
llm_goals_415,test,33,0.8596280217170715,0.9135139614061104
llm_goals_415,test,34,0.9126490950584412,0.9013304127749656
llm_goals_415,test,35,0.9489849209785461,0.9492589831139694
llm_goals_415,test,36,0.9758769273757935,0.9187255502309962
llm_goals_415,test,37,0.976633608341217,0.5449707044122137
llm_goals_415,test,38,0.976010262966156,0.5046841390470121
llm_goals_415,test,39,0.7659590840339661,0.6393683546149316
llm_goals_415,test,40,0.9748471975326538,0.958167434065704
llm_goals_415,test,41,0.9758384823799133,0.1258629101169009
llm_goals_415,test,42,0.9754239320755005,0.9370320411522076
llm_goals_415,test,43,0.4375711977481842,0.4502328517486405
llm_goals_415,test,44,0.9763917326927185,0.9666150226098564
llm_goals_415,test,45,0.9754500389099121,0.4875853574104955
llm_goals_415,test,46,0.9763100147247314,0.9433028700684638
llm_goals_415,test,47,0.976922869682312,0.6681809390819066
llm_goals_415,test,48,0.9754393696784973,0.9737072586261016
llm_goals_415,test,49,0.9753053188323975,0.9706154467965924
llm_goals_326,test,0,0.8412187099456787,0.7080152699628419
llm_goals_326,test,1,0.6727941632270813,0.1915128822536962
llm_goals_326,test,2,0.8375542163848877,0.7444471737433097
llm_goals_326,test,3,0.849812924861908,0.4392720971297062
llm_goals_326,test,4,0.8840710520744324,0.891602221308631
llm_goals_326,test,5,0.8325470685958862,0.0516888735952717
llm_goals_326,test,6,0.09943483769893646,0.0431335229249402
llm_goals_326,test,7,0.8543372750282288,0.7478485077290631
llm_goals_326,test,8,0.8828296065330505,0.8572844895817008
llm_goals_326,test,9,0.05438879877328873,0.0757120509371778
llm_goals_326,test,10,0.7999597191810608,0.6528841155476689
llm_goals_326,test,11,0.8500512838363647,0.1274303379860786
llm_goals_326,test,12,0.649986743927002,0.2755885259868873
llm_goals_326,test,13,0.054347191005945206,0.0177855343138265
llm_goals_326,test,14,0.8415219187736511,0.6106618044247591
llm_goals_326,test,15,0.8678270578384399,0.7686960692919514
llm_goals_326,test,16,0.841233491897583,0.78510933722289
llm_goals_326,test,17,0.7572131752967834,0.1510306836677899
llm_goals_326,test,18,0.8263117671012878,0.714622257156643
llm_goals_326,test,19,0.8636170625686646,0.7195213393137665
llm_goals_326,test,20,0.8844171166419983,0.7418285539231787
llm_goals_326,test,21,0.05808775871992111,0.0439102190505529
llm_goals_326,test,22,0.798836886882782,0.1806737802484422
llm_goals_326,test,23,0.8565084934234619,0.2841042501811218
llm_goals_326,test,24,0.8249460458755493,0.7647552401874187
llm_goals_326,test,25,0.736990749835968,0.6430861301277235
llm_goals_326,test,26,0.8432255983352661,0.6501767709406938
llm_goals_326,test,27,0.15498337149620056,0.1962727505123375
llm_goals_326,test,28,0.462542861700058,0.454183079329165
llm_goals_326,test,29,0.8704301714897156,0.4340101008130515
llm_goals_326,test,30,0.7405219674110413,0.7979558074116693
llm_goals_326,test,31,0.05605292320251465,0.0525898871260873
llm_goals_326,test,32,0.8683379888534546,0.1343880762883031
llm_goals_326,test,33,0.8381484746932983,0.7783031325129874
llm_goals_326,test,34,0.7405420541763306,0.6498840537412343
llm_goals_326,test,35,0.8713814616203308,0.8262182481972753
llm_goals_326,test,36,0.74770188331604,0.5329381900017961
llm_goals_326,test,37,0.7999116778373718,0.7623008977444935
llm_goals_326,test,38,0.8909384608268738,0.4787708612720514
llm_goals_326,test,39,0.4271612763404846,0.3940936731364524
llm_goals_326,test,40,0.8731791973114014,0.1554473532877406
llm_goals_326,test,41,0.057775065302848816,0.0835985162339854
llm_goals_326,test,42,0.8550592064857483,0.6165240441256894
llm_goals_326,test,43,0.06569015979766846,0.0224093895342384
llm_goals_326,test,44,0.8821815848350525,0.9028450273437764
llm_goals_326,test,45,0.3843979239463806,0.4414288558565103
llm_goals_326,test,46,0.881867527961731,0.3316083963903907
llm_goals_326,test,47,0.625856339931488,0.5242446701777307
llm_goals_326,test,48,0.8385510444641113,0.6202593994623453
llm_goals_326,test,49,0.7219077348709106,0.1810686795284525
llm_goals_401,test,0,0.9486632347106934,0.995337188206034
llm_goals_401,test,1,0.9401397109031677,0.9877679942135538
llm_goals_401,test,2,0.9392310976982117,0.9922930596197688
llm_goals_401,test,3,0.9422348737716675,0.944885851250094
llm_goals_401,test,4,0.9436570405960083,0.9994499087491504
llm_goals_401,test,5,0.7651200294494629,0.9963147043479246
llm_goals_401,test,6,0.9417749643325806,0.9937722087866844
llm_goals_401,test,7,0.9418618679046631,0.9823669190452718
llm_goals_401,test,8,0.9391269683837891,0.999488890156012
llm_goals_401,test,9,0.9415950179100037,0.9958814380530916
llm_goals_401,test,10,0.94106125831604,0.9936545489877364
llm_goals_401,test,11,0.9404705762863159,0.9890559909640764
llm_goals_401,test,12,0.9399333596229553,0.974935352749168
llm_goals_401,test,13,0.1995183676481247,0.9811005000644571
llm_goals_401,test,14,0.6728673577308655,0.9053577183425524
llm_goals_401,test,15,0.9399405121803284,0.999511659155082
llm_goals_401,test,16,0.6653110384941101,0.9069459477271428
llm_goals_401,test,17,0.9393593668937683,0.9880303739527652
llm_goals_401,test,18,0.6707574725151062,0.8849021227503215
llm_goals_401,test,19,0.9417961239814758,0.8461972444129129
llm_goals_401,test,20,0.9391789436340332,0.9985405802478567
llm_goals_401,test,21,0.9426528215408325,0.9979217051338364
llm_goals_401,test,22,0.940018355846405,0.9942128658052918
llm_goals_401,test,23,0.9465133547782898,0.9829282756728752
llm_goals_401,test,24,0.6895575523376465,0.9755103584971708
llm_goals_401,test,25,0.9392849206924438,0.9854062793275654
llm_goals_401,test,26,0.6728101968765259,0.9862527255879427
llm_goals_401,test,27,0.9384196400642395,0.9777193074990224
llm_goals_401,test,28,0.8913965821266174,0.939797105410212
llm_goals_401,test,29,0.9420819878578186,0.924205125757662
llm_goals_401,test,30,0.9406440258026123,0.9887794851583942
llm_goals_401,test,31,0.9415899515151978,0.9974054097423743
llm_goals_401,test,32,0.9415894150733948,0.987442612237454
llm_goals_401,test,33,0.8452345728874207,0.9077114440216438
llm_goals_401,test,34,0.9400277137756348,0.9898619654171366
llm_goals_401,test,35,0.14957787096500397,0.9887833003411676
llm_goals_401,test,36,0.666286826133728,0.9883841875213925
llm_goals_401,test,37,0.682830274105072,0.7709992025246631
llm_goals_401,test,38,0.943580687046051,0.9687479729312614
llm_goals_401,test,39,0.9404500722885132,0.9758216144428412
llm_goals_401,test,40,0.9400593042373657,0.9954323769170262
llm_goals_401,test,41,0.9400182366371155,0.9970808029010324
llm_goals_401,test,42,0.9411196708679199,0.9970337152076278
llm_goals_401,test,43,0.9425095319747925,0.9963160753068976
llm_goals_401,test,44,0.9383707046508789,0.999649882329424
llm_goals_401,test,45,0.9483339190483093,0.9708126775062408
llm_goals_401,test,46,0.1498681902885437,0.9777507190974738
llm_goals_401,test,47,0.9376746416091919,0.879110748685256
llm_goals_401,test,48,0.7814012169837952,0.9452995669117286
llm_goals_401,test,49,0.9399994015693665,0.9904525877618487
llm_goals_397,test,0,0.7333943247795105,0.699619236076976
llm_goals_397,test,1,0.5186901092529297,0.7858562431683396
llm_goals_397,test,2,0.6923584938049316,0.8656621011318779
llm_goals_397,test,3,0.7589691877365112,0.5216834406120067
llm_goals_397,test,4,0.2787601351737976,0.2170933847941593
llm_goals_397,test,5,0.274781197309494,0.6203395193658312
llm_goals_397,test,6,0.47151705622673035,0.7062451920643317
llm_goals_397,test,7,0.6956173181533813,0.7818592804776917
llm_goals_397,test,8,0.3542095720767975,0.3722692260862962
llm_goals_397,test,9,0.25454723834991455,0.7525421946277221
llm_goals_397,test,10,0.32176679372787476,0.55382257151224
llm_goals_397,test,11,0.2590883672237396,0.654531962180046
llm_goals_397,test,12,0.25760629773139954,0.659030860866389
llm_goals_397,test,13,0.370805025100708,0.5080189660267803
llm_goals_397,test,14,0.2755041718482971,0.93949913908347
llm_goals_397,test,15,0.2752211391925812,0.1397603306643144
llm_goals_397,test,16,0.3844895362854004,0.94748652041643
llm_goals_397,test,17,0.5911856293678284,0.8337419557614654
llm_goals_397,test,18,0.898423969745636,0.929486218624307
llm_goals_397,test,19,0.490425705909729,0.6917306210780744
llm_goals_397,test,20,0.2755277156829834,0.3168072461941615
llm_goals_397,test,21,0.6550680994987488,0.7050710313870211
llm_goals_397,test,22,0.2970374524593353,0.9048640732247422
llm_goals_397,test,23,0.2564849853515625,0.6896020664732718
llm_goals_397,test,24,0.8976948261260986,0.9588320250969324
llm_goals_397,test,25,0.681707501411438,0.7992448229336531
llm_goals_397,test,26,0.2546575367450714,0.925966146729264
llm_goals_397,test,27,0.5021007061004639,0.8290411876343606
llm_goals_397,test,28,0.5373938083648682,0.614512804730325
llm_goals_397,test,29,0.5056021809577942,0.8119643351773586
llm_goals_397,test,30,0.6780498623847961,0.6416279718371558
llm_goals_397,test,31,0.6627889275550842,0.6679576084359291
llm_goals_397,test,32,0.4397028982639313,0.7497660034408138
llm_goals_397,test,33,0.28645071387290955,0.9406859295331328
llm_goals_397,test,34,0.6511244773864746,0.6256499877247926
llm_goals_397,test,35,0.7064218521118164,0.6832026805931318
llm_goals_397,test,36,0.47693321108818054,0.688132397136647
llm_goals_397,test,37,0.6498556137084961,0.6918761795541711
llm_goals_397,test,38,0.49661287665367126,0.5633524658577074
llm_goals_397,test,39,0.7043500542640686,0.6565008680077707
llm_goals_397,test,40,0.2953554093837738,0.6958896526078188
llm_goals_397,test,41,0.6435391306877136,0.757736441343734
llm_goals_397,test,42,0.7073044776916504,0.8490422913608605
llm_goals_397,test,43,0.6611385941505432,0.7079678798274159
llm_goals_397,test,44,0.271148145198822,0.1092321450870402
llm_goals_397,test,45,0.27488386631011963,0.5166526556585691
llm_goals_397,test,46,0.2542974650859833,0.5419103560441939
llm_goals_397,test,47,0.5135966539382935,0.4142756611326425
llm_goals_397,test,48,0.901003360748291,0.956440745261376
llm_goals_397,test,49,0.6297324299812317,0.7473642876631974
llm_goals_228,test,0,0.28566303849220276,0.8649997140208154
llm_goals_228,test,1,0.27594608068466187,0.873321951241939
llm_goals_228,test,2,0.7160254716873169,0.8957700764194163
llm_goals_228,test,3,0.2354883849620819,0.1236903018304219
llm_goals_228,test,4,0.7128834128379822,0.7453361748877467
llm_goals_228,test,5,0.1776387244462967,0.1192843072628729
llm_goals_228,test,6,0.13460896909236908,0.1326971908799849
llm_goals_228,test,7,0.19076010584831238,0.871489699671336
llm_goals_228,test,8,0.6986765265464783,0.8243934448117555
llm_goals_228,test,9,0.39329853653907776,0.0467115483491378
llm_goals_228,test,10,0.4003499746322632,0.7659628877266128
llm_goals_228,test,11,0.16187410056591034,0.7679518438112958
llm_goals_228,test,12,0.3122538626194,0.3261574116268692
llm_goals_228,test,13,0.1770496517419815,0.0485964531599145
llm_goals_228,test,14,0.4317813515663147,0.9059895277949974
llm_goals_228,test,15,0.34561458230018616,0.7273862898347316
llm_goals_228,test,16,0.7805408239364624,0.9158116003766568
llm_goals_228,test,17,0.2798219919204712,0.2983101183881344
llm_goals_228,test,18,0.7836850881576538,0.9541860240679364
llm_goals_228,test,19,0.5434136390686035,0.8476448713173524
llm_goals_228,test,20,0.2941892743110657,0.7696132021860423
llm_goals_228,test,21,0.22275809943675995,0.2240634772421697
llm_goals_228,test,22,0.622897207736969,0.5944716948164779
llm_goals_228,test,23,0.15933507680892944,0.8035987641686457
llm_goals_228,test,24,0.7418830394744873,0.8954167374248716
llm_goals_228,test,25,0.15722262859344482,0.7151555300243474
llm_goals_228,test,26,0.2835383117198944,0.933582548003466
llm_goals_228,test,27,0.47466298937797546,0.7197431412077714
llm_goals_228,test,28,0.41915562748908997,0.2110212364220297
llm_goals_228,test,29,0.6826042532920837,0.7475211519371483
llm_goals_228,test,30,0.24317477643489838,0.8439204089849919
llm_goals_228,test,31,0.2754366397857666,0.233187545611893
llm_goals_228,test,32,0.5150302648544312,0.6522564861744167
llm_goals_228,test,33,0.7822955250740051,0.9036141650151318
llm_goals_228,test,34,0.6138200163841248,0.8063092871634532
llm_goals_228,test,35,0.23290809988975525,0.7386503177076705
llm_goals_228,test,36,0.15684874355793,0.4339953557233364
llm_goals_228,test,37,0.3310997188091278,0.7638352005094292
llm_goals_228,test,38,0.550026535987854,0.6360600602425245
llm_goals_228,test,39,0.1839754283428192,0.5582201546461881
llm_goals_228,test,40,0.23862211406230927,0.7919258516091835
llm_goals_228,test,41,0.3947339355945587,0.2382556436276936
llm_goals_228,test,42,0.15899989008903503,0.8180639729723762
llm_goals_228,test,43,0.3650039732456207,0.1744592330547429
llm_goals_228,test,44,0.711092472076416,0.8059483789474448
llm_goals_228,test,45,0.16758668422698975,0.185739159862888
llm_goals_228,test,46,0.3741595447063446,0.6385866390140609
llm_goals_228,test,47,0.2850683033466339,0.181375317343939
llm_goals_228,test,48,0.6139076352119446,0.9407807579284042
llm_goals_228,test,49,0.6166794896125793,0.3930787424904466
llm_goals_236,test,0,0.9143151044845581,0.9502374518284544
llm_goals_236,test,1,0.6388458609580994,0.632901790189933
llm_goals_236,test,2,0.9406206011772156,0.9547987563797488
llm_goals_236,test,3,0.979788601398468,0.8955373773460537
llm_goals_236,test,4,0.9257761836051941,0.604483505999029
llm_goals_236,test,5,0.9122804999351501,0.9645155057204132
llm_goals_236,test,6,0.9153602719306946,0.8400796085543216
llm_goals_236,test,7,0.9230512976646423,0.9809643632475058
llm_goals_236,test,8,0.9113273620605469,0.9551138894715042
llm_goals_236,test,9,0.9101365804672241,0.9323794813876984
llm_goals_236,test,10,0.6669217944145203,0.7794327710983796
llm_goals_236,test,11,0.9490074515342712,0.9780675777326168
llm_goals_236,test,12,0.925508975982666,0.8875326502933057
llm_goals_236,test,13,0.9089944362640381,0.929950178153689
llm_goals_236,test,14,0.6298235058784485,0.926757039035834
llm_goals_236,test,15,0.6316938996315002,0.7910198588237014
llm_goals_236,test,16,0.9082698822021484,0.8930606244937771
llm_goals_236,test,17,0.8403250575065613,0.8600503746114734
llm_goals_236,test,18,0.8772803544998169,0.9478088016198049
llm_goals_236,test,19,0.8839049339294434,0.0557340307108
llm_goals_236,test,20,0.6312896013259888,0.7876626308843195
llm_goals_236,test,21,0.9084720015525818,0.8769842974627694
llm_goals_236,test,22,0.8918876051902771,0.3468135553298878
llm_goals_236,test,23,0.9813251495361328,0.9514921900954876
llm_goals_236,test,24,0.63954758644104,0.9310227652155564
llm_goals_236,test,25,0.6998491883277893,0.6795750924280035
llm_goals_236,test,26,0.8235880136489868,0.7808533270173144
llm_goals_236,test,27,0.899334192276001,0.3548731041307022
llm_goals_236,test,28,0.8646496534347534,0.9628356114282044
llm_goals_236,test,29,0.9087533950805664,0.0678193344934023
llm_goals_236,test,30,0.7082778811454773,0.8669796641519075
llm_goals_236,test,31,0.6390562653541565,0.7901777055635718
llm_goals_236,test,32,0.9086393117904663,0.9761505717506124
llm_goals_236,test,33,0.8844258785247803,0.925212684096036
llm_goals_236,test,34,0.9483343362808228,0.8283609769269727
llm_goals_236,test,35,0.9168275594711304,0.9191289552997411
llm_goals_236,test,36,0.6305878758430481,0.8559995305859318
llm_goals_236,test,37,0.976493239402771,0.0699999500981296
llm_goals_236,test,38,0.8893734812736511,0.1796257560659765
llm_goals_236,test,39,0.9215161800384521,0.9175097319342992
llm_goals_236,test,40,0.8889705538749695,0.9403113739191608
llm_goals_236,test,41,0.9095118641853333,0.8003138232198173
llm_goals_236,test,42,0.913442850112915,0.9656298771148252
llm_goals_236,test,43,0.9087253212928772,0.7679948721037306
llm_goals_236,test,44,0.6424999237060547,0.8397446267621825
llm_goals_236,test,45,0.9255654811859131,0.9383022794052605
llm_goals_236,test,46,0.979975163936615,0.9861661791368868
llm_goals_236,test,47,0.6321889162063599,0.8216616511109771
llm_goals_236,test,48,0.8837860226631165,0.858269332712761
llm_goals_236,test,49,0.6322696805000305,0.4904990431885668
llm_goals_206,test,0,0.7933925986289978,0.7014024312312447
llm_goals_206,test,1,0.10423990339040756,0.09993519180039
llm_goals_206,test,2,0.2688242197036743,0.3805957976908932
llm_goals_206,test,3,0.1050209179520607,0.2506742005767606
llm_goals_206,test,4,0.07852675020694733,0.168981522681462
llm_goals_206,test,5,0.08370473980903625,0.1396780009361202
llm_goals_206,test,6,0.08790596574544907,0.1710164544129008
llm_goals_206,test,7,0.2505711615085602,0.6071092597712212
llm_goals_206,test,8,0.27134740352630615,0.1978085524873241
llm_goals_206,test,9,0.08991888165473938,0.1702414804858378
llm_goals_206,test,10,0.04981281980872154,0.0308920608149318
llm_goals_206,test,11,0.09435845166444778,0.2459778318217196
llm_goals_206,test,12,0.12177321314811707,0.1179569636055102
llm_goals_206,test,13,0.08606582134962082,0.0251233861816931
llm_goals_206,test,14,0.7603884339332581,0.2426781205526189
llm_goals_206,test,15,0.1240106150507927,0.1278309727108198
llm_goals_206,test,16,0.10247839242219925,0.1146704353190439
llm_goals_206,test,17,0.760012686252594,0.3515281137996084
llm_goals_206,test,18,0.047051478177309036,0.2196923742635269
llm_goals_206,test,19,0.06428942829370499,0.1304369288612395
llm_goals_206,test,20,0.7624126672744751,0.4106940669038348
llm_goals_206,test,21,0.7725529074668884,0.3368795798730228
llm_goals_206,test,22,0.05289025232195854,0.2288948502948852
llm_goals_206,test,23,0.04073568433523178,0.074953807513886
llm_goals_206,test,24,0.04093233495950699,0.2441322028739075
llm_goals_206,test,25,0.16271889209747314,0.3918868995222204
llm_goals_206,test,26,0.24868664145469666,0.1723782187624221
llm_goals_206,test,27,0.08823465555906296,0.0391839098867229
llm_goals_206,test,28,0.09781181812286377,0.4274888337765593
llm_goals_206,test,29,0.12473813444375992,0.2322920572824445
llm_goals_206,test,30,0.044558096677064896,0.1908743178919042
llm_goals_206,test,31,0.7210205793380737,0.3263357686111295
llm_goals_206,test,32,0.11007130146026611,0.1979025671613465
llm_goals_206,test,33,0.09750711172819138,0.3702297199267818
llm_goals_206,test,34,0.04141384735703468,0.2357480876238731
llm_goals_206,test,35,0.20598022639751434,0.1482521268698281
llm_goals_206,test,36,0.8275256752967834,0.5821584340939012
llm_goals_206,test,37,0.06573834270238876,0.2650568670113227
llm_goals_206,test,38,0.1672961264848709,0.1360417554406812
llm_goals_206,test,39,0.15361493825912476,0.2119836423042495
llm_goals_206,test,40,0.04732932895421982,0.0289993447458979
llm_goals_206,test,41,0.04291646182537079,0.3521756872657808
llm_goals_206,test,42,0.753074049949646,0.4575015387432249
llm_goals_206,test,43,0.5216361880302429,0.4938333202476994
llm_goals_206,test,44,0.11093956232070923,0.2787682116116408
llm_goals_206,test,45,0.08583663403987885,0.1994537138307505
llm_goals_206,test,46,0.09816666692495346,0.5462074258839572
llm_goals_206,test,47,0.054236751049757004,0.1243126576899177
llm_goals_206,test,48,0.04903406277298927,0.2961273515821391
llm_goals_206,test,49,0.09608809649944305,0.2344178444588115
llm_goals_438,test,0,0.8263804912567139,0.2314562881184323
llm_goals_438,test,1,0.8539483547210693,0.7762565054203492
llm_goals_438,test,2,0.8322731852531433,0.1997924391817541
llm_goals_438,test,3,0.8263435959815979,0.7992019611592787
llm_goals_438,test,4,0.8300032615661621,0.8049000497082406
llm_goals_438,test,5,0.832830011844635,0.7174028107354763
llm_goals_438,test,6,0.8279396891593933,0.5982684312746689
llm_goals_438,test,7,0.8317769765853882,0.3291190451199206
llm_goals_438,test,8,0.8237735629081726,0.7413057021464516
llm_goals_438,test,9,0.8415693044662476,0.5655428700196011
llm_goals_438,test,10,0.8312342166900635,0.3747542837277384
llm_goals_438,test,11,0.8427790403366089,0.8141028872063276
llm_goals_438,test,12,0.8360777497291565,0.7668041079795853
llm_goals_438,test,13,0.873321533203125,0.3989739137138193
llm_goals_438,test,14,0.8251780271530151,0.630963737596379
llm_goals_438,test,15,0.8295823335647583,0.8688319384714368
llm_goals_438,test,16,0.8249242901802063,0.6505714018494433
llm_goals_438,test,17,0.8506530523300171,0.8432750100181929
llm_goals_438,test,18,0.8328948020935059,0.5454092034362268
llm_goals_438,test,19,0.8752042651176453,0.7865512327498919
llm_goals_438,test,20,0.8293439745903015,0.9279490690754252
llm_goals_438,test,21,0.8689284324645996,0.6012074860500735
llm_goals_438,test,22,0.8344907164573669,0.8641822359830585
llm_goals_438,test,23,0.8305966854095459,0.6703505477568762
llm_goals_438,test,24,0.8416149616241455,0.659909363300309
llm_goals_438,test,25,0.8695815801620483,0.345825906210505
llm_goals_438,test,26,0.8515064120292664,0.6342860330821757
llm_goals_438,test,27,0.8505181670188904,0.7595913377648781
llm_goals_438,test,28,0.8414125442504883,0.8379201892332447
llm_goals_438,test,29,0.8658779263496399,0.7401884809786652
llm_goals_438,test,30,0.825181245803833,0.3998648759112044
llm_goals_438,test,31,0.8309702277183533,0.7480624326316926
llm_goals_438,test,32,0.838809609413147,0.7187081505530665
llm_goals_438,test,33,0.8239719271659851,0.4107650384506188
llm_goals_438,test,34,0.8368360996246338,0.5939725433582219
llm_goals_438,test,35,0.829106867313385,0.3302466757601784
llm_goals_438,test,36,0.8547151684761047,0.5187707473385741
llm_goals_438,test,37,0.8520601391792297,0.7520728212475446
llm_goals_438,test,38,0.854703962802887,0.7995393320092329
llm_goals_438,test,39,0.8334459662437439,0.6185694255280791
llm_goals_438,test,40,0.8313468098640442,0.6220749660889419
llm_goals_438,test,41,0.829889178276062,0.6546343601883343
llm_goals_438,test,42,0.8302647471427917,0.2289113591576547
llm_goals_438,test,43,0.845887303352356,0.7896951428928559
llm_goals_438,test,44,0.8730354309082031,0.8209977715013562
llm_goals_438,test,45,0.8761395812034607,0.8177235133069483
llm_goals_438,test,46,0.829160749912262,0.6895343723542676
llm_goals_438,test,47,0.8745988607406616,0.8889681695339486
llm_goals_438,test,48,0.8282347321510315,0.5208110922241418
llm_goals_438,test,49,0.8462479710578918,0.8657228979999158
llm_goals_182,test,0,0.9657437801361084,0.9879327417690024
llm_goals_182,test,1,0.012727722525596619,0.143590152166184
llm_goals_182,test,2,0.9724847674369812,0.975250065576832
llm_goals_182,test,3,0.03179576247930527,0.0053962595772566
llm_goals_182,test,4,0.9480559825897217,0.9395814535027472
llm_goals_182,test,5,0.7119918465614319,0.7582924861360835
llm_goals_182,test,6,0.7169232964515686,0.5490946196305377
llm_goals_182,test,7,0.9643486142158508,0.9749513260519144
llm_goals_182,test,8,0.9656129479408264,0.9392208432034008
llm_goals_182,test,9,0.6840804815292358,0.644100014754752
llm_goals_182,test,10,0.3367210328578949,0.3220700188782243
llm_goals_182,test,11,0.7744138240814209,0.981547533906946
llm_goals_182,test,12,0.6074743866920471,0.7137040988313296
llm_goals_182,test,13,0.7247717380523682,0.7387330490542552
llm_goals_182,test,14,0.009824780747294426,0.0829307635671844
llm_goals_182,test,15,0.9606161117553711,0.9703041905123228
llm_goals_182,test,16,0.009855560027062893,0.0818078392213706
llm_goals_182,test,17,0.06211487576365471,0.1321702105874928
llm_goals_182,test,18,0.009862024337053299,0.0627371233524436
llm_goals_182,test,19,0.8276873230934143,0.9687731848071606
llm_goals_182,test,20,0.9417970776557922,0.9416436555176496
llm_goals_182,test,21,0.7032408118247986,0.6725942479831736
llm_goals_182,test,22,0.01084035076200962,0.0934677665250981
llm_goals_182,test,23,0.7124137878417969,0.9812074898398124
llm_goals_182,test,24,0.01432445552200079,0.0949295295653811
llm_goals_182,test,25,0.11338330060243607,0.44246206774058
llm_goals_182,test,26,0.009870190173387527,0.0774707640077587
llm_goals_182,test,27,0.05052635073661804,0.0713961149448467
llm_goals_182,test,28,0.03116699866950512,0.0522770850978139
llm_goals_182,test,29,0.8266988396644592,0.95314246463025
llm_goals_182,test,30,0.3423384428024292,0.2504357008006138
llm_goals_182,test,31,0.6557250618934631,0.7161425258649188
llm_goals_182,test,32,0.70386803150177,0.9646060453168028
llm_goals_182,test,33,0.010055548511445522,0.0545762932705501
llm_goals_182,test,34,0.29641228914260864,0.3122018653274729
llm_goals_182,test,35,0.958766758441925,0.9860451225539192
llm_goals_182,test,36,0.28040632605552673,0.281834229591876
llm_goals_182,test,37,0.7434348464012146,0.9542467602371212
llm_goals_182,test,38,0.7532491683959961,0.9552662371080364
llm_goals_182,test,39,0.6270128488540649,0.6622170237731956
llm_goals_182,test,40,0.7435576915740967,0.946633816726873
llm_goals_182,test,41,0.692851185798645,0.7134743981048756
llm_goals_182,test,42,0.9574533700942993,0.9907715917291932
llm_goals_182,test,43,0.6852307915687561,0.7427734633378366
llm_goals_182,test,44,0.9584841132164001,0.932817223580216
llm_goals_182,test,45,0.015475737862288952,0.0223475038345677
llm_goals_182,test,46,0.683077335357666,0.9353007113558736
llm_goals_182,test,47,0.03712048754096031,0.0297953783874237
llm_goals_182,test,48,0.00970983225852251,0.0852689569608422
llm_goals_182,test,49,0.018364280462265015,0.1518944682187261
llm_goals_281,test,0,0.9652186036109924,0.6469683041227705
llm_goals_281,test,1,0.5857418775558472,0.9610940798838222
llm_goals_281,test,2,0.448650062084198,0.5031480224374153
llm_goals_281,test,3,0.5775071382522583,0.7670721923110818
llm_goals_281,test,4,0.5200296640396118,0.8573901670428902
llm_goals_281,test,5,0.9491576552391052,0.9842762939932492
llm_goals_281,test,6,0.9868152737617493,0.9899150137032138
llm_goals_281,test,7,0.4481182396411896,0.6044210331647685
llm_goals_281,test,8,0.8814641833305359,0.8588374254971683
llm_goals_281,test,9,0.9604097604751587,0.9818143245858572
llm_goals_281,test,10,0.9000213742256165,0.9566256410829356
llm_goals_281,test,11,0.9866651296615601,0.9833216073807935
llm_goals_281,test,12,0.9868691563606262,0.9046776287625902
llm_goals_281,test,13,0.9509685635566711,0.98515391356728
llm_goals_281,test,14,0.9606149196624756,0.9050783518109468
llm_goals_281,test,15,0.9654021263122559,0.8143656887719262
llm_goals_281,test,16,0.8864850997924805,0.9030392149866112
llm_goals_281,test,17,0.9180409908294678,0.9782413247928606
llm_goals_281,test,18,0.9356961846351624,0.9394087233364008
llm_goals_281,test,19,0.551395833492279,0.013753239211091
llm_goals_281,test,20,0.9868817329406738,0.9155304405450826
llm_goals_281,test,21,0.9851881265640259,0.9849370716395414
llm_goals_281,test,22,0.5608623027801514,0.9678947939625672
llm_goals_281,test,23,0.9813013076782227,0.9923826456520252
llm_goals_281,test,24,0.9867447018623352,0.8888994485286436
llm_goals_281,test,25,0.7662249207496643,0.8670526725158154
llm_goals_281,test,26,0.9257124662399292,0.901592433398574
llm_goals_281,test,27,0.5414153337478638,0.9485982074237508
llm_goals_281,test,28,0.9636890888214111,0.7677171303083985
llm_goals_281,test,29,0.9331504106521606,0.0052734878464236
llm_goals_281,test,30,0.9562925696372986,0.9557743081128488
llm_goals_281,test,31,0.9790119528770447,0.991380810774403
llm_goals_281,test,32,0.9310483932495117,0.9805269843458853
llm_goals_281,test,33,0.9577497243881226,0.972194253937266
llm_goals_281,test,34,0.9484846591949463,0.8780187362883507
llm_goals_281,test,35,0.4585971236228943,0.6544240022055418
llm_goals_281,test,36,0.9874374270439148,0.8261399254388034
llm_goals_281,test,37,0.5592430830001831,0.0237476361601453
llm_goals_281,test,38,0.9865831732749939,0.2014428856002633
llm_goals_281,test,39,0.8319960832595825,0.9075832337909528
llm_goals_281,test,40,0.9845359921455383,0.9579415929114572
llm_goals_281,test,41,0.9865607023239136,0.9618767488068802
llm_goals_281,test,42,0.45579978823661804,0.6901242783749021
llm_goals_281,test,43,0.9873894453048706,0.991684436764246
llm_goals_281,test,44,0.8848865628242493,0.8707976927725176
llm_goals_281,test,45,0.939669668674469,0.855741209208138
llm_goals_281,test,46,0.9749919176101685,0.9874326582385272
llm_goals_281,test,47,0.5248281955718994,0.8582029976458081
llm_goals_281,test,48,0.9364656209945679,0.9463140960540516
llm_goals_281,test,49,0.5461546182632446,0.9857229593669278
llm_goals_115,test,0,0.8177944421768188,0.6878400979912542
llm_goals_115,test,1,0.8113060593605042,0.9839099052127078
llm_goals_115,test,2,0.8109374046325684,0.6264771219588837
llm_goals_115,test,3,0.8093876242637634,0.5101504336920828
llm_goals_115,test,4,0.8170970678329468,0.4749856076325871
llm_goals_115,test,5,0.8113220930099487,0.8824023049105343
llm_goals_115,test,6,0.8170471787452698,0.8532372770314965
llm_goals_115,test,7,0.8149070143699646,0.6852873624151903
llm_goals_115,test,8,0.8151302933692932,0.7160879332925761
llm_goals_115,test,9,0.8114471435546875,0.8154454841155164
llm_goals_115,test,10,0.8170469403266907,0.4039477654904175
llm_goals_115,test,11,0.817540168762207,0.0754019011174093
llm_goals_115,test,12,0.8152996897697449,0.9216642359611203
llm_goals_115,test,13,0.8116932511329651,0.9023613914305204
llm_goals_115,test,14,0.8161258697509766,0.7385692464553043
llm_goals_115,test,15,0.8166444897651672,0.5273332595390108
llm_goals_115,test,16,0.8114182949066162,0.5319983452316508
llm_goals_115,test,17,0.8120772242546082,0.9818465718466136
llm_goals_115,test,18,0.8114188313484192,0.7098198015331896
llm_goals_115,test,19,0.8124094605445862,0.8100224145820547
llm_goals_115,test,20,0.8109081983566284,0.5463347366319413
llm_goals_115,test,21,0.8121658563613892,0.9708728191294718
llm_goals_115,test,22,0.8104583024978638,0.9564870014981148
llm_goals_115,test,23,0.8162593841552734,0.3427940177342707
llm_goals_115,test,24,0.8170841932296753,0.7216819551218165
llm_goals_115,test,25,0.8152366280555725,0.7118023516747564
llm_goals_115,test,26,0.8166545033454895,0.7495779511143832
llm_goals_115,test,27,0.8115450143814087,0.9876421097288872
llm_goals_115,test,28,0.8123869895935059,0.2396916848864135
llm_goals_115,test,29,0.8164737224578857,0.8734944493526076
llm_goals_115,test,30,0.8159114718437195,0.4720948765146447
llm_goals_115,test,31,0.8174195289611816,0.9613435269013808
llm_goals_115,test,32,0.8162065744400024,0.2802482482345895
llm_goals_115,test,33,0.8114726543426514,0.800091024303248
llm_goals_115,test,34,0.8107672929763794,0.4731748202329244
llm_goals_115,test,35,0.8115061521530151,0.6451464331436837
llm_goals_115,test,36,0.8116865754127502,0.5222874947485241
llm_goals_115,test,37,0.8170714378356934,0.9548006059708632
llm_goals_115,test,38,0.8176316618919373,0.8483833674723993
llm_goals_115,test,39,0.8108003735542297,0.9222807287488012
llm_goals_115,test,40,0.8103567361831665,0.2526171639652511
llm_goals_115,test,41,0.8118876814842224,0.8309168152992626
llm_goals_115,test,42,0.8117129802703857,0.58549510672814
llm_goals_115,test,43,0.8127110600471497,0.9288889171841944
llm_goals_115,test,44,0.8155165314674377,0.8696076826330161
llm_goals_115,test,45,0.8102157711982727,0.3370215724740773
llm_goals_115,test,46,0.8125327825546265,0.0614426001598869
llm_goals_115,test,47,0.8108394145965576,0.5311914101507266
llm_goals_115,test,48,0.8168334364891052,0.6260304408434099
llm_goals_115,test,49,0.8116156458854675,0.9732730991067396
llm_goals_230,test,0,0.9731080532073975,0.93636017960289
llm_goals_230,test,1,0.9892380237579346,0.9080467805464864
llm_goals_230,test,2,0.9608802795410156,0.9492415176604246
llm_goals_230,test,3,0.9848667979240417,0.9883888360800804
llm_goals_230,test,4,0.9221495389938354,0.9308136137248874
llm_goals_230,test,5,0.779708206653595,0.7461662385047432
llm_goals_230,test,6,0.7811709642410278,0.717943379234597
llm_goals_230,test,7,0.949508786201477,0.9498506183856972
llm_goals_230,test,8,0.9237001538276672,0.903114856201669
llm_goals_230,test,9,0.7814126014709473,0.82763987275553
llm_goals_230,test,10,0.9052579402923584,0.9176431321128996
llm_goals_230,test,11,0.9893808364868164,0.9351009100066487
llm_goals_230,test,12,0.9879831075668335,0.9397739779762262
llm_goals_230,test,13,0.7784947752952576,0.7663744634625839
llm_goals_230,test,14,0.9806120991706848,0.9587652680933508
llm_goals_230,test,15,0.9183003902435303,0.9205685886203512
llm_goals_230,test,16,0.9807628393173218,0.9548512114624126
llm_goals_230,test,17,0.9892396926879883,0.9177103078422468
llm_goals_230,test,18,0.9862567782402039,0.9694847470185912
llm_goals_230,test,19,0.9892966151237488,0.241616336852366
llm_goals_230,test,20,0.9357587099075317,0.9513741131303785
llm_goals_230,test,21,0.779822826385498,0.7885476346382756
llm_goals_230,test,22,0.9895092248916626,0.956588746723208
llm_goals_230,test,23,0.9893556237220764,0.8796235970540837
llm_goals_230,test,24,0.9850598573684692,0.9499204774153412
llm_goals_230,test,25,0.8909271955490112,0.7955946265549944
llm_goals_230,test,26,0.9860681295394897,0.96990013164722
llm_goals_230,test,27,0.9893175959587097,0.94318586558855
llm_goals_230,test,28,0.9874628186225891,0.9816553586758804
llm_goals_230,test,29,0.9893580675125122,0.2755945052454056
llm_goals_230,test,30,0.8939929604530334,0.8694230354404326
llm_goals_230,test,31,0.7820585370063782,0.7707924321548216
llm_goals_230,test,32,0.9893396496772766,0.90274804670901
llm_goals_230,test,33,0.9804067015647888,0.964968324648223
llm_goals_230,test,34,0.8957765698432922,0.8786148395823622
llm_goals_230,test,35,0.9782944917678833,0.9659460769293176
llm_goals_230,test,36,0.899864137172699,0.9265550381327228
llm_goals_230,test,37,0.9894114136695862,0.2797244301796934
llm_goals_230,test,38,0.9894472360610962,0.3705167516113265
llm_goals_230,test,39,0.9863312244415283,0.9571521884098596
llm_goals_230,test,40,0.989273726940155,0.9050631524069218
llm_goals_230,test,41,0.782590389251709,0.7826731844385996
llm_goals_230,test,42,0.9744055867195129,0.9429872638799112
llm_goals_230,test,43,0.778678834438324,0.7863413109728788
llm_goals_230,test,44,0.9108035564422607,0.925476613788026
llm_goals_230,test,45,0.9861676096916199,0.9835724833108438
llm_goals_230,test,46,0.9892701506614685,0.9324350988748504
llm_goals_230,test,47,0.9882329106330872,0.9918220640686296
llm_goals_230,test,48,0.9835757613182068,0.9632008068532188
llm_goals_230,test,49,0.9894549250602722,0.9196290944353572
llm_goals_86,test,0,0.9118556380271912,0.8172564483303766
llm_goals_86,test,1,0.9117120504379272,0.2845365633956491
llm_goals_86,test,2,0.9118829965591431,0.9608234754131476
llm_goals_86,test,3,0.9117460250854492,0.4905999470752266
llm_goals_86,test,4,0.9117368459701538,0.5223214595126631
llm_goals_86,test,5,0.9117187261581421,0.9513769165530092
llm_goals_86,test,6,0.9117171168327332,0.951058446008376
llm_goals_86,test,7,0.9117382168769836,0.7200026044712197
llm_goals_86,test,8,0.9117380380630493,0.8539416814851118
llm_goals_86,test,9,0.9118892550468445,0.8409760614216922
llm_goals_86,test,10,0.911733090877533,0.8985089093274398
llm_goals_86,test,11,0.9118782877922058,0.960478126988056
llm_goals_86,test,12,0.9119128584861755,0.8489881816956245
llm_goals_86,test,13,0.9118936061859131,0.4634670890641906
llm_goals_86,test,14,0.9117125868797302,0.4018075540357314
llm_goals_86,test,15,0.9117090702056885,0.8179491136946933
llm_goals_86,test,16,0.9117277264595032,0.5277433486870419
llm_goals_86,test,17,0.9117045402526855,0.7712345746838988
llm_goals_86,test,18,0.9118868112564087,0.89457523897237
llm_goals_86,test,19,0.9119040369987488,0.6892489135704485
llm_goals_86,test,20,0.9118796586990356,0.6353825334363962
llm_goals_86,test,21,0.9117016792297363,0.6530961974169966
llm_goals_86,test,22,0.9118949174880981,0.8880043621770078
llm_goals_86,test,23,0.9118732213973999,0.8809145094341392
llm_goals_86,test,24,0.9117180109024048,0.8164918407571773
llm_goals_86,test,25,0.9117222428321838,0.5604555739923536
llm_goals_86,test,26,0.9118813276290894,0.649475864669537
llm_goals_86,test,27,0.9117431640625,0.951709689395591
llm_goals_86,test,28,0.9118179082870483,0.8387165613894679
llm_goals_86,test,29,0.9117258191108704,0.674672778677278
llm_goals_86,test,30,0.9117284417152405,0.4648188591057368
llm_goals_86,test,31,0.9116967916488647,0.663420132120388
llm_goals_86,test,32,0.9117153286933899,0.6545684404871791
llm_goals_86,test,33,0.9117452502250671,0.8998044750116885
llm_goals_86,test,34,0.9117208123207092,0.4550697205389422
llm_goals_86,test,35,0.9119131565093994,0.7006743460528161
llm_goals_86,test,36,0.9116897583007812,0.8041987455834365
llm_goals_86,test,37,0.9117081165313721,0.5563101271844357
llm_goals_86,test,38,0.9117277264595032,0.8432080747815303
llm_goals_86,test,39,0.9117218852043152,0.8362265811977031
llm_goals_86,test,40,0.9117316603660583,0.9223666199627952
llm_goals_86,test,41,0.9117023348808289,0.8662457492250272
llm_goals_86,test,42,0.9117236733436584,0.8608515909392838
llm_goals_86,test,43,0.9117147326469421,0.7603164939120921
llm_goals_86,test,44,0.9117200970649719,0.7861883669269881
llm_goals_86,test,45,0.911738395690918,0.8184625523204602
llm_goals_86,test,46,0.9117278456687927,0.94641274246688
llm_goals_86,test,47,0.9117135405540466,0.5854129271694068
llm_goals_86,test,48,0.9117380380630493,0.920227709127447
llm_goals_86,test,49,0.9117205739021301,0.938532531863768
