template_id,split,question_idx,prediction,label
llm_goals_146,test,0,0.5791512727737427,0.3466493369029843
llm_goals_146,test,1,0.9007602334022522,0.971693336326244
llm_goals_146,test,2,0.5731834769248962,0.6455769508045827
llm_goals_146,test,3,0.5958911180496216,0.5822222843930172
llm_goals_146,test,4,0.9331358671188354,0.6842324614600077
llm_goals_146,test,5,0.7300992608070374,0.3860201177799698
llm_goals_146,test,6,0.8111069798469543,0.2341590231269344
llm_goals_146,test,7,0.5810909271240234,0.3643683725782807
llm_goals_146,test,8,0.9347020387649536,0.7055765903024079
llm_goals_146,test,9,0.8197762370109558,0.5084888304786936
llm_goals_146,test,10,0.811458170413971,0.421220281945486
llm_goals_146,test,11,0.9047059416770935,0.489412697220974
llm_goals_146,test,12,0.5767506957054138,0.601020744296073
llm_goals_146,test,13,0.597033679485321,0.2557097387485441
llm_goals_146,test,14,0.6029108762741089,0.4640750948087747
llm_goals_146,test,15,0.9182132482528687,0.7362086189794756
llm_goals_146,test,16,0.7540239095687866,0.4987515019820814
llm_goals_146,test,17,0.8673863410949707,0.9587866055408388
llm_goals_146,test,18,0.5809923410415649,0.6036783393019786
llm_goals_146,test,19,0.9206916093826294,0.3513374958540852
llm_goals_146,test,20,0.9128636717796326,0.8281412663626416
llm_goals_146,test,21,0.7157033681869507,0.34065502012531
llm_goals_146,test,22,0.9252482652664185,0.9907655718703378
llm_goals_146,test,23,0.742755115032196,0.2061079946990629
llm_goals_146,test,24,0.5736799240112305,0.3446073251785768
llm_goals_146,test,25,0.8410251140594482,0.6576184069508046
llm_goals_146,test,26,0.8462466597557068,0.4110159240539976
llm_goals_146,test,27,0.6090240478515625,0.984345377326772
llm_goals_146,test,28,0.5983500480651855,0.4903698662236391
llm_goals_146,test,29,0.9289256930351257,0.362068682571018
llm_goals_146,test,30,0.792212188243866,0.4551468142410695
llm_goals_146,test,31,0.7433891892433167,0.3773142189484755
llm_goals_146,test,32,0.8699982166290283,0.4187681470726767
llm_goals_146,test,33,0.5758742690086365,0.3985742178328911
llm_goals_146,test,34,0.6305944919586182,0.5044141394199024
llm_goals_146,test,35,0.5760316848754883,0.4783486718786691
llm_goals_146,test,36,0.711500883102417,0.5445552456181276
llm_goals_146,test,37,0.925042450428009,0.2691607872517095
llm_goals_146,test,38,0.9266459345817566,0.5517828611974395
llm_goals_146,test,39,0.5786667466163635,0.4823935111879393
llm_goals_146,test,40,0.9024039506912231,0.496646227759238
llm_goals_146,test,41,0.8322489261627197,0.410176485322602
llm_goals_146,test,42,0.5919616222381592,0.2603856324469091
llm_goals_146,test,43,0.6286025643348694,0.555094401775619
llm_goals_146,test,44,0.9272214770317078,0.5218179738711017
llm_goals_146,test,45,0.5778558254241943,0.4447090989140364
llm_goals_146,test,46,0.784755527973175,0.5567532038584041
llm_goals_146,test,47,0.5713530778884888,0.4034445534741869
llm_goals_146,test,48,0.5749676823616028,0.5335126942675598
llm_goals_146,test,49,0.9389775991439819,0.9775228494630558
llm_goals_293,test,0,0.7618224620819092,0.8297409974007197
llm_goals_293,test,1,0.39512011408805847,0.9166843306899098
llm_goals_293,test,2,0.5724632740020752,0.4850136353820626
llm_goals_293,test,3,0.5443739295005798,0.391440800258883
llm_goals_293,test,4,0.9720616340637207,0.9814967512328644
llm_goals_293,test,5,0.959604024887085,0.9737153059180352
llm_goals_293,test,6,0.3792541027069092,0.985194742988862
llm_goals_293,test,7,0.8842853903770447,0.8402554946174373
llm_goals_293,test,8,0.6023622155189514,0.9863079191276464
llm_goals_293,test,9,0.9607365727424622,0.9549403173664608
llm_goals_293,test,10,0.5571886897087097,0.8132036948651367
llm_goals_293,test,11,0.5216346383094788,0.2287407340843056
llm_goals_293,test,12,0.7883599400520325,0.9235533469292896
llm_goals_293,test,13,0.4342987835407257,0.9750785225411348
llm_goals_293,test,14,0.9724876880645752,0.9922545554063744
llm_goals_293,test,15,0.9715500473976135,0.9838300936204548
llm_goals_293,test,16,0.7006088495254517,0.9858987926415744
llm_goals_293,test,17,0.37389978766441345,0.9556264290993132
llm_goals_293,test,18,0.9153884649276733,0.9458334455175702
llm_goals_293,test,19,0.5682153701782227,0.5736353307249014
llm_goals_293,test,20,0.971141517162323,0.9812628634528624
llm_goals_293,test,21,0.4258383512496948,0.975720704169484
llm_goals_293,test,22,0.918658435344696,0.9409119473005016
llm_goals_293,test,23,0.3689366579055786,0.6306895569552333
llm_goals_293,test,24,0.35519012808799744,0.9831594820284292
llm_goals_293,test,25,0.8478729128837585,0.8981066919341846
llm_goals_293,test,26,0.9532185196876526,0.9716764680193836
llm_goals_293,test,27,0.9365136027336121,0.8155843585345258
llm_goals_293,test,28,0.5843423008918762,0.9733620875264264
llm_goals_293,test,29,0.385082483291626,0.9051490452062086
llm_goals_293,test,30,0.8386556506156921,0.7348576092486574
llm_goals_293,test,31,0.3669436275959015,0.9808893198314528
llm_goals_293,test,32,0.9672676920890808,0.593206056427014
llm_goals_293,test,33,0.5751613974571228,0.9657488455537
llm_goals_293,test,34,0.8288846611976624,0.8851169960520849
llm_goals_293,test,35,0.7877147793769836,0.8555944515949592
llm_goals_293,test,36,0.3824392259120941,0.5855041803376898
llm_goals_293,test,37,0.8168705701828003,0.6409415533832579
llm_goals_293,test,38,0.3570447862148285,0.8743998352900223
llm_goals_293,test,39,0.7801629900932312,0.8182019034258088
llm_goals_293,test,40,0.601982057094574,0.146751538769175
llm_goals_293,test,41,0.9605703353881836,0.9709715243488852
llm_goals_293,test,42,0.9645997285842896,0.8829771283195118
llm_goals_293,test,43,0.5692387819290161,0.9398047936750782
llm_goals_293,test,44,0.9701761603355408,0.9795592430097252
llm_goals_293,test,45,0.9426306486129761,0.7025255505490043
llm_goals_293,test,46,0.35483139753341675,0.6531552142747035
llm_goals_293,test,47,0.9558691382408142,0.8527514903629965
llm_goals_293,test,48,0.9142051935195923,0.9844985009129076
llm_goals_293,test,49,0.5980191826820374,0.9160653951101896
llm_goals_415,test,0,0.9692776799201965,0.945942102532565
llm_goals_415,test,1,0.9669500589370728,0.8927012108946369
llm_goals_415,test,2,0.9648554921150208,0.9655095349643328
llm_goals_415,test,3,0.7077561020851135,0.5773151635184978
llm_goals_415,test,4,0.9667443633079529,0.9305359112645492
llm_goals_415,test,5,0.6305270195007324,0.2698413337656404
llm_goals_415,test,6,0.5788222551345825,0.1924170964690193
llm_goals_415,test,7,0.9672753214836121,0.964020489204398
llm_goals_415,test,8,0.8901318907737732,0.9764932994994092
llm_goals_415,test,9,0.59724360704422,0.3491418165522914
llm_goals_415,test,10,0.856238842010498,0.8933741476361363
llm_goals_415,test,11,0.9577264189720154,0.920598685397126
llm_goals_415,test,12,0.623638927936554,0.6909930118311174
llm_goals_415,test,13,0.6233727931976318,0.1145420932706047
llm_goals_415,test,14,0.9628495573997498,0.9423774493447
llm_goals_415,test,15,0.967877984046936,0.9532362831473804
llm_goals_415,test,16,0.8954781293869019,0.8356775665343158
llm_goals_415,test,17,0.9732385873794556,0.9361823813143908
llm_goals_415,test,18,0.9607845544815063,0.9889487026200174
llm_goals_415,test,19,0.7820702791213989,0.6368843297568499
llm_goals_415,test,20,0.9682222604751587,0.9870089292806328
llm_goals_415,test,21,0.5895153880119324,0.192645257351692
llm_goals_415,test,22,0.9711506366729736,0.9748569718628384
llm_goals_415,test,23,0.9699022769927979,0.8083263035006917
llm_goals_415,test,24,0.9407551288604736,0.931151511592295
llm_goals_415,test,25,0.8686177134513855,0.8863766227722848
llm_goals_415,test,26,0.967965841293335,0.9640595915946516
llm_goals_415,test,27,0.9752669930458069,0.9081509748500972
llm_goals_415,test,28,0.7971638441085815,0.702391734675441
llm_goals_415,test,29,0.8038257360458374,0.5027195285232966
llm_goals_415,test,30,0.9631809592247009,0.9145678910630844
llm_goals_415,test,31,0.5936891436576843,0.2428483145137429
llm_goals_415,test,32,0.8917765021324158,0.8852615952458959
llm_goals_415,test,33,0.7370138764381409,0.9135139614061104
llm_goals_415,test,34,0.8695175647735596,0.9013304127749656
llm_goals_415,test,35,0.9650431871414185,0.9492589831139694
llm_goals_415,test,36,0.9609561562538147,0.9187255502309962
llm_goals_415,test,37,0.8844427466392517,0.5449707044122137
llm_goals_415,test,38,0.8821696639060974,0.5046841390470121
llm_goals_415,test,39,0.6470142006874084,0.6393683546149316
llm_goals_415,test,40,0.9676690697669983,0.958167434065704
llm_goals_415,test,41,0.6343192458152771,0.1258629101169009
llm_goals_415,test,42,0.961961567401886,0.9370320411522076
llm_goals_415,test,43,0.5810494422912598,0.4502328517486405
llm_goals_415,test,44,0.823523998260498,0.9666150226098564
llm_goals_415,test,45,0.8846936821937561,0.4875853574104955
llm_goals_415,test,46,0.9710366725921631,0.9433028700684638
llm_goals_415,test,47,0.9643734097480774,0.6681809390819066
llm_goals_415,test,48,0.9682263731956482,0.9737072586261016
llm_goals_415,test,49,0.974324107170105,0.9706154467965924
llm_goals_326,test,0,0.8316165208816528,0.7080152699628419
llm_goals_326,test,1,0.6297542452812195,0.1915128822536962
llm_goals_326,test,2,0.792914867401123,0.7444471737433097
llm_goals_326,test,3,0.8826253414154053,0.4392720971297062
llm_goals_326,test,4,0.8626738786697388,0.891602221308631
llm_goals_326,test,5,0.2985377013683319,0.0516888735952717
llm_goals_326,test,6,0.2638610601425171,0.0431335229249402
llm_goals_326,test,7,0.7797701954841614,0.7478485077290631
llm_goals_326,test,8,0.8669190406799316,0.8572844895817008
llm_goals_326,test,9,0.16227558255195618,0.0757120509371778
llm_goals_326,test,10,0.636686384677887,0.6528841155476689
llm_goals_326,test,11,0.8756475448608398,0.1274303379860786
llm_goals_326,test,12,0.551184356212616,0.2755885259868873
llm_goals_326,test,13,0.1615188866853714,0.0177855343138265
llm_goals_326,test,14,0.734855055809021,0.6106618044247591
llm_goals_326,test,15,0.7642439007759094,0.7686960692919514
llm_goals_326,test,16,0.7771369814872742,0.78510933722289
llm_goals_326,test,17,0.6399996876716614,0.1510306836677899
llm_goals_326,test,18,0.7908462285995483,0.714622257156643
llm_goals_326,test,19,0.7923464775085449,0.7195213393137665
llm_goals_326,test,20,0.8461105227470398,0.7418285539231787
llm_goals_326,test,21,0.1709783375263214,0.0439102190505529
llm_goals_326,test,22,0.6234515309333801,0.1806737802484422
llm_goals_326,test,23,0.67244553565979,0.2841042501811218
llm_goals_326,test,24,0.771625816822052,0.7647552401874187
llm_goals_326,test,25,0.8018871545791626,0.6430861301277235
llm_goals_326,test,26,0.7791181206703186,0.6501767709406938
llm_goals_326,test,27,0.16789758205413818,0.1962727505123375
llm_goals_326,test,28,0.6821979284286499,0.454183079329165
llm_goals_326,test,29,0.8694461584091187,0.4340101008130515
llm_goals_326,test,30,0.7400705814361572,0.7979558074116693
llm_goals_326,test,31,0.16359661519527435,0.0525898871260873
llm_goals_326,test,32,0.876255452632904,0.1343880762883031
llm_goals_326,test,33,0.8199498653411865,0.7783031325129874
llm_goals_326,test,34,0.7143153548240662,0.6498840537412343
llm_goals_326,test,35,0.8081730008125305,0.8262182481972753
llm_goals_326,test,36,0.7760645151138306,0.5329381900017961
llm_goals_326,test,37,0.8305843472480774,0.7623008977444935
llm_goals_326,test,38,0.8967947363853455,0.4787708612720514
llm_goals_326,test,39,0.49807801842689514,0.3940936731364524
llm_goals_326,test,40,0.7855213284492493,0.1554473532877406
llm_goals_326,test,41,0.16134455800056458,0.0835985162339854
llm_goals_326,test,42,0.8329447507858276,0.6165240441256894
llm_goals_326,test,43,0.2661108672618866,0.0224093895342384
llm_goals_326,test,44,0.8911939263343811,0.9028450273437764
llm_goals_326,test,45,0.6224622130393982,0.4414288558565103
llm_goals_326,test,46,0.8772341012954712,0.3316083963903907
llm_goals_326,test,47,0.4652976393699646,0.5242446701777307
llm_goals_326,test,48,0.805100679397583,0.6202593994623453
llm_goals_326,test,49,0.5488542914390564,0.1810686795284525
llm_goals_401,test,0,0.8314334154129028,0.995337188206034
llm_goals_401,test,1,0.9433030486106873,0.9877679942135538
llm_goals_401,test,2,0.9437536597251892,0.9922930596197688
llm_goals_401,test,3,0.9436647891998291,0.944885851250094
llm_goals_401,test,4,0.13787102699279785,0.9994499087491504
llm_goals_401,test,5,0.9463493227958679,0.9963147043479246
llm_goals_401,test,6,0.9521145224571228,0.9937722087866844
llm_goals_401,test,7,0.9495706558227539,0.9823669190452718
llm_goals_401,test,8,0.9479236602783203,0.999488890156012
llm_goals_401,test,9,0.9401516318321228,0.9958814380530916
llm_goals_401,test,10,0.9461902976036072,0.9936545489877364
llm_goals_401,test,11,0.9443196058273315,0.9890559909640764
llm_goals_401,test,12,0.9467335939407349,0.974935352749168
llm_goals_401,test,13,0.14157508313655853,0.9811005000644571
llm_goals_401,test,14,0.6019748449325562,0.9053577183425524
llm_goals_401,test,15,0.9512952566146851,0.999511659155082
llm_goals_401,test,16,0.6646623611450195,0.9069459477271428
llm_goals_401,test,17,0.9474325776100159,0.9880303739527652
llm_goals_401,test,18,0.6947225332260132,0.8849021227503215
llm_goals_401,test,19,0.9461408853530884,0.8461972444129129
llm_goals_401,test,20,0.9518508315086365,0.9985405802478567
llm_goals_401,test,21,0.9484610557556152,0.9979217051338364
llm_goals_401,test,22,0.9431765079498291,0.9942128658052918
llm_goals_401,test,23,0.9459667205810547,0.9829282756728752
llm_goals_401,test,24,0.6743003129959106,0.9755103584971708
llm_goals_401,test,25,0.9470281004905701,0.9854062793275654
llm_goals_401,test,26,0.6068299412727356,0.9862527255879427
llm_goals_401,test,27,0.9477412104606628,0.9777193074990224
llm_goals_401,test,28,0.9454068541526794,0.939797105410212
llm_goals_401,test,29,0.9404498934745789,0.924205125757662
llm_goals_401,test,30,0.9458824992179871,0.9887794851583942
llm_goals_401,test,31,0.9525798559188843,0.9974054097423743
llm_goals_401,test,32,0.9441376328468323,0.987442612237454
llm_goals_401,test,33,0.13725338876247406,0.9077114440216438
llm_goals_401,test,34,0.9456825256347656,0.9898619654171366
llm_goals_401,test,35,0.9475528597831726,0.9887833003411676
llm_goals_401,test,36,0.9449160099029541,0.9883841875213925
llm_goals_401,test,37,0.8209521174430847,0.7709992025246631
llm_goals_401,test,38,0.9449146389961243,0.9687479729312614
llm_goals_401,test,39,0.9436982274055481,0.9758216144428412
llm_goals_401,test,40,0.9422743916511536,0.9954323769170262
llm_goals_401,test,41,0.9433502554893494,0.9970808029010324
llm_goals_401,test,42,0.945152759552002,0.9970337152076278
llm_goals_401,test,43,0.94745934009552,0.9963160753068976
llm_goals_401,test,44,0.9486061930656433,0.999649882329424
llm_goals_401,test,45,0.9477365016937256,0.9708126775062408
llm_goals_401,test,46,0.14154230058193207,0.9777507190974738
llm_goals_401,test,47,0.9453270435333252,0.879110748685256
llm_goals_401,test,48,0.6883940696716309,0.9452995669117286
llm_goals_401,test,49,0.9534363150596619,0.9904525877618487
llm_goals_397,test,0,0.731791615486145,0.699619236076976
llm_goals_397,test,1,0.37888696789741516,0.7858562431683396
llm_goals_397,test,2,0.6547026634216309,0.8656621011318779
llm_goals_397,test,3,0.6299281120300293,0.5216834406120067
llm_goals_397,test,4,0.2533230185508728,0.2170933847941593
llm_goals_397,test,5,0.6020143032073975,0.6203395193658312
llm_goals_397,test,6,0.6856152415275574,0.7062451920643317
llm_goals_397,test,7,0.6621561050415039,0.7818592804776917
llm_goals_397,test,8,0.2329312562942505,0.3722692260862962
llm_goals_397,test,9,0.6386074423789978,0.7525421946277221
llm_goals_397,test,10,0.6825741529464722,0.55382257151224
llm_goals_397,test,11,0.26994016766548157,0.654531962180046
llm_goals_397,test,12,0.37336742877960205,0.659030860866389
llm_goals_397,test,13,0.5924643278121948,0.5080189660267803
llm_goals_397,test,14,0.7224995493888855,0.93949913908347
llm_goals_397,test,15,0.26196539402008057,0.1397603306643144
llm_goals_397,test,16,0.7611894011497498,0.94748652041643
llm_goals_397,test,17,0.5079134702682495,0.8337419557614654
llm_goals_397,test,18,0.7986015677452087,0.929486218624307
llm_goals_397,test,19,0.2652572989463806,0.6917306210780744
llm_goals_397,test,20,0.26580575108528137,0.3168072461941615
llm_goals_397,test,21,0.6379959583282471,0.7050710313870211
llm_goals_397,test,22,0.3276056945323944,0.9048640732247422
llm_goals_397,test,23,0.22948560118675232,0.6896020664732718
llm_goals_397,test,24,0.7986993193626404,0.9588320250969324
llm_goals_397,test,25,0.7130464911460876,0.7992448229336531
llm_goals_397,test,26,0.7381059527397156,0.925966146729264
llm_goals_397,test,27,0.5437147617340088,0.8290411876343606
llm_goals_397,test,28,0.29708799719810486,0.614512804730325
llm_goals_397,test,29,0.2581796944141388,0.8119643351773586
llm_goals_397,test,30,0.7132539749145508,0.6416279718371558
llm_goals_397,test,31,0.6718745827674866,0.6679576084359291
llm_goals_397,test,32,0.24071930348873138,0.7497660034408138
llm_goals_397,test,33,0.7416021227836609,0.9406859295331328
llm_goals_397,test,34,0.682444155216217,0.6256499877247926
llm_goals_397,test,35,0.7391107678413391,0.6832026805931318
llm_goals_397,test,36,0.7114958167076111,0.688132397136647
llm_goals_397,test,37,0.2508256435394287,0.6918761795541711
llm_goals_397,test,38,0.4278337061405182,0.5633524658577074
llm_goals_397,test,39,0.6126433610916138,0.6565008680077707
llm_goals_397,test,40,0.287859171628952,0.6958896526078188
llm_goals_397,test,41,0.5066888928413391,0.757736441343734
llm_goals_397,test,42,0.7287217974662781,0.8490422913608605
llm_goals_397,test,43,0.6545565724372864,0.7079678798274159
llm_goals_397,test,44,0.26860371232032776,0.1092321450870402
llm_goals_397,test,45,0.28230687975883484,0.5166526556585691
llm_goals_397,test,46,0.23613549768924713,0.5419103560441939
llm_goals_397,test,47,0.4512782096862793,0.4142756611326425
llm_goals_397,test,48,0.7997626662254333,0.956440745261376
llm_goals_397,test,49,0.68086838722229,0.7473642876631974
llm_goals_228,test,0,0.1722424179315567,0.8649997140208154
llm_goals_228,test,1,0.1754782497882843,0.873321951241939
llm_goals_228,test,2,0.6998310089111328,0.8957700764194163
llm_goals_228,test,3,0.16397158801555634,0.1236903018304219
llm_goals_228,test,4,0.706348180770874,0.7453361748877467
llm_goals_228,test,5,0.16157613694667816,0.1192843072628729
llm_goals_228,test,6,0.16602997481822968,0.1326971908799849
llm_goals_228,test,7,0.6686804890632629,0.871489699671336
llm_goals_228,test,8,0.6889762878417969,0.8243934448117555
llm_goals_228,test,9,0.15948866307735443,0.0467115483491378
llm_goals_228,test,10,0.17077353596687317,0.7659628877266128
llm_goals_228,test,11,0.16964608430862427,0.7679518438112958
llm_goals_228,test,12,0.16502517461776733,0.3261574116268692
llm_goals_228,test,13,0.16435077786445618,0.0485964531599145
llm_goals_228,test,14,0.1927804797887802,0.9059895277949974
llm_goals_228,test,15,0.6846508383750916,0.7273862898347316
llm_goals_228,test,16,0.6924918293952942,0.9158116003766568
llm_goals_228,test,17,0.16906136274337769,0.2983101183881344
llm_goals_228,test,18,0.6888924837112427,0.9541860240679364
llm_goals_228,test,19,0.6920989751815796,0.8476448713173524
llm_goals_228,test,20,0.17090384662151337,0.7696132021860423
llm_goals_228,test,21,0.1674729883670807,0.2240634772421697
llm_goals_228,test,22,0.17375066876411438,0.5944716948164779
llm_goals_228,test,23,0.16834722459316254,0.8035987641686457
llm_goals_228,test,24,0.7069722414016724,0.8954167374248716
llm_goals_228,test,25,0.5774574279785156,0.7151555300243474
llm_goals_228,test,26,0.2941727638244629,0.933582548003466
llm_goals_228,test,27,0.1747891753911972,0.7197431412077714
llm_goals_228,test,28,0.16411270201206207,0.2110212364220297
llm_goals_228,test,29,0.7022331357002258,0.7475211519371483
llm_goals_228,test,30,0.17467227578163147,0.8439204089849919
llm_goals_228,test,31,0.16011974215507507,0.233187545611893
llm_goals_228,test,32,0.1920069009065628,0.6522564861744167
llm_goals_228,test,33,0.7023283243179321,0.9036141650151318
llm_goals_228,test,34,0.7020546197891235,0.8063092871634532
llm_goals_228,test,35,0.6809567809104919,0.7386503177076705
llm_goals_228,test,36,0.6041761636734009,0.4339953557233364
llm_goals_228,test,37,0.6330276727676392,0.7638352005094292
llm_goals_228,test,38,0.6825512647628784,0.6360600602425245
llm_goals_228,test,39,0.16661527752876282,0.5582201546461881
llm_goals_228,test,40,0.1724773496389389,0.7919258516091835
llm_goals_228,test,41,0.15964186191558838,0.2382556436276936
llm_goals_228,test,42,0.6647601127624512,0.8180639729723762
llm_goals_228,test,43,0.15943653881549835,0.1744592330547429
llm_goals_228,test,44,0.689095675945282,0.8059483789474448
llm_goals_228,test,45,0.1645621359348297,0.185739159862888
llm_goals_228,test,46,0.1705355942249298,0.6385866390140609
llm_goals_228,test,47,0.1648464947938919,0.181375317343939
llm_goals_228,test,48,0.7089186906814575,0.9407807579284042
llm_goals_228,test,49,0.17474204301834106,0.3930787424904466
llm_goals_236,test,0,0.9268882870674133,0.9502374518284544
llm_goals_236,test,1,0.6938409805297852,0.632901790189933
llm_goals_236,test,2,0.9149590134620667,0.9547987563797488
llm_goals_236,test,3,0.983604371547699,0.8955373773460537
llm_goals_236,test,4,0.8973193764686584,0.604483505999029
llm_goals_236,test,5,0.93982994556427,0.9645155057204132
llm_goals_236,test,6,0.9214068651199341,0.8400796085543216
llm_goals_236,test,7,0.9421794414520264,0.9809643632475058
llm_goals_236,test,8,0.968411922454834,0.9551138894715042
llm_goals_236,test,9,0.9041949510574341,0.9323794813876984
llm_goals_236,test,10,0.6864395141601562,0.7794327710983796
llm_goals_236,test,11,0.6947758793830872,0.9780675777326168
llm_goals_236,test,12,0.8787602186203003,0.8875326502933057
llm_goals_236,test,13,0.9186340570449829,0.929950178153689
llm_goals_236,test,14,0.6933563351631165,0.926757039035834
llm_goals_236,test,15,0.6933254599571228,0.7910198588237014
llm_goals_236,test,16,0.8709459900856018,0.8930606244937771
llm_goals_236,test,17,0.9112777709960938,0.8600503746114734
llm_goals_236,test,18,0.9134700894355774,0.9478088016198049
llm_goals_236,test,19,0.6935579776763916,0.0557340307108
llm_goals_236,test,20,0.6945441961288452,0.7876626308843195
llm_goals_236,test,21,0.9470965266227722,0.8769842974627694
llm_goals_236,test,22,0.8828457593917847,0.3468135553298878
llm_goals_236,test,23,0.9855799674987793,0.9514921900954876
llm_goals_236,test,24,0.6914600729942322,0.9310227652155564
llm_goals_236,test,25,0.8796764612197876,0.6795750924280035
llm_goals_236,test,26,0.6932404041290283,0.7808533270173144
llm_goals_236,test,27,0.871465802192688,0.3548731041307022
llm_goals_236,test,28,0.9144371151924133,0.9628356114282044
llm_goals_236,test,29,0.9612787961959839,0.0678193344934023
llm_goals_236,test,30,0.858697235584259,0.8669796641519075
llm_goals_236,test,31,0.6859533190727234,0.7901777055635718
llm_goals_236,test,32,0.9897261261940002,0.9761505717506124
llm_goals_236,test,33,0.79592365026474,0.925212684096036
llm_goals_236,test,34,0.9862099289894104,0.8283609769269727
llm_goals_236,test,35,0.928114116191864,0.9191289552997411
llm_goals_236,test,36,0.6937963962554932,0.8559995305859318
llm_goals_236,test,37,0.9890640377998352,0.0699999500981296
llm_goals_236,test,38,0.8373544812202454,0.1796257560659765
llm_goals_236,test,39,0.8789856433868408,0.9175097319342992
llm_goals_236,test,40,0.8783242702484131,0.9403113739191608
llm_goals_236,test,41,0.8961793780326843,0.8003138232198173
llm_goals_236,test,42,0.9270021915435791,0.9656298771148252
llm_goals_236,test,43,0.9424220323562622,0.7679948721037306
llm_goals_236,test,44,0.6930720806121826,0.8397446267621825
llm_goals_236,test,45,0.9885503649711609,0.9383022794052605
llm_goals_236,test,46,0.9852803349494934,0.9861661791368868
llm_goals_236,test,47,0.6937865018844604,0.8216616511109771
llm_goals_236,test,48,0.7941617369651794,0.858269332712761
llm_goals_236,test,49,0.6947224140167236,0.4904990431885668
llm_goals_206,test,0,0.8873036503791809,0.7014024312312447
llm_goals_206,test,1,0.2141198068857193,0.09993519180039
llm_goals_206,test,2,0.5690045952796936,0.3805957976908932
llm_goals_206,test,3,0.20318244397640228,0.2506742005767606
llm_goals_206,test,4,0.10118087381124496,0.168981522681462
llm_goals_206,test,5,0.10364209115505219,0.1396780009361202
llm_goals_206,test,6,0.10857616364955902,0.1710164544129008
llm_goals_206,test,7,0.49260827898979187,0.6071092597712212
llm_goals_206,test,8,0.49231770634651184,0.1978085524873241
llm_goals_206,test,9,0.11773581057786942,0.1702414804858378
llm_goals_206,test,10,0.06476196646690369,0.0308920608149318
llm_goals_206,test,11,0.14699965715408325,0.2459778318217196
llm_goals_206,test,12,0.10506699979305267,0.1179569636055102
llm_goals_206,test,13,0.08537864685058594,0.0251233861816931
llm_goals_206,test,14,0.6879762411117554,0.2426781205526189
llm_goals_206,test,15,0.12454940378665924,0.1278309727108198
llm_goals_206,test,16,0.20073896646499634,0.1146704353190439
llm_goals_206,test,17,0.7437596917152405,0.3515281137996084
llm_goals_206,test,18,0.08124148100614548,0.2196923742635269
llm_goals_206,test,19,0.06392870843410492,0.1304369288612395
llm_goals_206,test,20,0.6693929433822632,0.4106940669038348
llm_goals_206,test,21,0.7423301935195923,0.3368795798730228
llm_goals_206,test,22,0.07616857439279556,0.2288948502948852
llm_goals_206,test,23,0.06447580456733704,0.074953807513886
llm_goals_206,test,24,0.06730373948812485,0.2441322028739075
llm_goals_206,test,25,0.1459503173828125,0.3918868995222204
llm_goals_206,test,26,0.30887410044670105,0.1723782187624221
llm_goals_206,test,27,0.08578888326883316,0.0391839098867229
llm_goals_206,test,28,0.1514042764902115,0.4274888337765593
llm_goals_206,test,29,0.12758813798427582,0.2322920572824445
llm_goals_206,test,30,0.06941217929124832,0.1908743178919042
llm_goals_206,test,31,0.6862934827804565,0.3263357686111295
llm_goals_206,test,32,0.12787532806396484,0.1979025671613465
llm_goals_206,test,33,0.27925583720207214,0.3702297199267818
llm_goals_206,test,34,0.06518158316612244,0.2357480876238731
llm_goals_206,test,35,0.23136618733406067,0.1482521268698281
llm_goals_206,test,36,0.907582700252533,0.5821584340939012
llm_goals_206,test,37,0.06822434812784195,0.2650568670113227
llm_goals_206,test,38,0.14390280842781067,0.1360417554406812
llm_goals_206,test,39,0.11510305106639862,0.2119836423042495
llm_goals_206,test,40,0.06226157024502754,0.0289993447458979
llm_goals_206,test,41,0.07224533706903458,0.3521756872657808
llm_goals_206,test,42,0.6703070998191833,0.4575015387432249
llm_goals_206,test,43,0.6784267425537109,0.4938333202476994
llm_goals_206,test,44,0.1810738742351532,0.2787682116116408
llm_goals_206,test,45,0.10889598727226257,0.1994537138307505
llm_goals_206,test,46,0.299317866563797,0.5462074258839572
llm_goals_206,test,47,0.06796830892562866,0.1243126576899177
llm_goals_206,test,48,0.09235689789056778,0.2961273515821391
llm_goals_206,test,49,0.24743084609508514,0.2344178444588115
llm_goals_438,test,0,0.866706132888794,0.2314562881184323
llm_goals_438,test,1,0.872851550579071,0.7762565054203492
llm_goals_438,test,2,0.8731275796890259,0.1997924391817541
llm_goals_438,test,3,0.8671772480010986,0.7992019611592787
llm_goals_438,test,4,0.8731991052627563,0.8049000497082406
llm_goals_438,test,5,0.8669136762619019,0.7174028107354763
llm_goals_438,test,6,0.8726605176925659,0.5982684312746689
llm_goals_438,test,7,0.8728475570678711,0.3291190451199206
llm_goals_438,test,8,0.8730551600456238,0.7413057021464516
llm_goals_438,test,9,0.8672693967819214,0.5655428700196011
llm_goals_438,test,10,0.8731305599212646,0.3747542837277384
llm_goals_438,test,11,0.8670448064804077,0.8141028872063276
llm_goals_438,test,12,0.86631178855896,0.7668041079795853
llm_goals_438,test,13,0.8665211796760559,0.3989739137138193
llm_goals_438,test,14,0.8673950433731079,0.630963737596379
llm_goals_438,test,15,0.8731629252433777,0.8688319384714368
llm_goals_438,test,16,0.8736158609390259,0.6505714018494433
llm_goals_438,test,17,0.8673564791679382,0.8432750100181929
llm_goals_438,test,18,0.8731315732002258,0.5454092034362268
llm_goals_438,test,19,0.866594672203064,0.7865512327498919
llm_goals_438,test,20,0.8729034066200256,0.9279490690754252
llm_goals_438,test,21,0.8670198917388916,0.6012074860500735
llm_goals_438,test,22,0.873103141784668,0.8641822359830585
llm_goals_438,test,23,0.8666567206382751,0.6703505477568762
llm_goals_438,test,24,0.8671430945396423,0.659909363300309
llm_goals_438,test,25,0.8670504093170166,0.345825906210505
llm_goals_438,test,26,0.8664751648902893,0.6342860330821757
llm_goals_438,test,27,0.8673046827316284,0.7595913377648781
llm_goals_438,test,28,0.8729143142700195,0.8379201892332447
llm_goals_438,test,29,0.8666481375694275,0.7401884809786652
llm_goals_438,test,30,0.8665032982826233,0.3998648759112044
llm_goals_438,test,31,0.8666345477104187,0.7480624326316926
llm_goals_438,test,32,0.8665907382965088,0.7187081505530665
llm_goals_438,test,33,0.873161256313324,0.4107650384506188
llm_goals_438,test,34,0.8666658401489258,0.5939725433582219
llm_goals_438,test,35,0.8732230067253113,0.3302466757601784
llm_goals_438,test,36,0.8660848140716553,0.5187707473385741
llm_goals_438,test,37,0.8733291029930115,0.7520728212475446
llm_goals_438,test,38,0.8669460415840149,0.7995393320092329
llm_goals_438,test,39,0.8732538819313049,0.6185694255280791
llm_goals_438,test,40,0.8671366572380066,0.6220749660889419
llm_goals_438,test,41,0.8730482459068298,0.6546343601883343
llm_goals_438,test,42,0.8671610951423645,0.2289113591576547
llm_goals_438,test,43,0.8668202757835388,0.7896951428928559
llm_goals_438,test,44,0.8668801784515381,0.8209977715013562
llm_goals_438,test,45,0.8732249736785889,0.8177235133069483
llm_goals_438,test,46,0.8728706240653992,0.6895343723542676
llm_goals_438,test,47,0.8664179444313049,0.8889681695339486
llm_goals_438,test,48,0.8731792569160461,0.5208110922241418
llm_goals_438,test,49,0.8664475083351135,0.8657228979999158
llm_goals_182,test,0,0.5212439894676208,0.9879327417690024
llm_goals_182,test,1,0.5213844180107117,0.143590152166184
llm_goals_182,test,2,0.5216275453567505,0.975250065576832
llm_goals_182,test,3,0.521467924118042,0.0053962595772566
llm_goals_182,test,4,0.5214536786079407,0.9395814535027472
llm_goals_182,test,5,0.521395742893219,0.7582924861360835
llm_goals_182,test,6,0.5220302939414978,0.5490946196305377
llm_goals_182,test,7,0.5214067697525024,0.9749513260519144
llm_goals_182,test,8,0.5215876698493958,0.9392208432034008
llm_goals_182,test,9,0.5211220383644104,0.644100014754752
llm_goals_182,test,10,0.5218130350112915,0.3220700188782243
llm_goals_182,test,11,0.5209812521934509,0.981547533906946
llm_goals_182,test,12,0.5210322141647339,0.7137040988313296
llm_goals_182,test,13,0.521763801574707,0.7387330490542552
llm_goals_182,test,14,0.5214685201644897,0.0829307635671844
llm_goals_182,test,15,0.5210351347923279,0.9703041905123228
llm_goals_182,test,16,0.52144855260849,0.0818078392213706
llm_goals_182,test,17,0.521041989326477,0.1321702105874928
llm_goals_182,test,18,0.5214546322822571,0.0627371233524436
llm_goals_182,test,19,0.5213384628295898,0.9687731848071606
llm_goals_182,test,20,0.5215604901313782,0.9416436555176496
llm_goals_182,test,21,0.5211618542671204,0.6725942479831736
llm_goals_182,test,22,0.5209833979606628,0.0934677665250981
llm_goals_182,test,23,0.5212288498878479,0.9812074898398124
llm_goals_182,test,24,0.5213205814361572,0.0949295295653811
llm_goals_182,test,25,0.5210714340209961,0.44246206774058
llm_goals_182,test,26,0.5212090015411377,0.0774707640077587
llm_goals_182,test,27,0.5211473107337952,0.0713961149448467
llm_goals_182,test,28,0.5211647748947144,0.0522770850978139
llm_goals_182,test,29,0.5213621854782104,0.95314246463025
llm_goals_182,test,30,0.5215060114860535,0.2504357008006138
llm_goals_182,test,31,0.5214876532554626,0.7161425258649188
llm_goals_182,test,32,0.5215598344802856,0.9646060453168028
llm_goals_182,test,33,0.5218170285224915,0.0545762932705501
llm_goals_182,test,34,0.5211315751075745,0.3122018653274729
llm_goals_182,test,35,0.5214043855667114,0.9860451225539192
llm_goals_182,test,36,0.5212311148643494,0.281834229591876
llm_goals_182,test,37,0.5209442377090454,0.9542467602371212
llm_goals_182,test,38,0.5210053324699402,0.9552662371080364
llm_goals_182,test,39,0.5216251015663147,0.6622170237731956
llm_goals_182,test,40,0.5221669673919678,0.946633816726873
llm_goals_182,test,41,0.521480143070221,0.7134743981048756
llm_goals_182,test,42,0.5212798118591309,0.9907715917291932
llm_goals_182,test,43,0.5215588212013245,0.7427734633378366
llm_goals_182,test,44,0.521625816822052,0.932817223580216
llm_goals_182,test,45,0.5213456749916077,0.0223475038345677
llm_goals_182,test,46,0.5214451551437378,0.9353007113558736
llm_goals_182,test,47,0.5212540626525879,0.0297953783874237
llm_goals_182,test,48,0.5217797756195068,0.0852689569608422
llm_goals_182,test,49,0.5215796828269958,0.1518944682187261
llm_goals_281,test,0,0.5929321050643921,0.6469683041227705
llm_goals_281,test,1,0.628247857093811,0.9610940798838222
llm_goals_281,test,2,0.503231942653656,0.5031480224374153
llm_goals_281,test,3,0.8596112728118896,0.7670721923110818
llm_goals_281,test,4,0.5633025169372559,0.8573901670428902
llm_goals_281,test,5,0.9704369306564331,0.9842762939932492
llm_goals_281,test,6,0.9735203981399536,0.9899150137032138
llm_goals_281,test,7,0.5112797021865845,0.6044210331647685
llm_goals_281,test,8,0.9720726609230042,0.8588374254971683
llm_goals_281,test,9,0.9727731347084045,0.9818143245858572
llm_goals_281,test,10,0.9511508941650391,0.9566256410829356
llm_goals_281,test,11,0.9913430213928223,0.9833216073807935
llm_goals_281,test,12,0.9800090789794922,0.9046776287625902
llm_goals_281,test,13,0.9663691520690918,0.98515391356728
llm_goals_281,test,14,0.9730456471443176,0.9050783518109468
llm_goals_281,test,15,0.870998740196228,0.8143656887719262
llm_goals_281,test,16,0.9007793068885803,0.9030392149866112
llm_goals_281,test,17,0.9867572784423828,0.9782413247928606
llm_goals_281,test,18,0.9715179800987244,0.9394087233364008
llm_goals_281,test,19,0.5343848466873169,0.013753239211091
llm_goals_281,test,20,0.979004979133606,0.9155304405450826
llm_goals_281,test,21,0.9878657460212708,0.9849370716395414
llm_goals_281,test,22,0.6091542840003967,0.9678947939625672
llm_goals_281,test,23,0.9898367524147034,0.9923826456520252
llm_goals_281,test,24,0.9880202412605286,0.8888994485286436
llm_goals_281,test,25,0.5590075254440308,0.8670526725158154
llm_goals_281,test,26,0.9475741982460022,0.901592433398574
llm_goals_281,test,27,0.5292646884918213,0.9485982074237508
llm_goals_281,test,28,0.9865841269493103,0.7677171303083985
llm_goals_281,test,29,0.9862692952156067,0.0052734878464236
llm_goals_281,test,30,0.953300416469574,0.9557743081128488
llm_goals_281,test,31,0.9892643094062805,0.991380810774403
llm_goals_281,test,32,0.9455111622810364,0.9805269843458853
llm_goals_281,test,33,0.9614551663398743,0.972194253937266
llm_goals_281,test,34,0.9438588619232178,0.8780187362883507
llm_goals_281,test,35,0.5089579224586487,0.6544240022055418
llm_goals_281,test,36,0.9906420111656189,0.8261399254388034
llm_goals_281,test,37,0.5355271697044373,0.0237476361601453
llm_goals_281,test,38,0.9828276634216309,0.2014428856002633
llm_goals_281,test,39,0.9453119039535522,0.9075832337909528
llm_goals_281,test,40,0.9864277243614197,0.9579415929114572
llm_goals_281,test,41,0.9860795140266418,0.9618767488068802
llm_goals_281,test,42,0.5137863159179688,0.6901242783749021
llm_goals_281,test,43,0.9914161562919617,0.991684436764246
llm_goals_281,test,44,0.8575574159622192,0.8707976927725176
llm_goals_281,test,45,0.9876030087471008,0.855741209208138
llm_goals_281,test,46,0.9907392263412476,0.9874326582385272
llm_goals_281,test,47,0.5986936688423157,0.8582029976458081
llm_goals_281,test,48,0.9517695903778076,0.9463140960540516
llm_goals_281,test,49,0.5954994559288025,0.9857229593669278
llm_goals_115,test,0,0.8136507868766785,0.6878400979912542
llm_goals_115,test,1,0.8021003603935242,0.9839099052127078
llm_goals_115,test,2,0.8035239577293396,0.6264771219588837
llm_goals_115,test,3,0.7857789397239685,0.5101504336920828
llm_goals_115,test,4,0.8152324557304382,0.4749856076325871
llm_goals_115,test,5,0.8058381676673889,0.8824023049105343
llm_goals_115,test,6,0.8131368160247803,0.8532372770314965
llm_goals_115,test,7,0.7876312732696533,0.6852873624151903
llm_goals_115,test,8,0.7902747392654419,0.7160879332925761
llm_goals_115,test,9,0.8041742444038391,0.8154454841155164
llm_goals_115,test,10,0.8123607039451599,0.4039477654904175
llm_goals_115,test,11,0.8702176809310913,0.0754019011174093
llm_goals_115,test,12,0.7819044589996338,0.9216642359611203
llm_goals_115,test,13,0.789786159992218,0.9023613914305204
llm_goals_115,test,14,0.8049720525741577,0.7385692464553043
llm_goals_115,test,15,0.7968432307243347,0.5273332595390108
llm_goals_115,test,16,0.8088529109954834,0.5319983452316508
llm_goals_115,test,17,0.7973599433898926,0.9818465718466136
llm_goals_115,test,18,0.8051589131355286,0.7098198015331896
llm_goals_115,test,19,0.7951388955116272,0.8100224145820547
llm_goals_115,test,20,0.8011724352836609,0.5463347366319413
llm_goals_115,test,21,0.7871339917182922,0.9708728191294718
llm_goals_115,test,22,0.8030697703361511,0.9564870014981148
llm_goals_115,test,23,0.7923142910003662,0.3427940177342707
llm_goals_115,test,24,0.7956953644752502,0.7216819551218165
llm_goals_115,test,25,0.7983527183532715,0.7118023516747564
llm_goals_115,test,26,0.8122862577438354,0.7495779511143832
llm_goals_115,test,27,0.8057533502578735,0.9876421097288872
llm_goals_115,test,28,0.7940798401832581,0.2396916848864135
llm_goals_115,test,29,0.7916164994239807,0.8734944493526076
llm_goals_115,test,30,0.798896849155426,0.4720948765146447
llm_goals_115,test,31,0.7977340221405029,0.9613435269013808
llm_goals_115,test,32,0.7958024144172668,0.2802482482345895
llm_goals_115,test,33,0.8132099509239197,0.800091024303248
llm_goals_115,test,34,0.7952708601951599,0.4731748202329244
llm_goals_115,test,35,0.7990100383758545,0.6451464331436837
llm_goals_115,test,36,0.8030921220779419,0.5222874947485241
llm_goals_115,test,37,0.8013569712638855,0.9548006059708632
llm_goals_115,test,38,0.8044572472572327,0.8483833674723993
llm_goals_115,test,39,0.7980036735534668,0.9222807287488012
llm_goals_115,test,40,0.7910251617431641,0.2526171639652511
llm_goals_115,test,41,0.7988887429237366,0.8309168152992626
llm_goals_115,test,42,0.7867591977119446,0.58549510672814
llm_goals_115,test,43,0.7925595045089722,0.9288889171841944
llm_goals_115,test,44,0.8059404492378235,0.8696076826330161
llm_goals_115,test,45,0.7825178503990173,0.3370215724740773
llm_goals_115,test,46,0.7980589866638184,0.0614426001598869
llm_goals_115,test,47,0.7930471301078796,0.5311914101507266
llm_goals_115,test,48,0.8083345890045166,0.6260304408434099
llm_goals_115,test,49,0.7918119430541992,0.9732730991067396
llm_goals_230,test,0,0.91219162940979,0.93636017960289
llm_goals_230,test,1,0.9903296232223511,0.9080467805464864
llm_goals_230,test,2,0.925641655921936,0.9492415176604246
llm_goals_230,test,3,0.9766449928283691,0.9883888360800804
llm_goals_230,test,4,0.9196762442588806,0.9308136137248874
llm_goals_230,test,5,0.8322795629501343,0.7461662385047432
llm_goals_230,test,6,0.8296359181404114,0.717943379234597
llm_goals_230,test,7,0.9170296788215637,0.9498506183856972
llm_goals_230,test,8,0.9299734830856323,0.903114856201669
llm_goals_230,test,9,0.8279612064361572,0.82763987275553
llm_goals_230,test,10,0.8727102875709534,0.9176431321128996
llm_goals_230,test,11,0.9883990287780762,0.9351009100066487
llm_goals_230,test,12,0.96576327085495,0.9397739779762262
llm_goals_230,test,13,0.8245070576667786,0.7663744634625839
llm_goals_230,test,14,0.9497990608215332,0.9587652680933508
llm_goals_230,test,15,0.9251430630683899,0.9205685886203512
llm_goals_230,test,16,0.9434024095535278,0.9548512114624126
llm_goals_230,test,17,0.9915536046028137,0.9177103078422468
llm_goals_230,test,18,0.9546169638633728,0.9694847470185912
llm_goals_230,test,19,0.9862104058265686,0.241616336852366
llm_goals_230,test,20,0.9205750226974487,0.9513741131303785
llm_goals_230,test,21,0.8213210105895996,0.7885476346382756
llm_goals_230,test,22,0.991621732711792,0.956588746723208
llm_goals_230,test,23,0.9893342852592468,0.8796235970540837
llm_goals_230,test,24,0.9365435242652893,0.9499204774153412
llm_goals_230,test,25,0.875663697719574,0.7955946265549944
llm_goals_230,test,26,0.9354990720748901,0.96990013164722
llm_goals_230,test,27,0.9910853505134583,0.94318586558855
llm_goals_230,test,28,0.980941891670227,0.9816553586758804
llm_goals_230,test,29,0.9873327612876892,0.2755945052454056
llm_goals_230,test,30,0.8694889545440674,0.8694230354404326
llm_goals_230,test,31,0.8296932578086853,0.7707924321548216
llm_goals_230,test,32,0.9890751242637634,0.90274804670901
llm_goals_230,test,33,0.9528704285621643,0.964968324648223
llm_goals_230,test,34,0.8808702230453491,0.8786148395823622
llm_goals_230,test,35,0.9259980320930481,0.9659460769293176
llm_goals_230,test,36,0.8756468892097473,0.9265550381327228
llm_goals_230,test,37,0.9838347434997559,0.2797244301796934
llm_goals_230,test,38,0.9874847531318665,0.3705167516113265
llm_goals_230,test,39,0.9791825413703918,0.9571521884098596
llm_goals_230,test,40,0.9877525568008423,0.9050631524069218
llm_goals_230,test,41,0.8295361399650574,0.7826731844385996
llm_goals_230,test,42,0.9178322553634644,0.9429872638799112
llm_goals_230,test,43,0.8269023299217224,0.7863413109728788
llm_goals_230,test,44,0.9303556084632874,0.925476613788026
llm_goals_230,test,45,0.9818456172943115,0.9835724833108438
llm_goals_230,test,46,0.9878106117248535,0.9324350988748504
llm_goals_230,test,47,0.9764195680618286,0.9918220640686296
llm_goals_230,test,48,0.9390921592712402,0.9632008068532188
llm_goals_230,test,49,0.9915325045585632,0.9196290944353572
llm_goals_86,test,0,0.9150123596191406,0.8172564483303766
llm_goals_86,test,1,0.9113932847976685,0.2845365633956491
llm_goals_86,test,2,0.9146825671195984,0.9608234754131476
llm_goals_86,test,3,0.9134328365325928,0.4905999470752266
llm_goals_86,test,4,0.9135329723358154,0.5223214595126631
llm_goals_86,test,5,0.9128416180610657,0.9513769165530092
llm_goals_86,test,6,0.9145473837852478,0.951058446008376
llm_goals_86,test,7,0.9140870571136475,0.7200026044712197
llm_goals_86,test,8,0.9136707782745361,0.8539416814851118
llm_goals_86,test,9,0.9124360680580139,0.8409760614216922
llm_goals_86,test,10,0.9132612943649292,0.8985089093274398
llm_goals_86,test,11,0.9127174615859985,0.960478126988056
llm_goals_86,test,12,0.9139571785926819,0.8489881816956245
llm_goals_86,test,13,0.9139754176139832,0.4634670890641906
llm_goals_86,test,14,0.9137089252471924,0.4018075540357314
llm_goals_86,test,15,0.9131455421447754,0.8179491136946933
llm_goals_86,test,16,0.9139589667320251,0.5277433486870419
llm_goals_86,test,17,0.913179337978363,0.7712345746838988
llm_goals_86,test,18,0.9126178622245789,0.89457523897237
llm_goals_86,test,19,0.9132014513015747,0.6892489135704485
llm_goals_86,test,20,0.913277268409729,0.6353825334363962
llm_goals_86,test,21,0.9116100072860718,0.6530961974169966
llm_goals_86,test,22,0.914979100227356,0.8880043621770078
llm_goals_86,test,23,0.9144610166549683,0.8809145094341392
llm_goals_86,test,24,0.9138888120651245,0.8164918407571773
llm_goals_86,test,25,0.9123143553733826,0.5604555739923536
llm_goals_86,test,26,0.9134559631347656,0.649475864669537
llm_goals_86,test,27,0.9116471409797668,0.951709689395591
llm_goals_86,test,28,0.914199948310852,0.8387165613894679
llm_goals_86,test,29,0.9127815961837769,0.674672778677278
llm_goals_86,test,30,0.9135973453521729,0.4648188591057368
llm_goals_86,test,31,0.912720799446106,0.663420132120388
llm_goals_86,test,32,0.911888599395752,0.6545684404871791
llm_goals_86,test,33,0.9211844801902771,0.8998044750116885
llm_goals_86,test,34,0.9143116474151611,0.4550697205389422
llm_goals_86,test,35,0.9126651287078857,0.7006743460528161
llm_goals_86,test,36,0.9135012626647949,0.8041987455834365
llm_goals_86,test,37,0.9145660996437073,0.5563101271844357
llm_goals_86,test,38,0.9140744805335999,0.8432080747815303
llm_goals_86,test,39,0.9127770066261292,0.8362265811977031
llm_goals_86,test,40,0.9146044850349426,0.9223666199627952
llm_goals_86,test,41,0.914162278175354,0.8662457492250272
llm_goals_86,test,42,0.9150428771972656,0.8608515909392838
llm_goals_86,test,43,0.9124600291252136,0.7603164939120921
llm_goals_86,test,44,0.9134668111801147,0.7861883669269881
llm_goals_86,test,45,0.9146044850349426,0.8184625523204602
llm_goals_86,test,46,0.9131292700767517,0.94641274246688
llm_goals_86,test,47,0.917384922504425,0.5854129271694068
llm_goals_86,test,48,0.9129018783569336,0.920227709127447
llm_goals_86,test,49,0.9145534634590149,0.938532531863768
