template_id,split,question_idx,prediction,label
llm_goals_401,test,0,0.8214562529052961,0.9578712581802692
llm_goals_401,test,1,0.9183133038628218,0.9541386350517288
llm_goals_401,test,2,0.8731095180580019,0.957633375223842
llm_goals_401,test,3,0.9031755534275779,0.939051149330178
llm_goals_401,test,4,0.9216204671367109,0.977310418686652
llm_goals_401,test,5,0.9710886875156742,0.9809432028055692
llm_goals_401,test,6,0.9616086488761382,0.9755462400413056
llm_goals_401,test,7,0.9566576880828318,0.9511837371803112
llm_goals_401,test,8,0.9712628134499308,0.9766189453717342
llm_goals_401,test,9,0.942616741480261,0.980273604068012
llm_goals_401,test,10,0.8668729039423293,0.9337562348751304
llm_goals_401,test,11,0.8747116360635667,0.9722627409957164
llm_goals_401,test,12,0.9516276128004293,0.9492781746196448
llm_goals_401,test,13,0.8266644852329375,0.9655717616666432
llm_goals_401,test,14,0.5830465226764079,0.9574255360215488
llm_goals_401,test,15,0.9671019126339315,0.981384516529317
llm_goals_401,test,16,0.8906841496008981,0.9222335225671268
llm_goals_401,test,17,0.9166832382079013,0.9532549975325786
llm_goals_401,test,18,0.5922537900817711,0.9602041260526504
llm_goals_401,test,19,0.8912233331780152,0.6478757906857974
llm_goals_401,test,20,0.9712628134499308,0.9756843455136628
llm_goals_401,test,21,0.9615910849244848,0.990806877474314
llm_goals_401,test,22,0.9164054002531593,0.9781303405092489
llm_goals_401,test,23,0.887990416578074,0.9417706148309914
llm_goals_401,test,24,0.7877199896717927,0.956336258298358
llm_goals_401,test,25,0.9061807993496612,0.9313769351906296
llm_goals_401,test,26,0.7785127222664293,0.979961037598938
llm_goals_401,test,27,0.9215812282015975,0.9393076331919324
llm_goals_401,test,28,0.9133316865481492,0.8926497724419561
llm_goals_401,test,29,0.9312233906526229,0.9039012216839892
llm_goals_401,test,30,0.914331693654821,0.95604890766277
llm_goals_401,test,31,0.9615910849244848,0.9892266395215328
llm_goals_401,test,32,0.942125499097808,0.9539506448444212
llm_goals_401,test,33,0.8413232928985449,0.96642041239244
llm_goals_401,test,34,0.9141979421053983,0.9213858238598618
llm_goals_401,test,35,0.7187272536970307,0.9778974644047042
llm_goals_401,test,36,0.7812867531891864,0.9268431675812392
llm_goals_401,test,37,0.8662648388215594,0.8127340038962743
llm_goals_401,test,38,0.9303868014860533,0.9302681059523448
llm_goals_401,test,39,0.9426015617484055,0.965603948473316
llm_goals_401,test,40,0.9051675571710421,0.9546924251425224
llm_goals_401,test,41,0.9629821385081657,0.9801422355796748
llm_goals_401,test,42,0.9286247707916342,0.942841528380264
llm_goals_401,test,43,0.9256016829180336,0.9624654057275208
llm_goals_401,test,44,0.9737788236838357,0.966766237132969
llm_goals_401,test,45,0.9186061026634317,0.9166605495286292
llm_goals_401,test,46,0.6791297603701844,0.9735371475107132
llm_goals_401,test,47,0.9031019014895968,0.9170851722665576
llm_goals_401,test,48,0.8176974691738469,0.9537843477184356
llm_goals_401,test,49,0.8895878995419325,0.9543297303993916
llm_goals_438,test,0,0.8573114041123828,0.7747630551513792
llm_goals_438,test,1,0.9498173973639337,0.935855152767207
llm_goals_438,test,2,0.9658494389516136,0.7147502234092127
llm_goals_438,test,3,0.9678214385759535,0.8761475031821572
llm_goals_438,test,4,0.9132841226821159,0.8370855426053896
llm_goals_438,test,5,0.9513514434341529,0.9507074946857058
llm_goals_438,test,6,0.8971093873428143,0.9429353481406656
llm_goals_438,test,7,0.9615105785886997,0.7990903812700391
llm_goals_438,test,8,0.9193712266278782,0.8538515570307835
llm_goals_438,test,9,0.950339576185887,0.9386380912818448
llm_goals_438,test,10,0.966236709725465,0.6740432265841833
llm_goals_438,test,11,0.9600534833886597,0.923251629590114
llm_goals_438,test,12,0.9594717818470259,0.9507690082509844
llm_goals_438,test,13,0.957554619750411,0.9289195559017176
llm_goals_438,test,14,0.9438473375838642,0.9291442660965707
llm_goals_438,test,15,0.8873731687113366,0.7999125079987789
llm_goals_438,test,16,0.940380453935723,0.88095736898606
llm_goals_438,test,17,0.9600144624110668,0.9601117963812904
llm_goals_438,test,18,0.9505217266785223,0.9583113794533262
llm_goals_438,test,19,0.9604017544672647,0.95757842004367
llm_goals_438,test,20,0.9547446569275779,0.8569924879179522
llm_goals_438,test,21,0.9619238978457757,0.925142229626847
llm_goals_438,test,22,0.9687252040147171,0.969126404096814
llm_goals_438,test,23,0.9683167340967649,0.9270654345691632
llm_goals_438,test,24,0.9539733535245057,0.9447244425458496
llm_goals_438,test,25,0.9195592785965444,0.8921203628029744
llm_goals_438,test,26,0.9458278205343894,0.8840255122167773
llm_goals_438,test,27,0.9600144624110668,0.9518933904854712
llm_goals_438,test,28,0.9482242671449583,0.9239946006518056
llm_goals_438,test,29,0.9639467003242704,0.967243014785835
llm_goals_438,test,30,0.9010446862341684,0.6513131257391838
llm_goals_438,test,31,0.9596493056620488,0.9452766773393708
llm_goals_438,test,32,0.9668027360345514,0.8986559507515572
llm_goals_438,test,33,0.9076430822576155,0.9178363106363248
llm_goals_438,test,34,0.9587392215366545,0.58431126605279
llm_goals_438,test,35,0.9143568060448518,0.8549510299218779
llm_goals_438,test,36,0.9511895368323436,0.7795077576161488
llm_goals_438,test,37,0.967160324356418,0.942087231678633
llm_goals_438,test,38,0.9424871806186926,0.9569196102046018
llm_goals_438,test,39,0.9574014142228785,0.6285572658291573
llm_goals_438,test,40,0.9708081680844388,0.9166060095399312
llm_goals_438,test,41,0.955841322097236,0.9583812359561668
llm_goals_438,test,42,0.9498546913666512,0.7214403769582053
llm_goals_438,test,43,0.9677076735164706,0.953284323393524
llm_goals_438,test,44,0.9552493096751246,0.8806080817389553
llm_goals_438,test,45,0.9663930333442997,0.92897343737164
llm_goals_438,test,46,0.9555734195327542,0.9584044214292112
llm_goals_438,test,47,0.9498173973639337,0.8794537151734262
llm_goals_438,test,48,0.9461325200979909,0.9488244649817108
llm_goals_438,test,49,0.9710415398403764,0.96985775201818
llm_goals_293,test,0,0.7744215271425355,0.8120835453586716
llm_goals_293,test,1,0.8176076403732098,0.6480853571933485
llm_goals_293,test,2,0.9200846335577779,0.6079345474456403
llm_goals_293,test,3,0.8829360817909899,0.6129396608114358
llm_goals_293,test,4,0.9874996738509068,0.98210900952278
llm_goals_293,test,5,0.9576698134115569,0.8658899081858322
llm_goals_293,test,6,0.8959161446497094,0.8882423636795116
llm_goals_293,test,7,0.9565311663615835,0.8509290855710241
llm_goals_293,test,8,0.8577252443564257,0.9446962492710368
llm_goals_293,test,9,0.8914862264053388,0.7513049230182386
llm_goals_293,test,10,0.9418662403857762,0.9177243110970909
llm_goals_293,test,11,0.7700312322075241,0.745101023404262
llm_goals_293,test,12,0.9265824357412583,0.938155530986403
llm_goals_293,test,13,0.8679240372367953,0.93102258486241
llm_goals_293,test,14,0.9150975380097078,0.9835613372030028
llm_goals_293,test,15,0.9920480051504833,0.9863449336337948
llm_goals_293,test,16,0.9228887962069386,0.9708489179126024
llm_goals_293,test,17,0.8437893213855792,0.6973515732177806
llm_goals_293,test,18,0.8414066603731819,0.9506214255847638
llm_goals_293,test,19,0.9144945739019571,0.8167264479797811
llm_goals_293,test,20,0.9905116161706223,0.9863548279811404
llm_goals_293,test,21,0.9183380807386147,0.7980274506980439
llm_goals_293,test,22,0.9124726846087411,0.6721615182655347
llm_goals_293,test,23,0.7800967253351155,0.3710465401115394
llm_goals_293,test,24,0.860907496309229,0.9595301158883638
llm_goals_293,test,25,0.8097101403807652,0.958163618329278
llm_goals_293,test,26,0.8352247506021677,0.966849388190172
llm_goals_293,test,27,0.9551631215633737,0.8260024784538619
llm_goals_293,test,28,0.855019465832088,0.8388491865678236
llm_goals_293,test,29,0.8744164128533881,0.8884818588260758
llm_goals_293,test,30,0.9246136337374264,0.918233934226988
llm_goals_293,test,31,0.8371796418906078,0.7707225792789547
llm_goals_293,test,32,0.9629731580344657,0.8156692405787661
llm_goals_293,test,33,0.862338604565581,0.9365220048324068
llm_goals_293,test,34,0.960345089224707,0.9379748130481748
llm_goals_293,test,35,0.9265431550590595,0.8901683114693926
llm_goals_293,test,36,0.6949545917561265,0.8493608200921786
llm_goals_293,test,37,0.8231221010168613,0.861633780857663
llm_goals_293,test,38,0.8964858278761921,0.9021887179891658
llm_goals_293,test,39,0.860217869014575,0.9140409207490428
llm_goals_293,test,40,0.7623449506809757,0.7293724502897094
llm_goals_293,test,41,0.9640895323010699,0.9162639417701852
llm_goals_293,test,42,0.9558912707087007,0.8334788131423683
llm_goals_293,test,43,0.9397305244764208,0.7597774188725597
llm_goals_293,test,44,0.9869245288656132,0.9659911986912778
llm_goals_293,test,45,0.8479401231287526,0.931854425798201
llm_goals_293,test,46,0.7986035938109651,0.5591331662726297
llm_goals_293,test,47,0.9538321891301011,0.8162434715058863
llm_goals_293,test,48,0.8893271668422725,0.9539650091821064
llm_goals_293,test,49,0.8850920611648839,0.7483454415724036
llm_goals_397,test,0,0.8213622768133361,0.6336495795213035
llm_goals_397,test,1,0.18426160597567062,0.5588590528022535
llm_goals_397,test,2,0.4132067177214726,0.8967757860893129
llm_goals_397,test,3,0.5212463328629702,0.5944806240686361
llm_goals_397,test,4,0.2095098001844943,0.2252168546192252
llm_goals_397,test,5,0.3345462923363119,0.4122469495302511
llm_goals_397,test,6,0.20512849959153176,0.6366335121435647
llm_goals_397,test,7,0.6226395989604033,0.8096849879343126
llm_goals_397,test,8,0.18008116069863145,0.2219286497366863
llm_goals_397,test,9,0.21299735361810215,0.3790244200754085
llm_goals_397,test,10,0.24422179312191875,0.5739754985147579
llm_goals_397,test,11,0.20545796842003794,0.5146662099973958
llm_goals_397,test,12,0.29547356852425305,0.7724721370318186
llm_goals_397,test,13,0.19768326012109175,0.5128249643851043
llm_goals_397,test,14,0.3224377824220991,0.7029672903171433
llm_goals_397,test,15,0.21641549589129228,0.1755089749954058
llm_goals_397,test,16,0.17312067139513274,0.5780643909290929
llm_goals_397,test,17,0.4540562806767052,0.5651405953323156
llm_goals_397,test,18,0.6948946278944433,0.6692897169839361
llm_goals_397,test,19,0.2169581402506554,0.3010834864311782
llm_goals_397,test,20,0.24094888607236645,0.1723383376025849
llm_goals_397,test,21,0.5587726664579424,0.5552848455448852
llm_goals_397,test,22,0.21190772120102555,0.6247249816867365
llm_goals_397,test,23,0.2777856805653069,0.6892307964539947
llm_goals_397,test,24,0.629306289287581,0.707193144679384
llm_goals_397,test,25,0.26606916926154883,0.4728503384141321
llm_goals_397,test,26,0.27499159339669205,0.6125340479102231
llm_goals_397,test,27,0.3134295424431823,0.638109206443543
llm_goals_397,test,28,0.35376371407720764,0.6131846340287811
llm_goals_397,test,29,0.21040390002290554,0.3070471542438216
llm_goals_397,test,30,0.3219451367942934,0.5305219388850028
llm_goals_397,test,31,0.5587726664579424,0.5467284805801325
llm_goals_397,test,32,0.196094423144227,0.6052179554793606
llm_goals_397,test,33,0.20266970551977656,0.6898052709387655
llm_goals_397,test,34,0.4088568179056011,0.4488721547034506
llm_goals_397,test,35,0.831891677260819,0.8279244269125856
llm_goals_397,test,36,0.18733182186226785,0.4830040624014946
llm_goals_397,test,37,0.3646559701243713,0.25459376065388
llm_goals_397,test,38,0.3655107398903059,0.2830348916920439
llm_goals_397,test,39,0.49372831888276486,0.6957841399255769
llm_goals_397,test,40,0.24786703221989026,0.4595589778278475
llm_goals_397,test,41,0.45005270441335243,0.5333267612417034
llm_goals_397,test,42,0.6218595849645714,0.8577821893356561
llm_goals_397,test,43,0.42868694271466684,0.5300958828918404
llm_goals_397,test,44,0.21385974252042972,0.2841438783476365
llm_goals_397,test,45,0.1818478548111997,0.5478091730108666
llm_goals_397,test,46,0.19404554209720837,0.623584984691235
llm_goals_397,test,47,0.49231787455574044,0.5396113255522375
llm_goals_397,test,48,0.591432487926857,0.6615871853452404
llm_goals_397,test,49,0.37898768439909786,0.4776676395182983
llm_goals_281,test,0,0.8392421262480901,0.7595571331426817
llm_goals_281,test,1,0.9001152531953579,0.9579733612090848
llm_goals_281,test,2,0.6821143830669723,0.7309832638356816
llm_goals_281,test,3,0.8971640085072671,0.909583748095366
llm_goals_281,test,4,0.743183263751309,0.8390255026542476
llm_goals_281,test,5,0.9787055653481774,0.9811318521618418
llm_goals_281,test,6,0.9681720145411764,0.9764572976382064
llm_goals_281,test,7,0.6299486793149108,0.7324330753461098
llm_goals_281,test,8,0.8936867121688699,0.7994783462392235
llm_goals_281,test,9,0.9775423609875332,0.9815838934803736
llm_goals_281,test,10,0.9568201303490599,0.9535347237148352
llm_goals_281,test,11,0.9833410583603627,0.9162909963751452
llm_goals_281,test,12,0.8589886453738469,0.3257945040794002
llm_goals_281,test,13,0.9852106176864038,0.9844478965422564
llm_goals_281,test,14,0.9664341606738259,0.927134453638218
llm_goals_281,test,15,0.9128287814770869,0.8836817175051781
llm_goals_281,test,16,0.9183405638492635,0.9699068666000064
llm_goals_281,test,17,0.9108720820642456,0.9397605064907452
llm_goals_281,test,18,0.9651591772514498,0.9597848062242011
llm_goals_281,test,19,0.7733090713324896,0.0774181964240286
llm_goals_281,test,20,0.9607553683513844,0.7915892634199072
llm_goals_281,test,21,0.9657528193686682,0.971953988882578
llm_goals_281,test,22,0.852391539483205,0.9602834573260726
llm_goals_281,test,23,0.9772749745897963,0.9122855662185028
llm_goals_281,test,24,0.9686210151680328,0.8979990468804274
llm_goals_281,test,25,0.7845381983628327,0.9241587506219392
llm_goals_281,test,26,0.9664270282626926,0.9170165050266824
llm_goals_281,test,27,0.8318265694763709,0.9611372957336312
llm_goals_281,test,28,0.9843826495356893,0.87756150859893
llm_goals_281,test,29,0.7923356388932268,0.0434288112667957
llm_goals_281,test,30,0.9650731673753677,0.9342891559515047
llm_goals_281,test,31,0.9805685081045503,0.9743182057709278
llm_goals_281,test,32,0.9749271278179759,0.9397307039659378
llm_goals_281,test,33,0.9767070022073386,0.9512294521408392
llm_goals_281,test,34,0.9556480639497975,0.9536682955508284
llm_goals_281,test,35,0.7267520594700881,0.7535714456504239
llm_goals_281,test,36,0.9753932350256065,0.923856973167726
llm_goals_281,test,37,0.827479840847607,0.0418935402602879
llm_goals_281,test,38,0.9604255154395306,0.7059133681203931
llm_goals_281,test,39,0.41947342703509666,0.3500755803223985
llm_goals_281,test,40,0.9800937576644276,0.9247009790185582
llm_goals_281,test,41,0.9812620873841752,0.9782058597610022
llm_goals_281,test,42,0.6861054151304081,0.7983229743904063
llm_goals_281,test,43,0.9774000049298662,0.975883662329788
llm_goals_281,test,44,0.7955350462243369,0.8750632399664175
llm_goals_281,test,45,0.9497693594692259,0.8910572538163689
llm_goals_281,test,46,0.9705727103354279,0.8908228852175125
llm_goals_281,test,47,0.8778619390095984,0.9549922359097318
llm_goals_281,test,48,0.9600238801333312,0.953490375608375
llm_goals_281,test,49,0.8840155382234013,0.9719706773426644
llm_goals_415,test,0,0.7812577068460534,0.8577433203043316
llm_goals_415,test,1,0.9195365124736199,0.9021974832977488
llm_goals_415,test,2,0.8340919368161934,0.8694165318475334
llm_goals_415,test,3,0.6904416828917288,0.82278495857052
llm_goals_415,test,4,0.8702763337436088,0.9068547493871278
llm_goals_415,test,5,0.8560318117506068,0.4606612308159887
llm_goals_415,test,6,0.5562051905897859,0.5138990722916633
llm_goals_415,test,7,0.8183795408816845,0.7904330429356278
llm_goals_415,test,8,0.7788870753841285,0.8709577911266214
llm_goals_415,test,9,0.705602943531812,0.4953228711784456
llm_goals_415,test,10,0.8525478049186511,0.8458883220009739
llm_goals_415,test,11,0.9214862188424652,0.7530689318495781
llm_goals_415,test,12,0.5622220346055048,0.5374074070351774
llm_goals_415,test,13,0.8649904493893422,0.4543814478672013
llm_goals_415,test,14,0.8479359960245821,0.895305392435367
llm_goals_415,test,15,0.907496608975749,0.8949400812124582
llm_goals_415,test,16,0.8216898188650706,0.8238564098543186
llm_goals_415,test,17,0.8489414683371476,0.7003946285795368
llm_goals_415,test,18,0.8894896905683524,0.8541047516491939
llm_goals_415,test,19,0.7814249752476824,0.4356960950136901
llm_goals_415,test,20,0.8991910183680322,0.8781125499856784
llm_goals_415,test,21,0.529099837953014,0.4662951793594435
llm_goals_415,test,22,0.9588007743797613,0.9133476600123868
llm_goals_415,test,23,0.841998060345165,0.729661283414631
llm_goals_415,test,24,0.9306613416412293,0.8203684726547874
llm_goals_415,test,25,0.8333516328748115,0.8415742531339464
llm_goals_415,test,26,0.9128011859728499,0.9206447035427088
llm_goals_415,test,27,0.8114018655783438,0.8224443724423657
llm_goals_415,test,28,0.7792744190638894,0.7688508117653093
llm_goals_415,test,29,0.8000753930869959,0.4040278781550455
llm_goals_415,test,30,0.9015129412405948,0.8326128082774296
llm_goals_415,test,31,0.5770855879543864,0.5181213462550505
llm_goals_415,test,32,0.810461480365662,0.7754768797937415
llm_goals_415,test,33,0.7799125763284808,0.8137893713459559
llm_goals_415,test,34,0.8712482654536758,0.8569270432280164
llm_goals_415,test,35,0.8015479015608017,0.7699967690808278
llm_goals_415,test,36,0.9361931301709365,0.921674433414038
llm_goals_415,test,37,0.7983407545817598,0.4660833031516516
llm_goals_415,test,38,0.9095088832082651,0.65152560988785
llm_goals_415,test,39,0.6835504775631364,0.5898309243469293
llm_goals_415,test,40,0.9407808179874202,0.8091818666854369
llm_goals_415,test,41,0.8530874051851866,0.6069718569678856
llm_goals_415,test,42,0.9253835675476833,0.801905695679187
llm_goals_415,test,43,0.40407734168657883,0.7026985943405766
llm_goals_415,test,44,0.8267714172713468,0.8893336073229712
llm_goals_415,test,45,0.9381879965479474,0.7374717538757577
llm_goals_415,test,46,0.7665206618426463,0.7627238714563013
llm_goals_415,test,47,0.9017292265023306,0.8843601962700689
llm_goals_415,test,48,0.933475235461072,0.8760370596393947
llm_goals_415,test,49,0.8676365828573381,0.934119643218544
llm_goals_228,test,0,0.5104535554778796,0.5536142490727757
llm_goals_228,test,1,0.4363172935378017,0.5970781946787781
llm_goals_228,test,2,0.4048350931530316,0.6661069439112477
llm_goals_228,test,3,0.510242926499045,0.1621815926046939
llm_goals_228,test,4,0.3370388510693921,0.576352949396635
llm_goals_228,test,5,0.21198124235987106,0.3840882428737504
llm_goals_228,test,6,0.29847172196103283,0.3586276503372383
llm_goals_228,test,7,0.39518562100707316,0.6176623614140859
llm_goals_228,test,8,0.5712684746236006,0.6954425570040618
llm_goals_228,test,9,0.4403701842379019,0.259004657946923
llm_goals_228,test,10,0.547003761277803,0.7034691604166908
llm_goals_228,test,11,0.35537719847633387,0.3667773177860855
llm_goals_228,test,12,0.4745542332783535,0.5441412387152404
llm_goals_228,test,13,0.4732536196391404,0.1798966425933617
llm_goals_228,test,14,0.655874941143005,0.6634600123551451
llm_goals_228,test,15,0.5281238883655054,0.5674040322938352
llm_goals_228,test,16,0.6862328002493441,0.7765365728525085
llm_goals_228,test,17,0.44293080167750015,0.4222217460157005
llm_goals_228,test,18,0.8047832454840002,0.7662538791291613
llm_goals_228,test,19,0.5681585622441985,0.551409534979895
llm_goals_228,test,20,0.48007273976210635,0.6118689802554753
llm_goals_228,test,21,0.37346961768247616,0.4397631653224428
llm_goals_228,test,22,0.4587209910128485,0.4973987580331913
llm_goals_228,test,23,0.34979001892839784,0.4235300043058759
llm_goals_228,test,24,0.45750376628975803,0.7459660834986607
llm_goals_228,test,25,0.34500869651498417,0.7296489537631492
llm_goals_228,test,26,0.3113947734719911,0.7553963575905828
llm_goals_228,test,27,0.6839247262025063,0.4245395914745872
llm_goals_228,test,28,0.4391924202444393,0.2113718455283501
llm_goals_228,test,29,0.47639991828565886,0.5839822372879856
llm_goals_228,test,30,0.28933118268789815,0.7882484823843936
llm_goals_228,test,31,0.5676727771952558,0.3978078935012989
llm_goals_228,test,32,0.4584554503861697,0.2741357451446661
llm_goals_228,test,33,0.7469808880327463,0.7841096477395639
llm_goals_228,test,34,0.3649838134158916,0.7149417896157899
llm_goals_228,test,35,0.4461127728598006,0.4510400759046654
llm_goals_228,test,36,0.36649369691028494,0.5606685093118066
llm_goals_228,test,37,0.4894531458666734,0.4949700429354497
llm_goals_228,test,38,0.35996019241471,0.5294251416140277
llm_goals_228,test,39,0.3817335024372522,0.5303236770643239
llm_goals_228,test,40,0.4357628160590054,0.3614923254884982
llm_goals_228,test,41,0.4815233083907442,0.5566797369456766
llm_goals_228,test,42,0.4777776375681029,0.5874469302943671
llm_goals_228,test,43,0.4714673847928448,0.4047568932904402
llm_goals_228,test,44,0.572478648728458,0.5769778999083791
llm_goals_228,test,45,0.2593854451339948,0.381513250802113
llm_goals_228,test,46,0.4347055506976673,0.3717434970639749
llm_goals_228,test,47,0.3315010915328978,0.1147154724199779
llm_goals_228,test,48,0.47850576352465185,0.8181026530840192
llm_goals_228,test,49,0.5659503504794104,0.3646229970196732
llm_goals_236,test,0,0.8161078300650213,0.7150391262778099
llm_goals_236,test,1,0.8100488386952209,0.6861406620593683
llm_goals_236,test,2,0.8553019119016101,0.871267676778662
llm_goals_236,test,3,0.8092031685310074,0.8072499568065914
llm_goals_236,test,4,0.6679300513873301,0.7988170942289826
llm_goals_236,test,5,0.9311650779242986,0.9525319342810132
llm_goals_236,test,6,0.9179261533668169,0.922369719315248
llm_goals_236,test,7,0.8238741756000789,0.8816325671024458
llm_goals_236,test,8,0.8165307499419853,0.850525379453481
llm_goals_236,test,9,0.9352259243856252,0.8905094280701752
llm_goals_236,test,10,0.8463498524133298,0.8517140753999074
llm_goals_236,test,11,0.8776091339764224,0.568158988781905
llm_goals_236,test,12,0.9020422691821546,0.81539887388713
llm_goals_236,test,13,0.9312603461307921,0.9457094685792072
llm_goals_236,test,14,0.6855512227521386,0.926884118217816
llm_goals_236,test,15,0.7567650470938935,0.6732607492268958
llm_goals_236,test,16,0.9076589724005849,0.9227134615696562
llm_goals_236,test,17,0.81036179881491,0.6862386995801422
llm_goals_236,test,18,0.8551667161685595,0.9588107473600692
llm_goals_236,test,19,0.8053570998925091,0.4410424607670248
llm_goals_236,test,20,0.811198472304976,0.7869937367368668
llm_goals_236,test,21,0.9294453464415732,0.9367554776085516
llm_goals_236,test,22,0.8182539543468031,0.6817940522161587
llm_goals_236,test,23,0.9636995396968212,0.6579733541579077
llm_goals_236,test,24,0.7716090251406009,0.9454425582899262
llm_goals_236,test,25,0.8439883761563364,0.8055338192842599
llm_goals_236,test,26,0.8883665786386993,0.9246634247941125
llm_goals_236,test,27,0.8341487013878023,0.5396068227032322
llm_goals_236,test,28,0.8270453780845207,0.7982258737433797
llm_goals_236,test,29,0.9086776190009367,0.3578559150563034
llm_goals_236,test,30,0.7973359408518422,0.831663663699925
llm_goals_236,test,31,0.7678572383704235,0.8995406617766696
llm_goals_236,test,32,0.7167012752230194,0.7293236306375612
llm_goals_236,test,33,0.9356209624262396,0.9457866545397724
llm_goals_236,test,34,0.8699631272106997,0.8364083160174887
llm_goals_236,test,35,0.8255265539400359,0.7920782594120231
llm_goals_236,test,36,0.8174372532310952,0.8063141051706565
llm_goals_236,test,37,0.9032312055815891,0.5136290105021059
llm_goals_236,test,38,0.8075216192678013,0.4514706416931497
llm_goals_236,test,39,0.8970810787560864,0.8644759660227708
llm_goals_236,test,40,0.8363205830775029,0.6572924853656194
llm_goals_236,test,41,0.9268717371473763,0.9311078819261616
llm_goals_236,test,42,0.8255265539400359,0.7421276499016344
llm_goals_236,test,43,0.9311650779242986,0.9182581940665174
llm_goals_236,test,44,0.6945069666052182,0.78992920712052
llm_goals_236,test,45,0.8714529494735657,0.7690354024834042
llm_goals_236,test,46,0.9430307360940816,0.7972039589337531
llm_goals_236,test,47,0.766954265696532,0.7979662948734613
llm_goals_236,test,48,0.9356209624262397,0.9401202207915952
llm_goals_236,test,49,0.7528963482216383,0.6403300609260552
llm_goals_86,test,0,0.7658902588151951,0.848978225488033
llm_goals_86,test,1,0.7145340489466993,0.5667421908216284
llm_goals_86,test,2,0.7185027623086682,0.9666773097569414
llm_goals_86,test,3,0.8263316909599094,0.6816530119880609
llm_goals_86,test,4,0.7587362730857411,0.7288792743080739
llm_goals_86,test,5,0.7596623465840652,0.5964895998637825
llm_goals_86,test,6,0.7403757958765903,0.8656080931523872
llm_goals_86,test,7,0.8849015648491063,0.847071824665876
llm_goals_86,test,8,0.8029774301972838,0.8023766851830423
llm_goals_86,test,9,0.7877013657888904,0.8243871920658875
llm_goals_86,test,10,0.8019975405492805,0.8687054464050711
llm_goals_86,test,11,0.8458505646617643,0.8881319769937132
llm_goals_86,test,12,0.8011499065684901,0.6590020093828339
llm_goals_86,test,13,0.7141973561661293,0.6733084849210463
llm_goals_86,test,14,0.6943388409609145,0.3116476330682434
llm_goals_86,test,15,0.8029883912125841,0.663392413111642
llm_goals_86,test,16,0.7469460991196254,0.6387773046544254
llm_goals_86,test,17,0.74679108660296,0.8044010404371023
llm_goals_86,test,18,0.8503475993670694,0.8337891051244346
llm_goals_86,test,19,0.740204336624144,0.6396502923711483
llm_goals_86,test,20,0.7192265809022179,0.788879332405017
llm_goals_86,test,21,0.7687297474007505,0.6394004244150501
llm_goals_86,test,22,0.7439380534811516,0.8626416934726503
llm_goals_86,test,23,0.7763555957029298,0.7951470614108611
llm_goals_86,test,24,0.7735546222942489,0.7989944850297322
llm_goals_86,test,25,0.7533744179060763,0.5568233615310513
llm_goals_86,test,26,0.7729133164964046,0.7084484057432834
llm_goals_86,test,27,0.815090042305351,0.752342212713773
llm_goals_86,test,28,0.7021188106519567,0.7815325258308541
llm_goals_86,test,29,0.836848538394149,0.5424953082979187
llm_goals_86,test,30,0.7748943385305425,0.623081015497586
llm_goals_86,test,31,0.719239397671083,0.7577194596401322
llm_goals_86,test,32,0.6968374996195212,0.6601579792051269
llm_goals_86,test,33,0.8711025914595205,0.8711616389038969
llm_goals_86,test,34,0.8583848669718496,0.5900015643097076
llm_goals_86,test,35,0.8065821114294902,0.7298998281488869
llm_goals_86,test,36,0.6573182757546309,0.8123467602894
llm_goals_86,test,37,0.7950759740760288,0.6271477319447354
llm_goals_86,test,38,0.7849204972177019,0.7261717872389034
llm_goals_86,test,39,0.8441678878675846,0.6599378021515605
llm_goals_86,test,40,0.8509555678312849,0.8521513981089313
llm_goals_86,test,41,0.8528925340436558,0.7704036243710474
llm_goals_86,test,42,0.8158382610068614,0.8350187493970602
llm_goals_86,test,43,0.7508767423681529,0.7481739555208251
llm_goals_86,test,44,0.6242566875350877,0.6095754460637204
llm_goals_86,test,45,0.7795698085199207,0.7827780278235656
llm_goals_86,test,46,0.8069878841419548,0.7216219938712807
llm_goals_86,test,47,0.7696774951214747,0.5942280347054933
llm_goals_86,test,48,0.8516883045241544,0.9115690572813184
llm_goals_86,test,49,0.7246297821761832,0.8632650981365884
llm_goals_230,test,0,0.7871338731139926,0.8179351702935589
llm_goals_230,test,1,0.9440602647511449,0.8188182051872285
llm_goals_230,test,2,0.7870665371584358,0.826086635244689
llm_goals_230,test,3,0.8982946477400184,0.8927791723378472
llm_goals_230,test,4,0.6090617868561528,0.5912138130658398
llm_goals_230,test,5,0.5607923070647115,0.5729324801938974
llm_goals_230,test,6,0.5695311065381016,0.5531325232730563
llm_goals_230,test,7,0.7830524604027208,0.8021489969603882
llm_goals_230,test,8,0.6185417823206797,0.581443858272074
llm_goals_230,test,9,0.5709966515745464,0.6239073379929715
llm_goals_230,test,10,0.8280830948400655,0.8046756982507505
llm_goals_230,test,11,0.8618885099572209,0.8313074111593964
llm_goals_230,test,12,0.8685647478988899,0.7681956263676517
llm_goals_230,test,13,0.5992874016919684,0.5988426118672923
llm_goals_230,test,14,0.8336671606903043,0.8599042234104695
llm_goals_230,test,15,0.6069233684627925,0.5936393834712063
llm_goals_230,test,16,0.8561663217336474,0.8206661307032131
llm_goals_230,test,17,0.8834371364635416,0.8436086722555165
llm_goals_230,test,18,0.8525316339868185,0.8499366057691331
llm_goals_230,test,19,0.8084138264835773,0.6149675790828681
llm_goals_230,test,20,0.6069233684627925,0.6266825308309105
llm_goals_230,test,21,0.5954785102736323,0.6164414775805864
llm_goals_230,test,22,0.9438636105569155,0.8343809289495249
llm_goals_230,test,23,0.7251944391771664,0.7837306823394206
llm_goals_230,test,24,0.840081417018455,0.846372128553261
llm_goals_230,test,25,0.8146330298350745,0.7390370859690565
llm_goals_230,test,26,0.8451327925226725,0.8494265096725975
llm_goals_230,test,27,0.9384396072432505,0.8455606124040723
llm_goals_230,test,28,0.9049128906133596,0.8579674965057287
llm_goals_230,test,29,0.7731658625965406,0.5880964942153284
llm_goals_230,test,30,0.836508472950117,0.8358535771844652
llm_goals_230,test,31,0.5695311065381016,0.5805894006126754
llm_goals_230,test,32,0.7251944391771664,0.7934818259785719
llm_goals_230,test,33,0.8564681629531462,0.834267023337033
llm_goals_230,test,34,0.8307760332332189,0.7892429815800577
llm_goals_230,test,35,0.7703023548196284,0.8120109426774391
llm_goals_230,test,36,0.8326559633503742,0.8267144565859352
llm_goals_230,test,37,0.7417287467958898,0.675275568637887
llm_goals_230,test,38,0.7688271963221434,0.601560060077995
llm_goals_230,test,39,0.8580372190884763,0.7968490736396183
llm_goals_230,test,40,0.8192031014359725,0.8216474607704832
llm_goals_230,test,41,0.5847582112325739,0.5197035792893621
llm_goals_230,test,42,0.7805045247118886,0.8097020966346316
llm_goals_230,test,43,0.6069891328833861,0.5719336953223904
llm_goals_230,test,44,0.6151485647303913,0.5981191789043642
llm_goals_230,test,45,0.9057828586258152,0.8736265335179677
llm_goals_230,test,46,0.8192031014359725,0.8487543997753076
llm_goals_230,test,47,0.9050985987239296,0.862138026666209
llm_goals_230,test,48,0.8470069389199498,0.8476904574354545
llm_goals_230,test,49,0.9419213522353841,0.8333969706647004
llm_goals_206,test,0,0.8814285206735887,0.8559101199930208
llm_goals_206,test,1,0.7898609048463107,0.7879802535556255
llm_goals_206,test,2,0.76889677914887,0.8428787031752032
llm_goals_206,test,3,0.7898609048463107,0.8372089275473125
llm_goals_206,test,4,0.7193188273931254,0.7926284648989284
llm_goals_206,test,5,0.554888444450801,0.5153650037350492
llm_goals_206,test,6,0.7898609048463107,0.3343649675741765
llm_goals_206,test,7,0.7688967791488701,0.9070251599505484
llm_goals_206,test,8,0.76889677914887,0.8455585838330733
llm_goals_206,test,9,0.7898609048463107,0.6389058176485727
llm_goals_206,test,10,0.26720566943225516,0.1182260710030537
llm_goals_206,test,11,0.7898609048463107,0.7467083406482149
llm_goals_206,test,12,0.4914919033006003,0.4574706710312913
llm_goals_206,test,13,0.7898609048463107,0.0931690538173042
llm_goals_206,test,14,0.8814285206735887,0.6621974660772205
llm_goals_206,test,15,0.475019968139524,0.494185809495628
llm_goals_206,test,16,0.7898609048463107,0.7829698941017894
llm_goals_206,test,17,0.8814285206735887,0.8449901332087761
llm_goals_206,test,18,0.5594336623523949,0.5445193157731812
llm_goals_206,test,19,0.157108957913308,0.1852277467567061
llm_goals_206,test,20,0.8814285206735887,0.779411075601452
llm_goals_206,test,21,0.8814285206735887,0.8588447585962018
llm_goals_206,test,22,0.6171391085371308,0.6656724760322136
llm_goals_206,test,23,0.6107121349624048,0.7493727269767156
llm_goals_206,test,24,0.6107121349624048,0.9005759943815377
llm_goals_206,test,25,0.45208651049468146,0.6107508047041867
llm_goals_206,test,26,0.76889677914887,0.6818652741926582
llm_goals_206,test,27,0.7898609048463107,0.101753575918842
llm_goals_206,test,28,0.7898609048463107,0.7895615709538879
llm_goals_206,test,29,0.475019968139524,0.6033396275372427
llm_goals_206,test,30,0.5635064646707343,0.5204197590718647
llm_goals_206,test,31,0.8701178644021893,0.552048008783336
llm_goals_206,test,32,0.5004752969004098,0.6983551428844045
llm_goals_206,test,33,0.478837657499427,0.7920455896978277
llm_goals_206,test,34,0.6107121349624048,0.8229582905649624
llm_goals_206,test,35,0.7688967791488701,0.7513486755861682
llm_goals_206,test,36,0.8647048101930709,0.8837921634473136
llm_goals_206,test,37,0.157108957913308,0.2323070941836299
llm_goals_206,test,38,0.45208651049468146,0.6051056320867053
llm_goals_206,test,39,0.45208651049468146,0.3355604905336211
llm_goals_206,test,40,0.289622044936852,0.0412251261013626
llm_goals_206,test,41,0.6107121349624048,0.8154974559604902
llm_goals_206,test,42,0.8814285206735887,0.650940780779249
llm_goals_206,test,43,0.7741360072488358,0.8136300459617928
llm_goals_206,test,44,0.5834866780165955,0.7293123065101965
llm_goals_206,test,45,0.554888444450801,0.7633236130088205
llm_goals_206,test,46,0.478837657499427,0.7359003505062409
llm_goals_206,test,47,0.12323954610504563,0.1849221570121216
llm_goals_206,test,48,0.5594336623523949,0.5419546416667619
llm_goals_206,test,49,0.554888444450801,0.7725634685769378
llm_goals_146,test,0,0.4444755843801664,0.4053194295707414
llm_goals_146,test,1,0.44833124036560784,0.77863227297152
llm_goals_146,test,2,0.38471039322095063,0.4040155724720515
llm_goals_146,test,3,0.6068574968206952,0.5871794799134783
llm_goals_146,test,4,0.763946457054451,0.560196939517166
llm_goals_146,test,5,0.7471916492289291,0.3825763215338447
llm_goals_146,test,6,0.4508613353129513,0.4487256154760436
llm_goals_146,test,7,0.779372454136008,0.4411791015965565
llm_goals_146,test,8,0.5648425924510992,0.6130303084936348
llm_goals_146,test,9,0.636477152715674,0.416171525001919
llm_goals_146,test,10,0.6583219198506414,0.8501744885482464
llm_goals_146,test,11,0.3945933740670649,0.4275203540944987
llm_goals_146,test,12,0.4771158856071584,0.1433368859863564
llm_goals_146,test,13,0.48151978810958757,0.3454179305560879
llm_goals_146,test,14,0.39305115761207005,0.3984231289972209
llm_goals_146,test,15,0.7452596616290273,0.7779127849429681
llm_goals_146,test,16,0.4887469557074769,0.3923375995541924
llm_goals_146,test,17,0.5755614127668571,0.7787704480980863
llm_goals_146,test,18,0.5202172391267131,0.4485241820972409
llm_goals_146,test,19,0.6875393981195902,0.3670787385820063
llm_goals_146,test,20,0.6540030964368687,0.5808513911957404
llm_goals_146,test,21,0.3609573741059083,0.348566839291943
llm_goals_146,test,22,0.6311293809835541,0.8201150868695501
llm_goals_146,test,23,0.663317021942542,0.4340105664556233
llm_goals_146,test,24,0.7999926963775721,0.3779777975457017
llm_goals_146,test,25,0.4411902315048801,0.7064739339171033
llm_goals_146,test,26,0.4774539527022496,0.5068406448496142
llm_goals_146,test,27,0.5810157120353128,0.8190644408840766
llm_goals_146,test,28,0.5800425116055075,0.5180591218776336
llm_goals_146,test,29,0.6059730205484891,0.333344294156815
llm_goals_146,test,30,0.7087480807504386,0.7747827278582075
llm_goals_146,test,31,0.6971446654463365,0.407372660950012
llm_goals_146,test,32,0.7109011784803231,0.410007141610074
llm_goals_146,test,33,0.7529136941911188,0.4871137583409777
llm_goals_146,test,34,0.5216438139294176,0.7239974058432814
llm_goals_146,test,35,0.7804016266937559,0.4717056489178101
llm_goals_146,test,36,0.472672080151447,0.7518023835632467
llm_goals_146,test,37,0.633291625879511,0.4299980307715084
llm_goals_146,test,38,0.544988008207789,0.6853837869126352
llm_goals_146,test,39,0.7682083260792806,0.3821269496784033
llm_goals_146,test,40,0.5516595936083483,0.358534933784248
llm_goals_146,test,41,0.6889657952350839,0.3237407407589303
llm_goals_146,test,42,0.6394922417185512,0.465061399698899
llm_goals_146,test,43,0.40348893844766515,0.4315601062358607
llm_goals_146,test,44,0.7520795279829245,0.5556539728312679
llm_goals_146,test,45,0.8139403223922699,0.5920740253668018
llm_goals_146,test,46,0.6180912675732005,0.4871274917312868
llm_goals_146,test,47,0.7123438986501015,0.581843033103631
llm_goals_146,test,48,0.636477152715674,0.3075919216632473
llm_goals_146,test,49,0.5912815051110073,0.7619587682314233
llm_goals_115,test,0,0.8347486446233504,0.4434632286904964
llm_goals_115,test,1,0.7172019873342732,0.6904317046527385
llm_goals_115,test,2,0.4408440739558637,0.6516189539788452
llm_goals_115,test,3,0.2966545485132646,0.4339340266994299
llm_goals_115,test,4,0.8062011414093231,0.3268096695267067
llm_goals_115,test,5,0.8596905281180957,0.5710080860563095
llm_goals_115,test,6,0.7783322288742164,0.5525431491717658
llm_goals_115,test,7,0.6164858170166386,0.4917475457547118
llm_goals_115,test,8,0.49516332024412324,0.5222463726327303
llm_goals_115,test,9,0.7022623176496755,0.7214239291876969
llm_goals_115,test,10,0.6668498682027938,0.1423416680133991
llm_goals_115,test,11,0.6756777376257181,0.0211501267474188
llm_goals_115,test,12,0.7880751052058925,0.7876465277644278
llm_goals_115,test,13,0.6672858547412573,0.7712913027170795
llm_goals_115,test,14,0.8168847574165814,0.8433526700949283
llm_goals_115,test,15,0.6786312651259111,0.4617767241926875
llm_goals_115,test,16,0.8596905281180955,0.751642873633365
llm_goals_115,test,17,0.7867182250663669,0.5750395750632571
llm_goals_115,test,18,0.6816652636077022,0.731190860777953
llm_goals_115,test,19,0.6467218028778062,0.4010090785721757
llm_goals_115,test,20,0.518530374285905,0.4639274084070101
llm_goals_115,test,21,0.7663443823162358,0.8439438875722727
llm_goals_115,test,22,0.5432139740237227,0.4930168575659444
llm_goals_115,test,23,0.6473163174628483,0.1606455731785788
llm_goals_115,test,24,0.663598336509911,0.8225050510480415
llm_goals_115,test,25,0.7092771101677405,0.4682683350736841
llm_goals_115,test,26,0.8169077667258936,0.8534070288351345
llm_goals_115,test,27,0.5343915974933059,0.4742317939252941
llm_goals_115,test,28,0.6732692345806722,0.0572278600376891
llm_goals_115,test,29,0.7582536709101707,0.4907303253909411
llm_goals_115,test,30,0.6078905394914111,0.1910564822051387
llm_goals_115,test,31,0.6642305073204406,0.8779074551007869
llm_goals_115,test,32,0.6660769361834182,0.504762116367231
llm_goals_115,test,33,0.7919192729524838,0.7961870421429237
llm_goals_115,test,34,0.4409181841090319,0.3117413427150098
llm_goals_115,test,35,0.50366956010087,0.6062498233633526
llm_goals_115,test,36,0.6620429489129991,0.5465164093133901
llm_goals_115,test,37,0.7037975233033015,0.5868361038548235
llm_goals_115,test,38,0.8004235817493816,0.3403703059415776
llm_goals_115,test,39,0.6717837730431001,0.8219108606932716
llm_goals_115,test,40,0.5032709284037992,0.1035981058362717
llm_goals_115,test,41,0.5626184945318599,0.8823458588361053
llm_goals_115,test,42,0.28240670314084965,0.2803475549795062
llm_goals_115,test,43,0.8171745374771042,0.8513012565900029
llm_goals_115,test,44,0.508984203748378,0.7679774087348048
llm_goals_115,test,45,0.36764714339539095,0.2698108084459203
llm_goals_115,test,46,0.7356797548147386,0.0357215216493676
llm_goals_115,test,47,0.6670877134814345,0.3874775112245213
llm_goals_115,test,48,0.5077682261883835,0.7725214275580398
llm_goals_115,test,49,0.6862464795367217,0.6720584616591353
llm_goals_182,test,0,0.2506490725574451,0.8652063058172594
llm_goals_182,test,1,0.20114606875642027,0.0826947230522371
llm_goals_182,test,2,0.7601511895575227,0.798587619624287
llm_goals_182,test,3,0.5995683904538984,0.1153333326317589
llm_goals_182,test,4,0.696520998004023,0.8911074983818299
llm_goals_182,test,5,0.4054663605622779,0.322673941983326
llm_goals_182,test,6,0.4017949787740518,0.3873713790307967
llm_goals_182,test,7,0.6149639597438846,0.7954637431338407
llm_goals_182,test,8,0.6389968128817459,0.8713126766724444
llm_goals_182,test,9,0.5921652434459558,0.3727719680238343
llm_goals_182,test,10,0.6982354699109901,0.344259508803394
llm_goals_182,test,11,0.49028060708905247,0.826409100211895
llm_goals_182,test,12,0.6347469985134365,0.6995229144237722
llm_goals_182,test,13,0.49028060708905247,0.2983322739239659
llm_goals_182,test,14,0.1984209756125411,0.1121533235192302
llm_goals_182,test,15,0.4523658179307252,0.8530130402661898
llm_goals_182,test,16,0.1984209756125411,0.140642355581753
llm_goals_182,test,17,0.5782576209682757,0.0750534295482943
llm_goals_182,test,18,0.1984209756125411,0.1021012312203676
llm_goals_182,test,19,0.6441516337550671,0.7930271065360427
llm_goals_182,test,20,0.7882527872994854,0.8727144576853948
llm_goals_182,test,21,0.40564723858643253,0.3883832635395169
llm_goals_182,test,22,0.23296758240551715,0.0858987886275301
llm_goals_182,test,23,0.3074656609024791,0.8587499834361978
llm_goals_182,test,24,0.6698964061491401,0.1529090764042953
llm_goals_182,test,25,0.22358692662507726,0.3907861244891192
llm_goals_182,test,26,0.1984209756125411,0.0898813936513848
llm_goals_182,test,27,0.5690653304252247,0.0749023763424232
llm_goals_182,test,28,0.5566626834888587,0.1190598459155961
llm_goals_182,test,29,0.6093014234830273,0.7016449528375276
llm_goals_182,test,30,0.6475566765812731,0.3981986561532387
llm_goals_182,test,31,0.3943109161328568,0.4033468376136607
llm_goals_182,test,32,0.6109284368679878,0.8175631740353995
llm_goals_182,test,33,0.2798583591115524,0.1448325966795168
llm_goals_182,test,34,0.5667319270004799,0.3547330626609051
llm_goals_182,test,35,0.40594130426306557,0.823988084949945
llm_goals_182,test,36,0.6052316732509015,0.3920453941320438
llm_goals_182,test,37,0.6442566028123252,0.7340161718404824
llm_goals_182,test,38,0.6149639597438846,0.7157883671582869
llm_goals_182,test,39,0.6384446873748391,0.6255065804950025
llm_goals_182,test,40,0.4704078063478363,0.8448494619425109
llm_goals_182,test,41,0.6286935704621647,0.3755363161560536
llm_goals_182,test,42,0.6962090529225565,0.8729538896060706
llm_goals_182,test,43,0.29536726237162075,0.3845820969466132
llm_goals_182,test,44,0.8443303300988667,0.8535886434286102
llm_goals_182,test,45,0.5048370947190394,0.1216040102965252
llm_goals_182,test,46,0.2184212223025385,0.8235719161892086
llm_goals_182,test,47,0.7436891940002419,0.1493568636498045
llm_goals_182,test,48,0.2005773355196745,0.0977336313667494
llm_goals_182,test,49,0.4111231352111722,0.1145731477706443
llm_goals_326,test,0,0.784535546381835,0.7210679537447465
llm_goals_326,test,1,0.6438290063292545,0.3698897290610674
llm_goals_326,test,2,0.784535546381835,0.6763902331902123
llm_goals_326,test,3,0.7228022572349886,0.6144636338527515
llm_goals_326,test,4,0.7388225396241895,0.7226604268728406
llm_goals_326,test,5,0.7713556461028198,0.225010837259175
llm_goals_326,test,6,0.6499115051893732,0.2511202974122448
llm_goals_326,test,7,0.7722448905285941,0.682107850479918
llm_goals_326,test,8,0.7717872456272076,0.7688388280774076
llm_goals_326,test,9,0.2402733845948962,0.2201601423152862
llm_goals_326,test,10,0.6691487698760848,0.5927527654534117
llm_goals_326,test,11,0.727733326994901,0.6402653335664261
llm_goals_326,test,12,0.6300134866494692,0.5219165167657188
llm_goals_326,test,13,0.22366818785933293,0.0912598670014219
llm_goals_326,test,14,0.6577951206540854,0.6237388815231029
llm_goals_326,test,15,0.7514011618916401,0.58422994911281
llm_goals_326,test,16,0.6577951206540854,0.598158895081766
llm_goals_326,test,17,0.6439460503586668,0.3075615988754216
llm_goals_326,test,18,0.7007644133433143,0.5624766832927214
llm_goals_326,test,19,0.8338799667796172,0.5837591950755885
llm_goals_326,test,20,0.7304965473807338,0.7023531773229094
llm_goals_326,test,21,0.28876575410090155,0.2231004774292705
llm_goals_326,test,22,0.7200652290664317,0.2981176158853984
llm_goals_326,test,23,0.779748840091696,0.5947662537107573
llm_goals_326,test,24,0.6730949002587631,0.6704973022514685
llm_goals_326,test,25,0.6801590889871508,0.7548767212573778
llm_goals_326,test,26,0.7327769924905939,0.6431580812900115
llm_goals_326,test,27,0.5410249591494479,0.3327881124528037
llm_goals_326,test,28,0.7065552497462071,0.7641554441675951
llm_goals_326,test,29,0.8063806650602184,0.4778539914575542
llm_goals_326,test,30,0.7147864084158665,0.7097294996625062
llm_goals_326,test,31,0.2887657541009015,0.2337370659971392
llm_goals_326,test,32,0.6587575008667071,0.6377238074883209
llm_goals_326,test,33,0.6766266521519168,0.6003066307923431
llm_goals_326,test,34,0.6715174363678539,0.6119511714458287
llm_goals_326,test,35,0.7505557127793688,0.5639635886155634
llm_goals_326,test,36,0.7007141085083656,0.6540181072858835
llm_goals_326,test,37,0.760773344631295,0.6297099560404894
llm_goals_326,test,38,0.7728800914585126,0.5278265562464118
llm_goals_326,test,39,0.531727633981092,0.5029836143581652
llm_goals_326,test,40,0.7445919696205566,0.5904794288270965
llm_goals_326,test,41,0.40526480502001566,0.2163880989100536
llm_goals_326,test,42,0.7783196196951416,0.6295586948815126
llm_goals_326,test,43,0.4575702785584845,0.2120109108404722
llm_goals_326,test,44,0.7196690051654085,0.6940786874714112
llm_goals_326,test,45,0.6182605590474731,0.6451782543850242
llm_goals_326,test,46,0.732191315046581,0.5818721654926584
llm_goals_326,test,47,0.8011093312118863,0.6937841106268494
llm_goals_326,test,48,0.7093537794576955,0.6520763720762168
llm_goals_326,test,49,0.5969347191535176,0.3675984402598143
