template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.3982158168044063,0.0759730864369293
llm_goals_75,test,1,0.5853912812377603,0.8120127334654941
llm_goals_75,test,2,0.7395428427858338,0.114709127325878
llm_goals_75,test,3,0.4310166547384904,0.1496870071208148
llm_goals_75,test,4,0.47444800659518854,0.321632167436288
llm_goals_75,test,5,0.49349637240573835,0.0308084020440725
llm_goals_75,test,6,0.6807937952727227,0.1843175427563358
llm_goals_75,test,7,0.5487051863613228,0.0324508518984932
llm_goals_75,test,8,0.44718430102223633,0.5189439655904322
llm_goals_75,test,9,0.37751302194854847,0.1502023728460554
llm_goals_75,test,10,0.5883758915602464,0.7657257241793485
llm_goals_75,test,11,0.6834568397512367,0.8035850534204568
llm_goals_75,test,12,0.3847102799179984,0.7307072346740583
llm_goals_75,test,13,0.8065906699650055,0.048717541539827
llm_goals_75,test,14,0.5230861926482177,0.4316714754134473
llm_goals_75,test,15,0.5030218959191545,0.3935335107130105
llm_goals_75,test,16,0.4115279877675729,0.6144967070154126
llm_goals_75,test,17,0.6164785192906478,0.4488809036296748
llm_goals_75,test,18,0.2625842739557853,0.3920271677291012
llm_goals_75,test,19,0.5808712849849383,0.7810532991351343
llm_goals_75,test,20,0.5021586599217737,0.5443958675997833
llm_goals_75,test,21,0.46767756675121736,0.1604437012338976
llm_goals_75,test,22,0.37280094272245873,0.856696781503626
llm_goals_75,test,23,0.4288816870628658,0.590876283737086
llm_goals_75,test,24,0.5833014369824757,0.4156776627984636
llm_goals_75,test,25,0.3977696189481738,0.5957841371692396
llm_goals_75,test,26,0.3600520250696097,0.3752723036244256
llm_goals_75,test,27,0.48196597691877335,0.7866846353759014
llm_goals_75,test,28,0.4636540768492777,0.1838073487826972
llm_goals_75,test,29,0.23001968144221865,0.853944837222159
llm_goals_75,test,30,0.7108593141172612,0.5992601606803653
llm_goals_75,test,31,0.3486427341061966,0.0743179753131944
llm_goals_75,test,32,0.7887106294949996,0.8015013965908157
llm_goals_75,test,33,0.6374507061423706,0.3498422912836759
llm_goals_75,test,34,0.6940923139634965,0.6692496458902099
llm_goals_75,test,35,0.28795441439636,0.134422492314073
llm_goals_75,test,36,0.5496806713449942,0.6683886675406214
llm_goals_75,test,37,0.6670346022383918,0.7419528346018889
llm_goals_75,test,38,0.4737672429457014,0.6995551035614008
llm_goals_75,test,39,0.42350586761355696,0.4983520000991506
llm_goals_75,test,40,0.2625842739557853,0.8622382893733471
llm_goals_75,test,41,0.47327827636012393,0.1026909531781266
llm_goals_75,test,42,0.5791670067295603,0.0240063792397545
llm_goals_75,test,43,0.6972406709908666,0.1143049422953708
llm_goals_75,test,44,0.3844082737570577,0.6884653626060131
llm_goals_75,test,45,0.7571992274321442,0.1059329319681491
llm_goals_75,test,46,0.4005055346567234,0.405588044943207
llm_goals_75,test,47,0.673961237612549,0.1393457779753145
llm_goals_75,test,48,0.2625842739557853,0.3670858714001939
llm_goals_75,test,49,0.488504812091945,0.8052537448889737
llm_goals_78,test,0,0.7037126207011997,0.710685793210421
llm_goals_78,test,1,0.7169007459141952,0.4521352090792044
llm_goals_78,test,2,0.6684402209973986,0.6984783435600235
llm_goals_78,test,3,0.30460076873481545,0.2113029164438998
llm_goals_78,test,4,0.41399529724101763,0.59362413330766
llm_goals_78,test,5,0.5119399087269922,0.2973601904856865
llm_goals_78,test,6,0.40456687642500294,0.4911861319335979
llm_goals_78,test,7,0.6684402209973986,0.5109292120432304
llm_goals_78,test,8,0.17579042506048048,0.0214058927819057
llm_goals_78,test,9,0.5844632730513843,0.4561729519761929
llm_goals_78,test,10,0.14697308157612504,0.0679189625299845
llm_goals_78,test,11,0.5681229958180082,0.0515682065315942
llm_goals_78,test,12,0.7588584991615321,0.2832565853408079
llm_goals_78,test,13,0.5449051763505985,0.3661356287525266
llm_goals_78,test,14,0.45638413836385966,0.5580731095047592
llm_goals_78,test,15,0.17063554724303845,0.3539146882613382
llm_goals_78,test,16,0.48489037705516425,0.5198975918853231
llm_goals_78,test,17,0.5953170107839536,0.5974518854580513
llm_goals_78,test,18,0.4527371304842996,0.2539582988569515
llm_goals_78,test,19,0.6582887707268953,0.7818671446797811
llm_goals_78,test,20,0.18417520497635054,0.0817902870466741
llm_goals_78,test,21,0.5078858199297253,0.5313736848686219
llm_goals_78,test,22,0.7272226592380013,0.0578457193154468
llm_goals_78,test,23,0.710369128863991,0.0588067732422271
llm_goals_78,test,24,0.5366805904371074,0.504620490567031
llm_goals_78,test,25,0.20805668203959674,0.1421301439321696
llm_goals_78,test,26,0.4619784459815257,0.4043160008204248
llm_goals_78,test,27,0.6253921325550897,0.1341547030792127
llm_goals_78,test,28,0.3629745551752824,0.585613528389559
llm_goals_78,test,29,0.448874436423153,0.8037635629119642
llm_goals_78,test,30,0.2574238042486803,0.2785354206868534
llm_goals_78,test,31,0.4954722426467139,0.5074585147112473
llm_goals_78,test,32,0.7331299583030733,0.1365325187594412
llm_goals_78,test,33,0.4527371304842996,0.559776178990876
llm_goals_78,test,34,0.17698533929011417,0.3923464392379233
llm_goals_78,test,35,0.6931707600810847,0.8694947336510349
llm_goals_78,test,36,0.20727157061906687,0.3406365028138333
llm_goals_78,test,37,0.3966906562342218,0.7407991807699866
llm_goals_78,test,38,0.4391252184596386,0.5783193820848918
llm_goals_78,test,39,0.7588584991615321,0.2552456607848852
llm_goals_78,test,40,0.4042893011701114,0.0543816822443674
llm_goals_78,test,41,0.4054679009268934,0.5111749720190839
llm_goals_78,test,42,0.6684402209973986,0.6373460300781935
llm_goals_78,test,43,0.4954722426467139,0.5824615770869916
llm_goals_78,test,44,0.1988386570411648,0.0725045867582824
llm_goals_78,test,45,0.47194558102524703,0.2055844845637347
llm_goals_78,test,46,0.7695332377600841,0.3443727538373435
llm_goals_78,test,47,0.5717383552006216,0.5137599661573641
llm_goals_78,test,48,0.44291391915449063,0.2386868392211005
llm_goals_78,test,49,0.6570557783490693,0.1092249314217729
llm_goals_53,test,0,0.09560240460363044,0.0313037944392233
llm_goals_53,test,1,0.22005001273667735,0.8240056677766623
llm_goals_53,test,2,0.2026710537763543,0.1449565288533016
llm_goals_53,test,3,0.16063471492016146,0.3918474727299789
llm_goals_53,test,4,0.14230232006241264,0.8286818229760559
llm_goals_53,test,5,0.0776468690830448,0.0496580173268359
llm_goals_53,test,6,0.09186029967338537,0.0987357123371914
llm_goals_53,test,7,0.2684025870795949,0.0188377606185046
llm_goals_53,test,8,0.11470973583318067,0.4663370658319237
llm_goals_53,test,9,0.06464723150786167,0.0353320635735392
llm_goals_53,test,10,0.19362227263400864,0.2465247528295183
llm_goals_53,test,11,0.18516485233260274,0.3482216267575191
llm_goals_53,test,12,0.20413140474585803,0.252877033485255
llm_goals_53,test,13,0.19891753591661168,0.1153444147006914
llm_goals_53,test,14,0.24235495426225742,0.3868210483124377
llm_goals_53,test,15,0.0955067221158646,0.3705090883623566
llm_goals_53,test,16,0.4020883216839626,0.3407126088990169
llm_goals_53,test,17,0.11564673149896554,0.7543820151744927
llm_goals_53,test,18,0.274839738737175,0.3322955201354975
llm_goals_53,test,19,0.13067944201374582,0.2261316448538241
llm_goals_53,test,20,0.24235495426225742,0.7343425814116562
llm_goals_53,test,21,0.3194577027621051,0.1601759070107346
llm_goals_53,test,22,0.11485432273271783,0.4190051893758809
llm_goals_53,test,23,0.12067560585026056,0.2354442333363782
llm_goals_53,test,24,0.23804352350463207,0.9719922543639756
llm_goals_53,test,25,0.14409644598134902,0.3289260309913144
llm_goals_53,test,26,0.23898656889915482,0.5781819989075234
llm_goals_53,test,27,0.16826977898730325,0.790876622992061
llm_goals_53,test,28,0.21502768945998096,0.2747068584160578
llm_goals_53,test,29,0.11036378383268221,0.796844478423965
llm_goals_53,test,30,0.3599820967858085,0.3573432990729852
llm_goals_53,test,31,0.3335472970081423,0.1489779797461384
llm_goals_53,test,32,0.3288135397386216,0.4788477745757265
llm_goals_53,test,33,0.32826362417439875,0.967617631882366
llm_goals_53,test,34,0.28898057227283974,0.2252558680565209
llm_goals_53,test,35,0.19802217968709904,0.2300146581986259
llm_goals_53,test,36,0.13856800923370236,0.2311446011459964
llm_goals_53,test,37,0.16309351068052122,0.144409781340655
llm_goals_53,test,38,0.1860213540859891,0.705274211964156
llm_goals_53,test,39,0.3047665511184185,0.5921281985506138
llm_goals_53,test,40,0.1532810371049799,0.2443004460536172
llm_goals_53,test,41,0.08176324807839763,0.061497045516474
llm_goals_53,test,42,0.10969199884966756,0.1570495895565415
llm_goals_53,test,43,0.10227168123502127,0.0641649100604756
llm_goals_53,test,44,0.04698078042319843,0.8074959504603542
llm_goals_53,test,45,0.38459438988759254,0.1813221143349916
llm_goals_53,test,46,0.1894866249451348,0.2352295334667747
llm_goals_53,test,47,0.3413591629070176,0.820244135045574
llm_goals_53,test,48,0.2026710537763543,0.4512817747836695
llm_goals_53,test,49,0.1050576020926312,0.7022675298006026
llm_goals_305,test,0,0.331525095822173,0.0438240495208372
llm_goals_305,test,1,0.6384172362254575,0.1352509650243347
llm_goals_305,test,2,0.3510251140851767,0.1723536643548477
llm_goals_305,test,3,0.6938923562296337,0.250094549889143
llm_goals_305,test,4,0.8047811392603378,0.8419536365764244
llm_goals_305,test,5,0.3441816051111868,0.1913177614094214
llm_goals_305,test,6,0.33143950867118344,0.3723916849874114
llm_goals_305,test,7,0.3609097686058625,0.3337485526549648
llm_goals_305,test,8,0.8363032956042152,0.5824263052609597
llm_goals_305,test,9,0.5454675953681567,0.620184539039364
llm_goals_305,test,10,0.7868684740982085,0.494000537986959
llm_goals_305,test,11,0.24539288630881698,0.0482253546984711
llm_goals_305,test,12,0.1489010413553231,0.0565816635108951
llm_goals_305,test,13,0.5671104630801705,0.1136796894182332
llm_goals_305,test,14,0.8069753055861595,0.6102884358627442
llm_goals_305,test,15,0.8226534542512849,0.8302422164176205
llm_goals_305,test,16,0.8019838754253793,0.589511050327059
llm_goals_305,test,17,0.7036702388799293,0.2622011524235305
llm_goals_305,test,18,0.8072741064205259,0.3737828315077605
llm_goals_305,test,19,0.5088367193233797,0.0401465973617238
llm_goals_305,test,20,0.7743481383572423,0.8971526617856483
llm_goals_305,test,21,0.4313506450845339,0.513862561708882
llm_goals_305,test,22,0.38964093507225894,0.6617399492970566
llm_goals_305,test,23,0.4181653983108558,0.3090300849059566
llm_goals_305,test,24,0.7940983172493391,0.4310886044152408
llm_goals_305,test,25,0.8177470984236151,0.7346082271960779
llm_goals_305,test,26,0.8282669000052018,0.7784622217319951
llm_goals_305,test,27,0.6835508928999676,0.2285268892180965
llm_goals_305,test,28,0.6938923562296337,0.1970890988987535
llm_goals_305,test,29,0.509107866615194,0.0543679274789402
llm_goals_305,test,30,0.5171950963485282,0.8325365217586828
llm_goals_305,test,31,0.41901187698366976,0.5078524819974076
llm_goals_305,test,32,0.5186095353721174,0.2061603354174771
llm_goals_305,test,33,0.7912771541420036,0.6039954962603894
llm_goals_305,test,34,0.8263641568167825,0.5754865922825898
llm_goals_305,test,35,0.461041994204423,0.1664152616423
llm_goals_305,test,36,0.8806096314407399,0.7880486291444165
llm_goals_305,test,37,0.6810402661662235,0.03605078558617
llm_goals_305,test,38,0.3808967364890354,0.0575570687729377
llm_goals_305,test,39,0.1132816608859643,0.4297583044645929
llm_goals_305,test,40,0.21813707886837208,0.1915445959084499
llm_goals_305,test,41,0.5125787118569868,0.6536953532364804
llm_goals_305,test,42,0.44162321266398025,0.122527873906398
llm_goals_305,test,43,0.6587525395513165,0.1711906255032107
llm_goals_305,test,44,0.8020873256793054,0.8371975999741996
llm_goals_305,test,45,0.6907716006016095,0.3836914756409174
llm_goals_305,test,46,0.34234522385519156,0.0543049670678201
llm_goals_305,test,47,0.6858804370429356,0.5245484205428286
llm_goals_305,test,48,0.8050942446082635,0.5969765977742552
llm_goals_305,test,49,0.5185192362152883,0.2351952691938564
llm_goals_81,test,0,0.3516580812085672,0.2060992725380174
llm_goals_81,test,1,0.6241849125386096,0.1581010259271764
llm_goals_81,test,2,0.5112445686406537,0.6899052301996583
llm_goals_81,test,3,0.6275499835057335,0.7805136989347726
llm_goals_81,test,4,0.40096049210040724,0.0972643506873081
llm_goals_81,test,5,0.24251795139068197,0.3089356066572108
llm_goals_81,test,6,0.6555116020195482,0.0383605574042886
llm_goals_81,test,7,0.5454683150460257,0.3161536190910042
llm_goals_81,test,8,0.601848685911,0.5126683642741202
llm_goals_81,test,9,0.4814144105585521,0.1286837550045714
llm_goals_81,test,10,0.39855441949992226,0.1256029490894943
llm_goals_81,test,11,0.5577554325217209,0.408772132619307
llm_goals_81,test,12,0.3813638397396076,0.1214277758013974
llm_goals_81,test,13,0.49027355691813,0.0409721836992895
llm_goals_81,test,14,0.4258053157012833,0.0827685330888261
llm_goals_81,test,15,0.4474129177002737,0.224678660518271
llm_goals_81,test,16,0.48301633086115153,0.0872837265533343
llm_goals_81,test,17,0.30493899390367485,0.2837028272857018
llm_goals_81,test,18,0.3833527621203869,0.1193591553103013
llm_goals_81,test,19,0.41014138171527864,0.4952181327203863
llm_goals_81,test,20,0.5473976836042783,0.2564665599863756
llm_goals_81,test,21,0.52953100646064,0.2041083994609424
llm_goals_81,test,22,0.5220565247131858,0.3119739178030907
llm_goals_81,test,23,0.46508357297987585,0.0994638293022036
llm_goals_81,test,24,0.46430225832630917,0.1575467450844306
llm_goals_81,test,25,0.6673747731284845,0.0291125673317165
llm_goals_81,test,26,0.6241849125386096,0.0869071714890548
llm_goals_81,test,27,0.4814144105585521,0.1863589895989203
llm_goals_81,test,28,0.6771678176184306,0.119977738469712
llm_goals_81,test,29,0.4977186324675742,0.4410572612602489
llm_goals_81,test,30,0.38823122908247804,0.0555125882985712
llm_goals_81,test,31,0.2011383733167632,0.3486004983652399
llm_goals_81,test,32,0.5577554325217209,0.189941914212834
llm_goals_81,test,33,0.4937787819718363,0.1890390776737304
llm_goals_81,test,34,0.34143581739269185,0.1328217427635933
llm_goals_81,test,35,0.5471312351758705,0.1343397356978365
llm_goals_81,test,36,0.5141122696515069,0.0899701336915993
llm_goals_81,test,37,0.25057956843778273,0.6478431799513802
llm_goals_81,test,38,0.6241849125386096,0.7351879424598803
llm_goals_81,test,39,0.5471312351758705,0.0885373562628188
llm_goals_81,test,40,0.5705665645721942,0.3150516869571108
llm_goals_81,test,41,0.37285798286307775,0.0498826197855212
llm_goals_81,test,42,0.5112445686406537,0.1381899328653333
llm_goals_81,test,43,0.3813638397396076,0.3536647169335493
llm_goals_81,test,44,0.5248300567591224,0.4857917971856136
llm_goals_81,test,45,0.5369694307171987,0.3921515914618259
llm_goals_81,test,46,0.568048765378825,0.1179764264181158
llm_goals_81,test,47,0.5553743436951705,0.2786576050295211
llm_goals_81,test,48,0.5112445686406537,0.0726556891905063
llm_goals_81,test,49,0.5206903625200394,0.3912461690418435
llm_goals_133,test,0,0.6482019324296177,0.4440529323069227
llm_goals_133,test,1,0.543835908345864,0.3955848959165448
llm_goals_133,test,2,0.548817144860181,0.5130030466159122
llm_goals_133,test,3,0.6202499483008838,0.220245744335209
llm_goals_133,test,4,0.5565487603106977,0.5879895213269322
llm_goals_133,test,5,0.630720271154586,0.8588037554744828
llm_goals_133,test,6,0.7919430924478442,0.8322442189227136
llm_goals_133,test,7,0.4122382870753996,0.6588760518065184
llm_goals_133,test,8,0.5080479451871991,0.5253890103378467
llm_goals_133,test,9,0.4574137004161169,0.8170254264656348
llm_goals_133,test,10,0.5145171003228252,0.4608975636274737
llm_goals_133,test,11,0.5456563667351045,0.2002942757447116
llm_goals_133,test,12,0.5901678857846574,0.4022389740133304
llm_goals_133,test,13,0.5643177435991404,0.8516519653230656
llm_goals_133,test,14,0.2971008357763885,0.0158444788418117
llm_goals_133,test,15,0.5456563667351045,0.5978365001716675
llm_goals_133,test,16,0.48376937262690684,0.0529535109452945
llm_goals_133,test,17,0.5085740866706598,0.2556263204518722
llm_goals_133,test,18,0.5354609346961973,0.0167466717766424
llm_goals_133,test,19,0.8297240962173479,0.0944244428527934
llm_goals_133,test,20,0.5755924019146437,0.6914976263434149
llm_goals_133,test,21,0.6731894209991216,0.8436048633272258
llm_goals_133,test,22,0.4466596701426037,0.2868007235124719
llm_goals_133,test,23,0.5363945570218163,0.1160262834732456
llm_goals_133,test,24,0.4527966690115202,0.0154812210183983
llm_goals_133,test,25,0.5390855316449182,0.3119789757098087
llm_goals_133,test,26,0.4421325443212351,0.7929420521534107
llm_goals_133,test,27,0.5725070964575597,0.2037716603127804
llm_goals_133,test,28,0.28020171551747136,0.3118772754734958
llm_goals_133,test,29,0.7007879864765321,0.1808521166351184
llm_goals_133,test,30,0.6944503265364169,0.5045142070808941
llm_goals_133,test,31,0.7483247718531243,0.8498369503935175
llm_goals_133,test,32,0.43816213585893576,0.2694815541177057
llm_goals_133,test,33,0.6060439959321818,0.0177588567715867
llm_goals_133,test,34,0.5599189275175968,0.445395356923409
llm_goals_133,test,35,0.5806827933375809,0.4995371449295338
llm_goals_133,test,36,0.46654041590725637,0.4823747071618506
llm_goals_133,test,37,0.8854866440715815,0.0266815206082507
llm_goals_133,test,38,0.6993258597444031,0.0874820007739646
llm_goals_133,test,39,0.5456563667351045,0.3366422826024648
llm_goals_133,test,40,0.6253725438546921,0.2329899847029727
llm_goals_133,test,41,0.7668995235297392,0.8571491285285516
llm_goals_133,test,42,0.4129857632441191,0.8193201391590431
llm_goals_133,test,43,0.5881931378908482,0.8422289427227235
llm_goals_133,test,44,0.6198944960978435,0.5294372908894646
llm_goals_133,test,45,0.49185453358677456,0.2588603231187393
llm_goals_133,test,46,0.610309838648964,0.2337208499272645
llm_goals_133,test,47,0.6838150828438123,0.1166634693988454
llm_goals_133,test,48,0.23682201959788082,0.6138306242825586
llm_goals_133,test,49,0.6345787126757099,0.2806552214021111
llm_goals_157,test,0,0.07575155092809056,0.6355320308190959
llm_goals_157,test,1,0.22305854894383237,0.7424853921670768
llm_goals_157,test,2,0.3543186489634716,0.073314241900373
llm_goals_157,test,3,0.1826875571136769,0.4675850432855137
llm_goals_157,test,4,0.7106967170380978,0.5458627288071527
llm_goals_157,test,5,0.13627973021243925,0.0771889198722071
llm_goals_157,test,6,0.36569899430273195,0.0818153292956077
llm_goals_157,test,7,0.07687561788846647,0.0574362851972565
llm_goals_157,test,8,0.7106967170380978,0.612650525482466
llm_goals_157,test,9,0.3714272341816626,0.025148981997583
llm_goals_157,test,10,0.2995604246911792,0.075973919657683
llm_goals_157,test,11,0.40460078186478926,0.1464687737403695
llm_goals_157,test,12,0.490784049383298,0.7268917489370714
llm_goals_157,test,13,0.0887633678507027,0.0531648084509613
llm_goals_157,test,14,0.31637962619753185,0.3299706213737123
llm_goals_157,test,15,0.666678595611962,0.3043292296967997
llm_goals_157,test,16,0.4695504725753608,0.7463776357566768
llm_goals_157,test,17,0.2035685411870367,0.8521737469617978
llm_goals_157,test,18,0.5718670415074408,0.6376039881194869
llm_goals_157,test,19,0.9205665019931818,0.6077618725185671
llm_goals_157,test,20,0.6666785956119617,0.7242060188576479
llm_goals_157,test,21,0.07819393356117789,0.2051165554994685
llm_goals_157,test,22,0.4018961010419342,0.7995715125449558
llm_goals_157,test,23,0.1603298750396178,0.1467023010082007
llm_goals_157,test,24,0.2444568735359316,0.3108830330026099
llm_goals_157,test,25,0.29956042469117916,0.1001087986592319
llm_goals_157,test,26,0.29540829299187094,0.4552169748909065
llm_goals_157,test,27,0.4175391575571781,0.8485816155817194
llm_goals_157,test,28,0.14371200935208042,0.2265082237706469
llm_goals_157,test,29,0.4058462237939479,0.6283476283012512
llm_goals_157,test,30,0.30108935984755325,0.2620283904445711
llm_goals_157,test,31,0.6352061477144487,0.167952021094149
llm_goals_157,test,32,0.11389572198721826,0.0898426790762853
llm_goals_157,test,33,0.4695504725753608,0.5043942365008254
llm_goals_157,test,34,0.2879401173002328,0.0725868476993519
llm_goals_157,test,35,0.08969951550606577,0.1237829648198263
llm_goals_157,test,36,0.2995604246911792,0.1925318890375481
llm_goals_157,test,37,0.12224479526739747,0.6842419436738383
llm_goals_157,test,38,0.2877241108675765,0.691649855522258
llm_goals_157,test,39,0.4306417160233386,0.5264401359594252
llm_goals_157,test,40,0.16985220208757554,0.1777385641102212
llm_goals_157,test,41,0.4025726224865686,0.0328183379619016
llm_goals_157,test,42,0.09082358246644168,0.1949198629220696
llm_goals_157,test,43,0.1643572479548233,0.135028229661103
llm_goals_157,test,44,0.7106967170380978,0.7258662504199943
llm_goals_157,test,45,0.21315260073816608,0.0935995604675028
llm_goals_157,test,46,0.6670122185270292,0.15418174898882
llm_goals_157,test,47,0.1662512137895075,0.3212815335896988
llm_goals_157,test,48,0.02188423336092273,0.2795304252191649
llm_goals_157,test,49,0.6936203928839045,0.7371862441831876
llm_goals_186,test,0,0.2682079284928854,0.0367510709036243
llm_goals_186,test,1,0.474599907593422,0.9589190483469818
llm_goals_186,test,2,0.029231960913375665,0.0667798014228945
llm_goals_186,test,3,0.32618136337091413,0.0340377111776925
llm_goals_186,test,4,0.46728953676408436,0.4002710909538108
llm_goals_186,test,5,0.6801595661258256,0.8817241785963961
llm_goals_186,test,6,0.8164327760931235,0.8798493094453552
llm_goals_186,test,7,0.03684163448705357,0.0443878504309986
llm_goals_186,test,8,0.49221718854989227,0.4415592471567901
llm_goals_186,test,9,0.7609491893658502,0.8847759344734797
llm_goals_186,test,10,0.86476776844312,0.8905032311770802
llm_goals_186,test,11,0.7721199188356489,0.704948600699243
llm_goals_186,test,12,0.30582869288721987,0.409597002882604
llm_goals_186,test,13,0.9149637043401868,0.8867287662434378
llm_goals_186,test,14,0.7666297315167592,0.8128145923588551
llm_goals_186,test,15,0.7779721185796274,0.4832503599366539
llm_goals_186,test,16,0.7877604163813031,0.8267478910426029
llm_goals_186,test,17,0.8144538592049692,0.957207678971151
llm_goals_186,test,18,0.7760642149939742,0.7610878868091969
llm_goals_186,test,19,0.9356427194756126,0.7872715543385574
llm_goals_186,test,20,0.4345630535971545,0.520514563098938
llm_goals_186,test,21,0.7263758193002854,0.9084513175350956
llm_goals_186,test,22,0.5055227868702041,0.9157385254478472
llm_goals_186,test,23,0.3392029998679669,0.5585242484052028
llm_goals_186,test,24,0.779130284511966,0.8535531187105866
llm_goals_186,test,25,0.8738818374352614,0.8466653814359101
llm_goals_186,test,26,0.2576264683977954,0.8550637970014335
llm_goals_186,test,27,0.3207436302410992,0.9398175480710378
llm_goals_186,test,28,0.3207436302410992,0.0028363068023126
llm_goals_186,test,29,0.8667001720734381,0.8305997204848772
llm_goals_186,test,30,0.8605783762783857,0.8987706923113074
llm_goals_186,test,31,0.6404150319794356,0.8880043621770078
llm_goals_186,test,32,0.683023891072228,0.4759852559635721
llm_goals_186,test,33,0.7709678597530841,0.8450176079466206
llm_goals_186,test,34,0.86476776844312,0.9532899841201126
llm_goals_186,test,35,0.918362003942359,0.0290458993492998
llm_goals_186,test,36,0.9016770925817722,0.9324312223548752
llm_goals_186,test,37,0.5239941639207056,0.7062064442252718
llm_goals_186,test,38,0.32828099037549463,0.8475385890175953
llm_goals_186,test,39,0.4828459011494008,0.2451742937665777
llm_goals_186,test,40,0.5461696187512545,0.7628755019143414
llm_goals_186,test,41,0.521900434370853,0.9118681525061464
llm_goals_186,test,42,0.2682079284928854,0.0299416334392138
llm_goals_186,test,43,0.601820274000187,0.9088531704037304
llm_goals_186,test,44,0.4519738276228136,0.399223879564458
llm_goals_186,test,45,0.371436122708621,0.0207412737387863
llm_goals_186,test,46,0.5874844341422216,0.504435185656878
llm_goals_186,test,47,0.35968395528560265,0.0426878976387859
llm_goals_186,test,48,0.7561525269377215,0.8374391175680358
llm_goals_186,test,49,0.4954094485960792,0.9286286845568336
llm_goals_401,test,0,0.6184047751639278,0.9617876405899952
llm_goals_401,test,1,0.7230454450891406,0.9404490002093446
llm_goals_401,test,2,0.8507607753530629,0.935046673136648
llm_goals_401,test,3,0.932048320191194,0.9543637645078082
llm_goals_401,test,4,0.6838076451127016,0.9660480618061604
llm_goals_401,test,5,0.6335463690446367,0.9211199313115676
llm_goals_401,test,6,0.8095534066119813,0.902884063341732
llm_goals_401,test,7,0.8971091515986206,0.94729781169548
llm_goals_401,test,8,0.8016563870172292,0.957207800210959
llm_goals_401,test,9,0.8397063621484783,0.9145740305533586
llm_goals_401,test,10,0.5850684162637231,0.9267561414021268
llm_goals_401,test,11,0.9218423575624825,0.9242701512661912
llm_goals_401,test,12,0.8004076093805096,0.8435903191618971
llm_goals_401,test,13,0.5814541206205167,0.938192186231483
llm_goals_401,test,14,0.06556123735139,0.8529976602342201
llm_goals_401,test,15,0.861516217097472,0.9478884329856586
llm_goals_401,test,16,0.35871373598049044,0.8192354997719525
llm_goals_401,test,17,0.7115883810743812,0.6739075253562599
llm_goals_401,test,18,0.2048464237106755,0.5197439125792754
llm_goals_401,test,19,0.9096097574642202,0.8583977843568281
llm_goals_401,test,20,0.8016563870172292,0.9455993763925636
llm_goals_401,test,21,0.7526128300743942,0.9705763460601288
llm_goals_401,test,22,0.7786682196443352,0.8491748031226035
llm_goals_401,test,23,0.5330573627551101,0.8515620190775874
llm_goals_401,test,24,0.22472973319472156,0.9138265832467732
llm_goals_401,test,25,0.7122650696846232,0.9028179661562172
llm_goals_401,test,26,0.0854445468354361,0.9558187125982178
llm_goals_401,test,27,0.8234926022668648,0.5612720368847214
llm_goals_401,test,28,0.940071384725028,0.9617025244944858
llm_goals_401,test,29,0.9492852102359302,0.8060652645257104
llm_goals_401,test,30,0.6696460815069226,0.8896843204884566
llm_goals_401,test,31,0.7526128300743942,0.9526923309875832
llm_goals_401,test,32,0.882507662197619,0.8506568677811456
llm_goals_401,test,33,0.5016789315459752,0.7802594846608962
llm_goals_401,test,34,0.6471056728010888,0.8870658880824575
llm_goals_401,test,35,0.4065702266619395,0.954492271718892
llm_goals_401,test,36,0.38794441691519704,0.562048146808884
llm_goals_401,test,37,0.8102821087939539,0.7781126533121301
llm_goals_401,test,38,0.9538239056570789,0.8591165497422784
llm_goals_401,test,39,0.7035422483994532,0.8788934372485709
llm_goals_401,test,40,0.8438143098736804,0.8091102829935539
llm_goals_401,test,41,0.7042438201880464,0.8675616408909181
llm_goals_401,test,42,0.8748585575746898,0.949993671778176
llm_goals_401,test,43,0.786924202960364,0.9467448581700896
llm_goals_401,test,44,0.8407871129490895,0.8687242293966851
llm_goals_401,test,45,0.9214604320705334,0.9587250339741858
llm_goals_401,test,46,0.13968551238962792,0.860452357605835
llm_goals_401,test,47,0.8897339533704473,0.9253614550163576
llm_goals_401,test,48,0.31831136548164607,0.8647797053472309
llm_goals_401,test,49,0.5266224893547556,0.9454760570818088
llm_goals_420,test,0,0.20679277203850643,0.9711560599789952
llm_goals_420,test,1,0.24138743640254598,0.5978053926454786
llm_goals_420,test,2,0.12875141098340953,0.0817351976918742
llm_goals_420,test,3,0.4422742816360566,0.4730792485055278
llm_goals_420,test,4,0.39267262739391745,0.8487827178347965
llm_goals_420,test,5,0.20700021697859397,0.1386299974231985
llm_goals_420,test,6,0.2091505584667224,0.9677888754301456
llm_goals_420,test,7,0.16899784273043203,0.9193048504515688
llm_goals_420,test,8,0.0783780140245125,0.1105940972856699
llm_goals_420,test,9,0.2898441974484152,0.0770199238560772
llm_goals_420,test,10,0.30040991813268386,0.6569762799970394
llm_goals_420,test,11,0.4654040327308424,0.4533228613613927
llm_goals_420,test,12,0.21995502251454704,0.9317349816078236
llm_goals_420,test,13,0.16459025071140534,0.5042190525433249
llm_goals_420,test,14,0.21380305340277359,0.1440102415442949
llm_goals_420,test,15,0.343366631570232,0.3273024402815145
llm_goals_420,test,16,0.40994696164317007,0.3681827771891491
llm_goals_420,test,17,0.3433418520667261,0.4632337459639504
llm_goals_420,test,18,0.1713907635148005,0.0396023029282074
llm_goals_420,test,19,0.09421353541648107,0.1093558671361818
llm_goals_420,test,20,0.29110630262282267,0.0396252534530884
llm_goals_420,test,21,0.1334011517544384,0.0465700238365722
llm_goals_420,test,22,0.09009939856973698,0.1916998836408959
llm_goals_420,test,23,0.23423841102049567,0.1681177802630551
llm_goals_420,test,24,0.3711415293139013,0.1698145969501656
llm_goals_420,test,25,0.09245089883214913,0.9829779862454848
llm_goals_420,test,26,0.10903818888655586,0.0308922007548325
llm_goals_420,test,27,0.8746483528033142,0.8125830946093627
llm_goals_420,test,28,0.1736980804379371,0.9367654930611784
llm_goals_420,test,29,0.0797836141031536,0.8184437096518178
llm_goals_420,test,30,0.16245480273820626,0.9386603865018432
llm_goals_420,test,31,0.0980395942110835,0.10851579640767
llm_goals_420,test,32,0.32038779393077094,0.9666374926215946
llm_goals_420,test,33,0.3064090096781948,0.8901044715204638
llm_goals_420,test,34,0.3403401615443978,0.9215939624410928
llm_goals_420,test,35,0.24984294407330832,0.5267802921981926
llm_goals_420,test,36,0.20892756636720375,0.092175658510858
llm_goals_420,test,37,0.23427940954736612,0.984362841437198
llm_goals_420,test,38,0.0945536825413016,0.8828047505171253
llm_goals_420,test,39,0.654598614119328,0.2972862903877061
llm_goals_420,test,40,0.24758686818509956,0.0444444838151291
llm_goals_420,test,41,0.5031948009852701,0.5478361768565629
llm_goals_420,test,42,0.1765500719488171,0.0713350995649345
llm_goals_420,test,43,0.20308407304505294,0.5106248261137879
llm_goals_420,test,44,0.12866512015317508,0.0905225225380096
llm_goals_420,test,45,0.2889492971844838,0.1531352912609338
llm_goals_420,test,46,0.2759426215367277,0.1216667665904784
llm_goals_420,test,47,0.16817083044964942,0.1240460201984882
llm_goals_420,test,48,0.2145620093509254,0.0596825371886435
llm_goals_420,test,49,0.3402761439452919,0.8335117138475971
llm_goals_263,test,0,0.7292716310056679,0.5209935935366439
llm_goals_263,test,1,0.6333473482039529,0.7968068746445729
llm_goals_263,test,2,0.700142140590568,0.5467470232144288
llm_goals_263,test,3,0.741649331792461,0.4733215007319302
llm_goals_263,test,4,0.9430514393691878,0.0057083956844275
llm_goals_263,test,5,0.5259285122434637,0.3493473596509424
llm_goals_263,test,6,0.6802368754110416,0.8201550249820808
llm_goals_263,test,7,0.802386403787505,0.6557906816477753
llm_goals_263,test,8,0.19744972661785098,0.7543426271500917
llm_goals_263,test,9,0.2245716167716585,0.2009829696053626
llm_goals_263,test,10,0.6837216465748145,0.8489753664207734
llm_goals_263,test,11,0.8001951398246759,0.652436611134806
llm_goals_263,test,12,0.27799339501654646,0.8143466734577086
llm_goals_263,test,13,0.854845027394079,0.4245607212842352
llm_goals_263,test,14,0.5815215705654925,0.0036452536113296
llm_goals_263,test,15,0.8057167134470918,0.0558692673280436
llm_goals_263,test,16,0.5332805073607757,0.0066133854077101
llm_goals_263,test,17,0.3532501005896947,0.7648655133279786
llm_goals_263,test,18,0.283893703405131,0.006651634113929
llm_goals_263,test,19,0.6897260514253657,0.0487544569045993
llm_goals_263,test,20,0.5580368494309156,0.0331700030843074
llm_goals_263,test,21,0.4901849077058243,0.2223244924357846
llm_goals_263,test,22,0.45874111891380726,0.8850367693524676
llm_goals_263,test,23,0.8580750855868198,0.6136206316393849
llm_goals_263,test,24,0.20004297817440617,0.0103991722036265
llm_goals_263,test,25,0.6494269767058415,0.7490420921699364
llm_goals_263,test,26,0.696986850795238,0.0149267434716544
llm_goals_263,test,27,0.5539716372501868,0.7989164572724189
llm_goals_263,test,28,0.594875398849853,0.6376088146659757
llm_goals_263,test,29,0.5061110841387157,0.0419343519282845
llm_goals_263,test,30,0.9129625966101367,0.8598701912381717
llm_goals_263,test,31,0.46586469664373836,0.168156003178124
llm_goals_263,test,32,0.5663294573935872,0.6210063725022936
llm_goals_263,test,33,0.6181688505196683,0.0062242916440703
llm_goals_263,test,34,0.8756908749831984,0.5691545063377933
llm_goals_263,test,35,0.8001951398246759,0.7982008481236453
llm_goals_263,test,36,0.6883092839021612,0.763706617142148
llm_goals_263,test,37,0.7852905421587647,0.0342430752915087
llm_goals_263,test,38,0.7334012584879431,0.1222660265497154
llm_goals_263,test,39,0.8057167134470918,0.8269810089104592
llm_goals_263,test,40,0.8016535221914655,0.5318831885299807
llm_goals_263,test,41,0.22098743425173425,0.3207528198494705
llm_goals_263,test,42,0.8464826309829826,0.4801431121790341
llm_goals_263,test,43,0.550600322155626,0.1981375631842198
llm_goals_263,test,44,0.3061387965786482,0.1080272425734687
llm_goals_263,test,45,0.500608842075385,0.8354177467829539
llm_goals_263,test,46,0.6490075023212643,0.7413273974280716
llm_goals_263,test,47,0.8690984262657916,0.4918597295945824
llm_goals_263,test,48,0.5099278879824859,0.0043793883332247
llm_goals_263,test,49,0.8623615706052443,0.949535011062065
llm_goals_427,test,0,0.4816905202879946,0.2826418194423331
llm_goals_427,test,1,0.6756636100829422,0.380977464557011
llm_goals_427,test,2,0.4778014644734654,0.1487359361961987
llm_goals_427,test,3,0.4513196059072359,0.3576580711889061
llm_goals_427,test,4,0.7367791141209438,0.2270212997591984
llm_goals_427,test,5,0.6625563558728086,0.2711625929555684
llm_goals_427,test,6,0.6793347562358947,0.1402530886796432
llm_goals_427,test,7,0.6215770670566462,0.2935996266753878
llm_goals_427,test,8,0.5758747983582602,0.2137670314614922
llm_goals_427,test,9,0.6308503736705152,0.2160041132788326
llm_goals_427,test,10,0.6571886859312063,0.3669200145460971
llm_goals_427,test,11,0.6376247284220646,0.1516882402250043
llm_goals_427,test,12,0.6406948790326641,0.1475453757541205
llm_goals_427,test,13,0.6986213324447642,0.1350458405109811
llm_goals_427,test,14,0.5675683424937928,0.2337964230636721
llm_goals_427,test,15,0.6843307253502967,0.3115728193455654
llm_goals_427,test,16,0.6110151382482,0.131520745523965
llm_goals_427,test,17,0.6122339537969727,0.3278496493117007
llm_goals_427,test,18,0.6538442372641378,0.3527567015929387
llm_goals_427,test,19,0.368097413724693,0.2313815817463674
llm_goals_427,test,20,0.669634219664824,0.3221674001172885
llm_goals_427,test,21,0.6308503736705152,0.2494982149574568
llm_goals_427,test,22,0.6122339537969727,0.3779860883941361
llm_goals_427,test,23,0.46966039671823606,0.3509138567308049
llm_goals_427,test,24,0.6315498738358666,0.1946130607095621
llm_goals_427,test,25,0.6157097727924116,0.228222711067827
llm_goals_427,test,26,0.6538442372641378,0.2768263295001016
llm_goals_427,test,27,0.5796200126952048,0.2199474495731038
llm_goals_427,test,28,0.6907409412943187,0.1579098434370231
llm_goals_427,test,29,0.4976619424526219,0.069843346359134
llm_goals_427,test,30,0.6522450569649466,0.5359787931933129
llm_goals_427,test,31,0.5489947708208569,0.1293182240484197
llm_goals_427,test,32,0.6580277685014493,0.2396093502744168
llm_goals_427,test,33,0.6579675226412659,0.1913976476875258
llm_goals_427,test,34,0.6427297320560129,0.4990200469731286
llm_goals_427,test,35,0.5761562892585138,0.357807030322738
llm_goals_427,test,36,0.6993850335904334,0.2580007688260685
llm_goals_427,test,37,0.37776252487565193,0.4031150735356658
llm_goals_427,test,38,0.6393673529339073,0.2737247490124149
llm_goals_427,test,39,0.5119495113312916,0.2478362318668904
llm_goals_427,test,40,0.6308503736705152,0.5026721270474518
llm_goals_427,test,41,0.42539464890038153,0.2273186194850299
llm_goals_427,test,42,0.47235637876992714,0.4444071308114509
llm_goals_427,test,43,0.7759971604326594,0.2369720640870139
llm_goals_427,test,44,0.749218383833477,0.3961639877527012
llm_goals_427,test,45,0.5348543008721084,0.2771055535106792
llm_goals_427,test,46,0.6954383423903171,0.3138421601355035
llm_goals_427,test,47,0.5033608349045474,0.3609031182529178
llm_goals_427,test,48,0.611654663797529,0.1899314535085168
llm_goals_427,test,49,0.7415247965186004,0.4452677872765214
llm_goals_449,test,0,0.0058296449572809,0.0338418398422384
llm_goals_449,test,1,0.1373305085288901,0.5657697432527007
llm_goals_449,test,2,0.019488354366039202,0.0110135668657682
llm_goals_449,test,3,0.11131518043188131,0.2352946829974157
llm_goals_449,test,4,0.3304271875822934,0.4842577903424374
llm_goals_449,test,5,0.04353562947674874,0.3485549555066181
llm_goals_449,test,6,0.04353562947674874,0.0519669241570106
llm_goals_449,test,7,0.006688178506786234,0.0031269987549636
llm_goals_449,test,8,0.4275223521409853,0.2974090419757364
llm_goals_449,test,9,0.02454763027067533,0.0925536782452943
llm_goals_449,test,10,0.12574976490158635,0.4386853694122202
llm_goals_449,test,11,0.051190011806135666,0.0063782741293255
llm_goals_449,test,12,0.04703881407983517,0.0714949969968408
llm_goals_449,test,13,0.04566359811952414,0.0227751515807358
llm_goals_449,test,14,0.08832966885119943,0.3195753835685478
llm_goals_449,test,15,0.19512553171325117,0.2420638006790782
llm_goals_449,test,16,0.0768730180543936,0.0953277975239941
llm_goals_449,test,17,0.11675524867721097,0.5835221517250182
llm_goals_449,test,18,0.05137281778115053,0.1603112674891041
llm_goals_449,test,19,0.04961640753270966,0.6833613521345718
llm_goals_449,test,20,0.2780670968532084,0.4549636770044163
llm_goals_449,test,21,0.09423993706735846,0.1579551412473356
llm_goals_449,test,22,0.038118676234206304,0.4400750405221786
llm_goals_449,test,23,0.12383358401617994,0.0251530775884869
llm_goals_449,test,24,0.09815575299169581,0.2695772384712642
llm_goals_449,test,25,0.12532976560206094,0.4519585097229938
llm_goals_449,test,26,0.026854905664604297,0.1123650856049343
llm_goals_449,test,27,0.0213962743702637,0.4873581137262729
llm_goals_449,test,28,0.047889975449533044,0.1751834751469549
llm_goals_449,test,29,0.0617882604592975,0.5804994670396382
llm_goals_449,test,30,0.12443664409245007,0.4626257946707522
llm_goals_449,test,31,0.03555885925261643,0.093699276239858
llm_goals_449,test,32,0.06316234434582524,0.0067638379987912
llm_goals_449,test,33,0.04845341502990571,0.1681834079617312
llm_goals_449,test,34,0.12209265095701198,0.3493466516790004
llm_goals_449,test,35,0.0669859307854548,0.0068633490722964
llm_goals_449,test,36,0.2124881164554512,0.3064552348162913
llm_goals_449,test,37,0.04670979975706837,0.6652150761082033
llm_goals_449,test,38,0.0893391438301772,0.6741024889471738
llm_goals_449,test,39,0.08622144932707831,0.0101379083480811
llm_goals_449,test,40,0.0644774410758258,0.0089588521989727
llm_goals_449,test,41,0.04353562947674874,0.1780122280374072
llm_goals_449,test,42,0.009248822149883299,0.0051090174127486
llm_goals_449,test,43,0.038769249091604764,0.2498444495344637
llm_goals_449,test,44,0.19622954556796077,0.221646438441774
llm_goals_449,test,45,0.1204263467237453,0.2094790944562388
llm_goals_449,test,46,0.1014830636751884,0.0060341568900946
llm_goals_449,test,47,0.11526271958600749,0.1241710666090617
llm_goals_449,test,48,0.019219154509253464,0.1338989075405874
llm_goals_449,test,49,0.0796148284525581,0.5982472498607049
llm_goals_93,test,0,0.4239589351187447,0.0729546648027557
llm_goals_93,test,1,0.6134298289966332,0.6981648167380458
llm_goals_93,test,2,0.6991384173497249,0.3318565449695651
llm_goals_93,test,3,0.47825719715816567,0.5505450337981316
llm_goals_93,test,4,0.6271416782483175,0.9547701486411684
llm_goals_93,test,5,0.3586001970146606,0.8378989133869518
llm_goals_93,test,6,0.79202292069473,0.8645678173925403
llm_goals_93,test,7,0.06500487156281197,0.8034776985663136
llm_goals_93,test,8,0.9307662641684189,0.9139015687588464
llm_goals_93,test,9,0.7704774524438651,0.84076714934408
llm_goals_93,test,10,0.1521608138026093,0.6739811171843284
llm_goals_93,test,11,0.9561956921398327,0.1039005746210211
llm_goals_93,test,12,0.4415745985612765,0.4267719606730696
llm_goals_93,test,13,0.5477041332733376,0.8366404177501517
llm_goals_93,test,14,0.9045655936338933,0.51043302648597
llm_goals_93,test,15,0.6558786703735542,0.9305120134073452
llm_goals_93,test,16,0.6464092726977028,0.3610424966705102
llm_goals_93,test,17,0.020092045608232134,0.6383014796360676
llm_goals_93,test,18,0.6321255736808035,0.7718638733110297
llm_goals_93,test,19,0.3196832236824733,0.6413577201238602
llm_goals_93,test,20,0.8380047844833238,0.9315750038397262
llm_goals_93,test,21,0.6297797304419422,0.959779321458113
llm_goals_93,test,22,0.5781973539643853,0.8212707656396169
llm_goals_93,test,23,0.6085692834119827,0.0620020892855925
llm_goals_93,test,24,0.8999534655315525,0.6707394124648912
llm_goals_93,test,25,0.474290336878179,0.8513622881275814
llm_goals_93,test,26,0.7474623443496066,0.9295520797235572
llm_goals_93,test,27,0.6205133942698736,0.731125599089208
llm_goals_93,test,28,0.7831752138768927,0.6387209955509132
llm_goals_93,test,29,0.3860096450621753,0.904010833763946
llm_goals_93,test,30,0.5808767167254532,0.6688304494781727
llm_goals_93,test,31,0.7636452586358828,0.7246857956085144
llm_goals_93,test,32,0.6564121346401864,0.8395389979928227
llm_goals_93,test,33,0.9368145464502745,0.4902935277900161
llm_goals_93,test,34,0.07323920640657694,0.5478889802411547
llm_goals_93,test,35,0.5559699304152003,0.8178324059837278
llm_goals_93,test,36,0.6507626488991122,0.8865698576591567
llm_goals_93,test,37,0.3085508750410973,0.9194802654649477
llm_goals_93,test,38,0.6375846729720197,0.8798558779688433
llm_goals_93,test,39,0.8806574365069356,0.2399301633758412
llm_goals_93,test,40,0.6142727877754569,0.946265162703226
llm_goals_93,test,41,0.6248715437394493,0.8435175392494392
llm_goals_93,test,42,0.12715081554097887,0.260949780916787
llm_goals_93,test,43,0.8159667022830247,0.8190814792570137
llm_goals_93,test,44,0.9241313951086644,0.951412738003951
llm_goals_93,test,45,0.8173697815723925,0.5085180485155563
llm_goals_93,test,46,0.8801904902737773,0.946404217454057
llm_goals_93,test,47,0.49415644354967525,0.5778150570910173
llm_goals_93,test,48,0.6483668335942524,0.6859809771933842
llm_goals_93,test,49,0.3186319214277559,0.6187362092588319
llm_goals_358,test,0,0.874163490767265,0.9198604804287828
llm_goals_358,test,1,0.727698379997953,0.8623428941495346
llm_goals_358,test,2,0.8776336145276188,0.8146633538972576
llm_goals_358,test,3,0.927803714219316,0.6635203414505186
llm_goals_358,test,4,0.9423816604696641,0.9873765704706148
llm_goals_358,test,5,0.624495307175923,0.8085697334742067
llm_goals_358,test,6,0.6366296885942168,0.8449593731392603
llm_goals_358,test,7,0.6422476378582064,0.7802023295414534
llm_goals_358,test,8,0.9755596318261966,0.5598487865057791
llm_goals_358,test,9,0.8483941555227182,0.7689259718491731
llm_goals_358,test,10,0.7844029437195056,0.4090564458803354
llm_goals_358,test,11,0.6707484059999421,0.7281676519766522
llm_goals_358,test,12,0.5400435461509258,0.781706684015145
llm_goals_358,test,13,0.8478659583209683,0.6175350663762746
llm_goals_358,test,14,0.11932973515299872,0.1259366314847969
llm_goals_358,test,15,0.9347979026705978,0.9665391433681092
llm_goals_358,test,16,0.6503770866567525,0.0977212357476949
llm_goals_358,test,17,0.36950975800433145,0.839291034278836
llm_goals_358,test,18,0.6271319220487285,0.0422931239637008
llm_goals_358,test,19,0.6175530791673697,0.3805861792548485
llm_goals_358,test,20,0.7572123999005926,0.972559035375534
llm_goals_358,test,21,0.8627439916603485,0.7844216881478699
llm_goals_358,test,22,0.6026046632073662,0.8432835924874589
llm_goals_358,test,23,0.12299989008140048,0.5831400725193898
llm_goals_358,test,24,0.15456825360843784,0.4717356494812245
llm_goals_358,test,25,0.0417321365090794,0.5591997574513735
llm_goals_358,test,26,0.24025076573191698,0.2416094818565881
llm_goals_358,test,27,0.5110918802920906,0.8557277309389357
llm_goals_358,test,28,0.8933634757557023,0.7784201381020285
llm_goals_358,test,29,0.34419461719184663,0.9848793746115032
llm_goals_358,test,30,0.3308779727262206,0.6211451382223804
llm_goals_358,test,31,0.6581246213220521,0.3507967043222499
llm_goals_358,test,32,0.12836131424986294,0.670993868258675
llm_goals_358,test,33,0.49589830808062496,0.0563775762088827
llm_goals_358,test,34,0.09603276048622617,0.4923522105012786
llm_goals_358,test,35,0.8779449893887886,0.8157534628947803
llm_goals_358,test,36,0.4919470862321999,0.7491294702469764
llm_goals_358,test,37,0.4107463429301222,0.5478990059483921
llm_goals_358,test,38,0.27265182555177203,0.9873113036659013
llm_goals_358,test,39,0.2983382889290079,0.6807296173615788
llm_goals_358,test,40,0.8852063192972643,0.6446558925323718
llm_goals_358,test,41,0.1751540571752087,0.3360622093964198
llm_goals_358,test,42,0.8776336145276188,0.6737437200032999
llm_goals_358,test,43,0.7815988207534595,0.453765234769123
llm_goals_358,test,44,0.41479993320019437,0.921309112250278
llm_goals_358,test,45,0.361245759492608,0.5133269325272689
llm_goals_358,test,46,0.7735574665820265,0.7619134214235583
llm_goals_358,test,47,0.6356328026683419,0.5260478340890856
llm_goals_358,test,48,0.20021859464034397,0.0910570466126295
llm_goals_358,test,49,0.516758874318664,0.4710935988277175
