template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.2484055459499359,0.0759730864369293
llm_goals_75,test,1,0.5204739570617676,0.8120127334654941
llm_goals_75,test,2,0.24704930186271667,0.114709127325878
llm_goals_75,test,3,0.20457357168197632,0.1496870071208148
llm_goals_75,test,4,0.5091306567192078,0.321632167436288
llm_goals_75,test,5,0.10175831615924835,0.0308084020440725
llm_goals_75,test,6,0.10947556048631668,0.1843175427563358
llm_goals_75,test,7,0.2363959401845932,0.0324508518984932
llm_goals_75,test,8,0.5549420714378357,0.5189439655904322
llm_goals_75,test,9,0.10876652598381042,0.1502023728460554
llm_goals_75,test,10,0.5521592497825623,0.7657257241793485
llm_goals_75,test,11,0.5414537191390991,0.8035850534204568
llm_goals_75,test,12,0.5574524998664856,0.7307072346740583
llm_goals_75,test,13,0.10586127638816833,0.048717541539827
llm_goals_75,test,14,0.3325668275356293,0.4316714754134473
llm_goals_75,test,15,0.5208004117012024,0.3935335107130105
llm_goals_75,test,16,0.3355901539325714,0.6144967070154126
llm_goals_75,test,17,0.6510047912597656,0.4488809036296748
llm_goals_75,test,18,0.32697319984436035,0.3920271677291012
llm_goals_75,test,19,0.5691641569137573,0.7810532991351343
llm_goals_75,test,20,0.5389258861541748,0.5443958675997833
llm_goals_75,test,21,0.10353688150644302,0.1604437012338976
llm_goals_75,test,22,0.5030467510223389,0.856696781503626
llm_goals_75,test,23,0.5746419429779053,0.590876283737086
llm_goals_75,test,24,0.3317704498767853,0.4156776627984636
llm_goals_75,test,25,0.5810277462005615,0.5957841371692396
llm_goals_75,test,26,0.32707738876342773,0.3752723036244256
llm_goals_75,test,27,0.6296879053115845,0.7866846353759014
llm_goals_75,test,28,0.19825059175491333,0.1838073487826972
llm_goals_75,test,29,0.6101070046424866,0.853944837222159
llm_goals_75,test,30,0.4882782995700836,0.5992601606803653
llm_goals_75,test,31,0.10315129160881042,0.0743179753131944
llm_goals_75,test,32,0.5252411365509033,0.8015013965908157
llm_goals_75,test,33,0.32673558592796326,0.3498422912836759
llm_goals_75,test,34,0.5474640130996704,0.6692496458902099
llm_goals_75,test,35,0.25724610686302185,0.134422492314073
llm_goals_75,test,36,0.559192419052124,0.6683886675406214
llm_goals_75,test,37,0.5435150265693665,0.7419528346018889
llm_goals_75,test,38,0.4996154308319092,0.6995551035614008
llm_goals_75,test,39,0.49375686049461365,0.4983520000991506
llm_goals_75,test,40,0.5199440717697144,0.8622382893733471
llm_goals_75,test,41,0.10770384222269058,0.1026909531781266
llm_goals_75,test,42,0.27106478810310364,0.0240063792397545
llm_goals_75,test,43,0.12352503836154938,0.1143049422953708
llm_goals_75,test,44,0.5405521988868713,0.6884653626060131
llm_goals_75,test,45,0.17033621668815613,0.1059329319681491
llm_goals_75,test,46,0.5274079442024231,0.405588044943207
llm_goals_75,test,47,0.18104968965053558,0.1393457779753145
llm_goals_75,test,48,0.31610599160194397,0.3670858714001939
llm_goals_75,test,49,0.5100284814834595,0.8052537448889737
llm_goals_78,test,0,0.7549707889556885,0.710685793210421
llm_goals_78,test,1,0.6981112957000732,0.4521352090792044
llm_goals_78,test,2,0.7350003719329834,0.6984783435600235
llm_goals_78,test,3,0.5415565967559814,0.2113029164438998
llm_goals_78,test,4,0.4057596027851105,0.59362413330766
llm_goals_78,test,5,0.4849262535572052,0.2973601904856865
llm_goals_78,test,6,0.49594515562057495,0.4911861319335979
llm_goals_78,test,7,0.7300220727920532,0.5109292120432304
llm_goals_78,test,8,0.3405224084854126,0.0214058927819057
llm_goals_78,test,9,0.5090460181236267,0.4561729519761929
llm_goals_78,test,10,0.4977239668369293,0.0679189625299845
llm_goals_78,test,11,0.5120745897293091,0.0515682065315942
llm_goals_78,test,12,0.4959765076637268,0.2832565853408079
llm_goals_78,test,13,0.48854777216911316,0.3661356287525266
llm_goals_78,test,14,0.4503902494907379,0.5580731095047592
llm_goals_78,test,15,0.37522614002227783,0.3539146882613382
llm_goals_78,test,16,0.47618368268013,0.5198975918853231
llm_goals_78,test,17,0.7201120853424072,0.5974518854580513
llm_goals_78,test,18,0.46504926681518555,0.2539582988569515
llm_goals_78,test,19,0.5645319223403931,0.7818671446797811
llm_goals_78,test,20,0.38690656423568726,0.0817902870466741
llm_goals_78,test,21,0.5487217307090759,0.5313736848686219
llm_goals_78,test,22,0.6835417151451111,0.0578457193154468
llm_goals_78,test,23,0.5234751105308533,0.0588067732422271
llm_goals_78,test,24,0.4277079701423645,0.504620490567031
llm_goals_78,test,25,0.4371093511581421,0.1421301439321696
llm_goals_78,test,26,0.43715211749076843,0.4043160008204248
llm_goals_78,test,27,0.6662269234657288,0.1341547030792127
llm_goals_78,test,28,0.5078949928283691,0.585613528389559
llm_goals_78,test,29,0.547038197517395,0.8037635629119642
llm_goals_78,test,30,0.45693668723106384,0.2785354206868534
llm_goals_78,test,31,0.5308067202568054,0.5074585147112473
llm_goals_78,test,32,0.5631043314933777,0.1365325187594412
llm_goals_78,test,33,0.4514273703098297,0.559776178990876
llm_goals_78,test,34,0.4766260087490082,0.3923464392379233
llm_goals_78,test,35,0.7443811893463135,0.8694947336510349
llm_goals_78,test,36,0.46396979689598083,0.3406365028138333
llm_goals_78,test,37,0.5351241230964661,0.7407991807699866
llm_goals_78,test,38,0.523657500743866,0.5783193820848918
llm_goals_78,test,39,0.4977736473083496,0.2552456607848852
llm_goals_78,test,40,0.5564173460006714,0.0543816822443674
llm_goals_78,test,41,0.5048376321792603,0.5111749720190839
llm_goals_78,test,42,0.710189938545227,0.6373460300781935
llm_goals_78,test,43,0.5564144253730774,0.5824615770869916
llm_goals_78,test,44,0.35248222947120667,0.0725045867582824
llm_goals_78,test,45,0.4626684784889221,0.2055844845637347
llm_goals_78,test,46,0.5854929089546204,0.3443727538373435
llm_goals_78,test,47,0.5274096131324768,0.5137599661573641
llm_goals_78,test,48,0.4496590197086334,0.2386868392211005
llm_goals_78,test,49,0.6885040402412415,0.1092249314217729
llm_goals_53,test,0,0.11783691495656967,0.0313037944392233
llm_goals_53,test,1,0.3380012810230255,0.8240056677766623
llm_goals_53,test,2,0.12018902599811554,0.1449565288533016
llm_goals_53,test,3,0.3812718093395233,0.3918474727299789
llm_goals_53,test,4,0.38771021366119385,0.8286818229760559
llm_goals_53,test,5,0.10295183211565018,0.0496580173268359
llm_goals_53,test,6,0.10144105553627014,0.0987357123371914
llm_goals_53,test,7,0.12257460504770279,0.0188377606185046
llm_goals_53,test,8,0.38887929916381836,0.4663370658319237
llm_goals_53,test,9,0.10211276262998581,0.0353320635735392
llm_goals_53,test,10,0.3830440938472748,0.2465247528295183
llm_goals_53,test,11,0.3679816722869873,0.3482216267575191
llm_goals_53,test,12,0.36031368374824524,0.252877033485255
llm_goals_53,test,13,0.10146792978048325,0.1153444147006914
llm_goals_53,test,14,0.369995653629303,0.3868210483124377
llm_goals_53,test,15,0.3931889235973358,0.3705090883623566
llm_goals_53,test,16,0.3703981041908264,0.3407126088990169
llm_goals_53,test,17,0.33628156781196594,0.7543820151744927
llm_goals_53,test,18,0.3873696029186249,0.3322955201354975
llm_goals_53,test,19,0.35494163632392883,0.2261316448538241
llm_goals_53,test,20,0.3821883201599121,0.7343425814116562
llm_goals_53,test,21,0.10046599060297012,0.1601759070107346
llm_goals_53,test,22,0.3364414870738983,0.4190051893758809
llm_goals_53,test,23,0.35062628984451294,0.2354442333363782
llm_goals_53,test,24,0.39184245467185974,0.9719922543639756
llm_goals_53,test,25,0.3659422695636749,0.3289260309913144
llm_goals_53,test,26,0.3888823688030243,0.5781819989075234
llm_goals_53,test,27,0.3320070803165436,0.790876622992061
llm_goals_53,test,28,0.3855002820491791,0.2747068584160578
llm_goals_53,test,29,0.3873574733734131,0.796844478423965
llm_goals_53,test,30,0.3816085457801819,0.3573432990729852
llm_goals_53,test,31,0.10106674581766129,0.1489779797461384
llm_goals_53,test,32,0.36600518226623535,0.4788477745757265
llm_goals_53,test,33,0.3903069794178009,0.967617631882366
llm_goals_53,test,34,0.3853599429130554,0.2252558680565209
llm_goals_53,test,35,0.12048552185297012,0.2300146581986259
llm_goals_53,test,36,0.3547265827655792,0.2311446011459964
llm_goals_53,test,37,0.36141613125801086,0.144409781340655
llm_goals_53,test,38,0.38491466641426086,0.705274211964156
llm_goals_53,test,39,0.3839503824710846,0.5921281985506138
llm_goals_53,test,40,0.36636924743652344,0.2443004460536172
llm_goals_53,test,41,0.10216300189495087,0.061497045516474
llm_goals_53,test,42,0.12016560137271881,0.1570495895565415
llm_goals_53,test,43,0.10225335508584976,0.0641649100604756
llm_goals_53,test,44,0.3910701870918274,0.8074959504603542
llm_goals_53,test,45,0.36235952377319336,0.1813221143349916
llm_goals_53,test,46,0.35309898853302,0.2352295334667747
llm_goals_53,test,47,0.3613232672214508,0.820244135045574
llm_goals_53,test,48,0.38508841395378113,0.4512817747836695
llm_goals_53,test,49,0.3382495045661926,0.7022675298006026
llm_goals_305,test,0,0.33414483070373535,0.0438240495208372
llm_goals_305,test,1,0.7111316323280334,0.1352509650243347
llm_goals_305,test,2,0.33983346819877625,0.1723536643548477
llm_goals_305,test,3,0.760536789894104,0.250094549889143
llm_goals_305,test,4,0.8036453127861023,0.8419536365764244
llm_goals_305,test,5,0.34576308727264404,0.1913177614094214
llm_goals_305,test,6,0.40240710973739624,0.3723916849874114
llm_goals_305,test,7,0.4147115647792816,0.3337485526549648
llm_goals_305,test,8,0.8257679343223572,0.5824263052609597
llm_goals_305,test,9,0.4824804365634918,0.620184539039364
llm_goals_305,test,10,0.8442078232765198,0.494000537986959
llm_goals_305,test,11,0.5500813722610474,0.0482253546984711
llm_goals_305,test,12,0.3089693486690521,0.0565816635108951
llm_goals_305,test,13,0.34535014629364014,0.1136796894182332
llm_goals_305,test,14,0.8061455488204956,0.6102884358627442
llm_goals_305,test,15,0.8225298523902893,0.8302422164176205
llm_goals_305,test,16,0.82975172996521,0.589511050327059
llm_goals_305,test,17,0.626292884349823,0.2622011524235305
llm_goals_305,test,18,0.7833749651908875,0.3737828315077605
llm_goals_305,test,19,0.3888190686702728,0.0401465973617238
llm_goals_305,test,20,0.818587601184845,0.8971526617856483
llm_goals_305,test,21,0.3599590063095093,0.513862561708882
llm_goals_305,test,22,0.5683661103248596,0.6617399492970566
llm_goals_305,test,23,0.7199605703353882,0.3090300849059566
llm_goals_305,test,24,0.7928738594055176,0.4310886044152408
llm_goals_305,test,25,0.86185622215271,0.7346082271960779
llm_goals_305,test,26,0.8007258176803589,0.7784622217319951
llm_goals_305,test,27,0.6947399377822876,0.2285268892180965
llm_goals_305,test,28,0.7931835651397705,0.1970890988987535
llm_goals_305,test,29,0.37952420115470886,0.0543679274789402
llm_goals_305,test,30,0.844765305519104,0.8325365217586828
llm_goals_305,test,31,0.360045850276947,0.5078524819974076
llm_goals_305,test,32,0.6619743704795837,0.2061603354174771
llm_goals_305,test,33,0.8265666365623474,0.6039954962603894
llm_goals_305,test,34,0.8551191091537476,0.5754865922825898
llm_goals_305,test,35,0.3315120041370392,0.1664152616423
llm_goals_305,test,36,0.8331259489059448,0.7880486291444165
llm_goals_305,test,37,0.38454362750053406,0.03605078558617
llm_goals_305,test,38,0.47371283173561096,0.0575570687729377
llm_goals_305,test,39,0.3115227520465851,0.4297583044645929
llm_goals_305,test,40,0.6014182567596436,0.1915445959084499
llm_goals_305,test,41,0.47707295417785645,0.6536953532364804
llm_goals_305,test,42,0.3350028991699219,0.122527873906398
llm_goals_305,test,43,0.3508877456188202,0.1711906255032107
llm_goals_305,test,44,0.8367544412612915,0.8371975999741996
llm_goals_305,test,45,0.7981429696083069,0.3836914756409174
llm_goals_305,test,46,0.5652751326560974,0.0543049670678201
llm_goals_305,test,47,0.7996965646743774,0.5245484205428286
llm_goals_305,test,48,0.8011797070503235,0.5969765977742552
llm_goals_305,test,49,0.7592543959617615,0.2351952691938564
llm_goals_81,test,0,0.08937902748584747,0.2060992725380174
llm_goals_81,test,1,0.19596879184246063,0.1581010259271764
llm_goals_81,test,2,0.703111469745636,0.6899052301996583
llm_goals_81,test,3,0.8676121830940247,0.7805136989347726
llm_goals_81,test,4,0.1314968317747116,0.0972643506873081
llm_goals_81,test,5,0.047583527863025665,0.3089356066572108
llm_goals_81,test,6,0.058133725076913834,0.0383605574042886
llm_goals_81,test,7,0.4069121479988098,0.3161536190910042
llm_goals_81,test,8,0.25395363569259644,0.5126683642741202
llm_goals_81,test,9,0.03878137469291687,0.1286837550045714
llm_goals_81,test,10,0.05754582956433296,0.1256029490894943
llm_goals_81,test,11,0.4715951383113861,0.408772132619307
llm_goals_81,test,12,0.09426983445882797,0.1214277758013974
llm_goals_81,test,13,0.040232863277196884,0.0409721836992895
llm_goals_81,test,14,0.04087922349572182,0.0827685330888261
llm_goals_81,test,15,0.1393938809633255,0.224678660518271
llm_goals_81,test,16,0.04019201919436455,0.0872837265533343
llm_goals_81,test,17,0.203385591506958,0.2837028272857018
llm_goals_81,test,18,0.06357674300670624,0.1193591553103013
llm_goals_81,test,19,0.17103314399719238,0.4952181327203863
llm_goals_81,test,20,0.09904351830482483,0.2564665599863756
llm_goals_81,test,21,0.04310586303472519,0.2041083994609424
llm_goals_81,test,22,0.3071999251842499,0.3119739178030907
llm_goals_81,test,23,0.21845777332782745,0.0994638293022036
llm_goals_81,test,24,0.05661725252866745,0.1575467450844306
llm_goals_81,test,25,0.04669686406850815,0.0291125673317165
llm_goals_81,test,26,0.04649397358298302,0.0869071714890548
llm_goals_81,test,27,0.43603354692459106,0.1863589895989203
llm_goals_81,test,28,0.46310368180274963,0.119977738469712
llm_goals_81,test,29,0.16278253495693207,0.4410572612602489
llm_goals_81,test,30,0.07374269515275955,0.0555125882985712
llm_goals_81,test,31,0.04254565015435219,0.3486004983652399
llm_goals_81,test,32,0.5478938817977905,0.189941914212834
llm_goals_81,test,33,0.08967627584934235,0.1890390776737304
llm_goals_81,test,34,0.049409814178943634,0.1328217427635933
llm_goals_81,test,35,0.22817477583885193,0.1343397356978365
llm_goals_81,test,36,0.1031421422958374,0.0899701336915993
llm_goals_81,test,37,0.28154268860816956,0.6478431799513802
llm_goals_81,test,38,0.4306720197200775,0.7351879424598803
llm_goals_81,test,39,0.1938932240009308,0.0885373562628188
llm_goals_81,test,40,0.536449670791626,0.3150516869571108
llm_goals_81,test,41,0.03928641974925995,0.0498826197855212
llm_goals_81,test,42,0.08989192545413971,0.1381899328653333
llm_goals_81,test,43,0.04006006568670273,0.3536647169335493
llm_goals_81,test,44,0.5132151246070862,0.4857917971856136
llm_goals_81,test,45,0.6943884491920471,0.3921515914618259
llm_goals_81,test,46,0.18781420588493347,0.1179764264181158
llm_goals_81,test,47,0.33688703179359436,0.2786576050295211
llm_goals_81,test,48,0.047151509672403336,0.0726556891905063
llm_goals_81,test,49,0.6140314340591431,0.3912461690418435
llm_goals_133,test,0,0.6015408039093018,0.4440529323069227
llm_goals_133,test,1,0.5679860711097717,0.3955848959165448
llm_goals_133,test,2,0.4383726418018341,0.5130030466159122
llm_goals_133,test,3,0.08895085006952286,0.220245744335209
llm_goals_133,test,4,0.5979760885238647,0.5879895213269322
llm_goals_133,test,5,0.931549072265625,0.8588037554744828
llm_goals_133,test,6,0.9288586378097534,0.8322442189227136
llm_goals_133,test,7,0.4941211938858032,0.6588760518065184
llm_goals_133,test,8,0.4897541105747223,0.5253890103378467
llm_goals_133,test,9,0.9262037873268127,0.8170254264656348
llm_goals_133,test,10,0.5970313549041748,0.4608975636274737
llm_goals_133,test,11,0.4291622042655945,0.2002942757447116
llm_goals_133,test,12,0.32249170541763306,0.4022389740133304
llm_goals_133,test,13,0.9300497770309448,0.8516519653230656
llm_goals_133,test,14,0.07890865951776505,0.0158444788418117
llm_goals_133,test,15,0.5834012031555176,0.5978365001716675
llm_goals_133,test,16,0.07600321620702744,0.0529535109452945
llm_goals_133,test,17,0.5995543003082275,0.2556263204518722
llm_goals_133,test,18,0.07857786864042282,0.0167466717766424
llm_goals_133,test,19,0.5443277955055237,0.0944244428527934
llm_goals_133,test,20,0.6211073398590088,0.6914976263434149
llm_goals_133,test,21,0.9290881752967834,0.8436048633272258
llm_goals_133,test,22,0.5970823168754578,0.2868007235124719
llm_goals_133,test,23,0.5354812145233154,0.1160262834732456
llm_goals_133,test,24,0.07930518686771393,0.0154812210183983
llm_goals_133,test,25,0.56076580286026,0.3119789757098087
llm_goals_133,test,26,0.09035120159387589,0.7929420521534107
llm_goals_133,test,27,0.6087313890457153,0.2037716603127804
llm_goals_133,test,28,0.1333329975605011,0.3118772754734958
llm_goals_133,test,29,0.46841850876808167,0.1808521166351184
llm_goals_133,test,30,0.5671843886375427,0.5045142070808941
llm_goals_133,test,31,0.9323346018791199,0.8498369503935175
llm_goals_133,test,32,0.42411351203918457,0.2694815541177057
llm_goals_133,test,33,0.07498132437467575,0.0177588567715867
llm_goals_133,test,34,0.5145189762115479,0.445395356923409
llm_goals_133,test,35,0.49762314558029175,0.4995371449295338
llm_goals_133,test,36,0.5789328813552856,0.4823747071618506
llm_goals_133,test,37,0.5386435389518738,0.0266815206082507
llm_goals_133,test,38,0.48635244369506836,0.0874820007739646
llm_goals_133,test,39,0.32507526874542236,0.3366422826024648
llm_goals_133,test,40,0.6315249800682068,0.2329899847029727
llm_goals_133,test,41,0.9241039156913757,0.8571491285285516
llm_goals_133,test,42,0.5870698094367981,0.8193201391590431
llm_goals_133,test,43,0.9293829798698425,0.8422289427227235
llm_goals_133,test,44,0.45903661847114563,0.5294372908894646
llm_goals_133,test,45,0.14456947147846222,0.2588603231187393
llm_goals_133,test,46,0.5458072423934937,0.2337208499272645
llm_goals_133,test,47,0.1999882310628891,0.1166634693988454
llm_goals_133,test,48,0.08909627795219421,0.6138306242825586
llm_goals_133,test,49,0.5464983582496643,0.2806552214021111
llm_goals_157,test,0,0.0930180549621582,0.6355320308190959
llm_goals_157,test,1,0.22642551362514496,0.7424853921670768
llm_goals_157,test,2,0.10543885827064514,0.073314241900373
llm_goals_157,test,3,0.19596125185489655,0.4675850432855137
llm_goals_157,test,4,0.7477324604988098,0.5458627288071527
llm_goals_157,test,5,0.1372409164905548,0.0771889198722071
llm_goals_157,test,6,0.1106635257601738,0.0818153292956077
llm_goals_157,test,7,0.08180024474859238,0.0574362851972565
llm_goals_157,test,8,0.7558386921882629,0.612650525482466
llm_goals_157,test,9,0.1426817774772644,0.025148981997583
llm_goals_157,test,10,0.23249483108520508,0.075973919657683
llm_goals_157,test,11,0.2109745740890503,0.1464687737403695
llm_goals_157,test,12,0.27443885803222656,0.7268917489370714
llm_goals_157,test,13,0.1375473588705063,0.0531648084509613
llm_goals_157,test,14,0.6365475058555603,0.3299706213737123
llm_goals_157,test,15,0.7617060542106628,0.3043292296967997
llm_goals_157,test,16,0.515164852142334,0.7463776357566768
llm_goals_157,test,17,0.23004965484142303,0.8521737469617978
llm_goals_157,test,18,0.623336672782898,0.6376039881194869
llm_goals_157,test,19,0.31661665439605713,0.6077618725185671
llm_goals_157,test,20,0.7295157313346863,0.7242060188576479
llm_goals_157,test,21,0.12761324644088745,0.2051165554994685
llm_goals_157,test,22,0.2383441925048828,0.7995715125449558
llm_goals_157,test,23,0.19797679781913757,0.1467023010082007
llm_goals_157,test,24,0.6863951683044434,0.3108830330026099
llm_goals_157,test,25,0.1627972424030304,0.1001087986592319
llm_goals_157,test,26,0.5832943320274353,0.4552169748909065
llm_goals_157,test,27,0.23589880764484406,0.8485816155817194
llm_goals_157,test,28,0.1878257840871811,0.2265082237706469
llm_goals_157,test,29,0.29872846603393555,0.6283476283012512
llm_goals_157,test,30,0.2020677775144577,0.2620283904445711
llm_goals_157,test,31,0.1391640156507492,0.167952021094149
llm_goals_157,test,32,0.16368819773197174,0.0898426790762853
llm_goals_157,test,33,0.5167278051376343,0.5043942365008254
llm_goals_157,test,34,0.23479261994361877,0.0725868476993519
llm_goals_157,test,35,0.11268346756696701,0.1237829648198263
llm_goals_157,test,36,0.2223137617111206,0.1925318890375481
llm_goals_157,test,37,0.2776467800140381,0.6842419436738383
llm_goals_157,test,38,0.22656430304050446,0.691649855522258
llm_goals_157,test,39,0.28488078713417053,0.5264401359594252
llm_goals_157,test,40,0.1931290626525879,0.1777385641102212
llm_goals_157,test,41,0.14309144020080566,0.0328183379619016
llm_goals_157,test,42,0.08371404558420181,0.1949198629220696
llm_goals_157,test,43,0.11489585787057877,0.135028229661103
llm_goals_157,test,44,0.7919219732284546,0.7258662504199943
llm_goals_157,test,45,0.1922117918729782,0.0935995604675028
llm_goals_157,test,46,0.19340880215168,0.15418174898882
llm_goals_157,test,47,0.1925622671842575,0.3212815335896988
llm_goals_157,test,48,0.6052310466766357,0.2795304252191649
llm_goals_157,test,49,0.34944865107536316,0.7371862441831876
llm_goals_186,test,0,0.07827675342559814,0.0367510709036243
llm_goals_186,test,1,0.3664810061454773,0.9589190483469818
llm_goals_186,test,2,0.10162638127803802,0.0667798014228945
llm_goals_186,test,3,0.09931743144989014,0.0340377111776925
llm_goals_186,test,4,0.3909919559955597,0.4002710909538108
llm_goals_186,test,5,0.9324905872344971,0.8817241785963961
llm_goals_186,test,6,0.9366121888160706,0.8798493094453552
llm_goals_186,test,7,0.07795573770999908,0.0443878504309986
llm_goals_186,test,8,0.43058696389198303,0.4415592471567901
llm_goals_186,test,9,0.9341310262680054,0.8847759344734797
llm_goals_186,test,10,0.9183443188667297,0.8905032311770802
llm_goals_186,test,11,0.5113476514816284,0.704948600699243
llm_goals_186,test,12,0.2141953408718109,0.409597002882604
llm_goals_186,test,13,0.9343966841697693,0.8867287662434378
llm_goals_186,test,14,0.8302830457687378,0.8128145923588551
llm_goals_186,test,15,0.3970239460468292,0.4832503599366539
llm_goals_186,test,16,0.8248865604400635,0.8267478910426029
llm_goals_186,test,17,0.377616822719574,0.957207678971151
llm_goals_186,test,18,0.8285910487174988,0.7610878868091969
llm_goals_186,test,19,0.41746199131011963,0.7872715543385574
llm_goals_186,test,20,0.41726210713386536,0.520514563098938
llm_goals_186,test,21,0.9366167783737183,0.9084513175350956
llm_goals_186,test,22,0.36036986112594604,0.9157385254478472
llm_goals_186,test,23,0.5190339684486389,0.5585242484052028
llm_goals_186,test,24,0.8322042226791382,0.8535531187105866
llm_goals_186,test,25,0.9181088209152222,0.8466653814359101
llm_goals_186,test,26,0.8140113353729248,0.8550637970014335
llm_goals_186,test,27,0.4187026023864746,0.9398175480710378
llm_goals_186,test,28,0.08288838714361191,0.0028363068023126
llm_goals_186,test,29,0.3896961212158203,0.8305997204848772
llm_goals_186,test,30,0.9221408367156982,0.8987706923113074
llm_goals_186,test,31,0.937960684299469,0.8880043621770078
llm_goals_186,test,32,0.5135229825973511,0.4759852559635721
llm_goals_186,test,33,0.8273010849952698,0.8450176079466206
llm_goals_186,test,34,0.9182887077331543,0.9532899841201126
llm_goals_186,test,35,0.07786021381616592,0.0290458993492998
llm_goals_186,test,36,0.9237472414970398,0.9324312223548752
llm_goals_186,test,37,0.39240115880966187,0.7062064442252718
llm_goals_186,test,38,0.3595903217792511,0.8475385890175953
llm_goals_186,test,39,0.21146497130393982,0.2451742937665777
llm_goals_186,test,40,0.5133352279663086,0.7628755019143414
llm_goals_186,test,41,0.9356397986412048,0.9118681525061464
llm_goals_186,test,42,0.08087164908647537,0.0299416334392138
llm_goals_186,test,43,0.9400517344474792,0.9088531704037304
llm_goals_186,test,44,0.42214328050613403,0.399223879564458
llm_goals_186,test,45,0.08569081872701645,0.0207412737387863
llm_goals_186,test,46,0.5145325064659119,0.504435185656878
llm_goals_186,test,47,0.08601415902376175,0.0426878976387859
llm_goals_186,test,48,0.8325508832931519,0.8374391175680358
llm_goals_186,test,49,0.36361008882522583,0.9286286845568336
llm_goals_401,test,0,0.9273388385772705,0.9617876405899952
llm_goals_401,test,1,0.8630486130714417,0.9404490002093446
llm_goals_401,test,2,0.9359673261642456,0.935046673136648
llm_goals_401,test,3,0.9385722875595093,0.9543637645078082
llm_goals_401,test,4,0.9408722519874573,0.9660480618061604
llm_goals_401,test,5,0.8044087290763855,0.9211199313115676
llm_goals_401,test,6,0.8873862028121948,0.902884063341732
llm_goals_401,test,7,0.9403477311134338,0.94729781169548
llm_goals_401,test,8,0.941554844379425,0.957207800210959
llm_goals_401,test,9,0.892612099647522,0.9145740305533586
llm_goals_401,test,10,0.8480529189109802,0.9267561414021268
llm_goals_401,test,11,0.8762943148612976,0.9242701512661912
llm_goals_401,test,12,0.695575475692749,0.8435903191618971
llm_goals_401,test,13,0.9060001969337463,0.938192186231483
llm_goals_401,test,14,0.484732449054718,0.8529976602342201
llm_goals_401,test,15,0.9330561757087708,0.9478884329856586
llm_goals_401,test,16,0.2941240072250366,0.8192354997719525
llm_goals_401,test,17,0.8858498930931091,0.6739075253562599
llm_goals_401,test,18,0.2439263015985489,0.5197439125792754
llm_goals_401,test,19,0.7989872694015503,0.8583977843568281
llm_goals_401,test,20,0.9198403358459473,0.9455993763925636
llm_goals_401,test,21,0.8793575167655945,0.9705763460601288
llm_goals_401,test,22,0.7123693823814392,0.8491748031226035
llm_goals_401,test,23,0.7330996990203857,0.8515620190775874
llm_goals_401,test,24,0.6262026429176331,0.9138265832467732
llm_goals_401,test,25,0.8338279128074646,0.9028179661562172
llm_goals_401,test,26,0.566013753414154,0.9558187125982178
llm_goals_401,test,27,0.8502933382987976,0.5612720368847214
llm_goals_401,test,28,0.9357211589813232,0.9617025244944858
llm_goals_401,test,29,0.8518296480178833,0.8060652645257104
llm_goals_401,test,30,0.8544227480888367,0.8896843204884566
llm_goals_401,test,31,0.6535370945930481,0.9526923309875832
llm_goals_401,test,32,0.857994794845581,0.8506568677811456
llm_goals_401,test,33,0.6458052396774292,0.7802594846608962
llm_goals_401,test,34,0.8303117156028748,0.8870658880824575
llm_goals_401,test,35,0.9401872158050537,0.954492271718892
llm_goals_401,test,36,0.3674314022064209,0.562048146808884
llm_goals_401,test,37,0.8705223798751831,0.7781126533121301
llm_goals_401,test,38,0.868816077709198,0.8591165497422784
llm_goals_401,test,39,0.8898283839225769,0.8788934372485709
llm_goals_401,test,40,0.771589457988739,0.8091102829935539
llm_goals_401,test,41,0.899057924747467,0.8675616408909181
llm_goals_401,test,42,0.9257642030715942,0.949993671778176
llm_goals_401,test,43,0.8632822036743164,0.9467448581700896
llm_goals_401,test,44,0.7856165766716003,0.8687242293966851
llm_goals_401,test,45,0.939160943031311,0.9587250339741858
llm_goals_401,test,46,0.7983530759811401,0.860452357605835
llm_goals_401,test,47,0.8022179007530212,0.9253614550163576
llm_goals_401,test,48,0.47738248109817505,0.8647797053472309
llm_goals_401,test,49,0.792057991027832,0.9454760570818088
llm_goals_420,test,0,0.968278169631958,0.9711560599789952
llm_goals_420,test,1,0.8522422909736633,0.5978053926454786
llm_goals_420,test,2,0.11037475615739822,0.0817351976918742
llm_goals_420,test,3,0.5146240592002869,0.4730792485055278
llm_goals_420,test,4,0.7866017818450928,0.8487827178347965
llm_goals_420,test,5,0.17113003134727478,0.1386299974231985
llm_goals_420,test,6,0.9537301659584045,0.9677888754301456
llm_goals_420,test,7,0.9196698069572449,0.9193048504515688
llm_goals_420,test,8,0.3876410722732544,0.1105940972856699
llm_goals_420,test,9,0.08401741832494736,0.0770199238560772
llm_goals_420,test,10,0.42066290974617004,0.6569762799970394
llm_goals_420,test,11,0.4713105857372284,0.4533228613613927
llm_goals_420,test,12,0.845345675945282,0.9317349816078236
llm_goals_420,test,13,0.402614027261734,0.5042190525433249
llm_goals_420,test,14,0.12925077974796295,0.1440102415442949
llm_goals_420,test,15,0.39214813709259033,0.3273024402815145
llm_goals_420,test,16,0.5267350673675537,0.3681827771891491
llm_goals_420,test,17,0.5852485299110413,0.4632337459639504
llm_goals_420,test,18,0.06790566444396973,0.0396023029282074
llm_goals_420,test,19,0.08531544357538223,0.1093558671361818
llm_goals_420,test,20,0.06864923238754272,0.0396252534530884
llm_goals_420,test,21,0.10432568192481995,0.0465700238365722
llm_goals_420,test,22,0.3756643533706665,0.1916998836408959
llm_goals_420,test,23,0.18055732548236847,0.1681177802630551
llm_goals_420,test,24,0.15574714541435242,0.1698145969501656
llm_goals_420,test,25,0.9673565030097961,0.9829779862454848
llm_goals_420,test,26,0.07782085239887238,0.0308922007548325
llm_goals_420,test,27,0.8329516649246216,0.8125830946093627
llm_goals_420,test,28,0.8174616098403931,0.9367654930611784
llm_goals_420,test,29,0.6964970827102661,0.8184437096518178
llm_goals_420,test,30,0.9248162508010864,0.9386603865018432
llm_goals_420,test,31,0.09648801386356354,0.10851579640767
llm_goals_420,test,32,0.940588653087616,0.9666374926215946
llm_goals_420,test,33,0.7847293019294739,0.8901044715204638
llm_goals_420,test,34,0.8248533606529236,0.9215939624410928
llm_goals_420,test,35,0.6500301361083984,0.5267802921981926
llm_goals_420,test,36,0.09312143176794052,0.092175658510858
llm_goals_420,test,37,0.9604648947715759,0.984362841437198
llm_goals_420,test,38,0.8190804123878479,0.8828047505171253
llm_goals_420,test,39,0.6125361323356628,0.2972862903877061
llm_goals_420,test,40,0.07252731919288635,0.0444444838151291
llm_goals_420,test,41,0.49909958243370056,0.5478361768565629
llm_goals_420,test,42,0.11874286085367203,0.0713350995649345
llm_goals_420,test,43,0.490193635225296,0.5106248261137879
llm_goals_420,test,44,0.08310878276824951,0.0905225225380096
llm_goals_420,test,45,0.23705589771270752,0.1531352912609338
llm_goals_420,test,46,0.1187274232506752,0.1216667665904784
llm_goals_420,test,47,0.111681267619133,0.1240460201984882
llm_goals_420,test,48,0.0724823921918869,0.0596825371886435
llm_goals_420,test,49,0.7314949631690979,0.8335117138475971
llm_goals_263,test,0,0.7470299005508423,0.5209935935366439
llm_goals_263,test,1,0.7063634395599365,0.7968068746445729
llm_goals_263,test,2,0.7480990886688232,0.5467470232144288
llm_goals_263,test,3,0.7459365129470825,0.4733215007319302
llm_goals_263,test,4,0.025524336844682693,0.0057083956844275
llm_goals_263,test,5,0.2255476862192154,0.3493473596509424
llm_goals_263,test,6,0.12960220873355865,0.8201550249820808
llm_goals_263,test,7,0.7729675769805908,0.6557906816477753
llm_goals_263,test,8,0.024474255740642548,0.7543426271500917
llm_goals_263,test,9,0.14812545478343964,0.2009829696053626
llm_goals_263,test,10,0.658078670501709,0.8489753664207734
llm_goals_263,test,11,0.45845600962638855,0.652436611134806
llm_goals_263,test,12,0.7957237958908081,0.8143466734577086
llm_goals_263,test,13,0.1223318874835968,0.4245607212842352
llm_goals_263,test,14,0.024719540029764175,0.0036452536113296
llm_goals_263,test,15,0.025992192327976227,0.0558692673280436
llm_goals_263,test,16,0.024190427735447884,0.0066133854077101
llm_goals_263,test,17,0.6170737743377686,0.7648655133279786
llm_goals_263,test,18,0.02348105050623417,0.006651634113929
llm_goals_263,test,19,0.053486213088035583,0.0487544569045993
llm_goals_263,test,20,0.026272282004356384,0.0331700030843074
llm_goals_263,test,21,0.13475704193115234,0.2223244924357846
llm_goals_263,test,22,0.7241626977920532,0.8850367693524676
llm_goals_263,test,23,0.3369579017162323,0.6136206316393849
llm_goals_263,test,24,0.024967819452285767,0.0103991722036265
llm_goals_263,test,25,0.36073046922683716,0.7490420921699364
llm_goals_263,test,26,0.022812291979789734,0.0149267434716544
llm_goals_263,test,27,0.6857898831367493,0.7989164572724189
llm_goals_263,test,28,0.6670131087303162,0.6376088146659757
llm_goals_263,test,29,0.04709167033433914,0.0419343519282845
llm_goals_263,test,30,0.5468336939811707,0.8598701912381717
llm_goals_263,test,31,0.176772341132164,0.168156003178124
llm_goals_263,test,32,0.36981409788131714,0.6210063725022936
llm_goals_263,test,33,0.024686798453330994,0.0062242916440703
llm_goals_263,test,34,0.7638387680053711,0.5691545063377933
llm_goals_263,test,35,0.8540935516357422,0.7982008481236453
llm_goals_263,test,36,0.6415402293205261,0.763706617142148
llm_goals_263,test,37,0.07753872871398926,0.0342430752915087
llm_goals_263,test,38,0.04822126403450966,0.1222660265497154
llm_goals_263,test,39,0.8449651598930359,0.8269810089104592
llm_goals_263,test,40,0.34024742245674133,0.5318831885299807
llm_goals_263,test,41,0.16025200486183167,0.3207528198494705
llm_goals_263,test,42,0.7327667474746704,0.4801431121790341
llm_goals_263,test,43,0.14734269678592682,0.1981375631842198
llm_goals_263,test,44,0.023691397160291672,0.1080272425734687
llm_goals_263,test,45,0.782777726650238,0.8354177467829539
llm_goals_263,test,46,0.6180311441421509,0.7413273974280716
llm_goals_263,test,47,0.6918649077415466,0.4918597295945824
llm_goals_263,test,48,0.023360226303339005,0.0043793883332247
llm_goals_263,test,49,0.8286684155464172,0.949535011062065
llm_goals_427,test,0,0.4180053472518921,0.2826418194423331
llm_goals_427,test,1,0.4964604675769806,0.380977464557011
llm_goals_427,test,2,0.340614914894104,0.1487359361961987
llm_goals_427,test,3,0.39564675092697144,0.3576580711889061
llm_goals_427,test,4,0.4179474711418152,0.2270212997591984
llm_goals_427,test,5,0.607297956943512,0.2711625929555684
llm_goals_427,test,6,0.5766912698745728,0.1402530886796432
llm_goals_427,test,7,0.38842183351516724,0.2935996266753878
llm_goals_427,test,8,0.3711540400981903,0.2137670314614922
llm_goals_427,test,9,0.4927396774291992,0.2160041132788326
llm_goals_427,test,10,0.5270676016807556,0.3669200145460971
llm_goals_427,test,11,0.4334775507450104,0.1516882402250043
llm_goals_427,test,12,0.5869714021682739,0.1475453757541205
llm_goals_427,test,13,0.39128589630126953,0.1350458405109811
llm_goals_427,test,14,0.4435889422893524,0.2337964230636721
llm_goals_427,test,15,0.6196666955947876,0.3115728193455654
llm_goals_427,test,16,0.4306420683860779,0.131520745523965
llm_goals_427,test,17,0.41687390208244324,0.3278496493117007
llm_goals_427,test,18,0.5009673237800598,0.3527567015929387
llm_goals_427,test,19,0.34174805879592896,0.2313815817463674
llm_goals_427,test,20,0.5068756937980652,0.3221674001172885
llm_goals_427,test,21,0.45655879378318787,0.2494982149574568
llm_goals_427,test,22,0.49860283732414246,0.3779860883941361
llm_goals_427,test,23,0.5080033540725708,0.3509138567308049
llm_goals_427,test,24,0.4548238515853882,0.1946130607095621
llm_goals_427,test,25,0.5110265016555786,0.228222711067827
llm_goals_427,test,26,0.5082368850708008,0.2768263295001016
llm_goals_427,test,27,0.39381492137908936,0.2199474495731038
llm_goals_427,test,28,0.39078736305236816,0.1579098434370231
llm_goals_427,test,29,0.4990987777709961,0.069843346359134
llm_goals_427,test,30,0.6120189428329468,0.5359787931933129
llm_goals_427,test,31,0.3697560131549835,0.1293182240484197
llm_goals_427,test,32,0.4308161735534668,0.2396093502744168
llm_goals_427,test,33,0.3564491271972656,0.1913976476875258
llm_goals_427,test,34,0.6655250787734985,0.4990200469731286
llm_goals_427,test,35,0.5616865754127502,0.357807030322738
llm_goals_427,test,36,0.33716875314712524,0.2580007688260685
llm_goals_427,test,37,0.3301825523376465,0.4031150735356658
llm_goals_427,test,38,0.571054995059967,0.2737247490124149
llm_goals_427,test,39,0.5475444793701172,0.2478362318668904
llm_goals_427,test,40,0.4711059629917145,0.5026721270474518
llm_goals_427,test,41,0.34119006991386414,0.2273186194850299
llm_goals_427,test,42,0.4460216760635376,0.4444071308114509
llm_goals_427,test,43,0.48079946637153625,0.2369720640870139
llm_goals_427,test,44,0.6136931777000427,0.3961639877527012
llm_goals_427,test,45,0.6018888354301453,0.2771055535106792
llm_goals_427,test,46,0.3626440465450287,0.3138421601355035
llm_goals_427,test,47,0.37086474895477295,0.3609031182529178
llm_goals_427,test,48,0.3430732488632202,0.1899314535085168
llm_goals_427,test,49,0.5064296126365662,0.4452677872765214
llm_goals_449,test,0,0.029416635632514954,0.0338418398422384
llm_goals_449,test,1,0.055388398468494415,0.5657697432527007
llm_goals_449,test,2,0.043954700231552124,0.0110135668657682
llm_goals_449,test,3,0.09572489559650421,0.2352946829974157
llm_goals_449,test,4,0.28909915685653687,0.4842577903424374
llm_goals_449,test,5,0.033525511622428894,0.3485549555066181
llm_goals_449,test,6,0.03522295504808426,0.0519669241570106
llm_goals_449,test,7,0.0392766147851944,0.0031269987549636
llm_goals_449,test,8,0.29169654846191406,0.2974090419757364
llm_goals_449,test,9,0.03410954400897026,0.0925536782452943
llm_goals_449,test,10,0.3191129267215729,0.4386853694122202
llm_goals_449,test,11,0.05773087218403816,0.0063782741293255
llm_goals_449,test,12,0.04483040049672127,0.0714949969968408
llm_goals_449,test,13,0.03548561409115791,0.0227751515807358
llm_goals_449,test,14,0.08493071794509888,0.3195753835685478
llm_goals_449,test,15,0.26228663325309753,0.2420638006790782
llm_goals_449,test,16,0.04941559210419655,0.0953277975239941
llm_goals_449,test,17,0.05517209693789482,0.5835221517250182
llm_goals_449,test,18,0.05109186843037605,0.1603112674891041
llm_goals_449,test,19,0.0729505643248558,0.6833613521345718
llm_goals_449,test,20,0.27696412801742554,0.4549636770044163
llm_goals_449,test,21,0.03763455152511597,0.1579551412473356
llm_goals_449,test,22,0.052532464265823364,0.4400750405221786
llm_goals_449,test,23,0.07011788338422775,0.0251530775884869
llm_goals_449,test,24,0.06526791304349899,0.2695772384712642
llm_goals_449,test,25,0.3438134491443634,0.4519585097229938
llm_goals_449,test,26,0.05660908296704292,0.1123650856049343
llm_goals_449,test,27,0.05507470294833183,0.4873581137262729
llm_goals_449,test,28,0.07746824622154236,0.1751834751469549
llm_goals_449,test,29,0.07721680402755737,0.5804994670396382
llm_goals_449,test,30,0.3524460196495056,0.4626257946707522
llm_goals_449,test,31,0.031107719987630844,0.093699276239858
llm_goals_449,test,32,0.08898865431547165,0.0067638379987912
llm_goals_449,test,33,0.06331103295087814,0.1681834079617312
llm_goals_449,test,34,0.3235901892185211,0.3493466516790004
llm_goals_449,test,35,0.03916589170694351,0.0068633490722964
llm_goals_449,test,36,0.3282895088195801,0.3064552348162913
llm_goals_449,test,37,0.05835175886750221,0.6652150761082033
llm_goals_449,test,38,0.07373645156621933,0.6741024889471738
llm_goals_449,test,39,0.044062066823244095,0.0101379083480811
llm_goals_449,test,40,0.07444946467876434,0.0089588521989727
llm_goals_449,test,41,0.03458036109805107,0.1780122280374072
llm_goals_449,test,42,0.03334144130349159,0.0051090174127486
llm_goals_449,test,43,0.03144937381148338,0.2498444495344637
llm_goals_449,test,44,0.248313307762146,0.221646438441774
llm_goals_449,test,45,0.07317293435335159,0.2094790944562388
llm_goals_449,test,46,0.05805516988039017,0.0060341568900946
llm_goals_449,test,47,0.06333476305007935,0.1241710666090617
llm_goals_449,test,48,0.0741734579205513,0.1338989075405874
llm_goals_449,test,49,0.04604104906320572,0.5982472498607049
llm_goals_93,test,0,0.07563725113868713,0.0729546648027557
llm_goals_93,test,1,0.22367022931575775,0.6981648167380458
llm_goals_93,test,2,0.07228881865739822,0.3318565449695651
llm_goals_93,test,3,0.1582014262676239,0.5505450337981316
llm_goals_93,test,4,0.9341947436332703,0.9547701486411684
llm_goals_93,test,5,0.8196579217910767,0.8378989133869518
llm_goals_93,test,6,0.8663662075996399,0.8645678173925403
llm_goals_93,test,7,0.08607523888349533,0.8034776985663136
llm_goals_93,test,8,0.9253911375999451,0.9139015687588464
llm_goals_93,test,9,0.8772943019866943,0.84076714934408
llm_goals_93,test,10,0.14775973558425903,0.6739811171843284
llm_goals_93,test,11,0.10425375401973724,0.1039005746210211
llm_goals_93,test,12,0.2753043472766876,0.4267719606730696
llm_goals_93,test,13,0.8826644420623779,0.8366404177501517
llm_goals_93,test,14,0.07956024259328842,0.51043302648597
llm_goals_93,test,15,0.8829843997955322,0.9305120134073452
llm_goals_93,test,16,0.07012885808944702,0.3610424966705102
llm_goals_93,test,17,0.35858598351478577,0.6383014796360676
llm_goals_93,test,18,0.6995114684104919,0.7718638733110297
llm_goals_93,test,19,0.5446770191192627,0.6413577201238602
llm_goals_93,test,20,0.926918089389801,0.9315750038397262
llm_goals_93,test,21,0.9052295088768005,0.959779321458113
llm_goals_93,test,22,0.7719531059265137,0.8212707656396169
llm_goals_93,test,23,0.11720428615808487,0.0620020892855925
llm_goals_93,test,24,0.07996390014886856,0.6707394124648912
llm_goals_93,test,25,0.15144196152687073,0.8513622881275814
llm_goals_93,test,26,0.7081361413002014,0.9295520797235572
llm_goals_93,test,27,0.48636341094970703,0.731125599089208
llm_goals_93,test,28,0.2347419261932373,0.6387209955509132
llm_goals_93,test,29,0.6731290817260742,0.904010833763946
llm_goals_93,test,30,0.2899768650531769,0.6688304494781727
llm_goals_93,test,31,0.887201189994812,0.7246857956085144
llm_goals_93,test,32,0.6959643959999084,0.8395389979928227
llm_goals_93,test,33,0.1772131472826004,0.4902935277900161
llm_goals_93,test,34,0.09001367539167404,0.5478889802411547
llm_goals_93,test,35,0.06615957617759705,0.8178324059837278
llm_goals_93,test,36,0.6960346102714539,0.8865698576591567
llm_goals_93,test,37,0.4917905628681183,0.9194802654649477
llm_goals_93,test,38,0.7388022541999817,0.8798558779688433
llm_goals_93,test,39,0.34285810589790344,0.2399301633758412
llm_goals_93,test,40,0.7589339017868042,0.946265162703226
llm_goals_93,test,41,0.7801393866539001,0.8435175392494392
llm_goals_93,test,42,0.0743810310959816,0.260949780916787
llm_goals_93,test,43,0.898263692855835,0.8190814792570137
llm_goals_93,test,44,0.9304811954498291,0.951412738003951
llm_goals_93,test,45,0.2028191089630127,0.5085180485155563
llm_goals_93,test,46,0.8575809597969055,0.946404217454057
llm_goals_93,test,47,0.7292388081550598,0.5778150570910173
llm_goals_93,test,48,0.6233948469161987,0.6859809771933842
llm_goals_93,test,49,0.31179073452949524,0.6187362092588319
llm_goals_358,test,0,0.912612795829773,0.9198604804287828
llm_goals_358,test,1,0.8790541291236877,0.8623428941495346
llm_goals_358,test,2,0.9085381627082825,0.8146633538972576
llm_goals_358,test,3,0.8942170143127441,0.6635203414505186
llm_goals_358,test,4,0.918791651725769,0.9873765704706148
llm_goals_358,test,5,0.8507634997367859,0.8085697334742067
llm_goals_358,test,6,0.8310166597366333,0.8449593731392603
llm_goals_358,test,7,0.8989403247833252,0.7802023295414534
llm_goals_358,test,8,0.9197859764099121,0.5598487865057791
llm_goals_358,test,9,0.8724116683006287,0.7689259718491731
llm_goals_358,test,10,0.7979781627655029,0.4090564458803354
llm_goals_358,test,11,0.7738680243492126,0.7281676519766522
llm_goals_358,test,12,0.8944395780563354,0.781706684015145
llm_goals_358,test,13,0.8632904291152954,0.6175350663762746
llm_goals_358,test,14,0.10645922273397446,0.1259366314847969
llm_goals_358,test,15,0.9226832389831543,0.9665391433681092
llm_goals_358,test,16,0.13843271136283875,0.0977212357476949
llm_goals_358,test,17,0.8788741827011108,0.839291034278836
llm_goals_358,test,18,0.16738013923168182,0.0422931239637008
llm_goals_358,test,19,0.6841960549354553,0.3805861792548485
llm_goals_358,test,20,0.9158124923706055,0.972559035375534
llm_goals_358,test,21,0.8463473320007324,0.7844216881478699
llm_goals_358,test,22,0.8924364447593689,0.8432835924874589
llm_goals_358,test,23,0.15868701040744781,0.5831400725193898
llm_goals_358,test,24,0.1535690724849701,0.4717356494812245
llm_goals_358,test,25,0.7719784379005432,0.5591997574513735
llm_goals_358,test,26,0.11566921323537827,0.2416094818565881
llm_goals_358,test,27,0.8809748291969299,0.8557277309389357
llm_goals_358,test,28,0.883971631526947,0.7784201381020285
llm_goals_358,test,29,0.5148969888687134,0.9848793746115032
llm_goals_358,test,30,0.7715247869491577,0.6211451382223804
llm_goals_358,test,31,0.8478240966796875,0.3507967043222499
llm_goals_358,test,32,0.11488399654626846,0.670993868258675
llm_goals_358,test,33,0.12994319200515747,0.0563775762088827
llm_goals_358,test,34,0.7998567223548889,0.4923522105012786
llm_goals_358,test,35,0.9131727814674377,0.8157534628947803
llm_goals_358,test,36,0.7959632873535156,0.7491294702469764
llm_goals_358,test,37,0.6994778513908386,0.5478990059483921
llm_goals_358,test,38,0.5915363430976868,0.9873113036659013
llm_goals_358,test,39,0.903278648853302,0.6807296173615788
llm_goals_358,test,40,0.6248006820678711,0.6446558925323718
llm_goals_358,test,41,0.8623954653739929,0.3360622093964198
llm_goals_358,test,42,0.9027431607246399,0.6737437200032999
llm_goals_358,test,43,0.8259800672531128,0.453765234769123
llm_goals_358,test,44,0.9149014353752136,0.921309112250278
llm_goals_358,test,45,0.8931755423545837,0.5133269325272689
llm_goals_358,test,46,0.6033108830451965,0.7619134214235583
llm_goals_358,test,47,0.890204906463623,0.5260478340890856
llm_goals_358,test,48,0.1289781630039215,0.0910570466126295
llm_goals_358,test,49,0.8601638078689575,0.4710935988277175
