template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.24446657299995422,0.0759730864369293
llm_goals_75,test,1,0.6968615055084229,0.8120127334654941
llm_goals_75,test,2,0.7012963891029358,0.114709127325878
llm_goals_75,test,3,0.2974395751953125,0.1496870071208148
llm_goals_75,test,4,0.41309675574302673,0.321632167436288
llm_goals_75,test,5,0.4384063482284546,0.0308084020440725
llm_goals_75,test,6,0.7023866772651672,0.1843175427563358
llm_goals_75,test,7,0.5226070880889893,0.0324508518984932
llm_goals_75,test,8,0.2737019658088684,0.5189439655904322
llm_goals_75,test,9,0.323326975107193,0.1502023728460554
llm_goals_75,test,10,0.6609447002410889,0.7657257241793485
llm_goals_75,test,11,0.6883294582366943,0.8035850534204568
llm_goals_75,test,12,0.2597764730453491,0.7307072346740583
llm_goals_75,test,13,0.714043378829956,0.048717541539827
llm_goals_75,test,14,0.5816547274589539,0.4316714754134473
llm_goals_75,test,15,0.3037731647491455,0.3935335107130105
llm_goals_75,test,16,0.2864454686641693,0.6144967070154126
llm_goals_75,test,17,0.6523423790931702,0.4488809036296748
llm_goals_75,test,18,0.1989196240901947,0.3920271677291012
llm_goals_75,test,19,0.6951936483383179,0.7810532991351343
llm_goals_75,test,20,0.5351606011390686,0.5443958675997833
llm_goals_75,test,21,0.5612866282463074,0.1604437012338976
llm_goals_75,test,22,0.2876685857772827,0.856696781503626
llm_goals_75,test,23,0.4003215432167053,0.590876283737086
llm_goals_75,test,24,0.728668749332428,0.4156776627984636
llm_goals_75,test,25,0.2918719947338104,0.5957841371692396
llm_goals_75,test,26,0.3047927916049957,0.3752723036244256
llm_goals_75,test,27,0.49480679631233215,0.7866846353759014
llm_goals_75,test,28,0.28854528069496155,0.1838073487826972
llm_goals_75,test,29,0.19025005400180817,0.853944837222159
llm_goals_75,test,30,0.6750400066375732,0.5992601606803653
llm_goals_75,test,31,0.21255411207675934,0.0743179753131944
llm_goals_75,test,32,0.7641066312789917,0.8015013965908157
llm_goals_75,test,33,0.7064940333366394,0.3498422912836759
llm_goals_75,test,34,0.7412354946136475,0.6692496458902099
llm_goals_75,test,35,0.20064890384674072,0.134422492314073
llm_goals_75,test,36,0.6039677858352661,0.6683886675406214
llm_goals_75,test,37,0.708601176738739,0.7419528346018889
llm_goals_75,test,38,0.4544006586074829,0.6995551035614008
llm_goals_75,test,39,0.4098401665687561,0.4983520000991506
llm_goals_75,test,40,0.21440617740154266,0.8622382893733471
llm_goals_75,test,41,0.44375118613243103,0.1026909531781266
llm_goals_75,test,42,0.4577254056930542,0.0240063792397545
llm_goals_75,test,43,0.7291962504386902,0.1143049422953708
llm_goals_75,test,44,0.38765519857406616,0.6884653626060131
llm_goals_75,test,45,0.7559835910797119,0.1059329319681491
llm_goals_75,test,46,0.4844052791595459,0.405588044943207
llm_goals_75,test,47,0.7029886841773987,0.1393457779753145
llm_goals_75,test,48,0.21556483209133148,0.3670858714001939
llm_goals_75,test,49,0.6532847881317139,0.8052537448889737
llm_goals_78,test,0,0.6588788032531738,0.710685793210421
llm_goals_78,test,1,0.6711418032646179,0.4521352090792044
llm_goals_78,test,2,0.6660574078559875,0.6984783435600235
llm_goals_78,test,3,0.44131526350975037,0.2113029164438998
llm_goals_78,test,4,0.41075801849365234,0.59362413330766
llm_goals_78,test,5,0.5705844163894653,0.2973601904856865
llm_goals_78,test,6,0.5027014017105103,0.4911861319335979
llm_goals_78,test,7,0.6313661932945251,0.5109292120432304
llm_goals_78,test,8,0.4219207167625427,0.0214058927819057
llm_goals_78,test,9,0.5084211826324463,0.4561729519761929
llm_goals_78,test,10,0.41977301239967346,0.0679189625299845
llm_goals_78,test,11,0.4521500766277313,0.0515682065315942
llm_goals_78,test,12,0.4259898066520691,0.2832565853408079
llm_goals_78,test,13,0.5488011837005615,0.3661356287525266
llm_goals_78,test,14,0.4368608593940735,0.5580731095047592
llm_goals_78,test,15,0.4153764545917511,0.3539146882613382
llm_goals_78,test,16,0.43382152915000916,0.5198975918853231
llm_goals_78,test,17,0.5194316506385803,0.5974518854580513
llm_goals_78,test,18,0.43368440866470337,0.2539582988569515
llm_goals_78,test,19,0.8997682929039001,0.7818671446797811
llm_goals_78,test,20,0.4127404987812042,0.0817902870466741
llm_goals_78,test,21,0.595531702041626,0.5313736848686219
llm_goals_78,test,22,0.666378378868103,0.0578457193154468
llm_goals_78,test,23,0.7058931589126587,0.0588067732422271
llm_goals_78,test,24,0.4350641965866089,0.504620490567031
llm_goals_78,test,25,0.4164276719093323,0.1421301439321696
llm_goals_78,test,26,0.4544427990913391,0.4043160008204248
llm_goals_78,test,27,0.6714680194854736,0.1341547030792127
llm_goals_78,test,28,0.4353627860546112,0.585613528389559
llm_goals_78,test,29,0.4333544671535492,0.8037635629119642
llm_goals_78,test,30,0.41982772946357727,0.2785354206868534
llm_goals_78,test,31,0.5805149078369141,0.5074585147112473
llm_goals_78,test,32,0.6944244503974915,0.1365325187594412
llm_goals_78,test,33,0.4336126148700714,0.559776178990876
llm_goals_78,test,34,0.4236532151699066,0.3923464392379233
llm_goals_78,test,35,0.6667897701263428,0.8694947336510349
llm_goals_78,test,36,0.4212353825569153,0.3406365028138333
llm_goals_78,test,37,0.5853989124298096,0.7407991807699866
llm_goals_78,test,38,0.44211217761039734,0.5783193820848918
llm_goals_78,test,39,0.43710699677467346,0.2552456607848852
llm_goals_78,test,40,0.43772298097610474,0.0543816822443674
llm_goals_78,test,41,0.501880407333374,0.5111749720190839
llm_goals_78,test,42,0.6734102964401245,0.6373460300781935
llm_goals_78,test,43,0.5846306681632996,0.5824615770869916
llm_goals_78,test,44,0.412753164768219,0.0725045867582824
llm_goals_78,test,45,0.4850665032863617,0.2055844845637347
llm_goals_78,test,46,0.7143720984458923,0.3443727538373435
llm_goals_78,test,47,0.438845694065094,0.5137599661573641
llm_goals_78,test,48,0.45145437121391296,0.2386868392211005
llm_goals_78,test,49,0.5292727947235107,0.1092249314217729
llm_goals_53,test,0,0.29697176814079285,0.0313037944392233
llm_goals_53,test,1,0.29747602343559265,0.8240056677766623
llm_goals_53,test,2,0.2987511157989502,0.1449565288533016
llm_goals_53,test,3,0.29814544320106506,0.3918474727299789
llm_goals_53,test,4,0.2968994677066803,0.8286818229760559
llm_goals_53,test,5,0.29803240299224854,0.0496580173268359
llm_goals_53,test,6,0.2994159162044525,0.0987357123371914
llm_goals_53,test,7,0.2980165183544159,0.0188377606185046
llm_goals_53,test,8,0.2981800138950348,0.4663370658319237
llm_goals_53,test,9,0.2995292544364929,0.0353320635735392
llm_goals_53,test,10,0.29770758748054504,0.2465247528295183
llm_goals_53,test,11,0.2983439862728119,0.3482216267575191
llm_goals_53,test,12,0.2979666292667389,0.252877033485255
llm_goals_53,test,13,0.2979484498500824,0.1153444147006914
llm_goals_53,test,14,0.29835209250450134,0.3868210483124377
llm_goals_53,test,15,0.299257755279541,0.3705090883623566
llm_goals_53,test,16,0.2993869483470917,0.3407126088990169
llm_goals_53,test,17,0.2979036867618561,0.7543820151744927
llm_goals_53,test,18,0.29929181933403015,0.3322955201354975
llm_goals_53,test,19,0.29795610904693604,0.2261316448538241
llm_goals_53,test,20,0.29843172430992126,0.7343425814116562
llm_goals_53,test,21,0.2970971167087555,0.1601759070107346
llm_goals_53,test,22,0.2988515794277191,0.4190051893758809
llm_goals_53,test,23,0.29855015873908997,0.2354442333363782
llm_goals_53,test,24,0.2988842725753784,0.9719922543639756
llm_goals_53,test,25,0.29885637760162354,0.3289260309913144
llm_goals_53,test,26,0.2991870939731598,0.5781819989075234
llm_goals_53,test,27,0.2985846698284149,0.790876622992061
llm_goals_53,test,28,0.29860296845436096,0.2747068584160578
llm_goals_53,test,29,0.2979934513568878,0.796844478423965
llm_goals_53,test,30,0.29750075936317444,0.3573432990729852
llm_goals_53,test,31,0.2986513078212738,0.1489779797461384
llm_goals_53,test,32,0.29828110337257385,0.4788477745757265
llm_goals_53,test,33,0.2987831234931946,0.967617631882366
llm_goals_53,test,34,0.2984132766723633,0.2252558680565209
llm_goals_53,test,35,0.29821768403053284,0.2300146581986259
llm_goals_53,test,36,0.29875078797340393,0.2311446011459964
llm_goals_53,test,37,0.29754164814949036,0.144409781340655
llm_goals_53,test,38,0.2990749180316925,0.705274211964156
llm_goals_53,test,39,0.29915928840637207,0.5921281985506138
llm_goals_53,test,40,0.2992909252643585,0.2443004460536172
llm_goals_53,test,41,0.2989756166934967,0.061497045516474
llm_goals_53,test,42,0.2983502447605133,0.1570495895565415
llm_goals_53,test,43,0.2966049015522003,0.0641649100604756
llm_goals_53,test,44,0.2995724678039551,0.8074959504603542
llm_goals_53,test,45,0.30003365874290466,0.1813221143349916
llm_goals_53,test,46,0.2987813651561737,0.2352295334667747
llm_goals_53,test,47,0.2989685833454132,0.820244135045574
llm_goals_53,test,48,0.2988664209842682,0.4512817747836695
llm_goals_53,test,49,0.29760873317718506,0.7022675298006026
llm_goals_305,test,0,0.4832049012184143,0.0438240495208372
llm_goals_305,test,1,0.7001737952232361,0.1352509650243347
llm_goals_305,test,2,0.4857150912284851,0.1723536643548477
llm_goals_305,test,3,0.7204851508140564,0.250094549889143
llm_goals_305,test,4,0.7151495814323425,0.8419536365764244
llm_goals_305,test,5,0.4837875962257385,0.1913177614094214
llm_goals_305,test,6,0.48386895656585693,0.3723916849874114
llm_goals_305,test,7,0.4841535687446594,0.3337485526549648
llm_goals_305,test,8,0.7191978096961975,0.5824263052609597
llm_goals_305,test,9,0.4851328730583191,0.620184539039364
llm_goals_305,test,10,0.7019459009170532,0.494000537986959
llm_goals_305,test,11,0.48248717188835144,0.0482253546984711
llm_goals_305,test,12,0.5387787222862244,0.0565816635108951
llm_goals_305,test,13,0.484389066696167,0.1136796894182332
llm_goals_305,test,14,0.714648425579071,0.6102884358627442
llm_goals_305,test,15,0.7169636487960815,0.8302422164176205
llm_goals_305,test,16,0.7216938734054565,0.589511050327059
llm_goals_305,test,17,0.5047934651374817,0.2622011524235305
llm_goals_305,test,18,0.6881442666053772,0.3737828315077605
llm_goals_305,test,19,0.48544222116470337,0.0401465973617238
llm_goals_305,test,20,0.7199074625968933,0.8971526617856483
llm_goals_305,test,21,0.48329633474349976,0.513862561708882
llm_goals_305,test,22,0.5446676015853882,0.6617399492970566
llm_goals_305,test,23,0.48136845231056213,0.3090300849059566
llm_goals_305,test,24,0.7224183678627014,0.4310886044152408
llm_goals_305,test,25,0.711460530757904,0.7346082271960779
llm_goals_305,test,26,0.7219110727310181,0.7784622217319951
llm_goals_305,test,27,0.49187034368515015,0.2285268892180965
llm_goals_305,test,28,0.7216471433639526,0.1970890988987535
llm_goals_305,test,29,0.48585525155067444,0.0543679274789402
llm_goals_305,test,30,0.4842408299446106,0.8325365217586828
llm_goals_305,test,31,0.4840472340583801,0.5078524819974076
llm_goals_305,test,32,0.6058451533317566,0.2061603354174771
llm_goals_305,test,33,0.7050198316574097,0.6039954962603894
llm_goals_305,test,34,0.7232211828231812,0.5754865922825898
llm_goals_305,test,35,0.48765504360198975,0.1664152616423
llm_goals_305,test,36,0.7221419215202332,0.7880486291444165
llm_goals_305,test,37,0.6341304779052734,0.03605078558617
llm_goals_305,test,38,0.7025620937347412,0.0575570687729377
llm_goals_305,test,39,0.4898284673690796,0.4297583044645929
llm_goals_305,test,40,0.4817250669002533,0.1915445959084499
llm_goals_305,test,41,0.4852563142776489,0.6536953532364804
llm_goals_305,test,42,0.48420238494873047,0.122527873906398
llm_goals_305,test,43,0.48406532406806946,0.1711906255032107
llm_goals_305,test,44,0.7170432209968567,0.8371975999741996
llm_goals_305,test,45,0.7219146490097046,0.3836914756409174
llm_goals_305,test,46,0.48266682028770447,0.0543049670678201
llm_goals_305,test,47,0.7171486616134644,0.5245484205428286
llm_goals_305,test,48,0.7151024341583252,0.5969765977742552
llm_goals_305,test,49,0.5600531101226807,0.2351952691938564
llm_goals_81,test,0,0.4336223602294922,0.2060992725380174
llm_goals_81,test,1,0.4341766834259033,0.1581010259271764
llm_goals_81,test,2,0.43414056301116943,0.6899052301996583
llm_goals_81,test,3,0.43487805128097534,0.7805136989347726
llm_goals_81,test,4,0.4343169033527374,0.0972643506873081
llm_goals_81,test,5,0.43183034658432007,0.3089356066572108
llm_goals_81,test,6,0.43369871377944946,0.0383605574042886
llm_goals_81,test,7,0.4336729943752289,0.3161536190910042
llm_goals_81,test,8,0.43459463119506836,0.5126683642741202
llm_goals_81,test,9,0.4342602789402008,0.1286837550045714
llm_goals_81,test,10,0.4338444173336029,0.1256029490894943
llm_goals_81,test,11,0.4352163076400757,0.408772132619307
llm_goals_81,test,12,0.43387001752853394,0.1214277758013974
llm_goals_81,test,13,0.43231305480003357,0.0409721836992895
llm_goals_81,test,14,0.4341091215610504,0.0827685330888261
llm_goals_81,test,15,0.4328451156616211,0.224678660518271
llm_goals_81,test,16,0.4335498809814453,0.0872837265533343
llm_goals_81,test,17,0.43427857756614685,0.2837028272857018
llm_goals_81,test,18,0.43358710408210754,0.1193591553103013
llm_goals_81,test,19,0.4331549406051636,0.4952181327203863
llm_goals_81,test,20,0.43397483229637146,0.2564665599863756
llm_goals_81,test,21,0.4345640540122986,0.2041083994609424
llm_goals_81,test,22,0.4336710274219513,0.3119739178030907
llm_goals_81,test,23,0.4307252764701843,0.0994638293022036
llm_goals_81,test,24,0.4344484806060791,0.1575467450844306
llm_goals_81,test,25,0.43432316184043884,0.0291125673317165
llm_goals_81,test,26,0.4341147840023041,0.0869071714890548
llm_goals_81,test,27,0.434386283159256,0.1863589895989203
llm_goals_81,test,28,0.43506672978401184,0.119977738469712
llm_goals_81,test,29,0.4339120090007782,0.4410572612602489
llm_goals_81,test,30,0.43353480100631714,0.0555125882985712
llm_goals_81,test,31,0.4301912188529968,0.3486004983652399
llm_goals_81,test,32,0.43496212363243103,0.189941914212834
llm_goals_81,test,33,0.43399834632873535,0.1890390776737304
llm_goals_81,test,34,0.4304806888103485,0.1328217427635933
llm_goals_81,test,35,0.434736967086792,0.1343397356978365
llm_goals_81,test,36,0.4336031973361969,0.0899701336915993
llm_goals_81,test,37,0.4293786287307739,0.6478431799513802
llm_goals_81,test,38,0.43425920605659485,0.7351879424598803
llm_goals_81,test,39,0.43469011783599854,0.0885373562628188
llm_goals_81,test,40,0.4343954026699066,0.3150516869571108
llm_goals_81,test,41,0.4336509704589844,0.0498826197855212
llm_goals_81,test,42,0.4339805841445923,0.1381899328653333
llm_goals_81,test,43,0.4338320791721344,0.3536647169335493
llm_goals_81,test,44,0.43373557925224304,0.4857917971856136
llm_goals_81,test,45,0.4348795413970947,0.3921515914618259
llm_goals_81,test,46,0.43406403064727783,0.1179764264181158
llm_goals_81,test,47,0.4338359236717224,0.2786576050295211
llm_goals_81,test,48,0.43387842178344727,0.0726556891905063
llm_goals_81,test,49,0.4345874488353729,0.3912461690418435
llm_goals_133,test,0,0.5668264627456665,0.4440529323069227
llm_goals_133,test,1,0.5667641162872314,0.3955848959165448
llm_goals_133,test,2,0.5667649507522583,0.5130030466159122
llm_goals_133,test,3,0.5667657256126404,0.220245744335209
llm_goals_133,test,4,0.5667990446090698,0.5879895213269322
llm_goals_133,test,5,0.5669783353805542,0.8588037554744828
llm_goals_133,test,6,0.5669776797294617,0.8322442189227136
llm_goals_133,test,7,0.5667083263397217,0.6588760518065184
llm_goals_133,test,8,0.5666915774345398,0.5253890103378467
llm_goals_133,test,9,0.5667969584465027,0.8170254264656348
llm_goals_133,test,10,0.5667202472686768,0.4608975636274737
llm_goals_133,test,11,0.5666453242301941,0.2002942757447116
llm_goals_133,test,12,0.5667641162872314,0.4022389740133304
llm_goals_133,test,13,0.566886305809021,0.8516519653230656
llm_goals_133,test,14,0.566601574420929,0.0158444788418117
llm_goals_133,test,15,0.566665768623352,0.5978365001716675
llm_goals_133,test,16,0.5667177438735962,0.0529535109452945
llm_goals_133,test,17,0.5666903853416443,0.2556263204518722
llm_goals_133,test,18,0.5667277574539185,0.0167466717766424
llm_goals_133,test,19,0.5668047070503235,0.0944244428527934
llm_goals_133,test,20,0.5666868686676025,0.6914976263434149
llm_goals_133,test,21,0.5667549967765808,0.8436048633272258
llm_goals_133,test,22,0.5667251348495483,0.2868007235124719
llm_goals_133,test,23,0.5666791200637817,0.1160262834732456
llm_goals_133,test,24,0.5666028261184692,0.0154812210183983
llm_goals_133,test,25,0.5666574835777283,0.3119789757098087
llm_goals_133,test,26,0.5665786266326904,0.7929420521534107
llm_goals_133,test,27,0.5667530298233032,0.2037716603127804
llm_goals_133,test,28,0.5666957497596741,0.3118772754734958
llm_goals_133,test,29,0.5668941736221313,0.1808521166351184
llm_goals_133,test,30,0.5668596029281616,0.5045142070808941
llm_goals_133,test,31,0.5668221712112427,0.8498369503935175
llm_goals_133,test,32,0.5666929483413696,0.2694815541177057
llm_goals_133,test,33,0.5666893124580383,0.0177588567715867
llm_goals_133,test,34,0.5667046904563904,0.445395356923409
llm_goals_133,test,35,0.566764771938324,0.4995371449295338
llm_goals_133,test,36,0.5667498111724854,0.4823747071618506
llm_goals_133,test,37,0.5668373703956604,0.0266815206082507
llm_goals_133,test,38,0.5668449401855469,0.0874820007739646
llm_goals_133,test,39,0.5666301846504211,0.3366422826024648
llm_goals_133,test,40,0.5667278170585632,0.2329899847029727
llm_goals_133,test,41,0.5667984485626221,0.8571491285285516
llm_goals_133,test,42,0.5667040944099426,0.8193201391590431
llm_goals_133,test,43,0.5668084621429443,0.8422289427227235
llm_goals_133,test,44,0.5667772889137268,0.5294372908894646
llm_goals_133,test,45,0.5667611360549927,0.2588603231187393
llm_goals_133,test,46,0.5666806101799011,0.2337208499272645
llm_goals_133,test,47,0.5667398571968079,0.1166634693988454
llm_goals_133,test,48,0.566627562046051,0.6138306242825586
llm_goals_133,test,49,0.5668948888778687,0.2806552214021111
llm_goals_157,test,0,0.2396055907011032,0.6355320308190959
llm_goals_157,test,1,0.21815624833106995,0.7424853921670768
llm_goals_157,test,2,0.8339670896530151,0.073314241900373
llm_goals_157,test,3,0.15150368213653564,0.4675850432855137
llm_goals_157,test,4,0.8344521522521973,0.5458627288071527
llm_goals_157,test,5,0.1670840084552765,0.0771889198722071
llm_goals_157,test,6,0.17475935816764832,0.0818153292956077
llm_goals_157,test,7,0.17527393996715546,0.0574362851972565
llm_goals_157,test,8,0.8346112370491028,0.612650525482466
llm_goals_157,test,9,0.27337485551834106,0.025148981997583
llm_goals_157,test,10,0.1821008026599884,0.075973919657683
llm_goals_157,test,11,0.3417065441608429,0.1464687737403695
llm_goals_157,test,12,0.8341537714004517,0.7268917489370714
llm_goals_157,test,13,0.187771737575531,0.0531648084509613
llm_goals_157,test,14,0.17946363985538483,0.3299706213737123
llm_goals_157,test,15,0.8345049619674683,0.3043292296967997
llm_goals_157,test,16,0.8339370489120483,0.7463776357566768
llm_goals_157,test,17,0.187745600938797,0.8521737469617978
llm_goals_157,test,18,0.1857236921787262,0.6376039881194869
llm_goals_157,test,19,0.8347147107124329,0.6077618725185671
llm_goals_157,test,20,0.8345901966094971,0.7242060188576479
llm_goals_157,test,21,0.18582889437675476,0.2051165554994685
llm_goals_157,test,22,0.42659905552864075,0.7995715125449558
llm_goals_157,test,23,0.22209392488002777,0.1467023010082007
llm_goals_157,test,24,0.17115692794322968,0.3108830330026099
llm_goals_157,test,25,0.18453098833560944,0.1001087986592319
llm_goals_157,test,26,0.18252409994602203,0.4552169748909065
llm_goals_157,test,27,0.3931126892566681,0.8485816155817194
llm_goals_157,test,28,0.14464370906352997,0.2265082237706469
llm_goals_157,test,29,0.14609487354755402,0.6283476283012512
llm_goals_157,test,30,0.09782743453979492,0.2620283904445711
llm_goals_157,test,31,0.30919113755226135,0.167952021094149
llm_goals_157,test,32,0.06289757788181305,0.0898426790762853
llm_goals_157,test,33,0.814617931842804,0.5043942365008254
llm_goals_157,test,34,0.19760838150978088,0.0725868476993519
llm_goals_157,test,35,0.2601828873157501,0.1237829648198263
llm_goals_157,test,36,0.17473727464675903,0.1925318890375481
llm_goals_157,test,37,0.06337697803974152,0.6842419436738383
llm_goals_157,test,38,0.09615875780582428,0.691649855522258
llm_goals_157,test,39,0.834343671798706,0.5264401359594252
llm_goals_157,test,40,0.18424175679683685,0.1777385641102212
llm_goals_157,test,41,0.15298104286193848,0.0328183379619016
llm_goals_157,test,42,0.19497796893119812,0.1949198629220696
llm_goals_157,test,43,0.17818720638751984,0.135028229661103
llm_goals_157,test,44,0.8345171213150024,0.7258662504199943
llm_goals_157,test,45,0.15103495121002197,0.0935995604675028
llm_goals_157,test,46,0.8345563411712646,0.15418174898882
llm_goals_157,test,47,0.14394095540046692,0.3212815335896988
llm_goals_157,test,48,0.09486837685108185,0.2795304252191649
llm_goals_157,test,49,0.37258100509643555,0.7371862441831876
llm_goals_186,test,0,0.12444496154785156,0.0367510709036243
llm_goals_186,test,1,0.4439559280872345,0.9589190483469818
llm_goals_186,test,2,0.09412642568349838,0.0667798014228945
llm_goals_186,test,3,0.11206120252609253,0.0340377111776925
llm_goals_186,test,4,0.4451729655265808,0.4002710909538108
llm_goals_186,test,5,0.820469856262207,0.8817241785963961
llm_goals_186,test,6,0.9032920598983765,0.8798493094453552
llm_goals_186,test,7,0.09620870649814606,0.0443878504309986
llm_goals_186,test,8,0.4422043263912201,0.4415592471567901
llm_goals_186,test,9,0.9213876724243164,0.8847759344734797
llm_goals_186,test,10,0.8625185489654541,0.8905032311770802
llm_goals_186,test,11,0.9016187787055969,0.704948600699243
llm_goals_186,test,12,0.16393201053142548,0.409597002882604
llm_goals_186,test,13,0.9251735806465149,0.8867287662434378
llm_goals_186,test,14,0.8054285645484924,0.8128145923588551
llm_goals_186,test,15,0.9538971781730652,0.4832503599366539
llm_goals_186,test,16,0.8110562562942505,0.8267478910426029
llm_goals_186,test,17,0.6041462421417236,0.957207678971151
llm_goals_186,test,18,0.816189169883728,0.7610878868091969
llm_goals_186,test,19,0.9569559693336487,0.7872715543385574
llm_goals_186,test,20,0.4321395754814148,0.520514563098938
llm_goals_186,test,21,0.8536897897720337,0.9084513175350956
llm_goals_186,test,22,0.302275151014328,0.9157385254478472
llm_goals_186,test,23,0.9111765027046204,0.5585242484052028
llm_goals_186,test,24,0.8164396286010742,0.8535531187105866
llm_goals_186,test,25,0.8657618165016174,0.8466653814359101
llm_goals_186,test,26,0.2788786292076111,0.8550637970014335
llm_goals_186,test,27,0.11386993527412415,0.9398175480710378
llm_goals_186,test,28,0.10544437170028687,0.0028363068023126
llm_goals_186,test,29,0.9221686124801636,0.8305997204848772
llm_goals_186,test,30,0.8412409424781799,0.8987706923113074
llm_goals_186,test,31,0.7655549049377441,0.8880043621770078
llm_goals_186,test,32,0.8688027858734131,0.4759852559635721
llm_goals_186,test,33,0.8324189782142639,0.8450176079466206
llm_goals_186,test,34,0.924990177154541,0.9532899841201126
llm_goals_186,test,35,0.955620288848877,0.0290458993492998
llm_goals_186,test,36,0.8994510769844055,0.9324312223548752
llm_goals_186,test,37,0.18116030097007751,0.7062064442252718
llm_goals_186,test,38,0.4752627909183502,0.8475385890175953
llm_goals_186,test,39,0.9170067310333252,0.2451742937665777
llm_goals_186,test,40,0.7171481847763062,0.7628755019143414
llm_goals_186,test,41,0.8219947814941406,0.9118681525061464
llm_goals_186,test,42,0.10334347188472748,0.0299416334392138
llm_goals_186,test,43,0.8327187299728394,0.9088531704037304
llm_goals_186,test,44,0.43143996596336365,0.399223879564458
llm_goals_186,test,45,0.1253439337015152,0.0207412737387863
llm_goals_186,test,46,0.1876775026321411,0.504435185656878
llm_goals_186,test,47,0.10808321088552475,0.0426878976387859
llm_goals_186,test,48,0.7897878885269165,0.8374391175680358
llm_goals_186,test,49,0.09070327877998352,0.9286286845568336
llm_goals_401,test,0,0.8715993165969849,0.9617876405899952
llm_goals_401,test,1,0.780176043510437,0.9404490002093446
llm_goals_401,test,2,0.8197088241577148,0.935046673136648
llm_goals_401,test,3,0.8819367289543152,0.9543637645078082
llm_goals_401,test,4,0.8712758421897888,0.9660480618061604
llm_goals_401,test,5,0.8519677519798279,0.9211199313115676
llm_goals_401,test,6,0.850259006023407,0.902884063341732
llm_goals_401,test,7,0.8432173728942871,0.94729781169548
llm_goals_401,test,8,0.8785117268562317,0.957207800210959
llm_goals_401,test,9,0.8559614419937134,0.9145740305533586
llm_goals_401,test,10,0.77599036693573,0.9267561414021268
llm_goals_401,test,11,0.8663089275360107,0.9242701512661912
llm_goals_401,test,12,0.7511382102966309,0.8435903191618971
llm_goals_401,test,13,0.5729135870933533,0.938192186231483
llm_goals_401,test,14,0.357323557138443,0.8529976602342201
llm_goals_401,test,15,0.8797394037246704,0.9478884329856586
llm_goals_401,test,16,0.3638516366481781,0.8192354997719525
llm_goals_401,test,17,0.7738760113716125,0.6739075253562599
llm_goals_401,test,18,0.37092506885528564,0.5197439125792754
llm_goals_401,test,19,0.881118655204773,0.8583977843568281
llm_goals_401,test,20,0.8713549971580505,0.9455993763925636
llm_goals_401,test,21,0.8500611782073975,0.9705763460601288
llm_goals_401,test,22,0.8731840252876282,0.8491748031226035
llm_goals_401,test,23,0.3645932972431183,0.8515620190775874
llm_goals_401,test,24,0.36956435441970825,0.9138265832467732
llm_goals_401,test,25,0.7670369744300842,0.9028179661562172
llm_goals_401,test,26,0.3483969569206238,0.9558187125982178
llm_goals_401,test,27,0.8453706502914429,0.5612720368847214
llm_goals_401,test,28,0.8716347217559814,0.9617025244944858
llm_goals_401,test,29,0.8811307549476624,0.8060652645257104
llm_goals_401,test,30,0.7741659879684448,0.8896843204884566
llm_goals_401,test,31,0.8467873930931091,0.9526923309875832
llm_goals_401,test,32,0.7894012331962585,0.8506568677811456
llm_goals_401,test,33,0.3855968713760376,0.7802594846608962
llm_goals_401,test,34,0.7764875292778015,0.8870658880824575
llm_goals_401,test,35,0.23439940810203552,0.954492271718892
llm_goals_401,test,36,0.7934780716896057,0.562048146808884
llm_goals_401,test,37,0.7440569400787354,0.7781126533121301
llm_goals_401,test,38,0.8785926103591919,0.8591165497422784
llm_goals_401,test,39,0.7488837838172913,0.8788934372485709
llm_goals_401,test,40,0.8719996809959412,0.8091102829935539
llm_goals_401,test,41,0.849281907081604,0.8675616408909181
llm_goals_401,test,42,0.8790556192398071,0.949993671778176
llm_goals_401,test,43,0.8584041595458984,0.9467448581700896
llm_goals_401,test,44,0.8801921606063843,0.8687242293966851
llm_goals_401,test,45,0.8784473538398743,0.9587250339741858
llm_goals_401,test,46,0.2072802037000656,0.860452357605835
llm_goals_401,test,47,0.8689415454864502,0.9253614550163576
llm_goals_401,test,48,0.37966135144233704,0.8647797053472309
llm_goals_401,test,49,0.7793925404548645,0.9454760570818088
llm_goals_420,test,0,0.21099010109901428,0.9711560599789952
llm_goals_420,test,1,0.21809864044189453,0.5978053926454786
llm_goals_420,test,2,0.20730668306350708,0.0817351976918742
llm_goals_420,test,3,0.420096218585968,0.4730792485055278
llm_goals_420,test,4,0.2076462209224701,0.8487827178347965
llm_goals_420,test,5,0.19399318099021912,0.1386299974231985
llm_goals_420,test,6,0.4199082553386688,0.9677888754301456
llm_goals_420,test,7,0.2058870941400528,0.9193048504515688
llm_goals_420,test,8,0.20450176298618317,0.1105940972856699
llm_goals_420,test,9,0.3739891052246094,0.0770199238560772
llm_goals_420,test,10,0.19629910588264465,0.6569762799970394
llm_goals_420,test,11,0.39311617612838745,0.4533228613613927
llm_goals_420,test,12,0.20522448420524597,0.9317349816078236
llm_goals_420,test,13,0.222793847322464,0.5042190525433249
llm_goals_420,test,14,0.20017659664154053,0.1440102415442949
llm_goals_420,test,15,0.21283365786075592,0.3273024402815145
llm_goals_420,test,16,0.4276424050331116,0.3681827771891491
llm_goals_420,test,17,0.22289925813674927,0.4632337459639504
llm_goals_420,test,18,0.20942752063274384,0.0396023029282074
llm_goals_420,test,19,0.1931607574224472,0.1093558671361818
llm_goals_420,test,20,0.2037779986858368,0.0396252534530884
llm_goals_420,test,21,0.20288871228694916,0.0465700238365722
llm_goals_420,test,22,0.21366499364376068,0.1916998836408959
llm_goals_420,test,23,0.33081409335136414,0.1681177802630551
llm_goals_420,test,24,0.23730336129665375,0.1698145969501656
llm_goals_420,test,25,0.1975952535867691,0.9829779862454848
llm_goals_420,test,26,0.21575208008289337,0.0308922007548325
llm_goals_420,test,27,0.4377521872520447,0.8125830946093627
llm_goals_420,test,28,0.199713796377182,0.9367654930611784
llm_goals_420,test,29,0.20057624578475952,0.8184437096518178
llm_goals_420,test,30,0.22565264999866486,0.9386603865018432
llm_goals_420,test,31,0.1964147686958313,0.10851579640767
llm_goals_420,test,32,0.25078120827674866,0.9666374926215946
llm_goals_420,test,33,0.2944084107875824,0.8901044715204638
llm_goals_420,test,34,0.3041853904724121,0.9215939624410928
llm_goals_420,test,35,0.38227614760398865,0.5267802921981926
llm_goals_420,test,36,0.19409926235675812,0.092175658510858
llm_goals_420,test,37,0.38537874817848206,0.984362841437198
llm_goals_420,test,38,0.2090984284877777,0.8828047505171253
llm_goals_420,test,39,0.384512335062027,0.2972862903877061
llm_goals_420,test,40,0.21777546405792236,0.0444444838151291
llm_goals_420,test,41,0.4680308997631073,0.5478361768565629
llm_goals_420,test,42,0.24960261583328247,0.0713350995649345
llm_goals_420,test,43,0.20883409678936005,0.5106248261137879
llm_goals_420,test,44,0.19862419366836548,0.0905225225380096
llm_goals_420,test,45,0.37301015853881836,0.1531352912609338
llm_goals_420,test,46,0.19915811717510223,0.1216667665904784
llm_goals_420,test,47,0.20568345487117767,0.1240460201984882
llm_goals_420,test,48,0.29531413316726685,0.0596825371886435
llm_goals_420,test,49,0.3932242691516876,0.8335117138475971
llm_goals_263,test,0,0.6789988279342651,0.5209935935366439
llm_goals_263,test,1,0.6788538098335266,0.7968068746445729
llm_goals_263,test,2,0.6789608001708984,0.5467470232144288
llm_goals_263,test,3,0.6789520382881165,0.4733215007319302
llm_goals_263,test,4,0.6788744926452637,0.0057083956844275
llm_goals_263,test,5,0.6790695786476135,0.3493473596509424
llm_goals_263,test,6,0.6789231896400452,0.8201550249820808
llm_goals_263,test,7,0.6789250373840332,0.6557906816477753
llm_goals_263,test,8,0.6788535118103027,0.7543426271500917
llm_goals_263,test,9,0.679096519947052,0.2009829696053626
llm_goals_263,test,10,0.679054319858551,0.8489753664207734
llm_goals_263,test,11,0.6789820790290833,0.652436611134806
llm_goals_263,test,12,0.6789785623550415,0.8143466734577086
llm_goals_263,test,13,0.6788881421089172,0.4245607212842352
llm_goals_263,test,14,0.6788685917854309,0.0036452536113296
llm_goals_263,test,15,0.6789518594741821,0.0558692673280436
llm_goals_263,test,16,0.6789295673370361,0.0066133854077101
llm_goals_263,test,17,0.678917646408081,0.7648655133279786
llm_goals_263,test,18,0.6789792776107788,0.006651634113929
llm_goals_263,test,19,0.6788322925567627,0.0487544569045993
llm_goals_263,test,20,0.6790087819099426,0.0331700030843074
llm_goals_263,test,21,0.6790229082107544,0.2223244924357846
llm_goals_263,test,22,0.6790624260902405,0.8850367693524676
llm_goals_263,test,23,0.679031491279602,0.6136206316393849
llm_goals_263,test,24,0.678884744644165,0.0103991722036265
llm_goals_263,test,25,0.6789067387580872,0.7490420921699364
llm_goals_263,test,26,0.6789050698280334,0.0149267434716544
llm_goals_263,test,27,0.6789705753326416,0.7989164572724189
llm_goals_263,test,28,0.6789790987968445,0.6376088146659757
llm_goals_263,test,29,0.6788267493247986,0.0419343519282845
llm_goals_263,test,30,0.6788703203201294,0.8598701912381717
llm_goals_263,test,31,0.6790446043014526,0.168156003178124
llm_goals_263,test,32,0.6788362860679626,0.6210063725022936
llm_goals_263,test,33,0.6787822842597961,0.0062242916440703
llm_goals_263,test,34,0.6790074706077576,0.5691545063377933
llm_goals_263,test,35,0.6789982914924622,0.7982008481236453
llm_goals_263,test,36,0.6789932250976562,0.763706617142148
llm_goals_263,test,37,0.6789905428886414,0.0342430752915087
llm_goals_263,test,38,0.6788877844810486,0.1222660265497154
llm_goals_263,test,39,0.6789953112602234,0.8269810089104592
llm_goals_263,test,40,0.6789829730987549,0.5318831885299807
llm_goals_263,test,41,0.6790953278541565,0.3207528198494705
llm_goals_263,test,42,0.6790015697479248,0.4801431121790341
llm_goals_263,test,43,0.6790041923522949,0.1981375631842198
llm_goals_263,test,44,0.6789634227752686,0.1080272425734687
llm_goals_263,test,45,0.6789903044700623,0.8354177467829539
llm_goals_263,test,46,0.6789515018463135,0.7413273974280716
llm_goals_263,test,47,0.679092288017273,0.4918597295945824
llm_goals_263,test,48,0.6789470911026001,0.0043793883332247
llm_goals_263,test,49,0.6790005564689636,0.949535011062065
llm_goals_427,test,0,0.5343061685562134,0.2826418194423331
llm_goals_427,test,1,0.7325527667999268,0.380977464557011
llm_goals_427,test,2,0.43207818269729614,0.1487359361961987
llm_goals_427,test,3,0.4723886549472809,0.3576580711889061
llm_goals_427,test,4,0.7331050634384155,0.2270212997591984
llm_goals_427,test,5,0.6241561770439148,0.2711625929555684
llm_goals_427,test,6,0.7714909911155701,0.1402530886796432
llm_goals_427,test,7,0.5496393442153931,0.2935996266753878
llm_goals_427,test,8,0.5545151233673096,0.2137670314614922
llm_goals_427,test,9,0.6318550109863281,0.2160041132788326
llm_goals_427,test,10,0.7543596625328064,0.3669200145460971
llm_goals_427,test,11,0.6483660340309143,0.1516882402250043
llm_goals_427,test,12,0.5329795479774475,0.1475453757541205
llm_goals_427,test,13,0.5487964749336243,0.1350458405109811
llm_goals_427,test,14,0.4990411102771759,0.2337964230636721
llm_goals_427,test,15,0.7684957385063171,0.3115728193455654
llm_goals_427,test,16,0.5482310652732849,0.131520745523965
llm_goals_427,test,17,0.7132839560508728,0.3278496493117007
llm_goals_427,test,18,0.7576345801353455,0.3527567015929387
llm_goals_427,test,19,0.3971717357635498,0.2313815817463674
llm_goals_427,test,20,0.6951618790626526,0.3221674001172885
llm_goals_427,test,21,0.6360405683517456,0.2494982149574568
llm_goals_427,test,22,0.7357025146484375,0.3779860883941361
llm_goals_427,test,23,0.5316876769065857,0.3509138567308049
llm_goals_427,test,24,0.6330001354217529,0.1946130607095621
llm_goals_427,test,25,0.5814751982688904,0.228222711067827
llm_goals_427,test,26,0.762302577495575,0.2768263295001016
llm_goals_427,test,27,0.5381453037261963,0.2199474495731038
llm_goals_427,test,28,0.6240439414978027,0.1579098434370231
llm_goals_427,test,29,0.542618453502655,0.069843346359134
llm_goals_427,test,30,0.7323828935623169,0.5359787931933129
llm_goals_427,test,31,0.4458824694156647,0.1293182240484197
llm_goals_427,test,32,0.6330690383911133,0.2396093502744168
llm_goals_427,test,33,0.649744987487793,0.1913976476875258
llm_goals_427,test,34,0.6899052262306213,0.4990200469731286
llm_goals_427,test,35,0.5370741486549377,0.357807030322738
llm_goals_427,test,36,0.5501160621643066,0.2580007688260685
llm_goals_427,test,37,0.4001210331916809,0.4031150735356658
llm_goals_427,test,38,0.6561838984489441,0.2737247490124149
llm_goals_427,test,39,0.47416171431541443,0.2478362318668904
llm_goals_427,test,40,0.637619137763977,0.5026721270474518
llm_goals_427,test,41,0.5322416424751282,0.2273186194850299
llm_goals_427,test,42,0.44294729828834534,0.4444071308114509
llm_goals_427,test,43,0.7662334442138672,0.2369720640870139
llm_goals_427,test,44,0.7676442861557007,0.3961639877527012
llm_goals_427,test,45,0.5415191054344177,0.2771055535106792
llm_goals_427,test,46,0.672573447227478,0.3138421601355035
llm_goals_427,test,47,0.4670843780040741,0.3609031182529178
llm_goals_427,test,48,0.5920025706291199,0.1899314535085168
llm_goals_427,test,49,0.7467164993286133,0.4452677872765214
llm_goals_449,test,0,0.047297894954681396,0.0338418398422384
llm_goals_449,test,1,0.06976495683193207,0.5657697432527007
llm_goals_449,test,2,0.04851813614368439,0.0110135668657682
llm_goals_449,test,3,0.08141845464706421,0.2352946829974157
llm_goals_449,test,4,0.24757234752178192,0.4842577903424374
llm_goals_449,test,5,0.05661991611123085,0.3485549555066181
llm_goals_449,test,6,0.058811672031879425,0.0519669241570106
llm_goals_449,test,7,0.05751414969563484,0.0031269987549636
llm_goals_449,test,8,0.2669842541217804,0.2974090419757364
llm_goals_449,test,9,0.05085839703679085,0.0925536782452943
llm_goals_449,test,10,0.05562124773859978,0.4386853694122202
llm_goals_449,test,11,0.051742617040872574,0.0063782741293255
llm_goals_449,test,12,0.04760237783193588,0.0714949969968408
llm_goals_449,test,13,0.06376513093709946,0.0227751515807358
llm_goals_449,test,14,0.09738780558109283,0.3195753835685478
llm_goals_449,test,15,0.06771636009216309,0.2420638006790782
llm_goals_449,test,16,0.05738139897584915,0.0953277975239941
llm_goals_449,test,17,0.06993599236011505,0.5835221517250182
llm_goals_449,test,18,0.05832551792263985,0.1603112674891041
llm_goals_449,test,19,0.05841855704784393,0.6833613521345718
llm_goals_449,test,20,0.22898665070533752,0.4549636770044163
llm_goals_449,test,21,0.052930332720279694,0.1579551412473356
llm_goals_449,test,22,0.046485841274261475,0.4400750405221786
llm_goals_449,test,23,0.05714951083064079,0.0251530775884869
llm_goals_449,test,24,0.07674705982208252,0.2695772384712642
llm_goals_449,test,25,0.13609065115451813,0.4519585097229938
llm_goals_449,test,26,0.050023507326841354,0.1123650856049343
llm_goals_449,test,27,0.04995671659708023,0.4873581137262729
llm_goals_449,test,28,0.05090576410293579,0.1751834751469549
llm_goals_449,test,29,0.07462119311094284,0.5804994670396382
llm_goals_449,test,30,0.05697615072131157,0.4626257946707522
llm_goals_449,test,31,0.05809856578707695,0.093699276239858
llm_goals_449,test,32,0.05279138684272766,0.0067638379987912
llm_goals_449,test,33,0.055979836732149124,0.1681834079617312
llm_goals_449,test,34,0.13015273213386536,0.3493466516790004
llm_goals_449,test,35,0.05175524577498436,0.0068633490722964
llm_goals_449,test,36,0.05370636284351349,0.3064552348162913
llm_goals_449,test,37,0.053239379078149796,0.6652150761082033
llm_goals_449,test,38,0.05403921380639076,0.6741024889471738
llm_goals_449,test,39,0.05944160372018814,0.0101379083480811
llm_goals_449,test,40,0.05190099775791168,0.0089588521989727
llm_goals_449,test,41,0.05589398369193077,0.1780122280374072
llm_goals_449,test,42,0.061798080801963806,0.0051090174127486
llm_goals_449,test,43,0.054626863449811935,0.2498444495344637
llm_goals_449,test,44,0.06349454075098038,0.221646438441774
llm_goals_449,test,45,0.08542687445878983,0.2094790944562388
llm_goals_449,test,46,0.05615886673331261,0.0060341568900946
llm_goals_449,test,47,0.062359023839235306,0.1241710666090617
llm_goals_449,test,48,0.050583235919475555,0.1338989075405874
llm_goals_449,test,49,0.0778370052576065,0.5982472498607049
llm_goals_93,test,0,0.1305513083934784,0.0729546648027557
llm_goals_93,test,1,0.7989389300346375,0.6981648167380458
llm_goals_93,test,2,0.365701287984848,0.3318565449695651
llm_goals_93,test,3,0.3017832934856415,0.5505450337981316
llm_goals_93,test,4,0.7085962891578674,0.9547701486411684
llm_goals_93,test,5,0.3638007342815399,0.8378989133869518
llm_goals_93,test,6,0.8638978600502014,0.8645678173925403
llm_goals_93,test,7,0.0840148851275444,0.8034776985663136
llm_goals_93,test,8,0.8732705116271973,0.9139015687588464
llm_goals_93,test,9,0.8399631381034851,0.84076714934408
llm_goals_93,test,10,0.14308086037635803,0.6739811171843284
llm_goals_93,test,11,0.8898657560348511,0.1039005746210211
llm_goals_93,test,12,0.5038647055625916,0.4267719606730696
llm_goals_93,test,13,0.48436233401298523,0.8366404177501517
llm_goals_93,test,14,0.6916314959526062,0.51043302648597
llm_goals_93,test,15,0.8087002635002136,0.9305120134073452
llm_goals_93,test,16,0.6406545042991638,0.3610424966705102
llm_goals_93,test,17,0.056752223521471024,0.6383014796360676
llm_goals_93,test,18,0.6384631991386414,0.7718638733110297
llm_goals_93,test,19,0.4980080723762512,0.6413577201238602
llm_goals_93,test,20,0.8530039191246033,0.9315750038397262
llm_goals_93,test,21,0.8138797283172607,0.959779321458113
llm_goals_93,test,22,0.44734737277030945,0.8212707656396169
llm_goals_93,test,23,0.8094322681427002,0.0620020892855925
llm_goals_93,test,24,0.8249814510345459,0.6707394124648912
llm_goals_93,test,25,0.4297100305557251,0.8513622881275814
llm_goals_93,test,26,0.75677090883255,0.9295520797235572
llm_goals_93,test,27,0.8217586278915405,0.731125599089208
llm_goals_93,test,28,0.754844605922699,0.6387209955509132
llm_goals_93,test,29,0.4673038721084595,0.904010833763946
llm_goals_93,test,30,0.5158856511116028,0.6688304494781727
llm_goals_93,test,31,0.7527400851249695,0.7246857956085144
llm_goals_93,test,32,0.8323047161102295,0.8395389979928227
llm_goals_93,test,33,0.8913012146949768,0.4902935277900161
llm_goals_93,test,34,0.06221351772546768,0.5478889802411547
llm_goals_93,test,35,0.16128724813461304,0.8178324059837278
llm_goals_93,test,36,0.5091901421546936,0.8865698576591567
llm_goals_93,test,37,0.5644108653068542,0.9194802654649477
llm_goals_93,test,38,0.6681265234947205,0.8798558779688433
llm_goals_93,test,39,0.8321815729141235,0.2399301633758412
llm_goals_93,test,40,0.8482808470726013,0.946265162703226
llm_goals_93,test,41,0.7681472301483154,0.8435175392494392
llm_goals_93,test,42,0.08226228505373001,0.260949780916787
llm_goals_93,test,43,0.8615394234657288,0.8190814792570137
llm_goals_93,test,44,0.8927982449531555,0.951412738003951
llm_goals_93,test,45,0.8627043962478638,0.5085180485155563
llm_goals_93,test,46,0.8456620573997498,0.946404217454057
llm_goals_93,test,47,0.35237303376197815,0.5778150570910173
llm_goals_93,test,48,0.636094331741333,0.6859809771933842
llm_goals_93,test,49,0.08089473098516464,0.6187362092588319
llm_goals_358,test,0,0.915558934211731,0.9198604804287828
llm_goals_358,test,1,0.5709055662155151,0.8623428941495346
llm_goals_358,test,2,0.9156898260116577,0.8146633538972576
llm_goals_358,test,3,0.9173790216445923,0.6635203414505186
llm_goals_358,test,4,0.919296383857727,0.9873765704706148
llm_goals_358,test,5,0.9156141877174377,0.8085697334742067
llm_goals_358,test,6,0.9145022034645081,0.8449593731392603
llm_goals_358,test,7,0.9100938439369202,0.7802023295414534
llm_goals_358,test,8,0.9193149209022522,0.5598487865057791
llm_goals_358,test,9,0.923234760761261,0.7689259718491731
llm_goals_358,test,10,0.8401479125022888,0.4090564458803354
llm_goals_358,test,11,0.9165669679641724,0.7281676519766522
llm_goals_358,test,12,0.8616359233856201,0.781706684015145
llm_goals_358,test,13,0.909970760345459,0.6175350663762746
llm_goals_358,test,14,0.14839774370193481,0.1259366314847969
llm_goals_358,test,15,0.925348699092865,0.9665391433681092
llm_goals_358,test,16,0.745927631855011,0.0977212357476949
llm_goals_358,test,17,0.5371708869934082,0.839291034278836
llm_goals_358,test,18,0.8420828580856323,0.0422931239637008
llm_goals_358,test,19,0.5593303442001343,0.3805861792548485
llm_goals_358,test,20,0.9131011962890625,0.972559035375534
llm_goals_358,test,21,0.9140401482582092,0.7844216881478699
llm_goals_358,test,22,0.9122382402420044,0.8432835924874589
llm_goals_358,test,23,0.13652221858501434,0.5831400725193898
llm_goals_358,test,24,0.254181444644928,0.4717356494812245
llm_goals_358,test,25,0.0764579102396965,0.5591997574513735
llm_goals_358,test,26,0.2736387252807617,0.2416094818565881
llm_goals_358,test,27,0.8689427971839905,0.8557277309389357
llm_goals_358,test,28,0.9170697927474976,0.7784201381020285
llm_goals_358,test,29,0.48688292503356934,0.9848793746115032
llm_goals_358,test,30,0.18571245670318604,0.6211451382223804
llm_goals_358,test,31,0.9054657816886902,0.3507967043222499
llm_goals_358,test,32,0.13800223171710968,0.670993868258675
llm_goals_358,test,33,0.41658660769462585,0.0563775762088827
llm_goals_358,test,34,0.18488101661205292,0.4923522105012786
llm_goals_358,test,35,0.9169865846633911,0.8157534628947803
llm_goals_358,test,36,0.8581609129905701,0.7491294702469764
llm_goals_358,test,37,0.5402523279190063,0.5478990059483921
llm_goals_358,test,38,0.4449560046195984,0.9873113036659013
llm_goals_358,test,39,0.1585860699415207,0.6807296173615788
llm_goals_358,test,40,0.9303176403045654,0.6446558925323718
llm_goals_358,test,41,0.40969976782798767,0.3360622093964198
llm_goals_358,test,42,0.914837121963501,0.6737437200032999
llm_goals_358,test,43,0.8984264731407166,0.453765234769123
llm_goals_358,test,44,0.6808140277862549,0.921309112250278
llm_goals_358,test,45,0.1955564022064209,0.5133269325272689
llm_goals_358,test,46,0.8706374168395996,0.7619134214235583
llm_goals_358,test,47,0.9132944941520691,0.5260478340890856
llm_goals_358,test,48,0.263114333152771,0.0910570466126295
llm_goals_358,test,49,0.8755224347114563,0.4710935988277175
