template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.18213005363941193,0.0759730864369293
llm_goals_75,test,1,0.6651593446731567,0.8120127334654941
llm_goals_75,test,2,0.6286173462867737,0.114709127325878
llm_goals_75,test,3,0.16056083142757416,0.1496870071208148
llm_goals_75,test,4,0.5736669301986694,0.321632167436288
llm_goals_75,test,5,0.12516732513904572,0.0308084020440725
llm_goals_75,test,6,0.536592423915863,0.1843175427563358
llm_goals_75,test,7,0.5335884094238281,0.0324508518984932
llm_goals_75,test,8,0.5731348991394043,0.5189439655904322
llm_goals_75,test,9,0.17782573401927948,0.1502023728460554
llm_goals_75,test,10,0.5469636917114258,0.7657257241793485
llm_goals_75,test,11,0.6683199405670166,0.8035850534204568
llm_goals_75,test,12,0.498159259557724,0.7307072346740583
llm_goals_75,test,13,0.5184365510940552,0.048717541539827
llm_goals_75,test,14,0.6559248566627502,0.4316714754134473
llm_goals_75,test,15,0.22307056188583374,0.3935335107130105
llm_goals_75,test,16,0.46504104137420654,0.6144967070154126
llm_goals_75,test,17,0.6676214933395386,0.4488809036296748
llm_goals_75,test,18,0.10281733423471451,0.3920271677291012
llm_goals_75,test,19,0.6752099394798279,0.7810532991351343
llm_goals_75,test,20,0.5702928304672241,0.5443958675997833
llm_goals_75,test,21,0.24788706004619598,0.1604437012338976
llm_goals_75,test,22,0.5032020211219788,0.856696781503626
llm_goals_75,test,23,0.6885164380073547,0.590876283737086
llm_goals_75,test,24,0.6242567896842957,0.4156776627984636
llm_goals_75,test,25,0.47947970032691956,0.5957841371692396
llm_goals_75,test,26,0.4057127833366394,0.3752723036244256
llm_goals_75,test,27,0.6714731454849243,0.7866846353759014
llm_goals_75,test,28,0.26616159081459045,0.1838073487826972
llm_goals_75,test,29,0.6264503002166748,0.853944837222159
llm_goals_75,test,30,0.6257634162902832,0.5992601606803653
llm_goals_75,test,31,0.22524352371692657,0.0743179753131944
llm_goals_75,test,32,0.7402387857437134,0.8015013965908157
llm_goals_75,test,33,0.6843193173408508,0.3498422912836759
llm_goals_75,test,34,0.5570224523544312,0.6692496458902099
llm_goals_75,test,35,0.2688884437084198,0.134422492314073
llm_goals_75,test,36,0.5512853860855103,0.6683886675406214
llm_goals_75,test,37,0.709664523601532,0.7419528346018889
llm_goals_75,test,38,0.11086354404687881,0.6995551035614008
llm_goals_75,test,39,0.5045441389083862,0.4983520000991506
llm_goals_75,test,40,0.10390648990869522,0.8622382893733471
llm_goals_75,test,41,0.2198360413312912,0.1026909531781266
llm_goals_75,test,42,0.6224974989891052,0.0240063792397545
llm_goals_75,test,43,0.30481916666030884,0.1143049422953708
llm_goals_75,test,44,0.47029972076416016,0.6884653626060131
llm_goals_75,test,45,0.6094810962677002,0.1059329319681491
llm_goals_75,test,46,0.5860698819160461,0.405588044943207
llm_goals_75,test,47,0.39909473061561584,0.1393457779753145
llm_goals_75,test,48,0.10289964079856873,0.3670858714001939
llm_goals_75,test,49,0.668726921081543,0.8052537448889737
llm_goals_78,test,0,0.6888633966445923,0.710685793210421
llm_goals_78,test,1,0.6504509449005127,0.4521352090792044
llm_goals_78,test,2,0.676685094833374,0.6984783435600235
llm_goals_78,test,3,0.45970216393470764,0.2113029164438998
llm_goals_78,test,4,0.4582340717315674,0.59362413330766
llm_goals_78,test,5,0.4142317771911621,0.2973601904856865
llm_goals_78,test,6,0.4135803282260895,0.4911861319335979
llm_goals_78,test,7,0.7110369205474854,0.5109292120432304
llm_goals_78,test,8,0.44891029596328735,0.0214058927819057
llm_goals_78,test,9,0.4161677658557892,0.4561729519761929
llm_goals_78,test,10,0.4151800572872162,0.0679189625299845
llm_goals_78,test,11,0.4639187455177307,0.0515682065315942
llm_goals_78,test,12,0.4628197252750397,0.2832565853408079
llm_goals_78,test,13,0.4116126596927643,0.3661356287525266
llm_goals_78,test,14,0.4572699964046478,0.5580731095047592
llm_goals_78,test,15,0.4460585117340088,0.3539146882613382
llm_goals_78,test,16,0.4542461335659027,0.5198975918853231
llm_goals_78,test,17,0.5028482675552368,0.5974518854580513
llm_goals_78,test,18,0.4490497410297394,0.2539582988569515
llm_goals_78,test,19,0.8654664754867554,0.7818671446797811
llm_goals_78,test,20,0.4526905119419098,0.0817902870466741
llm_goals_78,test,21,0.4315071403980255,0.5313736848686219
llm_goals_78,test,22,0.7862756252288818,0.0578457193154468
llm_goals_78,test,23,0.7975402474403381,0.0588067732422271
llm_goals_78,test,24,0.4586635231971741,0.504620490567031
llm_goals_78,test,25,0.4085099399089813,0.1421301439321696
llm_goals_78,test,26,0.45165789127349854,0.4043160008204248
llm_goals_78,test,27,0.8088718056678772,0.1341547030792127
llm_goals_78,test,28,0.46087953448295593,0.585613528389559
llm_goals_78,test,29,0.4564305543899536,0.8037635629119642
llm_goals_78,test,30,0.404889851808548,0.2785354206868534
llm_goals_78,test,31,0.4091472327709198,0.5074585147112473
llm_goals_78,test,32,0.7825906276702881,0.1365325187594412
llm_goals_78,test,33,0.44982200860977173,0.559776178990876
llm_goals_78,test,34,0.4070281386375427,0.3923464392379233
llm_goals_78,test,35,0.6831021904945374,0.8694947336510349
llm_goals_78,test,36,0.41212713718414307,0.3406365028138333
llm_goals_78,test,37,0.5389107465744019,0.7407991807699866
llm_goals_78,test,38,0.46103012561798096,0.5783193820848918
llm_goals_78,test,39,0.46625974774360657,0.2552456607848852
llm_goals_78,test,40,0.4602595269680023,0.0543816822443674
llm_goals_78,test,41,0.4114532470703125,0.5111749720190839
llm_goals_78,test,42,0.6731489896774292,0.6373460300781935
llm_goals_78,test,43,0.41394364833831787,0.5824615770869916
llm_goals_78,test,44,0.45188453793525696,0.0725045867582824
llm_goals_78,test,45,0.46249231696128845,0.2055844845637347
llm_goals_78,test,46,0.7881954312324524,0.3443727538373435
llm_goals_78,test,47,0.4602234959602356,0.5137599661573641
llm_goals_78,test,48,0.4507414698600769,0.2386868392211005
llm_goals_78,test,49,0.5297360420227051,0.1092249314217729
llm_goals_53,test,0,0.3109585642814636,0.0313037944392233
llm_goals_53,test,1,0.30570706725120544,0.8240056677766623
llm_goals_53,test,2,0.30757656693458557,0.1449565288533016
llm_goals_53,test,3,0.30809205770492554,0.3918474727299789
llm_goals_53,test,4,0.3074372410774231,0.8286818229760559
llm_goals_53,test,5,0.3068143129348755,0.0496580173268359
llm_goals_53,test,6,0.3078070282936096,0.0987357123371914
llm_goals_53,test,7,0.3083515465259552,0.0188377606185046
llm_goals_53,test,8,0.3116433024406433,0.4663370658319237
llm_goals_53,test,9,0.30677732825279236,0.0353320635735392
llm_goals_53,test,10,0.3046935200691223,0.2465247528295183
llm_goals_53,test,11,0.308639258146286,0.3482216267575191
llm_goals_53,test,12,0.306004136800766,0.252877033485255
llm_goals_53,test,13,0.30798521637916565,0.1153444147006914
llm_goals_53,test,14,0.30660876631736755,0.3868210483124377
llm_goals_53,test,15,0.3037414848804474,0.3705090883623566
llm_goals_53,test,16,0.30763721466064453,0.3407126088990169
llm_goals_53,test,17,0.310794472694397,0.7543820151744927
llm_goals_53,test,18,0.3064180016517639,0.3322955201354975
llm_goals_53,test,19,0.3062132000923157,0.2261316448538241
llm_goals_53,test,20,0.30639195442199707,0.7343425814116562
llm_goals_53,test,21,0.307632714509964,0.1601759070107346
llm_goals_53,test,22,0.3060174584388733,0.4190051893758809
llm_goals_53,test,23,0.3071014881134033,0.2354442333363782
llm_goals_53,test,24,0.3064804971218109,0.9719922543639756
llm_goals_53,test,25,0.30880579352378845,0.3289260309913144
llm_goals_53,test,26,0.30715587735176086,0.5781819989075234
llm_goals_53,test,27,0.30476680397987366,0.790876622992061
llm_goals_53,test,28,0.3087395429611206,0.2747068584160578
llm_goals_53,test,29,0.30639922618865967,0.796844478423965
llm_goals_53,test,30,0.3095422387123108,0.3573432990729852
llm_goals_53,test,31,0.30752307176589966,0.1489779797461384
llm_goals_53,test,32,0.31473901867866516,0.4788477745757265
llm_goals_53,test,33,0.30790895223617554,0.967617631882366
llm_goals_53,test,34,0.30767104029655457,0.2252558680565209
llm_goals_53,test,35,0.3055424094200134,0.2300146581986259
llm_goals_53,test,36,0.30557334423065186,0.2311446011459964
llm_goals_53,test,37,0.3061560392379761,0.144409781340655
llm_goals_53,test,38,0.30468371510505676,0.705274211964156
llm_goals_53,test,39,0.3095681667327881,0.5921281985506138
llm_goals_53,test,40,0.30840858817100525,0.2443004460536172
llm_goals_53,test,41,0.30588456988334656,0.061497045516474
llm_goals_53,test,42,0.30698156356811523,0.1570495895565415
llm_goals_53,test,43,0.3064036965370178,0.0641649100604756
llm_goals_53,test,44,0.3051421344280243,0.8074959504603542
llm_goals_53,test,45,0.30791133642196655,0.1813221143349916
llm_goals_53,test,46,0.31108880043029785,0.2352295334667747
llm_goals_53,test,47,0.3075317442417145,0.820244135045574
llm_goals_53,test,48,0.3056134581565857,0.4512817747836695
llm_goals_53,test,49,0.3051215410232544,0.7022675298006026
llm_goals_305,test,0,0.3781469464302063,0.0438240495208372
llm_goals_305,test,1,0.8028278946876526,0.1352509650243347
llm_goals_305,test,2,0.4405713975429535,0.1723536643548477
llm_goals_305,test,3,0.7949306964874268,0.250094549889143
llm_goals_305,test,4,0.8088180422782898,0.8419536365764244
llm_goals_305,test,5,0.41160738468170166,0.1913177614094214
llm_goals_305,test,6,0.37788960337638855,0.3723916849874114
llm_goals_305,test,7,0.39522770047187805,0.3337485526549648
llm_goals_305,test,8,0.8152369260787964,0.5824263052609597
llm_goals_305,test,9,0.772060215473175,0.620184539039364
llm_goals_305,test,10,0.7769798040390015,0.494000537986959
llm_goals_305,test,11,0.38734695315361023,0.0482253546984711
llm_goals_305,test,12,0.3910726010799408,0.0565816635108951
llm_goals_305,test,13,0.6025276780128479,0.1136796894182332
llm_goals_305,test,14,0.8096957206726074,0.6102884358627442
llm_goals_305,test,15,0.8003242015838623,0.8302422164176205
llm_goals_305,test,16,0.811034083366394,0.589511050327059
llm_goals_305,test,17,0.6692696213722229,0.2622011524235305
llm_goals_305,test,18,0.7968443632125854,0.3737828315077605
llm_goals_305,test,19,0.5929285883903503,0.0401465973617238
llm_goals_305,test,20,0.8144572973251343,0.8971526617856483
llm_goals_305,test,21,0.38398492336273193,0.513862561708882
llm_goals_305,test,22,0.5488378405570984,0.6617399492970566
llm_goals_305,test,23,0.42538872361183167,0.3090300849059566
llm_goals_305,test,24,0.8092941045761108,0.4310886044152408
llm_goals_305,test,25,0.8113726377487183,0.7346082271960779
llm_goals_305,test,26,0.7942020297050476,0.7784622217319951
llm_goals_305,test,27,0.8089956045150757,0.2285268892180965
llm_goals_305,test,28,0.5331048369407654,0.1970890988987535
llm_goals_305,test,29,0.5950555205345154,0.0543679274789402
llm_goals_305,test,30,0.4180959165096283,0.8325365217586828
llm_goals_305,test,31,0.41067054867744446,0.5078524819974076
llm_goals_305,test,32,0.4233275055885315,0.2061603354174771
llm_goals_305,test,33,0.798490583896637,0.6039954962603894
llm_goals_305,test,34,0.8026878833770752,0.5754865922825898
llm_goals_305,test,35,0.641376793384552,0.1664152616423
llm_goals_305,test,36,0.81670743227005,0.7880486291444165
llm_goals_305,test,37,0.401736319065094,0.03605078558617
llm_goals_305,test,38,0.39163079857826233,0.0575570687729377
llm_goals_305,test,39,0.43582379817962646,0.4297583044645929
llm_goals_305,test,40,0.39934372901916504,0.1915445959084499
llm_goals_305,test,41,0.6088920831680298,0.6536953532364804
llm_goals_305,test,42,0.4137025773525238,0.122527873906398
llm_goals_305,test,43,0.651608943939209,0.1711906255032107
llm_goals_305,test,44,0.8046603202819824,0.8371975999741996
llm_goals_305,test,45,0.513168215751648,0.3836914756409174
llm_goals_305,test,46,0.36846908926963806,0.0543049670678201
llm_goals_305,test,47,0.7707496285438538,0.5245484205428286
llm_goals_305,test,48,0.7995883822441101,0.5969765977742552
llm_goals_305,test,49,0.6264455318450928,0.2351952691938564
llm_goals_81,test,0,0.33586013317108154,0.2060992725380174
llm_goals_81,test,1,0.4653486907482147,0.1581010259271764
llm_goals_81,test,2,0.4734719395637512,0.6899052301996583
llm_goals_81,test,3,0.466547429561615,0.7805136989347726
llm_goals_81,test,4,0.47494593262672424,0.0972643506873081
llm_goals_81,test,5,0.3004654347896576,0.3089356066572108
llm_goals_81,test,6,0.45922043919563293,0.0383605574042886
llm_goals_81,test,7,0.35282251238822937,0.3161536190910042
llm_goals_81,test,8,0.5063422322273254,0.5126683642741202
llm_goals_81,test,9,0.4601188600063324,0.1286837550045714
llm_goals_81,test,10,0.44353699684143066,0.1256029490894943
llm_goals_81,test,11,0.5015520453453064,0.408772132619307
llm_goals_81,test,12,0.3744674623012543,0.1214277758013974
llm_goals_81,test,13,0.4442285895347595,0.0409721836992895
llm_goals_81,test,14,0.4326043725013733,0.0827685330888261
llm_goals_81,test,15,0.39488857984542847,0.224678660518271
llm_goals_81,test,16,0.4889307916164398,0.0872837265533343
llm_goals_81,test,17,0.44879454374313354,0.2837028272857018
llm_goals_81,test,18,0.3743830919265747,0.1193591553103013
llm_goals_81,test,19,0.285736620426178,0.4952181327203863
llm_goals_81,test,20,0.5022264122962952,0.2564665599863756
llm_goals_81,test,21,0.451506644487381,0.2041083994609424
llm_goals_81,test,22,0.4222584366798401,0.3119739178030907
llm_goals_81,test,23,0.2848965525627136,0.0994638293022036
llm_goals_81,test,24,0.4747891128063202,0.1575467450844306
llm_goals_81,test,25,0.4642389416694641,0.0291125673317165
llm_goals_81,test,26,0.4651496112346649,0.0869071714890548
llm_goals_81,test,27,0.46178168058395386,0.1863589895989203
llm_goals_81,test,28,0.5108520984649658,0.119977738469712
llm_goals_81,test,29,0.4070477783679962,0.4410572612602489
llm_goals_81,test,30,0.3676765263080597,0.0555125882985712
llm_goals_81,test,31,0.2888261079788208,0.3486004983652399
llm_goals_81,test,32,0.5003673434257507,0.189941914212834
llm_goals_81,test,33,0.40878933668136597,0.1890390776737304
llm_goals_81,test,34,0.2676036059856415,0.1328217427635933
llm_goals_81,test,35,0.45583340525627136,0.1343397356978365
llm_goals_81,test,36,0.42282262444496155,0.0899701336915993
llm_goals_81,test,37,0.24459443986415863,0.6478431799513802
llm_goals_81,test,38,0.4658063054084778,0.7351879424598803
llm_goals_81,test,39,0.4554332494735718,0.0885373562628188
llm_goals_81,test,40,0.4572749137878418,0.3150516869571108
llm_goals_81,test,41,0.4457017481327057,0.0498826197855212
llm_goals_81,test,42,0.4725190997123718,0.1381899328653333
llm_goals_81,test,43,0.3709631562232971,0.3536647169335493
llm_goals_81,test,44,0.4358276426792145,0.4857917971856136
llm_goals_81,test,45,0.485868901014328,0.3921515914618259
llm_goals_81,test,46,0.4442664682865143,0.1179764264181158
llm_goals_81,test,47,0.4506233334541321,0.2786576050295211
llm_goals_81,test,48,0.47371774911880493,0.0726556891905063
llm_goals_81,test,49,0.5011158585548401,0.3912461690418435
llm_goals_133,test,0,0.6815858483314514,0.4440529323069227
llm_goals_133,test,1,0.5073938369750977,0.3955848959165448
llm_goals_133,test,2,0.6790648698806763,0.5130030466159122
llm_goals_133,test,3,0.6817911863327026,0.220245744335209
llm_goals_133,test,4,0.507601261138916,0.5879895213269322
llm_goals_133,test,5,0.681581974029541,0.8588037554744828
llm_goals_133,test,6,0.5064310431480408,0.8322442189227136
llm_goals_133,test,7,0.5074448585510254,0.6588760518065184
llm_goals_133,test,8,0.5074187517166138,0.5253890103378467
llm_goals_133,test,9,0.5064204931259155,0.8170254264656348
llm_goals_133,test,10,0.5081532001495361,0.4608975636274737
llm_goals_133,test,11,0.5079187154769897,0.2002942757447116
llm_goals_133,test,12,0.5072022676467896,0.4022389740133304
llm_goals_133,test,13,0.6802021265029907,0.8516519653230656
llm_goals_133,test,14,0.5028277039527893,0.0158444788418117
llm_goals_133,test,15,0.5037547945976257,0.5978365001716675
llm_goals_133,test,16,0.5047257542610168,0.0529535109452945
llm_goals_133,test,17,0.5042397975921631,0.2556263204518722
llm_goals_133,test,18,0.5037813782691956,0.0167466717766424
llm_goals_133,test,19,0.6840633749961853,0.0944244428527934
llm_goals_133,test,20,0.5069866180419922,0.6914976263434149
llm_goals_133,test,21,0.5060379505157471,0.8436048633272258
llm_goals_133,test,22,0.5032078623771667,0.2868007235124719
llm_goals_133,test,23,0.506686806678772,0.1160262834732456
llm_goals_133,test,24,0.50456303358078,0.0154812210183983
llm_goals_133,test,25,0.5054448246955872,0.3119789757098087
llm_goals_133,test,26,0.5057482719421387,0.7929420521534107
llm_goals_133,test,27,0.506666362285614,0.2037716603127804
llm_goals_133,test,28,0.5019842982292175,0.3118772754734958
llm_goals_133,test,29,0.6846321821212769,0.1808521166351184
llm_goals_133,test,30,0.6799617409706116,0.5045142070808941
llm_goals_133,test,31,0.686692476272583,0.8498369503935175
llm_goals_133,test,32,0.5045507550239563,0.2694815541177057
llm_goals_133,test,33,0.5073308348655701,0.0177588567715867
llm_goals_133,test,34,0.5035120248794556,0.445395356923409
llm_goals_133,test,35,0.506751537322998,0.4995371449295338
llm_goals_133,test,36,0.6790236830711365,0.4823747071618506
llm_goals_133,test,37,0.6856629848480225,0.0266815206082507
llm_goals_133,test,38,0.6820346117019653,0.0874820007739646
llm_goals_133,test,39,0.5034942626953125,0.3366422826024648
llm_goals_133,test,40,0.5041614770889282,0.2329899847029727
llm_goals_133,test,41,0.6828569769859314,0.8571491285285516
llm_goals_133,test,42,0.505983829498291,0.8193201391590431
llm_goals_133,test,43,0.6818675994873047,0.8422289427227235
llm_goals_133,test,44,0.6854075193405151,0.5294372908894646
llm_goals_133,test,45,0.6781994104385376,0.2588603231187393
llm_goals_133,test,46,0.5222574472427368,0.2337208499272645
llm_goals_133,test,47,0.6836797595024109,0.1166634693988454
llm_goals_133,test,48,0.5024409294128418,0.6138306242825586
llm_goals_133,test,49,0.6820481419563293,0.2806552214021111
llm_goals_157,test,0,0.059976570308208466,0.6355320308190959
llm_goals_157,test,1,0.17726393043994904,0.7424853921670768
llm_goals_157,test,2,0.8333427309989929,0.073314241900373
llm_goals_157,test,3,0.19682055711746216,0.4675850432855137
llm_goals_157,test,4,0.746206521987915,0.5458627288071527
llm_goals_157,test,5,0.09017182141542435,0.0771889198722071
llm_goals_157,test,6,0.17084555327892303,0.0818153292956077
llm_goals_157,test,7,0.053936198353767395,0.0574362851972565
llm_goals_157,test,8,0.7492556571960449,0.612650525482466
llm_goals_157,test,9,0.6820585131645203,0.025148981997583
llm_goals_157,test,10,0.2762869596481323,0.075973919657683
llm_goals_157,test,11,0.5317445993423462,0.1464687737403695
llm_goals_157,test,12,0.5672652721405029,0.7268917489370714
llm_goals_157,test,13,0.08943050354719162,0.0531648084509613
llm_goals_157,test,14,0.7838094234466553,0.3299706213737123
llm_goals_157,test,15,0.6615673899650574,0.3043292296967997
llm_goals_157,test,16,0.3078918159008026,0.7463776357566768
llm_goals_157,test,17,0.16850651800632477,0.8521737469617978
llm_goals_157,test,18,0.6130462288856506,0.6376039881194869
llm_goals_157,test,19,0.8670440912246704,0.6077618725185671
llm_goals_157,test,20,0.7441235780715942,0.7242060188576479
llm_goals_157,test,21,0.0769224613904953,0.2051165554994685
llm_goals_157,test,22,0.8309707045555115,0.7995715125449558
llm_goals_157,test,23,0.1518397480249405,0.1467023010082007
llm_goals_157,test,24,0.36637648940086365,0.3108830330026099
llm_goals_157,test,25,0.2475595623254776,0.1001087986592319
llm_goals_157,test,26,0.831850528717041,0.4552169748909065
llm_goals_157,test,27,0.7254546880722046,0.8485816155817194
llm_goals_157,test,28,0.18201914429664612,0.2265082237706469
llm_goals_157,test,29,0.09073898941278458,0.6283476283012512
llm_goals_157,test,30,0.229563370347023,0.2620283904445711
llm_goals_157,test,31,0.7314267158508301,0.167952021094149
llm_goals_157,test,32,0.06607019156217575,0.0898426790762853
llm_goals_157,test,33,0.49361637234687805,0.5043942365008254
llm_goals_157,test,34,0.24118156731128693,0.0725868476993519
llm_goals_157,test,35,0.059803836047649384,0.1237829648198263
llm_goals_157,test,36,0.26830747723579407,0.1925318890375481
llm_goals_157,test,37,0.07014649361371994,0.6842419436738383
llm_goals_157,test,38,0.7369577884674072,0.691649855522258
llm_goals_157,test,39,0.40673041343688965,0.5264401359594252
llm_goals_157,test,40,0.18810096383094788,0.1777385641102212
llm_goals_157,test,41,0.5415016412734985,0.0328183379619016
llm_goals_157,test,42,0.07050389051437378,0.1949198629220696
llm_goals_157,test,43,0.08911878615617752,0.135028229661103
llm_goals_157,test,44,0.7503442764282227,0.7258662504199943
llm_goals_157,test,45,0.19408227503299713,0.0935995604675028
llm_goals_157,test,46,0.8606125116348267,0.15418174898882
llm_goals_157,test,47,0.18029142916202545,0.3212815335896988
llm_goals_157,test,48,0.0543917752802372,0.2795304252191649
llm_goals_157,test,49,0.6923412084579468,0.7371862441831876
llm_goals_186,test,0,0.12970666587352753,0.0367510709036243
llm_goals_186,test,1,0.48942527174949646,0.9589190483469818
llm_goals_186,test,2,0.13432425260543823,0.0667798014228945
llm_goals_186,test,3,0.14631962776184082,0.0340377111776925
llm_goals_186,test,4,0.3398852050304413,0.4002710909538108
llm_goals_186,test,5,0.8743233680725098,0.8817241785963961
llm_goals_186,test,6,0.8955549001693726,0.8798493094453552
llm_goals_186,test,7,0.12386918812990189,0.0443878504309986
llm_goals_186,test,8,0.3411366939544678,0.4415592471567901
llm_goals_186,test,9,0.8992406725883484,0.8847759344734797
llm_goals_186,test,10,0.8938353657722473,0.8905032311770802
llm_goals_186,test,11,0.8566011190414429,0.704948600699243
llm_goals_186,test,12,0.17281794548034668,0.409597002882604
llm_goals_186,test,13,0.9029480814933777,0.8867287662434378
llm_goals_186,test,14,0.7280864715576172,0.8128145923588551
llm_goals_186,test,15,0.946606457233429,0.4832503599366539
llm_goals_186,test,16,0.7752137184143066,0.8267478910426029
llm_goals_186,test,17,0.6394336223602295,0.957207678971151
llm_goals_186,test,18,0.7748554348945618,0.7610878868091969
llm_goals_186,test,19,0.9466707110404968,0.7872715543385574
llm_goals_186,test,20,0.32686853408813477,0.520514563098938
llm_goals_186,test,21,0.8838757872581482,0.9084513175350956
llm_goals_186,test,22,0.4509865641593933,0.9157385254478472
llm_goals_186,test,23,0.8428546786308289,0.5585242484052028
llm_goals_186,test,24,0.747530460357666,0.8535531187105866
llm_goals_186,test,25,0.8983492255210876,0.8466653814359101
llm_goals_186,test,26,0.35734525322914124,0.8550637970014335
llm_goals_186,test,27,0.8570743203163147,0.9398175480710378
llm_goals_186,test,28,0.1490807980298996,0.0028363068023126
llm_goals_186,test,29,0.8580521941184998,0.8305997204848772
llm_goals_186,test,30,0.897018551826477,0.8987706923113074
llm_goals_186,test,31,0.8421051502227783,0.8880043621770078
llm_goals_186,test,32,0.7508646845817566,0.4759852559635721
llm_goals_186,test,33,0.7453968524932861,0.8450176079466206
llm_goals_186,test,34,0.8991639614105225,0.9532899841201126
llm_goals_186,test,35,0.9404149055480957,0.0290458993492998
llm_goals_186,test,36,0.9062440395355225,0.9324312223548752
llm_goals_186,test,37,0.1828964352607727,0.7062064442252718
llm_goals_186,test,38,0.7257457971572876,0.8475385890175953
llm_goals_186,test,39,0.17914140224456787,0.2451742937665777
llm_goals_186,test,40,0.8537609577178955,0.7628755019143414
llm_goals_186,test,41,0.8410768508911133,0.9118681525061464
llm_goals_186,test,42,0.12078375369310379,0.0299416334392138
llm_goals_186,test,43,0.7885367274284363,0.9088531704037304
llm_goals_186,test,44,0.3299274742603302,0.399223879564458
llm_goals_186,test,45,0.5544095039367676,0.0207412737387863
llm_goals_186,test,46,0.8502131700515747,0.504435185656878
llm_goals_186,test,47,0.14615173637866974,0.0426878976387859
llm_goals_186,test,48,0.7443017363548279,0.8374391175680358
llm_goals_186,test,49,0.8396468758583069,0.9286286845568336
llm_goals_401,test,0,0.8211519122123718,0.9617876405899952
llm_goals_401,test,1,0.836296558380127,0.9404490002093446
llm_goals_401,test,2,0.833615779876709,0.935046673136648
llm_goals_401,test,3,0.8873139023780823,0.9543637645078082
llm_goals_401,test,4,0.8571836948394775,0.9660480618061604
llm_goals_401,test,5,0.7916078567504883,0.9211199313115676
llm_goals_401,test,6,0.8785900473594666,0.902884063341732
llm_goals_401,test,7,0.8864192962646484,0.94729781169548
llm_goals_401,test,8,0.829866886138916,0.957207800210959
llm_goals_401,test,9,0.8478705883026123,0.9145740305533586
llm_goals_401,test,10,0.837413489818573,0.9267561414021268
llm_goals_401,test,11,0.8370048999786377,0.9242701512661912
llm_goals_401,test,12,0.8210302591323853,0.8435903191618971
llm_goals_401,test,13,0.34473589062690735,0.938192186231483
llm_goals_401,test,14,0.28501081466674805,0.8529976602342201
llm_goals_401,test,15,0.8265918493270874,0.9478884329856586
llm_goals_401,test,16,0.278807133436203,0.8192354997719525
llm_goals_401,test,17,0.8363010287284851,0.6739075253562599
llm_goals_401,test,18,0.28801417350769043,0.5197439125792754
llm_goals_401,test,19,0.8912349343299866,0.8583977843568281
llm_goals_401,test,20,0.806387722492218,0.9455993763925636
llm_goals_401,test,21,0.7697662711143494,0.9705763460601288
llm_goals_401,test,22,0.8568733930587769,0.8491748031226035
llm_goals_401,test,23,0.7032168507575989,0.8515620190775874
llm_goals_401,test,24,0.5848453044891357,0.9138265832467732
llm_goals_401,test,25,0.8368738889694214,0.9028179661562172
llm_goals_401,test,26,0.49667179584503174,0.9558187125982178
llm_goals_401,test,27,0.8347898721694946,0.5612720368847214
llm_goals_401,test,28,0.8166528940200806,0.9617025244944858
llm_goals_401,test,29,0.8897502422332764,0.8060652645257104
llm_goals_401,test,30,0.8442996144294739,0.8896843204884566
llm_goals_401,test,31,0.7689225077629089,0.9526923309875832
llm_goals_401,test,32,0.8851990103721619,0.8506568677811456
llm_goals_401,test,33,0.2816685140132904,0.7802594846608962
llm_goals_401,test,34,0.8345659375190735,0.8870658880824575
llm_goals_401,test,35,0.4030866026878357,0.954492271718892
llm_goals_401,test,36,0.2843870222568512,0.562048146808884
llm_goals_401,test,37,0.7960929870605469,0.7781126533121301
llm_goals_401,test,38,0.8345980644226074,0.8591165497422784
llm_goals_401,test,39,0.782030463218689,0.8788934372485709
llm_goals_401,test,40,0.8548465371131897,0.8091102829935539
llm_goals_401,test,41,0.8058022856712341,0.8675616408909181
llm_goals_401,test,42,0.8334419131278992,0.949993671778176
llm_goals_401,test,43,0.8394643068313599,0.9467448581700896
llm_goals_401,test,44,0.8244726061820984,0.8687242293966851
llm_goals_401,test,45,0.827147901058197,0.9587250339741858
llm_goals_401,test,46,0.2092258185148239,0.860452357605835
llm_goals_401,test,47,0.7962934970855713,0.9253614550163576
llm_goals_401,test,48,0.31091392040252686,0.8647797053472309
llm_goals_401,test,49,0.8272743821144104,0.9454760570818088
llm_goals_420,test,0,0.3924897313117981,0.9711560599789952
llm_goals_420,test,1,0.3603971302509308,0.5978053926454786
llm_goals_420,test,2,0.25695493817329407,0.0817351976918742
llm_goals_420,test,3,0.5102593302726746,0.4730792485055278
llm_goals_420,test,4,0.26275911927223206,0.8487827178347965
llm_goals_420,test,5,0.25205734372138977,0.1386299974231985
llm_goals_420,test,6,0.26556238532066345,0.9677888754301456
llm_goals_420,test,7,0.26038116216659546,0.9193048504515688
llm_goals_420,test,8,0.25709494948387146,0.1105940972856699
llm_goals_420,test,9,0.496059387922287,0.0770199238560772
llm_goals_420,test,10,0.2588590085506439,0.6569762799970394
llm_goals_420,test,11,0.4907619059085846,0.4533228613613927
llm_goals_420,test,12,0.2573194205760956,0.9317349816078236
llm_goals_420,test,13,0.25811874866485596,0.5042190525433249
llm_goals_420,test,14,0.25842344760894775,0.1440102415442949
llm_goals_420,test,15,0.2642943561077118,0.3273024402815145
llm_goals_420,test,16,0.49911075830459595,0.3681827771891491
llm_goals_420,test,17,0.26829954981803894,0.4632337459639504
llm_goals_420,test,18,0.26168620586395264,0.0396023029282074
llm_goals_420,test,19,0.25578463077545166,0.1093558671361818
llm_goals_420,test,20,0.2826714515686035,0.0396252534530884
llm_goals_420,test,21,0.2810825705528259,0.0465700238365722
llm_goals_420,test,22,0.26543259620666504,0.1916998836408959
llm_goals_420,test,23,0.26338914036750793,0.1681177802630551
llm_goals_420,test,24,0.41956084966659546,0.1698145969501656
llm_goals_420,test,25,0.26630377769470215,0.9829779862454848
llm_goals_420,test,26,0.266163170337677,0.0308922007548325
llm_goals_420,test,27,0.49411797523498535,0.8125830946093627
llm_goals_420,test,28,0.2595211863517761,0.9367654930611784
llm_goals_420,test,29,0.25869107246398926,0.8184437096518178
llm_goals_420,test,30,0.25943291187286377,0.9386603865018432
llm_goals_420,test,31,0.25458598136901855,0.10851579640767
llm_goals_420,test,32,0.461353063583374,0.9666374926215946
llm_goals_420,test,33,0.436784952878952,0.8901044715204638
llm_goals_420,test,34,0.43224865198135376,0.9215939624410928
llm_goals_420,test,35,0.27474096417427063,0.5267802921981926
llm_goals_420,test,36,0.25801485776901245,0.092175658510858
llm_goals_420,test,37,0.2742716372013092,0.984362841437198
llm_goals_420,test,38,0.26331543922424316,0.8828047505171253
llm_goals_420,test,39,0.41905853152275085,0.2972862903877061
llm_goals_420,test,40,0.26247361302375793,0.0444444838151291
llm_goals_420,test,41,0.4904870390892029,0.5478361768565629
llm_goals_420,test,42,0.2722010314464569,0.0713350995649345
llm_goals_420,test,43,0.25794997811317444,0.5106248261137879
llm_goals_420,test,44,0.26445773243904114,0.0905225225380096
llm_goals_420,test,45,0.47427359223365784,0.1531352912609338
llm_goals_420,test,46,0.26655492186546326,0.1216667665904784
llm_goals_420,test,47,0.29410049319267273,0.1240460201984882
llm_goals_420,test,48,0.27110904455184937,0.0596825371886435
llm_goals_420,test,49,0.2910600006580353,0.8335117138475971
llm_goals_263,test,0,0.7679628729820251,0.5209935935366439
llm_goals_263,test,1,0.5824896097183228,0.7968068746445729
llm_goals_263,test,2,0.7649327516555786,0.5467470232144288
llm_goals_263,test,3,0.4503518342971802,0.4733215007319302
llm_goals_263,test,4,0.8010583519935608,0.0057083956844275
llm_goals_263,test,5,0.4584736227989197,0.3493473596509424
llm_goals_263,test,6,0.7820982933044434,0.8201550249820808
llm_goals_263,test,7,0.5667651891708374,0.6557906816477753
llm_goals_263,test,8,0.43974387645721436,0.7543426271500917
llm_goals_263,test,9,0.45037582516670227,0.2009829696053626
llm_goals_263,test,10,0.8111894130706787,0.8489753664207734
llm_goals_263,test,11,0.7557880282402039,0.652436611134806
llm_goals_263,test,12,0.6117441058158875,0.8143466734577086
llm_goals_263,test,13,0.7236234545707703,0.4245607212842352
llm_goals_263,test,14,0.7708999514579773,0.0036452536113296
llm_goals_263,test,15,0.6819190382957458,0.0558692673280436
llm_goals_263,test,16,0.4820389151573181,0.0066133854077101
llm_goals_263,test,17,0.45912131667137146,0.7648655133279786
llm_goals_263,test,18,0.45144498348236084,0.006651634113929
llm_goals_263,test,19,0.6712696552276611,0.0487544569045993
llm_goals_263,test,20,0.7402887344360352,0.0331700030843074
llm_goals_263,test,21,0.7618305683135986,0.2223244924357846
llm_goals_263,test,22,0.8751500248908997,0.8850367693524676
llm_goals_263,test,23,0.7879300117492676,0.6136206316393849
llm_goals_263,test,24,0.45249220728874207,0.0103991722036265
llm_goals_263,test,25,0.7488517165184021,0.7490420921699364
llm_goals_263,test,26,0.7116979956626892,0.0149267434716544
llm_goals_263,test,27,0.7485409379005432,0.7989164572724189
llm_goals_263,test,28,0.450557678937912,0.6376088146659757
llm_goals_263,test,29,0.4609370231628418,0.0419343519282845
llm_goals_263,test,30,0.8053397536277771,0.8598701912381717
llm_goals_263,test,31,0.7548306584358215,0.168156003178124
llm_goals_263,test,32,0.7560549378395081,0.6210063725022936
llm_goals_263,test,33,0.634270966053009,0.0062242916440703
llm_goals_263,test,34,0.7788557410240173,0.5691545063377933
llm_goals_263,test,35,0.7784920334815979,0.7982008481236453
llm_goals_263,test,36,0.752998948097229,0.763706617142148
llm_goals_263,test,37,0.793616533279419,0.0342430752915087
llm_goals_263,test,38,0.7542886734008789,0.1222660265497154
llm_goals_263,test,39,0.7678821682929993,0.8269810089104592
llm_goals_263,test,40,0.7563211917877197,0.5318831885299807
llm_goals_263,test,41,0.44723087549209595,0.3207528198494705
llm_goals_263,test,42,0.8814297914505005,0.4801431121790341
llm_goals_263,test,43,0.7721433043479919,0.1981375631842198
llm_goals_263,test,44,0.43466559052467346,0.1080272425734687
llm_goals_263,test,45,0.661138653755188,0.8354177467829539
llm_goals_263,test,46,0.66447913646698,0.7413273974280716
llm_goals_263,test,47,0.7540163397789001,0.4918597295945824
llm_goals_263,test,48,0.48512810468673706,0.0043793883332247
llm_goals_263,test,49,0.8175636529922485,0.949535011062065
llm_goals_427,test,0,0.5616303086280823,0.2826418194423331
llm_goals_427,test,1,0.7576030492782593,0.380977464557011
llm_goals_427,test,2,0.5100253820419312,0.1487359361961987
llm_goals_427,test,3,0.48225390911102295,0.3576580711889061
llm_goals_427,test,4,0.7427163124084473,0.2270212997591984
llm_goals_427,test,5,0.6799509525299072,0.2711625929555684
llm_goals_427,test,6,0.7612206935882568,0.1402530886796432
llm_goals_427,test,7,0.6110060214996338,0.2935996266753878
llm_goals_427,test,8,0.7013369202613831,0.2137670314614922
llm_goals_427,test,9,0.6752023696899414,0.2160041132788326
llm_goals_427,test,10,0.7331899404525757,0.3669200145460971
llm_goals_427,test,11,0.6845360398292542,0.1516882402250043
llm_goals_427,test,12,0.5448315739631653,0.1475453757541205
llm_goals_427,test,13,0.591892421245575,0.1350458405109811
llm_goals_427,test,14,0.5162834525108337,0.2337964230636721
llm_goals_427,test,15,0.7561149001121521,0.3115728193455654
llm_goals_427,test,16,0.4943295121192932,0.131520745523965
llm_goals_427,test,17,0.7266172766685486,0.3278496493117007
llm_goals_427,test,18,0.7444018721580505,0.3527567015929387
llm_goals_427,test,19,0.4647007882595062,0.2313815817463674
llm_goals_427,test,20,0.7325294613838196,0.3221674001172885
llm_goals_427,test,21,0.6689926981925964,0.2494982149574568
llm_goals_427,test,22,0.7281269431114197,0.3779860883941361
llm_goals_427,test,23,0.5331547856330872,0.3509138567308049
llm_goals_427,test,24,0.7126919627189636,0.1946130607095621
llm_goals_427,test,25,0.6296139359474182,0.228222711067827
llm_goals_427,test,26,0.7368202209472656,0.2768263295001016
llm_goals_427,test,27,0.5279528498649597,0.2199474495731038
llm_goals_427,test,28,0.650202214717865,0.1579098434370231
llm_goals_427,test,29,0.4824393689632416,0.069843346359134
llm_goals_427,test,30,0.7520275712013245,0.5359787931933129
llm_goals_427,test,31,0.4854195713996887,0.1293182240484197
llm_goals_427,test,32,0.5758863687515259,0.2396093502744168
llm_goals_427,test,33,0.7512494921684265,0.1913976476875258
llm_goals_427,test,34,0.7407684922218323,0.4990200469731286
llm_goals_427,test,35,0.51933354139328,0.357807030322738
llm_goals_427,test,36,0.7509183287620544,0.2580007688260685
llm_goals_427,test,37,0.4866988956928253,0.4031150735356658
llm_goals_427,test,38,0.6893041133880615,0.2737247490124149
llm_goals_427,test,39,0.5126136541366577,0.2478362318668904
llm_goals_427,test,40,0.6746727824211121,0.5026721270474518
llm_goals_427,test,41,0.6701657772064209,0.2273186194850299
llm_goals_427,test,42,0.4667811691761017,0.4444071308114509
llm_goals_427,test,43,0.7372493743896484,0.2369720640870139
llm_goals_427,test,44,0.7913563251495361,0.3961639877527012
llm_goals_427,test,45,0.5449885129928589,0.2771055535106792
llm_goals_427,test,46,0.7847160696983337,0.3138421601355035
llm_goals_427,test,47,0.47225508093833923,0.3609031182529178
llm_goals_427,test,48,0.6617470979690552,0.1899314535085168
llm_goals_427,test,49,0.7585927248001099,0.4452677872765214
llm_goals_449,test,0,0.05778774619102478,0.0338418398422384
llm_goals_449,test,1,0.10886172950267792,0.5657697432527007
llm_goals_449,test,2,0.05721224471926689,0.0110135668657682
llm_goals_449,test,3,0.08459136635065079,0.2352946829974157
llm_goals_449,test,4,0.27772536873817444,0.4842577903424374
llm_goals_449,test,5,0.059063129127025604,0.3485549555066181
llm_goals_449,test,6,0.05929182469844818,0.0519669241570106
llm_goals_449,test,7,0.06074311584234238,0.0031269987549636
llm_goals_449,test,8,0.2734565734863281,0.2974090419757364
llm_goals_449,test,9,0.05725017189979553,0.0925536782452943
llm_goals_449,test,10,0.0859001949429512,0.4386853694122202
llm_goals_449,test,11,0.05963173136115074,0.0063782741293255
llm_goals_449,test,12,0.05728689208626747,0.0714949969968408
llm_goals_449,test,13,0.0665278360247612,0.0227751515807358
llm_goals_449,test,14,0.07350324094295502,0.3195753835685478
llm_goals_449,test,15,0.0835351049900055,0.2420638006790782
llm_goals_449,test,16,0.06280023604631424,0.0953277975239941
llm_goals_449,test,17,0.0594259537756443,0.5835221517250182
llm_goals_449,test,18,0.057742197066545486,0.1603112674891041
llm_goals_449,test,19,0.12786942720413208,0.6833613521345718
llm_goals_449,test,20,0.20303627848625183,0.4549636770044163
llm_goals_449,test,21,0.058795008808374405,0.1579551412473356
llm_goals_449,test,22,0.05763568729162216,0.4400750405221786
llm_goals_449,test,23,0.06067581847310066,0.0251530775884869
llm_goals_449,test,24,0.06049317121505737,0.2695772384712642
llm_goals_449,test,25,0.28261128067970276,0.4519585097229938
llm_goals_449,test,26,0.05861532315611839,0.1123650856049343
llm_goals_449,test,27,0.05769834294915199,0.4873581137262729
llm_goals_449,test,28,0.0577009916305542,0.1751834751469549
llm_goals_449,test,29,0.07694519311189651,0.5804994670396382
llm_goals_449,test,30,0.07704895734786987,0.4626257946707522
llm_goals_449,test,31,0.05960022285580635,0.093699276239858
llm_goals_449,test,32,0.05786284804344177,0.0067638379987912
llm_goals_449,test,33,0.0614929236471653,0.1681834079617312
llm_goals_449,test,34,0.17671476304531097,0.3493466516790004
llm_goals_449,test,35,0.06140816956758499,0.0068633490722964
llm_goals_449,test,36,0.07450537383556366,0.3064552348162913
llm_goals_449,test,37,0.05967814102768898,0.6652150761082033
llm_goals_449,test,38,0.06415445357561111,0.6741024889471738
llm_goals_449,test,39,0.057716596871614456,0.0101379083480811
llm_goals_449,test,40,0.057579703629016876,0.0089588521989727
llm_goals_449,test,41,0.058210570365190506,0.1780122280374072
llm_goals_449,test,42,0.06285829097032547,0.0051090174127486
llm_goals_449,test,43,0.058147966861724854,0.2498444495344637
llm_goals_449,test,44,0.059539083391427994,0.221646438441774
llm_goals_449,test,45,0.06365463882684708,0.2094790944562388
llm_goals_449,test,46,0.0697348341345787,0.0060341568900946
llm_goals_449,test,47,0.05828874558210373,0.1241710666090617
llm_goals_449,test,48,0.05966491997241974,0.1338989075405874
llm_goals_449,test,49,0.05910857766866684,0.5982472498607049
llm_goals_93,test,0,0.5557652711868286,0.0729546648027557
llm_goals_93,test,1,0.5557674765586853,0.6981648167380458
llm_goals_93,test,2,0.5557966232299805,0.3318565449695651
llm_goals_93,test,3,0.5558145046234131,0.5505450337981316
llm_goals_93,test,4,0.5557373762130737,0.9547701486411684
llm_goals_93,test,5,0.5558027029037476,0.8378989133869518
llm_goals_93,test,6,0.5558111667633057,0.8645678173925403
llm_goals_93,test,7,0.5557503700256348,0.8034776985663136
llm_goals_93,test,8,0.5557932257652283,0.9139015687588464
llm_goals_93,test,9,0.5558247566223145,0.84076714934408
llm_goals_93,test,10,0.5558188557624817,0.6739811171843284
llm_goals_93,test,11,0.5558066368103027,0.1039005746210211
llm_goals_93,test,12,0.5558418035507202,0.4267719606730696
llm_goals_93,test,13,0.555747389793396,0.8366404177501517
llm_goals_93,test,14,0.5558047294616699,0.51043302648597
llm_goals_93,test,15,0.555851399898529,0.9305120134073452
llm_goals_93,test,16,0.5558375716209412,0.3610424966705102
llm_goals_93,test,17,0.5557391047477722,0.6383014796360676
llm_goals_93,test,18,0.5558238625526428,0.7718638733110297
llm_goals_93,test,19,0.5558077096939087,0.6413577201238602
llm_goals_93,test,20,0.5557910799980164,0.9315750038397262
llm_goals_93,test,21,0.5558059215545654,0.959779321458113
llm_goals_93,test,22,0.5557204484939575,0.8212707656396169
llm_goals_93,test,23,0.5557729601860046,0.0620020892855925
llm_goals_93,test,24,0.5557795763015747,0.6707394124648912
llm_goals_93,test,25,0.5557851195335388,0.8513622881275814
llm_goals_93,test,26,0.5558446049690247,0.9295520797235572
llm_goals_93,test,27,0.5558168888092041,0.731125599089208
llm_goals_93,test,28,0.5552781224250793,0.6387209955509132
llm_goals_93,test,29,0.5558428764343262,0.904010833763946
llm_goals_93,test,30,0.5558196306228638,0.6688304494781727
llm_goals_93,test,31,0.5557743310928345,0.7246857956085144
llm_goals_93,test,32,0.5558003783226013,0.8395389979928227
llm_goals_93,test,33,0.5558424592018127,0.4902935277900161
llm_goals_93,test,34,0.5557606816291809,0.5478889802411547
llm_goals_93,test,35,0.5557995438575745,0.8178324059837278
llm_goals_93,test,36,0.5558460354804993,0.8865698576591567
llm_goals_93,test,37,0.5557506680488586,0.9194802654649477
llm_goals_93,test,38,0.5558769702911377,0.8798558779688433
llm_goals_93,test,39,0.5558159947395325,0.2399301633758412
llm_goals_93,test,40,0.5558245182037354,0.946265162703226
llm_goals_93,test,41,0.5557635426521301,0.8435175392494392
llm_goals_93,test,42,0.5555887818336487,0.260949780916787
llm_goals_93,test,43,0.5558292865753174,0.8190814792570137
llm_goals_93,test,44,0.5558475852012634,0.951412738003951
llm_goals_93,test,45,0.5558596253395081,0.5085180485155563
llm_goals_93,test,46,0.5558071136474609,0.946404217454057
llm_goals_93,test,47,0.5556874871253967,0.5778150570910173
llm_goals_93,test,48,0.5558086037635803,0.6859809771933842
llm_goals_93,test,49,0.5558453798294067,0.6187362092588319
llm_goals_358,test,0,0.943760097026825,0.9198604804287828
llm_goals_358,test,1,0.8209458589553833,0.8623428941495346
llm_goals_358,test,2,0.9443225264549255,0.8146633538972576
llm_goals_358,test,3,0.9460855722427368,0.6635203414505186
llm_goals_358,test,4,0.9453287124633789,0.9873765704706148
llm_goals_358,test,5,0.8996373414993286,0.8085697334742067
llm_goals_358,test,6,0.9350990653038025,0.8449593731392603
llm_goals_358,test,7,0.9327251315116882,0.7802023295414534
llm_goals_358,test,8,0.9469171166419983,0.5598487865057791
llm_goals_358,test,9,0.943597674369812,0.7689259718491731
llm_goals_358,test,10,0.9134064316749573,0.4090564458803354
llm_goals_358,test,11,0.9223451018333435,0.7281676519766522
llm_goals_358,test,12,0.8516788482666016,0.781706684015145
llm_goals_358,test,13,0.9408815503120422,0.6175350663762746
llm_goals_358,test,14,0.1307852864265442,0.1259366314847969
llm_goals_358,test,15,0.9462966918945312,0.9665391433681092
llm_goals_358,test,16,0.8898636698722839,0.0977212357476949
llm_goals_358,test,17,0.5890902876853943,0.839291034278836
llm_goals_358,test,18,0.9157592058181763,0.0422931239637008
llm_goals_358,test,19,0.8203312158584595,0.3805861792548485
llm_goals_358,test,20,0.934543788433075,0.972559035375534
llm_goals_358,test,21,0.9420482516288757,0.7844216881478699
llm_goals_358,test,22,0.9272018074989319,0.8432835924874589
llm_goals_358,test,23,0.12905967235565186,0.5831400725193898
llm_goals_358,test,24,0.1349385380744934,0.4717356494812245
llm_goals_358,test,25,0.11508543789386749,0.5591997574513735
llm_goals_358,test,26,0.17333486676216125,0.2416094818565881
llm_goals_358,test,27,0.817949652671814,0.8557277309389357
llm_goals_358,test,28,0.9442237019538879,0.7784201381020285
llm_goals_358,test,29,0.48859483003616333,0.9848793746115032
llm_goals_358,test,30,0.1781836748123169,0.6211451382223804
llm_goals_358,test,31,0.9088481068611145,0.3507967043222499
llm_goals_358,test,32,0.12746946513652802,0.670993868258675
llm_goals_358,test,33,0.8252044916152954,0.0563775762088827
llm_goals_358,test,34,0.11972624063491821,0.4923522105012786
llm_goals_358,test,35,0.9438498616218567,0.8157534628947803
llm_goals_358,test,36,0.7246585488319397,0.7491294702469764
llm_goals_358,test,37,0.6308338642120361,0.5478990059483921
llm_goals_358,test,38,0.20752887427806854,0.9873113036659013
llm_goals_358,test,39,0.1582687944173813,0.6807296173615788
llm_goals_358,test,40,0.947882890701294,0.6446558925323718
llm_goals_358,test,41,0.1554749459028244,0.3360622093964198
llm_goals_358,test,42,0.9444658756256104,0.6737437200032999
llm_goals_358,test,43,0.9185907244682312,0.453765234769123
llm_goals_358,test,44,0.1662631332874298,0.921309112250278
llm_goals_358,test,45,0.1466795653104782,0.5133269325272689
llm_goals_358,test,46,0.9301704168319702,0.7619134214235583
llm_goals_358,test,47,0.9286240339279175,0.5260478340890856
llm_goals_358,test,48,0.1469983160495758,0.0910570466126295
llm_goals_358,test,49,0.81022047996521,0.4710935988277175
