template_id,split,question_idx,prediction,label
llm_goals_75,test,0,0.553112268447876,0.0759730864369293
llm_goals_75,test,1,0.5639644265174866,0.8120127334654941
llm_goals_75,test,2,0.668025016784668,0.114709127325878
llm_goals_75,test,3,0.45600640773773193,0.1496870071208148
llm_goals_75,test,4,0.41637325286865234,0.321632167436288
llm_goals_75,test,5,0.32695040106773376,0.0308084020440725
llm_goals_75,test,6,0.3623933792114258,0.1843175427563358
llm_goals_75,test,7,0.5643738508224487,0.0324508518984932
llm_goals_75,test,8,0.5728461146354675,0.5189439655904322
llm_goals_75,test,9,0.23073872923851013,0.1502023728460554
llm_goals_75,test,10,0.5279655456542969,0.7657257241793485
llm_goals_75,test,11,0.5771297216415405,0.8035850534204568
llm_goals_75,test,12,0.4090885519981384,0.7307072346740583
llm_goals_75,test,13,0.5376987457275391,0.048717541539827
llm_goals_75,test,14,0.4868634045124054,0.4316714754134473
llm_goals_75,test,15,0.4291263520717621,0.3935335107130105
llm_goals_75,test,16,0.44727811217308044,0.6144967070154126
llm_goals_75,test,17,0.5357984900474548,0.4488809036296748
llm_goals_75,test,18,0.340131014585495,0.3920271677291012
llm_goals_75,test,19,0.5312612056732178,0.7810532991351343
llm_goals_75,test,20,0.40768328309059143,0.5443958675997833
llm_goals_75,test,21,0.46000221371650696,0.1604437012338976
llm_goals_75,test,22,0.3684799373149872,0.856696781503626
llm_goals_75,test,23,0.6506277918815613,0.590876283737086
llm_goals_75,test,24,0.46679869294166565,0.4156776627984636
llm_goals_75,test,25,0.4825424253940582,0.5957841371692396
llm_goals_75,test,26,0.3701419234275818,0.3752723036244256
llm_goals_75,test,27,0.5456343293190002,0.7866846353759014
llm_goals_75,test,28,0.48256033658981323,0.1838073487826972
llm_goals_75,test,29,0.4847381114959717,0.853944837222159
llm_goals_75,test,30,0.5313201546669006,0.5992601606803653
llm_goals_75,test,31,0.4004783630371094,0.0743179753131944
llm_goals_75,test,32,0.6437198519706726,0.8015013965908157
llm_goals_75,test,33,0.5593297481536865,0.3498422912836759
llm_goals_75,test,34,0.7060718536376953,0.6692496458902099
llm_goals_75,test,35,0.6345694661140442,0.134422492314073
llm_goals_75,test,36,0.5712311863899231,0.6683886675406214
llm_goals_75,test,37,0.4890398383140564,0.7419528346018889
llm_goals_75,test,38,0.4444812834262848,0.6995551035614008
llm_goals_75,test,39,0.49685749411582947,0.4983520000991506
llm_goals_75,test,40,0.4396076798439026,0.8622382893733471
llm_goals_75,test,41,0.3176223039627075,0.1026909531781266
llm_goals_75,test,42,0.6005290746688843,0.0240063792397545
llm_goals_75,test,43,0.43748441338539124,0.1143049422953708
llm_goals_75,test,44,0.4328930377960205,0.6884653626060131
llm_goals_75,test,45,0.46370378136634827,0.1059329319681491
llm_goals_75,test,46,0.4111327826976776,0.405588044943207
llm_goals_75,test,47,0.4491312801837921,0.1393457779753145
llm_goals_75,test,48,0.38276782631874084,0.3670858714001939
llm_goals_75,test,49,0.3508621156215668,0.8052537448889737
llm_goals_78,test,0,0.7165425419807434,0.710685793210421
llm_goals_78,test,1,0.6827489137649536,0.4521352090792044
llm_goals_78,test,2,0.7612690329551697,0.6984783435600235
llm_goals_78,test,3,0.6173927187919617,0.2113029164438998
llm_goals_78,test,4,0.44977647066116333,0.59362413330766
llm_goals_78,test,5,0.4374322295188904,0.2973601904856865
llm_goals_78,test,6,0.5095874667167664,0.4911861319335979
llm_goals_78,test,7,0.7560422420501709,0.5109292120432304
llm_goals_78,test,8,0.4002310633659363,0.0214058927819057
llm_goals_78,test,9,0.5916089415550232,0.4561729519761929
llm_goals_78,test,10,0.5069184303283691,0.0679189625299845
llm_goals_78,test,11,0.6946478486061096,0.0515682065315942
llm_goals_78,test,12,0.48298972845077515,0.2832565853408079
llm_goals_78,test,13,0.489661306142807,0.3661356287525266
llm_goals_78,test,14,0.6120308041572571,0.5580731095047592
llm_goals_78,test,15,0.3189299702644348,0.3539146882613382
llm_goals_78,test,16,0.620506227016449,0.5198975918853231
llm_goals_78,test,17,0.663036048412323,0.5974518854580513
llm_goals_78,test,18,0.5562995672225952,0.2539582988569515
llm_goals_78,test,19,0.583354651927948,0.7818671446797811
llm_goals_78,test,20,0.46767479181289673,0.0817902870466741
llm_goals_78,test,21,0.693143367767334,0.5313736848686219
llm_goals_78,test,22,0.6656010150909424,0.0578457193154468
llm_goals_78,test,23,0.7560608983039856,0.0588067732422271
llm_goals_78,test,24,0.6341980695724487,0.504620490567031
llm_goals_78,test,25,0.44282063841819763,0.1421301439321696
llm_goals_78,test,26,0.5907727479934692,0.4043160008204248
llm_goals_78,test,27,0.5322402119636536,0.1341547030792127
llm_goals_78,test,28,0.5005925297737122,0.585613528389559
llm_goals_78,test,29,0.6010460257530212,0.8037635629119642
llm_goals_78,test,30,0.5281146168708801,0.2785354206868534
llm_goals_78,test,31,0.6186860203742981,0.5074585147112473
llm_goals_78,test,32,0.7274994850158691,0.1365325187594412
llm_goals_78,test,33,0.5509213805198669,0.559776178990876
llm_goals_78,test,34,0.5913699269294739,0.3923464392379233
llm_goals_78,test,35,0.8360570073127747,0.8694947336510349
llm_goals_78,test,36,0.5161992311477661,0.3406365028138333
llm_goals_78,test,37,0.6173611283302307,0.7407991807699866
llm_goals_78,test,38,0.6394096612930298,0.5783193820848918
llm_goals_78,test,39,0.5046607851982117,0.2552456607848852
llm_goals_78,test,40,0.607424795627594,0.0543816822443674
llm_goals_78,test,41,0.6231091618537903,0.5111749720190839
llm_goals_78,test,42,0.7650437355041504,0.6373460300781935
llm_goals_78,test,43,0.7050179243087769,0.5824615770869916
llm_goals_78,test,44,0.28307589888572693,0.0725045867582824
llm_goals_78,test,45,0.4407130479812622,0.2055844845637347
llm_goals_78,test,46,0.6336274743080139,0.3443727538373435
llm_goals_78,test,47,0.4406143128871918,0.5137599661573641
llm_goals_78,test,48,0.6325286626815796,0.2386868392211005
llm_goals_78,test,49,0.6398804187774658,0.1092249314217729
llm_goals_53,test,0,0.11835300177335739,0.0313037944392233
llm_goals_53,test,1,0.18777593970298767,0.8240056677766623
llm_goals_53,test,2,0.146831676363945,0.1449565288533016
llm_goals_53,test,3,0.21687161922454834,0.3918474727299789
llm_goals_53,test,4,0.21993158757686615,0.8286818229760559
llm_goals_53,test,5,0.16262242197990417,0.0496580173268359
llm_goals_53,test,6,0.1307680606842041,0.0987357123371914
llm_goals_53,test,7,0.16039012372493744,0.0188377606185046
llm_goals_53,test,8,0.29455310106277466,0.4663370658319237
llm_goals_53,test,9,0.13238568603992462,0.0353320635735392
llm_goals_53,test,10,0.20222775638103485,0.2465247528295183
llm_goals_53,test,11,0.22512394189834595,0.3482216267575191
llm_goals_53,test,12,0.17442044615745544,0.252877033485255
llm_goals_53,test,13,0.19514162838459015,0.1153444147006914
llm_goals_53,test,14,0.2736334800720215,0.3868210483124377
llm_goals_53,test,15,0.33417809009552,0.3705090883623566
llm_goals_53,test,16,0.2738836109638214,0.3407126088990169
llm_goals_53,test,17,0.18222013115882874,0.7543820151744927
llm_goals_53,test,18,0.2690262198448181,0.3322955201354975
llm_goals_53,test,19,0.15939931571483612,0.2261316448538241
llm_goals_53,test,20,0.2360212802886963,0.7343425814116562
llm_goals_53,test,21,0.21533815562725067,0.1601759070107346
llm_goals_53,test,22,0.12273318320512772,0.4190051893758809
llm_goals_53,test,23,0.22254501283168793,0.2354442333363782
llm_goals_53,test,24,0.332631915807724,0.9719922543639756
llm_goals_53,test,25,0.18939758837223053,0.3289260309913144
llm_goals_53,test,26,0.32494330406188965,0.5781819989075234
llm_goals_53,test,27,0.15871590375900269,0.790876622992061
llm_goals_53,test,28,0.2266205996274948,0.2747068584160578
llm_goals_53,test,29,0.1578790843486786,0.796844478423965
llm_goals_53,test,30,0.21503402292728424,0.3573432990729852
llm_goals_53,test,31,0.19350023567676544,0.1489779797461384
llm_goals_53,test,32,0.19126103818416595,0.4788477745757265
llm_goals_53,test,33,0.30354082584381104,0.967617631882366
llm_goals_53,test,34,0.24470074474811554,0.2252558680565209
llm_goals_53,test,35,0.28321611881256104,0.2300146581986259
llm_goals_53,test,36,0.16666525602340698,0.2311446011459964
llm_goals_53,test,37,0.1461750566959381,0.144409781340655
llm_goals_53,test,38,0.1864393800497055,0.705274211964156
llm_goals_53,test,39,0.34151920676231384,0.5921281985506138
llm_goals_53,test,40,0.13831359148025513,0.2443004460536172
llm_goals_53,test,41,0.1431717574596405,0.061497045516474
llm_goals_53,test,42,0.1399790197610855,0.1570495895565415
llm_goals_53,test,43,0.13329912722110748,0.0641649100604756
llm_goals_53,test,44,0.25915154814720154,0.8074959504603542
llm_goals_53,test,45,0.20414172112941742,0.1813221143349916
llm_goals_53,test,46,0.23425070941448212,0.2352295334667747
llm_goals_53,test,47,0.2382708340883255,0.820244135045574
llm_goals_53,test,48,0.2594515383243561,0.4512817747836695
llm_goals_53,test,49,0.1422775387763977,0.7022675298006026
llm_goals_305,test,0,0.5278694033622742,0.0438240495208372
llm_goals_305,test,1,0.7676090002059937,0.1352509650243347
llm_goals_305,test,2,0.5658125281333923,0.1723536643548477
llm_goals_305,test,3,0.7328177690505981,0.250094549889143
llm_goals_305,test,4,0.7309239506721497,0.8419536365764244
llm_goals_305,test,5,0.3813496530056,0.1913177614094214
llm_goals_305,test,6,0.43172505497932434,0.3723916849874114
llm_goals_305,test,7,0.47871842980384827,0.3337485526549648
llm_goals_305,test,8,0.6929181218147278,0.5824263052609597
llm_goals_305,test,9,0.49133387207984924,0.620184539039364
llm_goals_305,test,10,0.7134689092636108,0.494000537986959
llm_goals_305,test,11,0.578776478767395,0.0482253546984711
llm_goals_305,test,12,0.4458802044391632,0.0565816635108951
llm_goals_305,test,13,0.6103016138076782,0.1136796894182332
llm_goals_305,test,14,0.805057168006897,0.6102884358627442
llm_goals_305,test,15,0.7687990665435791,0.8302422164176205
llm_goals_305,test,16,0.8374454379081726,0.589511050327059
llm_goals_305,test,17,0.6750394105911255,0.2622011524235305
llm_goals_305,test,18,0.7261456251144409,0.3737828315077605
llm_goals_305,test,19,0.5238999128341675,0.0401465973617238
llm_goals_305,test,20,0.7513813972473145,0.8971526617856483
llm_goals_305,test,21,0.4434857666492462,0.513862561708882
llm_goals_305,test,22,0.6618984937667847,0.6617399492970566
llm_goals_305,test,23,0.5611169934272766,0.3090300849059566
llm_goals_305,test,24,0.7608532309532166,0.4310886044152408
llm_goals_305,test,25,0.7400286197662354,0.7346082271960779
llm_goals_305,test,26,0.7773345112800598,0.7784622217319951
llm_goals_305,test,27,0.7305319309234619,0.2285268892180965
llm_goals_305,test,28,0.6456246376037598,0.1970890988987535
llm_goals_305,test,29,0.49330800771713257,0.0543679274789402
llm_goals_305,test,30,0.7192700505256653,0.8325365217586828
llm_goals_305,test,31,0.5000010132789612,0.5078524819974076
llm_goals_305,test,32,0.628918468952179,0.2061603354174771
llm_goals_305,test,33,0.7855769991874695,0.6039954962603894
llm_goals_305,test,34,0.8048533201217651,0.5754865922825898
llm_goals_305,test,35,0.47489070892333984,0.1664152616423
llm_goals_305,test,36,0.7722275257110596,0.7880486291444165
llm_goals_305,test,37,0.5550589561462402,0.03605078558617
llm_goals_305,test,38,0.4847152531147003,0.0575570687729377
llm_goals_305,test,39,0.40750056505203247,0.4297583044645929
llm_goals_305,test,40,0.5606383681297302,0.1915445959084499
llm_goals_305,test,41,0.42297935485839844,0.6536953532364804
llm_goals_305,test,42,0.4999624490737915,0.122527873906398
llm_goals_305,test,43,0.5663982033729553,0.1711906255032107
llm_goals_305,test,44,0.8219337463378906,0.8371975999741996
llm_goals_305,test,45,0.7184107303619385,0.3836914756409174
llm_goals_305,test,46,0.5098323225975037,0.0543049670678201
llm_goals_305,test,47,0.6837562322616577,0.5245484205428286
llm_goals_305,test,48,0.7456586360931396,0.5969765977742552
llm_goals_305,test,49,0.6924876570701599,0.2351952691938564
llm_goals_81,test,0,0.4708559811115265,0.2060992725380174
llm_goals_81,test,1,0.4059716761112213,0.1581010259271764
llm_goals_81,test,2,0.5583218336105347,0.6899052301996583
llm_goals_81,test,3,0.5451149940490723,0.7805136989347726
llm_goals_81,test,4,0.5732606053352356,0.0972643506873081
llm_goals_81,test,5,0.2622084319591522,0.3089356066572108
llm_goals_81,test,6,0.2769024968147278,0.0383605574042886
llm_goals_81,test,7,0.6141551733016968,0.3161536190910042
llm_goals_81,test,8,0.493483304977417,0.5126683642741202
llm_goals_81,test,9,0.39568740129470825,0.1286837550045714
llm_goals_81,test,10,0.5230064392089844,0.1256029490894943
llm_goals_81,test,11,0.7442250847816467,0.408772132619307
llm_goals_81,test,12,0.4476369023323059,0.1214277758013974
llm_goals_81,test,13,0.37012481689453125,0.0409721836992895
llm_goals_81,test,14,0.34027016162872314,0.0827685330888261
llm_goals_81,test,15,0.47004735469818115,0.224678660518271
llm_goals_81,test,16,0.3384988307952881,0.0872837265533343
llm_goals_81,test,17,0.5009278655052185,0.2837028272857018
llm_goals_81,test,18,0.4545036256313324,0.1193591553103013
llm_goals_81,test,19,0.49233278632164,0.4952181327203863
llm_goals_81,test,20,0.4200819432735443,0.2564665599863756
llm_goals_81,test,21,0.23427057266235352,0.2041083994609424
llm_goals_81,test,22,0.6262224316596985,0.3119739178030907
llm_goals_81,test,23,0.5778539776802063,0.0994638293022036
llm_goals_81,test,24,0.4141985774040222,0.1575467450844306
llm_goals_81,test,25,0.22446271777153015,0.0291125673317165
llm_goals_81,test,26,0.3651653528213501,0.0869071714890548
llm_goals_81,test,27,0.5652408003807068,0.1863589895989203
llm_goals_81,test,28,0.5489151477813721,0.119977738469712
llm_goals_81,test,29,0.43030640482902527,0.4410572612602489
llm_goals_81,test,30,0.44702082872390747,0.0555125882985712
llm_goals_81,test,31,0.08252223581075668,0.3486004983652399
llm_goals_81,test,32,0.7474170327186584,0.189941914212834
llm_goals_81,test,33,0.49876466393470764,0.1890390776737304
llm_goals_81,test,34,0.4625827968120575,0.1328217427635933
llm_goals_81,test,35,0.6469902992248535,0.1343397356978365
llm_goals_81,test,36,0.3118438422679901,0.0899701336915993
llm_goals_81,test,37,0.4526829421520233,0.6478431799513802
llm_goals_81,test,38,0.5290122628211975,0.7351879424598803
llm_goals_81,test,39,0.4539344012737274,0.0885373562628188
llm_goals_81,test,40,0.7215231657028198,0.3150516869571108
llm_goals_81,test,41,0.2944706678390503,0.0498826197855212
llm_goals_81,test,42,0.4822199046611786,0.1381899328653333
llm_goals_81,test,43,0.29346537590026855,0.3536647169335493
llm_goals_81,test,44,0.6199740767478943,0.4857917971856136
llm_goals_81,test,45,0.572409451007843,0.3921515914618259
llm_goals_81,test,46,0.646447479724884,0.1179764264181158
llm_goals_81,test,47,0.41806161403656006,0.2786576050295211
llm_goals_81,test,48,0.362674355506897,0.0726556891905063
llm_goals_81,test,49,0.7053824067115784,0.3912461690418435
llm_goals_133,test,0,0.544624924659729,0.4440529323069227
llm_goals_133,test,1,0.64277184009552,0.3955848959165448
llm_goals_133,test,2,0.5205958485603333,0.5130030466159122
llm_goals_133,test,3,0.594546377658844,0.220245744335209
llm_goals_133,test,4,0.65718674659729,0.5879895213269322
llm_goals_133,test,5,0.6512773036956787,0.8588037554744828
llm_goals_133,test,6,0.6529971957206726,0.8322442189227136
llm_goals_133,test,7,0.5347446203231812,0.6588760518065184
llm_goals_133,test,8,0.5274951457977295,0.5253890103378467
llm_goals_133,test,9,0.6494905352592468,0.8170254264656348
llm_goals_133,test,10,0.6600486040115356,0.4608975636274737
llm_goals_133,test,11,0.41933852434158325,0.2002942757447116
llm_goals_133,test,12,0.6237030029296875,0.4022389740133304
llm_goals_133,test,13,0.685011625289917,0.8516519653230656
llm_goals_133,test,14,0.4873819649219513,0.0158444788418117
llm_goals_133,test,15,0.6379029750823975,0.5978365001716675
llm_goals_133,test,16,0.5678581595420837,0.0529535109452945
llm_goals_133,test,17,0.5875219702720642,0.2556263204518722
llm_goals_133,test,18,0.5121009349822998,0.0167466717766424
llm_goals_133,test,19,0.8032249808311462,0.0944244428527934
llm_goals_133,test,20,0.6185672283172607,0.6914976263434149
llm_goals_133,test,21,0.6436291337013245,0.8436048633272258
llm_goals_133,test,22,0.6043360233306885,0.2868007235124719
llm_goals_133,test,23,0.6409572958946228,0.1160262834732456
llm_goals_133,test,24,0.4611097276210785,0.0154812210183983
llm_goals_133,test,25,0.5730934739112854,0.3119789757098087
llm_goals_133,test,26,0.3991711735725403,0.7929420521534107
llm_goals_133,test,27,0.6404180526733398,0.2037716603127804
llm_goals_133,test,28,0.37295612692832947,0.3118772754734958
llm_goals_133,test,29,0.6837944388389587,0.1808521166351184
llm_goals_133,test,30,0.6073862910270691,0.5045142070808941
llm_goals_133,test,31,0.7535489797592163,0.8498369503935175
llm_goals_133,test,32,0.5077179074287415,0.2694815541177057
llm_goals_133,test,33,0.41913825273513794,0.0177588567715867
llm_goals_133,test,34,0.6554527878761292,0.445395356923409
llm_goals_133,test,35,0.5836391448974609,0.4995371449295338
llm_goals_133,test,36,0.6212167143821716,0.4823747071618506
llm_goals_133,test,37,0.7827717661857605,0.0266815206082507
llm_goals_133,test,38,0.6991158723831177,0.0874820007739646
llm_goals_133,test,39,0.5107810497283936,0.3366422826024648
llm_goals_133,test,40,0.6053860187530518,0.2329899847029727
llm_goals_133,test,41,0.7741226553916931,0.8571491285285516
llm_goals_133,test,42,0.6572816967964172,0.8193201391590431
llm_goals_133,test,43,0.7544021010398865,0.8422289427227235
llm_goals_133,test,44,0.5573188066482544,0.5294372908894646
llm_goals_133,test,45,0.4784253239631653,0.2588603231187393
llm_goals_133,test,46,0.7424049973487854,0.2337208499272645
llm_goals_133,test,47,0.6690412163734436,0.1166634693988454
llm_goals_133,test,48,0.37508365511894226,0.6138306242825586
llm_goals_133,test,49,0.6662598848342896,0.2806552214021111
llm_goals_157,test,0,0.24001674354076385,0.6355320308190959
llm_goals_157,test,1,0.2735922932624817,0.7424853921670768
llm_goals_157,test,2,0.2541704475879669,0.073314241900373
llm_goals_157,test,3,0.26327160000801086,0.4675850432855137
llm_goals_157,test,4,0.23681016266345978,0.5458627288071527
llm_goals_157,test,5,0.3099980354309082,0.0771889198722071
llm_goals_157,test,6,0.4231029748916626,0.0818153292956077
llm_goals_157,test,7,0.21097569167613983,0.0574362851972565
llm_goals_157,test,8,0.3049413859844208,0.612650525482466
llm_goals_157,test,9,0.28858980536460876,0.025148981997583
llm_goals_157,test,10,0.19201306998729706,0.075973919657683
llm_goals_157,test,11,0.38077956438064575,0.1464687737403695
llm_goals_157,test,12,0.5246886610984802,0.7268917489370714
llm_goals_157,test,13,0.3617662489414215,0.0531648084509613
llm_goals_157,test,14,0.5061325430870056,0.3299706213737123
llm_goals_157,test,15,0.3996366262435913,0.3043292296967997
llm_goals_157,test,16,0.48470425605773926,0.7463776357566768
llm_goals_157,test,17,0.2818990647792816,0.8521737469617978
llm_goals_157,test,18,0.32894402742385864,0.6376039881194869
llm_goals_157,test,19,0.4197961688041687,0.6077618725185671
llm_goals_157,test,20,0.32089775800704956,0.7242060188576479
llm_goals_157,test,21,0.3034589886665344,0.2051165554994685
llm_goals_157,test,22,0.301935613155365,0.7995715125449558
llm_goals_157,test,23,0.6017614603042603,0.1467023010082007
llm_goals_157,test,24,0.33436527848243713,0.3108830330026099
llm_goals_157,test,25,0.19027772545814514,0.1001087986592319
llm_goals_157,test,26,0.3111914098262787,0.4552169748909065
llm_goals_157,test,27,0.3522004783153534,0.8485816155817194
llm_goals_157,test,28,0.19194777309894562,0.2265082237706469
llm_goals_157,test,29,0.3700646758079529,0.6283476283012512
llm_goals_157,test,30,0.18394365906715393,0.2620283904445711
llm_goals_157,test,31,0.36437204480171204,0.167952021094149
llm_goals_157,test,32,0.3331838548183441,0.0898426790762853
llm_goals_157,test,33,0.4074932932853699,0.5043942365008254
llm_goals_157,test,34,0.2683711647987366,0.0725868476993519
llm_goals_157,test,35,0.27454283833503723,0.1237829648198263
llm_goals_157,test,36,0.24020664393901825,0.1925318890375481
llm_goals_157,test,37,0.25776273012161255,0.6842419436738383
llm_goals_157,test,38,0.34451034665107727,0.691649855522258
llm_goals_157,test,39,0.5316871404647827,0.5264401359594252
llm_goals_157,test,40,0.33209511637687683,0.1777385641102212
llm_goals_157,test,41,0.3002958297729492,0.0328183379619016
llm_goals_157,test,42,0.37278494238853455,0.1949198629220696
llm_goals_157,test,43,0.41100171208381653,0.135028229661103
llm_goals_157,test,44,0.44472163915634155,0.7258662504199943
llm_goals_157,test,45,0.29596322774887085,0.0935995604675028
llm_goals_157,test,46,0.3845851421356201,0.15418174898882
llm_goals_157,test,47,0.28652217984199524,0.3212815335896988
llm_goals_157,test,48,0.3443715274333954,0.2795304252191649
llm_goals_157,test,49,0.38761624693870544,0.7371862441831876
llm_goals_186,test,0,0.2726972699165344,0.0367510709036243
llm_goals_186,test,1,0.5577217936515808,0.9589190483469818
llm_goals_186,test,2,0.6030417084693909,0.0667798014228945
llm_goals_186,test,3,0.6441032886505127,0.0340377111776925
llm_goals_186,test,4,0.3247508704662323,0.4002710909538108
llm_goals_186,test,5,0.8452338576316833,0.8817241785963961
llm_goals_186,test,6,0.6649709343910217,0.8798493094453552
llm_goals_186,test,7,0.4733792543411255,0.0443878504309986
llm_goals_186,test,8,0.46935808658599854,0.4415592471567901
llm_goals_186,test,9,0.8776121139526367,0.8847759344734797
llm_goals_186,test,10,0.6482100486755371,0.8905032311770802
llm_goals_186,test,11,0.5338197946548462,0.704948600699243
llm_goals_186,test,12,0.45797818899154663,0.409597002882604
llm_goals_186,test,13,0.769667387008667,0.8867287662434378
llm_goals_186,test,14,0.7448122501373291,0.8128145923588551
llm_goals_186,test,15,0.4202713072299957,0.4832503599366539
llm_goals_186,test,16,0.6587606072425842,0.8267478910426029
llm_goals_186,test,17,0.6968705654144287,0.957207678971151
llm_goals_186,test,18,0.7081401348114014,0.7610878868091969
llm_goals_186,test,19,0.7195687890052795,0.7872715543385574
llm_goals_186,test,20,0.44291287660598755,0.520514563098938
llm_goals_186,test,21,0.882360577583313,0.9084513175350956
llm_goals_186,test,22,0.6252347230911255,0.9157385254478472
llm_goals_186,test,23,0.5063116550445557,0.5585242484052028
llm_goals_186,test,24,0.7626190781593323,0.8535531187105866
llm_goals_186,test,25,0.6465802192687988,0.8466653814359101
llm_goals_186,test,26,0.686174750328064,0.8550637970014335
llm_goals_186,test,27,0.724616289138794,0.9398175480710378
llm_goals_186,test,28,0.5768831372261047,0.0028363068023126
llm_goals_186,test,29,0.6122515201568604,0.8305997204848772
llm_goals_186,test,30,0.758341908454895,0.8987706923113074
llm_goals_186,test,31,0.679947018623352,0.8880043621770078
llm_goals_186,test,32,0.48838669061660767,0.4759852559635721
llm_goals_186,test,33,0.6398086547851562,0.8450176079466206
llm_goals_186,test,34,0.6584088802337646,0.9532899841201126
llm_goals_186,test,35,0.42133137583732605,0.0290458993492998
llm_goals_186,test,36,0.7478085160255432,0.9324312223548752
llm_goals_186,test,37,0.6596875786781311,0.7062064442252718
llm_goals_186,test,38,0.5907628536224365,0.8475385890175953
llm_goals_186,test,39,0.5598127245903015,0.2451742937665777
llm_goals_186,test,40,0.5115914940834045,0.7628755019143414
llm_goals_186,test,41,0.7928601503372192,0.9118681525061464
llm_goals_186,test,42,0.5388773679733276,0.0299416334392138
llm_goals_186,test,43,0.7452067136764526,0.9088531704037304
llm_goals_186,test,44,0.43931838870048523,0.399223879564458
llm_goals_186,test,45,0.6168708205223083,0.0207412737387863
llm_goals_186,test,46,0.5191962122917175,0.504435185656878
llm_goals_186,test,47,0.5975254774093628,0.0426878976387859
llm_goals_186,test,48,0.7759385704994202,0.8374391175680358
llm_goals_186,test,49,0.4759315252304077,0.9286286845568336
llm_goals_401,test,0,0.7272043824195862,0.9617876405899952
llm_goals_401,test,1,0.7871333956718445,0.9404490002093446
llm_goals_401,test,2,0.6827113628387451,0.935046673136648
llm_goals_401,test,3,0.8420503735542297,0.9543637645078082
llm_goals_401,test,4,0.7449044585227966,0.9660480618061604
llm_goals_401,test,5,0.3728652596473694,0.9211199313115676
llm_goals_401,test,6,0.7944743037223816,0.902884063341732
llm_goals_401,test,7,0.8461142182350159,0.94729781169548
llm_goals_401,test,8,0.7403542399406433,0.957207800210959
llm_goals_401,test,9,0.6538290977478027,0.9145740305533586
llm_goals_401,test,10,0.5032225251197815,0.9267561414021268
llm_goals_401,test,11,0.6291753649711609,0.9242701512661912
llm_goals_401,test,12,0.6776297688484192,0.8435903191618971
llm_goals_401,test,13,0.5268582701683044,0.938192186231483
llm_goals_401,test,14,0.36510470509529114,0.8529976602342201
llm_goals_401,test,15,0.6349526047706604,0.9478884329856586
llm_goals_401,test,16,0.28023797273635864,0.8192354997719525
llm_goals_401,test,17,0.7510631680488586,0.6739075253562599
llm_goals_401,test,18,0.44449126720428467,0.5197439125792754
llm_goals_401,test,19,0.6639479398727417,0.8583977843568281
llm_goals_401,test,20,0.6532124876976013,0.9455993763925636
llm_goals_401,test,21,0.6085302829742432,0.9705763460601288
llm_goals_401,test,22,0.6987234950065613,0.8491748031226035
llm_goals_401,test,23,0.6528576612472534,0.8515620190775874
llm_goals_401,test,24,0.4894212484359741,0.9138265832467732
llm_goals_401,test,25,0.6694169044494629,0.9028179661562172
llm_goals_401,test,26,0.4265963137149811,0.9558187125982178
llm_goals_401,test,27,0.7271250486373901,0.5612720368847214
llm_goals_401,test,28,0.8553650975227356,0.9617025244944858
llm_goals_401,test,29,0.8290663957595825,0.8060652645257104
llm_goals_401,test,30,0.7940393686294556,0.8896843204884566
llm_goals_401,test,31,0.44082117080688477,0.9526923309875832
llm_goals_401,test,32,0.8731826543807983,0.8506568677811456
llm_goals_401,test,33,0.6535808444023132,0.7802594846608962
llm_goals_401,test,34,0.7154738306999207,0.8870658880824575
llm_goals_401,test,35,0.5478351712226868,0.954492271718892
llm_goals_401,test,36,0.5610901713371277,0.562048146808884
llm_goals_401,test,37,0.8338974714279175,0.7781126533121301
llm_goals_401,test,38,0.8604344129562378,0.8591165497422784
llm_goals_401,test,39,0.5743411183357239,0.8788934372485709
llm_goals_401,test,40,0.6874042749404907,0.8091102829935539
llm_goals_401,test,41,0.7321528196334839,0.8675616408909181
llm_goals_401,test,42,0.6460950970649719,0.949993671778176
llm_goals_401,test,43,0.6280730962753296,0.9467448581700896
llm_goals_401,test,44,0.6169826984405518,0.8687242293966851
llm_goals_401,test,45,0.8003737926483154,0.9587250339741858
llm_goals_401,test,46,0.33551979064941406,0.860452357605835
llm_goals_401,test,47,0.6521012187004089,0.9253614550163576
llm_goals_401,test,48,0.40555235743522644,0.8647797053472309
llm_goals_401,test,49,0.5825291872024536,0.9454760570818088
llm_goals_420,test,0,0.40558430552482605,0.9711560599789952
llm_goals_420,test,1,0.5217304825782776,0.5978053926454786
llm_goals_420,test,2,0.19254286587238312,0.0817351976918742
llm_goals_420,test,3,0.33864036202430725,0.4730792485055278
llm_goals_420,test,4,0.3516906797885895,0.8487827178347965
llm_goals_420,test,5,0.24921491742134094,0.1386299974231985
llm_goals_420,test,6,0.24786189198493958,0.9677888754301456
llm_goals_420,test,7,0.32370150089263916,0.9193048504515688
llm_goals_420,test,8,0.29186248779296875,0.1105940972856699
llm_goals_420,test,9,0.18369193375110626,0.0770199238560772
llm_goals_420,test,10,0.2982414662837982,0.6569762799970394
llm_goals_420,test,11,0.3711185157299042,0.4533228613613927
llm_goals_420,test,12,0.3319769501686096,0.9317349816078236
llm_goals_420,test,13,0.28515616059303284,0.5042190525433249
llm_goals_420,test,14,0.3691754937171936,0.1440102415442949
llm_goals_420,test,15,0.40889379382133484,0.3273024402815145
llm_goals_420,test,16,0.3502398133277893,0.3681827771891491
llm_goals_420,test,17,0.3925550580024719,0.4632337459639504
llm_goals_420,test,18,0.18086613714694977,0.0396023029282074
llm_goals_420,test,19,0.22885727882385254,0.1093558671361818
llm_goals_420,test,20,0.26981109380722046,0.0396252534530884
llm_goals_420,test,21,0.23073162138462067,0.0465700238365722
llm_goals_420,test,22,0.3398144543170929,0.1916998836408959
llm_goals_420,test,23,0.30491170287132263,0.1681177802630551
llm_goals_420,test,24,0.32744503021240234,0.1698145969501656
llm_goals_420,test,25,0.4031774401664734,0.9829779862454848
llm_goals_420,test,26,0.2788124978542328,0.0308922007548325
llm_goals_420,test,27,0.36711451411247253,0.8125830946093627
llm_goals_420,test,28,0.288149893283844,0.9367654930611784
llm_goals_420,test,29,0.2781999707221985,0.8184437096518178
llm_goals_420,test,30,0.25505468249320984,0.9386603865018432
llm_goals_420,test,31,0.2024502009153366,0.10851579640767
llm_goals_420,test,32,0.3840380609035492,0.9666374926215946
llm_goals_420,test,33,0.2137174904346466,0.8901044715204638
llm_goals_420,test,34,0.40441906452178955,0.9215939624410928
llm_goals_420,test,35,0.49067583680152893,0.5267802921981926
llm_goals_420,test,36,0.2628359794616699,0.092175658510858
llm_goals_420,test,37,0.3348851799964905,0.984362841437198
llm_goals_420,test,38,0.3058125078678131,0.8828047505171253
llm_goals_420,test,39,0.37085527181625366,0.2972862903877061
llm_goals_420,test,40,0.21072958409786224,0.0444444838151291
llm_goals_420,test,41,0.3160633146762848,0.5478361768565629
llm_goals_420,test,42,0.2926141321659088,0.0713350995649345
llm_goals_420,test,43,0.2970658838748932,0.5106248261137879
llm_goals_420,test,44,0.2602047026157379,0.0905225225380096
llm_goals_420,test,45,0.23098549246788025,0.1531352912609338
llm_goals_420,test,46,0.31781715154647827,0.1216667665904784
llm_goals_420,test,47,0.30452829599380493,0.1240460201984882
llm_goals_420,test,48,0.23384715616703033,0.0596825371886435
llm_goals_420,test,49,0.46359044313430786,0.8335117138475971
llm_goals_263,test,0,0.5001147985458374,0.5209935935366439
llm_goals_263,test,1,0.7653377652168274,0.7968068746445729
llm_goals_263,test,2,0.7379801273345947,0.5467470232144288
llm_goals_263,test,3,0.6856784224510193,0.4733215007319302
llm_goals_263,test,4,0.6588085293769836,0.0057083956844275
llm_goals_263,test,5,0.6379050016403198,0.3493473596509424
llm_goals_263,test,6,0.4025757610797882,0.8201550249820808
llm_goals_263,test,7,0.799784779548645,0.6557906816477753
llm_goals_263,test,8,0.39196598529815674,0.7543426271500917
llm_goals_263,test,9,0.6625474095344543,0.2009829696053626
llm_goals_263,test,10,0.7639389634132385,0.8489753664207734
llm_goals_263,test,11,0.71098792552948,0.652436611134806
llm_goals_263,test,12,0.6623979806900024,0.8143466734577086
llm_goals_263,test,13,0.6755775213241577,0.4245607212842352
llm_goals_263,test,14,0.42318856716156006,0.0036452536113296
llm_goals_263,test,15,0.56925368309021,0.0558692673280436
llm_goals_263,test,16,0.406399130821228,0.0066133854077101
llm_goals_263,test,17,0.7527015805244446,0.7648655133279786
llm_goals_263,test,18,0.4329397976398468,0.006651634113929
llm_goals_263,test,19,0.7081301808357239,0.0487544569045993
llm_goals_263,test,20,0.6275766491889954,0.0331700030843074
llm_goals_263,test,21,0.4789040684700012,0.2223244924357846
llm_goals_263,test,22,0.8061860799789429,0.8850367693524676
llm_goals_263,test,23,0.541839599609375,0.6136206316393849
llm_goals_263,test,24,0.4247380197048187,0.0103991722036265
llm_goals_263,test,25,0.5303505063056946,0.7490420921699364
llm_goals_263,test,26,0.54156893491745,0.0149267434716544
llm_goals_263,test,27,0.8035178184509277,0.7989164572724189
llm_goals_263,test,28,0.6837005019187927,0.6376088146659757
llm_goals_263,test,29,0.46549588441848755,0.0419343519282845
llm_goals_263,test,30,0.6655441522598267,0.8598701912381717
llm_goals_263,test,31,0.5640407204627991,0.168156003178124
llm_goals_263,test,32,0.7359842658042908,0.6210063725022936
llm_goals_263,test,33,0.3613393306732178,0.0062242916440703
llm_goals_263,test,34,0.726648211479187,0.5691545063377933
llm_goals_263,test,35,0.6967136263847351,0.7982008481236453
llm_goals_263,test,36,0.6434521079063416,0.763706617142148
llm_goals_263,test,37,0.7366612553596497,0.0342430752915087
llm_goals_263,test,38,0.6792187690734863,0.1222660265497154
llm_goals_263,test,39,0.6617291569709778,0.8269810089104592
llm_goals_263,test,40,0.6598139405250549,0.5318831885299807
llm_goals_263,test,41,0.6482293009757996,0.3207528198494705
llm_goals_263,test,42,0.7255244255065918,0.4801431121790341
llm_goals_263,test,43,0.6505770683288574,0.1981375631842198
llm_goals_263,test,44,0.580004870891571,0.1080272425734687
llm_goals_263,test,45,0.8132931590080261,0.8354177467829539
llm_goals_263,test,46,0.5292200446128845,0.7413273974280716
llm_goals_263,test,47,0.8258805871009827,0.4918597295945824
llm_goals_263,test,48,0.49011003971099854,0.0043793883332247
llm_goals_263,test,49,0.8120301961898804,0.949535011062065
llm_goals_427,test,0,0.5372409224510193,0.2826418194423331
llm_goals_427,test,1,0.6861745715141296,0.380977464557011
llm_goals_427,test,2,0.46517249941825867,0.1487359361961987
llm_goals_427,test,3,0.5691680908203125,0.3576580711889061
llm_goals_427,test,4,0.7527753114700317,0.2270212997591984
llm_goals_427,test,5,0.6560001373291016,0.2711625929555684
llm_goals_427,test,6,0.6891666054725647,0.1402530886796432
llm_goals_427,test,7,0.5399993062019348,0.2935996266753878
llm_goals_427,test,8,0.5992351770401001,0.2137670314614922
llm_goals_427,test,9,0.5610436797142029,0.2160041132788326
llm_goals_427,test,10,0.6345363259315491,0.3669200145460971
llm_goals_427,test,11,0.588653028011322,0.1516882402250043
llm_goals_427,test,12,0.642636239528656,0.1475453757541205
llm_goals_427,test,13,0.5298323035240173,0.1350458405109811
llm_goals_427,test,14,0.504203200340271,0.2337964230636721
llm_goals_427,test,15,0.6887593269348145,0.3115728193455654
llm_goals_427,test,16,0.6269977688789368,0.131520745523965
llm_goals_427,test,17,0.6086397171020508,0.3278496493117007
llm_goals_427,test,18,0.628607451915741,0.3527567015929387
llm_goals_427,test,19,0.4470386207103729,0.2313815817463674
llm_goals_427,test,20,0.6588420867919922,0.3221674001172885
llm_goals_427,test,21,0.5379612445831299,0.2494982149574568
llm_goals_427,test,22,0.5841297507286072,0.3779860883941361
llm_goals_427,test,23,0.563977837562561,0.3509138567308049
llm_goals_427,test,24,0.635154664516449,0.1946130607095621
llm_goals_427,test,25,0.5300045013427734,0.228222711067827
llm_goals_427,test,26,0.5737840533256531,0.2768263295001016
llm_goals_427,test,27,0.5418882369995117,0.2199474495731038
llm_goals_427,test,28,0.5798082947731018,0.1579098434370231
llm_goals_427,test,29,0.55006343126297,0.069843346359134
llm_goals_427,test,30,0.6906412839889526,0.5359787931933129
llm_goals_427,test,31,0.5224179029464722,0.1293182240484197
llm_goals_427,test,32,0.6999430060386658,0.2396093502744168
llm_goals_427,test,33,0.6996815204620361,0.1913976476875258
llm_goals_427,test,34,0.6794571876525879,0.4990200469731286
llm_goals_427,test,35,0.5644886493682861,0.357807030322738
llm_goals_427,test,36,0.6114593744277954,0.2580007688260685
llm_goals_427,test,37,0.4694521129131317,0.4031150735356658
llm_goals_427,test,38,0.6440727114677429,0.2737247490124149
llm_goals_427,test,39,0.5518500804901123,0.2478362318668904
llm_goals_427,test,40,0.6028456091880798,0.5026721270474518
llm_goals_427,test,41,0.5601343512535095,0.2273186194850299
llm_goals_427,test,42,0.4709259271621704,0.4444071308114509
llm_goals_427,test,43,0.6512635946273804,0.2369720640870139
llm_goals_427,test,44,0.7063614130020142,0.3961639877527012
llm_goals_427,test,45,0.6275002956390381,0.2771055535106792
llm_goals_427,test,46,0.7010037302970886,0.3138421601355035
llm_goals_427,test,47,0.5341115593910217,0.3609031182529178
llm_goals_427,test,48,0.5599660873413086,0.1899314535085168
llm_goals_427,test,49,0.7422426342964172,0.4452677872765214
llm_goals_449,test,0,0.061174117028713226,0.0338418398422384
llm_goals_449,test,1,0.07132655382156372,0.5657697432527007
llm_goals_449,test,2,0.07711705565452576,0.0110135668657682
llm_goals_449,test,3,0.09228721261024475,0.2352946829974157
llm_goals_449,test,4,0.18894660472869873,0.4842577903424374
llm_goals_449,test,5,0.07624290883541107,0.3485549555066181
llm_goals_449,test,6,0.06745702773332596,0.0519669241570106
llm_goals_449,test,7,0.07320518046617508,0.0031269987549636
llm_goals_449,test,8,0.21319299936294556,0.2974090419757364
llm_goals_449,test,9,0.034329742193222046,0.0925536782452943
llm_goals_449,test,10,0.10718046873807907,0.4386853694122202
llm_goals_449,test,11,0.06136362627148628,0.0063782741293255
llm_goals_449,test,12,0.05650781840085983,0.0714949969968408
llm_goals_449,test,13,0.07832333445549011,0.0227751515807358
llm_goals_449,test,14,0.07566892355680466,0.3195753835685478
llm_goals_449,test,15,0.11633019894361496,0.2420638006790782
llm_goals_449,test,16,0.0614568330347538,0.0953277975239941
llm_goals_449,test,17,0.06087639927864075,0.5835221517250182
llm_goals_449,test,18,0.07201627641916275,0.1603112674891041
llm_goals_449,test,19,0.13357286155223846,0.6833613521345718
llm_goals_449,test,20,0.11395268887281418,0.4549636770044163
llm_goals_449,test,21,0.06762062758207321,0.1579551412473356
llm_goals_449,test,22,0.06508965790271759,0.4400750405221786
llm_goals_449,test,23,0.12721699476242065,0.0251530775884869
llm_goals_449,test,24,0.054335616528987885,0.2695772384712642
llm_goals_449,test,25,0.14648082852363586,0.4519585097229938
llm_goals_449,test,26,0.04363691061735153,0.1123650856049343
llm_goals_449,test,27,0.04880613088607788,0.4873581137262729
llm_goals_449,test,28,0.05313948169350624,0.1751834751469549
llm_goals_449,test,29,0.11822851002216339,0.5804994670396382
llm_goals_449,test,30,0.13036954402923584,0.4626257946707522
llm_goals_449,test,31,0.0776451975107193,0.093699276239858
llm_goals_449,test,32,0.09231361746788025,0.0067638379987912
llm_goals_449,test,33,0.08323023468255997,0.1681834079617312
llm_goals_449,test,34,0.10602403432130814,0.3493466516790004
llm_goals_449,test,35,0.08760200440883636,0.0068633490722964
llm_goals_449,test,36,0.09117020666599274,0.3064552348162913
llm_goals_449,test,37,0.07781273871660233,0.6652150761082033
llm_goals_449,test,38,0.07797552645206451,0.6741024889471738
llm_goals_449,test,39,0.06995358318090439,0.0101379083480811
llm_goals_449,test,40,0.10228598862886429,0.0089588521989727
llm_goals_449,test,41,0.06689973920583725,0.1780122280374072
llm_goals_449,test,42,0.07860222458839417,0.0051090174127486
llm_goals_449,test,43,0.04711846262216568,0.2498444495344637
llm_goals_449,test,44,0.13546790182590485,0.221646438441774
llm_goals_449,test,45,0.08170229196548462,0.2094790944562388
llm_goals_449,test,46,0.12023068964481354,0.0060341568900946
llm_goals_449,test,47,0.07958093285560608,0.1241710666090617
llm_goals_449,test,48,0.0567808598279953,0.1338989075405874
llm_goals_449,test,49,0.0811108946800232,0.5982472498607049
llm_goals_93,test,0,0.40267041325569153,0.0729546648027557
llm_goals_93,test,1,0.5459515452384949,0.6981648167380458
llm_goals_93,test,2,0.3154684901237488,0.3318565449695651
llm_goals_93,test,3,0.4388864040374756,0.5505450337981316
llm_goals_93,test,4,0.6600291132926941,0.9547701486411684
llm_goals_93,test,5,0.60379558801651,0.8378989133869518
llm_goals_93,test,6,0.7804321646690369,0.8645678173925403
llm_goals_93,test,7,0.3397061228752136,0.8034776985663136
llm_goals_93,test,8,0.6445391774177551,0.9139015687588464
llm_goals_93,test,9,0.723888099193573,0.84076714934408
llm_goals_93,test,10,0.30411863327026367,0.6739811171843284
llm_goals_93,test,11,0.5647035241127014,0.1039005746210211
llm_goals_93,test,12,0.5863098502159119,0.4267719606730696
llm_goals_93,test,13,0.6356543302536011,0.8366404177501517
llm_goals_93,test,14,0.49519622325897217,0.51043302648597
llm_goals_93,test,15,0.47534438967704773,0.9305120134073452
llm_goals_93,test,16,0.5282706618309021,0.3610424966705102
llm_goals_93,test,17,0.5802735090255737,0.6383014796360676
llm_goals_93,test,18,0.3887541890144348,0.7718638733110297
llm_goals_93,test,19,0.45604151487350464,0.6413577201238602
llm_goals_93,test,20,0.6683101058006287,0.9315750038397262
llm_goals_93,test,21,0.8531897068023682,0.959779321458113
llm_goals_93,test,22,0.35270848870277405,0.8212707656396169
llm_goals_93,test,23,0.7504820227622986,0.0620020892855925
llm_goals_93,test,24,0.7233929634094238,0.6707394124648912
llm_goals_93,test,25,0.32036224007606506,0.8513622881275814
llm_goals_93,test,26,0.4781411588191986,0.9295520797235572
llm_goals_93,test,27,0.4862464368343353,0.731125599089208
llm_goals_93,test,28,0.6299924254417419,0.6387209955509132
llm_goals_93,test,29,0.5862157344818115,0.904010833763946
llm_goals_93,test,30,0.6181029081344604,0.6688304494781727
llm_goals_93,test,31,0.7858136892318726,0.7246857956085144
llm_goals_93,test,32,0.6036299467086792,0.8395389979928227
llm_goals_93,test,33,0.7961347103118896,0.4902935277900161
llm_goals_93,test,34,0.2843736708164215,0.5478889802411547
llm_goals_93,test,35,0.38747408986091614,0.8178324059837278
llm_goals_93,test,36,0.4050310552120209,0.8865698576591567
llm_goals_93,test,37,0.3980415463447571,0.9194802654649477
llm_goals_93,test,38,0.5884554386138916,0.8798558779688433
llm_goals_93,test,39,0.7132452130317688,0.2399301633758412
llm_goals_93,test,40,0.4514429271221161,0.946265162703226
llm_goals_93,test,41,0.5906803011894226,0.8435175392494392
llm_goals_93,test,42,0.1976536065340042,0.260949780916787
llm_goals_93,test,43,0.5818545818328857,0.8190814792570137
llm_goals_93,test,44,0.7254412174224854,0.951412738003951
llm_goals_93,test,45,0.6224470138549805,0.5085180485155563
llm_goals_93,test,46,0.7535750865936279,0.946404217454057
llm_goals_93,test,47,0.5326115489006042,0.5778150570910173
llm_goals_93,test,48,0.461816668510437,0.6859809771933842
llm_goals_93,test,49,0.44538626074790955,0.6187362092588319
llm_goals_358,test,0,0.576516330242157,0.9198604804287828
llm_goals_358,test,1,0.8009064793586731,0.8623428941495346
llm_goals_358,test,2,0.6707714796066284,0.8146633538972576
llm_goals_358,test,3,0.6961897015571594,0.6635203414505186
llm_goals_358,test,4,0.7854582667350769,0.9873765704706148
llm_goals_358,test,5,0.5420191287994385,0.8085697334742067
llm_goals_358,test,6,0.43864166736602783,0.8449593731392603
llm_goals_358,test,7,0.4841148257255554,0.7802023295414534
llm_goals_358,test,8,0.7715920209884644,0.5598487865057791
llm_goals_358,test,9,0.40754738450050354,0.7689259718491731
llm_goals_358,test,10,0.4982543885707855,0.4090564458803354
llm_goals_358,test,11,0.5109652280807495,0.7281676519766522
llm_goals_358,test,12,0.48294106125831604,0.781706684015145
llm_goals_358,test,13,0.46936923265457153,0.6175350663762746
llm_goals_358,test,14,0.27330282330513,0.1259366314847969
llm_goals_358,test,15,0.8244229555130005,0.9665391433681092
llm_goals_358,test,16,0.4063071012496948,0.0977212357476949
llm_goals_358,test,17,0.7095783948898315,0.839291034278836
llm_goals_358,test,18,0.4589816629886627,0.0422931239637008
llm_goals_358,test,19,0.4348965883255005,0.3805861792548485
llm_goals_358,test,20,0.478420615196228,0.972559035375534
llm_goals_358,test,21,0.3258780539035797,0.7844216881478699
llm_goals_358,test,22,0.7819400429725647,0.8432835924874589
llm_goals_358,test,23,0.1183500662446022,0.5831400725193898
llm_goals_358,test,24,0.15052491426467896,0.4717356494812245
llm_goals_358,test,25,0.30534133315086365,0.5591997574513735
llm_goals_358,test,26,0.21464087069034576,0.2416094818565881
llm_goals_358,test,27,0.819869339466095,0.8557277309389357
llm_goals_358,test,28,0.5853722095489502,0.7784201381020285
llm_goals_358,test,29,0.28660547733306885,0.9848793746115032
llm_goals_358,test,30,0.5035577416419983,0.6211451382223804
llm_goals_358,test,31,0.6982247233390808,0.3507967043222499
llm_goals_358,test,32,0.3740454316139221,0.670993868258675
llm_goals_358,test,33,0.3080647885799408,0.0563775762088827
llm_goals_358,test,34,0.1901547908782959,0.4923522105012786
llm_goals_358,test,35,0.6237671375274658,0.8157534628947803
llm_goals_358,test,36,0.5817991495132446,0.7491294702469764
llm_goals_358,test,37,0.2315659523010254,0.5478990059483921
llm_goals_358,test,38,0.31617265939712524,0.9873113036659013
llm_goals_358,test,39,0.40431204438209534,0.6807296173615788
llm_goals_358,test,40,0.4766436517238617,0.6446558925323718
llm_goals_358,test,41,0.22919723391532898,0.3360622093964198
llm_goals_358,test,42,0.5683153867721558,0.6737437200032999
llm_goals_358,test,43,0.4613754153251648,0.453765234769123
llm_goals_358,test,44,0.5545831322669983,0.921309112250278
llm_goals_358,test,45,0.45915427803993225,0.5133269325272689
llm_goals_358,test,46,0.824207603931427,0.7619134214235583
llm_goals_358,test,47,0.6638444662094116,0.5260478340890856
llm_goals_358,test,48,0.2728506028652191,0.0910570466126295
llm_goals_358,test,49,0.6822474002838135,0.4710935988277175
