template_id,split,question_idx,prediction,label
llm_goals_236,test,0,0.8430222868919373,0.9999990463256836
llm_goals_236,test,1,0.8478867411613464,0.9998601079056368
llm_goals_236,test,2,0.8349076509475708,0.9999994039535594
llm_goals_236,test,3,0.9065715670585632,0.999998450279186
llm_goals_236,test,4,0.910978376865387,0.9999971389770508
llm_goals_236,test,5,0.9454839825630188,0.9999998211860676
llm_goals_236,test,6,0.9570965766906738,0.9999996423721385
llm_goals_236,test,7,0.8998451232910156,0.999999344348895
llm_goals_236,test,8,0.8918246626853943,0.9999988675117084
llm_goals_236,test,9,0.9481493234634399,0.9999998211860676
llm_goals_236,test,10,0.8050052523612976,0.9942808746609532
llm_goals_236,test,11,0.8754400014877319,0.9998060464816904
llm_goals_236,test,12,0.8509758114814758,0.9999784827237244
llm_goals_236,test,13,0.9229326844215393,0.9999998211860676
llm_goals_236,test,14,0.7516052722930908,0.9999877214430704
llm_goals_236,test,15,0.8338161110877991,0.9999918937684243
llm_goals_236,test,16,0.9269200563430786,0.9998958706868224
llm_goals_236,test,17,0.8501527905464172,0.9999055266407578
llm_goals_236,test,18,0.8639663457870483,0.9999948143957074
llm_goals_236,test,19,0.9124913811683655,0.0724598805251418
llm_goals_236,test,20,0.7762066125869751,0.9999921321869182
llm_goals_236,test,21,0.8809138536453247,0.9999992847442344
llm_goals_236,test,22,0.8347309827804565,0.9997341632930924
llm_goals_236,test,23,0.8464558720588684,0.9998326301518724
llm_goals_236,test,24,0.8725079298019409,0.999985396862246
llm_goals_236,test,25,0.8702197074890137,0.9778256407805812
llm_goals_236,test,26,0.7762987017631531,0.9999951124192492
llm_goals_236,test,27,0.8746629357337952,0.9285253300141496
llm_goals_236,test,28,0.9158926606178284,0.9999996423721385
llm_goals_236,test,29,0.9317673444747925,0.0552585651110835
llm_goals_236,test,30,0.8059697151184082,0.9193987256529454
llm_goals_236,test,31,0.7977486848831177,0.9999983310699748
llm_goals_236,test,32,0.8671139478683472,0.999973952771136
llm_goals_236,test,33,0.8649550080299377,0.9999884366984664
llm_goals_236,test,34,0.7904621958732605,0.9757304783109252
llm_goals_236,test,35,0.898261308670044,0.9999988675117084
llm_goals_236,test,36,0.5390647649765015,0.9998944997781798
llm_goals_236,test,37,0.9350749850273132,0.1365288892720467
llm_goals_236,test,38,0.8834224939346313,0.719762510833872
llm_goals_236,test,39,0.8213040232658386,0.9999720454212542
llm_goals_236,test,40,0.8817908763885498,0.9984533787155216
llm_goals_236,test,41,0.9073703289031982,0.9999998211860676
llm_goals_236,test,42,0.847639262676239,0.9999989271164011
llm_goals_236,test,43,0.8945975303649902,0.9999982714653176
llm_goals_236,test,44,0.7789138555526733,0.9999991655349802
llm_goals_236,test,45,0.9031053781509399,0.9999994635581704
llm_goals_236,test,46,0.8320164680480957,0.9999888539316992
llm_goals_236,test,47,0.7518549561500549,0.9999989271164011
llm_goals_236,test,48,0.847632884979248,0.9999687075599843
llm_goals_236,test,49,0.8272725939750671,0.9976178406916212
llm_goals_228,test,0,0.7450803518295288,0.9999923110003712
llm_goals_228,test,1,0.8212345838546753,0.9997239112788898
llm_goals_228,test,2,0.901739239692688,0.9999983310699748
llm_goals_228,test,3,0.8014810681343079,0.3235730208667373
llm_goals_228,test,4,0.8321613073348999,0.9999561309796544
llm_goals_228,test,5,0.7449600696563721,0.0500382433432339
llm_goals_228,test,6,0.7657087445259094,0.0325581693591018
llm_goals_228,test,7,0.8709616661071777,0.9999955296516294
llm_goals_228,test,8,0.8747166395187378,0.9999945759774836
llm_goals_228,test,9,0.4809732437133789,0.0028912413507839
llm_goals_228,test,10,0.7127666473388672,0.999972343445286
llm_goals_228,test,11,0.703471302986145,0.2356525274624049
llm_goals_228,test,12,0.8371111750602722,0.7824152121811281
llm_goals_228,test,13,0.7278538346290588,9.272773905172216e-05
llm_goals_228,test,14,0.691093921661377,0.999985396862246
llm_goals_228,test,15,0.8300343155860901,0.9997810721409376
llm_goals_228,test,16,0.8054617643356323,0.9999924302101154
llm_goals_228,test,17,0.7807289958000183,0.9964962600884054
llm_goals_228,test,18,0.6383557915687561,0.9999846816053862
llm_goals_228,test,19,0.8223061561584473,0.80150937475593
llm_goals_228,test,20,0.6826659440994263,0.9999823570243526
llm_goals_228,test,21,0.5939616560935974,0.7620198101590071
llm_goals_228,test,22,0.7277806997299194,0.999146759521554
llm_goals_228,test,23,0.6604021787643433,0.9753957978718134
llm_goals_228,test,24,0.5940473675727844,0.9999912381174048
llm_goals_228,test,25,0.8612379431724548,0.9997735619549886
llm_goals_228,test,26,0.7523418664932251,0.999994218349345
llm_goals_228,test,27,0.853759765625,0.9975230096939942
llm_goals_228,test,28,0.7812541723251343,0.0282825634665972
llm_goals_228,test,29,0.8753994703292847,0.989357948515744
llm_goals_228,test,30,0.6771966814994812,0.9999648928656788
llm_goals_228,test,31,0.5852938294410706,0.1841044242752227
llm_goals_228,test,32,0.8214963674545288,0.6401162870682584
llm_goals_228,test,33,0.7643503546714783,0.999995231628418
llm_goals_228,test,34,0.7045415639877319,0.9999182820314436
llm_goals_228,test,35,0.7384728193283081,0.999788165093598
llm_goals_228,test,36,0.7701621055603027,0.9875653984316132
llm_goals_228,test,37,0.6976105570793152,0.99879342314863
llm_goals_228,test,38,0.8288425803184509,0.9949926138514864
llm_goals_228,test,39,0.7839010953903198,0.9871301051093162
llm_goals_228,test,40,0.7409504652023315,0.9374855739864648
llm_goals_228,test,41,0.5053785443305969,0.768526607723976
llm_goals_228,test,42,0.8145321011543274,0.9999880194663172
llm_goals_228,test,43,0.5375326871871948,0.5863317957282166
llm_goals_228,test,44,0.8286685347557068,0.9999881386760048
llm_goals_228,test,45,0.7257265448570251,0.8984286803691676
llm_goals_228,test,46,0.8400965929031372,0.012311546088274
llm_goals_228,test,47,0.7146750092506409,0.0766497555033863
llm_goals_228,test,48,0.6518194079399109,0.9999973773956584
llm_goals_228,test,49,0.7289968729019165,0.939641477083989
llm_goals_397,test,0,0.2942177951335907,0.9814099073764808
llm_goals_397,test,1,0.7660227417945862,0.9999912381174048
llm_goals_397,test,2,0.3664465844631195,0.9999979138373352
llm_goals_397,test,3,0.6418820023536682,0.9999809861190028
llm_goals_397,test,4,0.31187570095062256,0.0255301341302224
llm_goals_397,test,5,0.752589225769043,0.9999864101405658
llm_goals_397,test,6,0.6659741997718811,0.9998381137785032
llm_goals_397,test,7,0.498049259185791,0.99997174739913
llm_goals_397,test,8,0.4193347990512848,0.130829360211786
llm_goals_397,test,9,0.2523956596851349,0.9999025464035022
llm_goals_397,test,10,0.17521314322948456,0.9995446801111892
llm_goals_397,test,11,0.1388006955385208,0.9833766826787343
llm_goals_397,test,12,0.4464116096496582,0.999996542930667
llm_goals_397,test,13,0.3258323669433594,0.999946773053646
llm_goals_397,test,14,0.6043489575386047,0.9999846816053862
llm_goals_397,test,15,0.2911507487297058,0.0066017277153082
llm_goals_397,test,16,0.5249044299125671,0.9999932646750854
llm_goals_397,test,17,0.8066643476486206,0.9999726414675878
llm_goals_397,test,18,0.4729898273944855,0.9999631643313146
llm_goals_397,test,19,0.4900428354740143,0.9990255832870808
llm_goals_397,test,20,0.4824889600276947,0.0470702684705779
llm_goals_397,test,21,0.761152446269989,0.999989688396868
llm_goals_397,test,22,0.28178733587265015,0.9999497532844842
llm_goals_397,test,23,0.11790604889392853,0.9997248649522246
llm_goals_397,test,24,0.6963722109794617,0.9999954104424252
llm_goals_397,test,25,0.5736244916915894,0.9892253874904372
llm_goals_397,test,26,0.5963995456695557,0.9999661445600752
llm_goals_397,test,27,0.6632895469665527,0.999984502792388
llm_goals_397,test,28,0.7334185838699341,0.9999876022334326
llm_goals_397,test,29,0.6842293739318848,0.99916046856021
llm_goals_397,test,30,0.6216341853141785,0.9993026852479712
llm_goals_397,test,31,0.7573267221450806,0.999975025654656
llm_goals_397,test,32,0.30733147263526917,0.9996371865271196
llm_goals_397,test,33,0.6335356831550598,0.9999744892118428
llm_goals_397,test,34,0.26369747519493103,0.999513149271868
llm_goals_397,test,35,0.3957348167896271,0.9999858140942104
llm_goals_397,test,36,0.5699621438980103,0.9909363981874668
llm_goals_397,test,37,0.41431763768196106,0.9967403410768448
llm_goals_397,test,38,0.45502790808677673,0.9700924159576454
llm_goals_397,test,39,0.6252934336662292,0.9999973177909444
llm_goals_397,test,40,0.16349174082279205,0.8681342672732176
llm_goals_397,test,41,0.4145619869232178,0.999994218349345
llm_goals_397,test,42,0.34922999143600464,0.9999906420709002
llm_goals_397,test,43,0.2517136037349701,0.9999807476995284
llm_goals_397,test,44,0.49225714802742004,0.6356807298065411
llm_goals_397,test,45,0.2936221957206726,0.999989688396868
llm_goals_397,test,46,0.3426448106765747,0.9996384382368032
llm_goals_397,test,47,0.661516010761261,0.9997389316595698
llm_goals_397,test,48,0.6813699007034302,0.9999530911461468
llm_goals_397,test,49,0.4633120894432068,0.9999887943268112
llm_goals_401,test,0,0.9963462948799133,0.9999994635581704
llm_goals_401,test,1,0.9970332384109497,0.9999995231628418
llm_goals_401,test,2,0.9972544312477112,0.9999994635581704
llm_goals_401,test,3,0.9969581365585327,0.9999074339856228
llm_goals_401,test,4,0.9963293671607971,0.999999940395357
llm_goals_401,test,5,0.9971030354499817,0.999999940395357
llm_goals_401,test,6,0.9970209002494812,0.999999940395357
llm_goals_401,test,7,0.9971342086791992,0.9999992847442344
llm_goals_401,test,8,0.9968684315681458,0.999999940395357
llm_goals_401,test,9,0.996738851070404,0.9999998211860676
llm_goals_401,test,10,0.9962277412414551,0.9999995827674883
llm_goals_401,test,11,0.9962934851646423,0.9999995827674883
llm_goals_401,test,12,0.9964620471000671,0.9999995827674883
llm_goals_401,test,13,0.9964810609817505,0.9999997019767636
llm_goals_401,test,14,0.9953631162643433,0.9999986886977652
llm_goals_401,test,15,0.9952812790870667,0.9999998211860676
llm_goals_401,test,16,0.9951163530349731,0.9999991655349802
llm_goals_401,test,17,0.9975993037223816,0.9999992847442344
llm_goals_401,test,18,0.9950366616249084,0.9999995231628418
llm_goals_401,test,19,0.9966199398040771,0.9713776109091996
llm_goals_401,test,20,0.9966316819190979,0.9999998807907032
llm_goals_401,test,21,0.9968159794807434,1.0
llm_goals_401,test,22,0.9969519376754761,0.9999998807907032
llm_goals_401,test,23,0.996164083480835,0.9999996423721385
llm_goals_401,test,24,0.9958341121673584,0.9999989867209838
llm_goals_401,test,25,0.9964712858200073,0.9999995231628418
llm_goals_401,test,26,0.9950645565986633,0.9999997615814208
llm_goals_401,test,27,0.99770587682724,0.9999994039535594
llm_goals_401,test,28,0.9971729516983032,0.9983195662475728
llm_goals_401,test,29,0.9968705773353577,0.9993320703468634
llm_goals_401,test,30,0.9969795942306519,0.9999997019767636
llm_goals_401,test,31,0.9957014918327332,0.9999998807907032
llm_goals_401,test,32,0.9969979524612427,0.9999989271164011
llm_goals_401,test,33,0.9961362481117249,0.9999992847442344
llm_goals_401,test,34,0.9961189031600952,0.9999994635581704
llm_goals_401,test,35,0.9957493543624878,0.9999998807907032
llm_goals_401,test,36,0.9960575103759766,0.9999997019767636
llm_goals_401,test,37,0.9968706965446472,0.8060779005212295
llm_goals_401,test,38,0.9962660670280457,0.9999966025353348
llm_goals_401,test,39,0.9959533214569092,0.9999998211860676
llm_goals_401,test,40,0.996659517288208,0.9999996423721385
llm_goals_401,test,41,0.9966914653778076,0.999999940395357
llm_goals_401,test,42,0.9966981410980225,0.9999996423721385
llm_goals_401,test,43,0.9965054988861084,0.999999940395357
llm_goals_401,test,44,0.9969779253005981,0.9999997615814208
llm_goals_401,test,45,0.9968258142471313,0.9992901682960044
llm_goals_401,test,46,0.9958477020263672,0.9999996423721385
llm_goals_401,test,47,0.99580317735672,0.9999059438703174
llm_goals_401,test,48,0.9959489107131958,0.9999988079071328
llm_goals_401,test,49,0.9973666071891785,0.999999344348895
llm_goals_326,test,0,0.9816784262657166,0.9998103380222648
llm_goals_326,test,1,0.9562320709228516,0.1015499721104398
llm_goals_326,test,2,0.9625237584114075,0.9995523691298543
llm_goals_326,test,3,0.9532651901245117,0.9820513126171384
llm_goals_326,test,4,0.9783061146736145,0.9997708797458836
llm_goals_326,test,5,0.9268292188644409,0.0041024868608885
llm_goals_326,test,6,0.9262264370918274,0.0013792452717851
llm_goals_326,test,7,0.9793380498886108,0.9970470071684427
llm_goals_326,test,8,0.9675837755203247,0.9994441866676194
llm_goals_326,test,9,0.8805633187294006,0.058725887838439
llm_goals_326,test,10,0.9581606388092041,0.9983887672460816
llm_goals_326,test,11,0.9724270701408386,0.9773504733838247
llm_goals_326,test,12,0.9745960235595703,0.5111671721675201
llm_goals_326,test,13,0.8693931102752686,0.0023213561119498
llm_goals_326,test,14,0.9774453639984131,0.9992487430849846
llm_goals_326,test,15,0.9828269481658936,0.9998496770786612
llm_goals_326,test,16,0.9767072200775146,0.9995788335870258
llm_goals_326,test,17,0.9681674838066101,0.0289194533041716
llm_goals_326,test,18,0.9631994366645813,0.9941717982892394
llm_goals_326,test,19,0.9595536589622498,0.9898712036276838
llm_goals_326,test,20,0.9734975695610046,0.998426973781836
llm_goals_326,test,21,0.8995441198348999,0.0080112452725568
llm_goals_326,test,22,0.9413581490516663,0.0170446490301255
llm_goals_326,test,23,0.9799767732620239,0.9315074100877992
llm_goals_326,test,24,0.9788609147071838,0.9996359944310432
llm_goals_326,test,25,0.9690476059913635,0.9993702173479914
llm_goals_326,test,26,0.9703907370567322,0.99954205751167
llm_goals_326,test,27,0.922756016254425,0.1439592978368308
llm_goals_326,test,28,0.9702001810073853,0.9782873980924596
llm_goals_326,test,29,0.9613676071166992,0.6406475907057047
llm_goals_326,test,30,0.9684116244316101,0.9997262358610556
llm_goals_326,test,31,0.9282740950584412,0.0025467661319066
llm_goals_326,test,32,0.9713271856307983,0.927265467234428
llm_goals_326,test,33,0.9815614819526672,0.9970411657688188
llm_goals_326,test,34,0.9682298898696899,0.9924188256928188
llm_goals_326,test,35,0.9643891453742981,0.9999433159814316
llm_goals_326,test,36,0.9794562458992004,0.7828563458619708
llm_goals_326,test,37,0.9635418653488159,0.9921779035200892
llm_goals_326,test,38,0.9766625165939331,0.9710912105847416
llm_goals_326,test,39,0.9702526926994324,0.9591411966937028
llm_goals_326,test,40,0.9496886134147644,0.9675343030198714
llm_goals_326,test,41,0.8706519603729248,0.0125675465450323
llm_goals_326,test,42,0.9806468486785889,0.9817813030107898
llm_goals_326,test,43,0.9109684228897095,0.0210821160744808
llm_goals_326,test,44,0.9819404482841492,0.9984526634039176
llm_goals_326,test,45,0.9756129384040833,0.7725256637892597
llm_goals_326,test,46,0.976373553276062,0.7800079567380194
llm_goals_326,test,47,0.9745410680770874,0.9949684144438216
llm_goals_326,test,48,0.9610031247138977,0.9985677004408368
llm_goals_326,test,49,0.9510464072227478,0.045807901396548
llm_goals_415,test,0,0.9640586376190186,0.9997588992066258
llm_goals_415,test,1,0.9052885174751282,0.9999479055404636
llm_goals_415,test,2,0.9346901774406433,0.9957069754348792
llm_goals_415,test,3,0.7240449786186218,0.9998519420664436
llm_goals_415,test,4,0.9115051031112671,0.9993222355665604
llm_goals_415,test,5,0.8491693139076233,0.0031238526915668
llm_goals_415,test,6,0.6857141852378845,0.0017953563962722
llm_goals_415,test,7,0.9554070830345154,0.9612127544780552
llm_goals_415,test,8,0.8909529447555542,0.9824366570655148
llm_goals_415,test,9,0.8863016963005066,0.0016438446411424
llm_goals_415,test,10,0.9021013379096985,0.9999850988388712
llm_goals_415,test,11,0.9542422294616699,0.9989992976042504
llm_goals_415,test,12,0.780139148235321,0.3845905785054697
llm_goals_415,test,13,0.7543106079101562,0.0001663988005876
llm_goals_415,test,14,0.9370611310005188,0.99998015165382
llm_goals_415,test,15,0.9666319489479065,0.9986550807653384
llm_goals_415,test,16,0.9007371068000793,0.9999568462370104
llm_goals_415,test,17,0.9446607232093811,0.9992712139839848
llm_goals_415,test,18,0.9486325979232788,0.9999991655349802
llm_goals_415,test,19,0.9021263122558594,0.1268431093615233
llm_goals_415,test,20,0.970140278339386,0.9989575147526004
llm_goals_415,test,21,0.7734661102294922,0.0009023478721852
llm_goals_415,test,22,0.9594117403030396,0.9999995231628418
llm_goals_415,test,23,0.9538750648498535,0.993058026033597
llm_goals_415,test,24,0.9535976648330688,0.9999659061423206
llm_goals_415,test,25,0.9039539694786072,0.9999040365210196
llm_goals_415,test,26,0.9509291052818298,0.9999980330466104
llm_goals_415,test,27,0.9041619300842285,0.9999982118606638
llm_goals_415,test,28,0.8490964770317078,0.9999243617070486
llm_goals_415,test,29,0.8219237327575684,0.0442403483495126
llm_goals_415,test,30,0.9114958047866821,0.9996343254989244
llm_goals_415,test,31,0.8777772188186646,0.0007251523135023
llm_goals_415,test,32,0.7847331166267395,0.8080266673914581
llm_goals_415,test,33,0.854121744632721,0.9999894499780476
llm_goals_415,test,34,0.9142054915428162,0.9994327426028377
llm_goals_415,test,35,0.9448233842849731,0.9989529847642252
llm_goals_415,test,36,0.9699577689170837,0.9999961853027344
llm_goals_415,test,37,0.9051578044891357,0.3947042465556775
llm_goals_415,test,38,0.9225307106971741,0.8703240197518964
llm_goals_415,test,39,0.9071926474571228,0.0273997177536015
llm_goals_415,test,40,0.941913902759552,0.9999445676815596
llm_goals_415,test,41,0.8935919404029846,0.0002682073682728
llm_goals_415,test,42,0.9740778207778931,0.9981696606070336
llm_goals_415,test,43,0.6224246621131897,0.2786076155583993
llm_goals_415,test,44,0.9730556607246399,0.9965623020600424
llm_goals_415,test,45,0.9132989645004272,0.9966514706305192
llm_goals_415,test,46,0.8373057842254639,0.9965545534402274
llm_goals_415,test,47,0.955289900302887,0.9999976754188272
llm_goals_415,test,48,0.95790696144104,0.9999974966048696
llm_goals_415,test,49,0.8929020762443542,0.9999994635581704
llm_goals_146,test,0,0.7910463213920593,0.9937213660464576
llm_goals_146,test,1,0.7670618295669556,0.9919615387432202
llm_goals_146,test,2,0.932327151298523,0.9951347707289916
llm_goals_146,test,3,0.8039190769195557,0.9912926556861676
llm_goals_146,test,4,0.7219217419624329,0.7208669260139297
llm_goals_146,test,5,0.703191876411438,0.0092084125425934
llm_goals_146,test,6,0.5864116549491882,0.1369906233450708
llm_goals_146,test,7,0.9421929121017456,0.9175609963819736
llm_goals_146,test,8,0.7830612063407898,0.0841403066718625
llm_goals_146,test,9,0.5921283960342407,0.1684153143767103
llm_goals_146,test,10,0.7859452962875366,0.908113835652124
llm_goals_146,test,11,0.8540011048316956,0.8938514016497744
llm_goals_146,test,12,0.788764476776123,0.0100032471486232
llm_goals_146,test,13,0.5358949303627014,0.0298375477746486
llm_goals_146,test,14,0.7932766079902649,0.3167174882579391
llm_goals_146,test,15,0.744251549243927,0.927414416219921
llm_goals_146,test,16,0.560673177242279,0.0159187313778433
llm_goals_146,test,17,0.8574996590614319,0.9978384970986696
llm_goals_146,test,18,0.7335546016693115,0.4101569026026918
llm_goals_146,test,19,0.8646697998046875,0.9986221193952473
llm_goals_146,test,20,0.662011981010437,0.7513560649612049
llm_goals_146,test,21,0.5127303600311279,0.0021027028134696
llm_goals_146,test,22,0.7030345797538757,0.9998958110811108
llm_goals_146,test,23,0.6920067667961121,0.7051541837582169
llm_goals_146,test,24,0.8393325209617615,0.0397261476166626
llm_goals_146,test,25,0.9061992764472961,0.8227276184379292
llm_goals_146,test,26,0.7642346024513245,0.2677446993446601
llm_goals_146,test,27,0.8732576370239258,0.99998730421031
llm_goals_146,test,28,0.8420307636260986,0.492199104386424
llm_goals_146,test,29,0.7517523765563965,0.987386405278924
llm_goals_146,test,30,0.9423622488975525,0.9782173041265074
llm_goals_146,test,31,0.6419367790222168,0.0704110414375715
llm_goals_146,test,32,0.8882023096084595,0.5230369446492051
llm_goals_146,test,33,0.7406551241874695,0.3253501541923078
llm_goals_146,test,34,0.8732749819755554,0.9673128124700084
llm_goals_146,test,35,0.7823246121406555,0.9997146725628192
llm_goals_146,test,36,0.9022085666656494,0.9323108220562318
llm_goals_146,test,37,0.8849445581436157,0.9997609853775884
llm_goals_146,test,38,0.8411788940429688,0.9964783192363952
llm_goals_146,test,39,0.7580084800720215,0.0664083062639811
llm_goals_146,test,40,0.6741892099380493,0.6598511913765792
llm_goals_146,test,41,0.5292597413063049,0.1032532555810967
llm_goals_146,test,42,0.8817193508148193,0.9252474342352108
llm_goals_146,test,43,0.6085236668586731,0.2905593299202337
llm_goals_146,test,44,0.9001889824867249,0.3807617993711434
llm_goals_146,test,45,0.8991692662239075,0.972134053996152
llm_goals_146,test,46,0.7331438064575195,0.1382936231589401
llm_goals_146,test,47,0.8331972360610962,0.9162934404085756
llm_goals_146,test,48,0.8269011378288269,0.2466934390927757
llm_goals_146,test,49,0.9061385989189148,0.9999247193370324
llm_goals_293,test,0,0.9982226490974426,0.9999973177909444
llm_goals_293,test,1,0.9979912042617798,0.9999983310699748
llm_goals_293,test,2,0.9982885718345642,0.9999885559082036
llm_goals_293,test,3,0.9980611205101013,0.9996450543386908
llm_goals_293,test,4,0.9979244470596313,0.9999996423721385
llm_goals_293,test,5,0.9979739785194397,0.9999819397920116
llm_goals_293,test,6,0.9979625940322876,0.9999997019767636
llm_goals_293,test,7,0.9980219602584839,0.999996066093452
llm_goals_293,test,8,0.9979389309883118,0.9999997615814208
llm_goals_293,test,9,0.9984315037727356,0.9999495744722752
llm_goals_293,test,10,0.9978346228599548,0.9999885559082036
llm_goals_293,test,11,0.9978042244911194,0.999996721744454
llm_goals_293,test,12,0.9980036616325378,0.9999988675117084
llm_goals_293,test,13,0.9983868598937988,0.9999983906746356
llm_goals_293,test,14,0.9981361627578735,0.9999996423721385
llm_goals_293,test,15,0.9980610013008118,0.9999997615814208
llm_goals_293,test,16,0.9981619715690613,0.9999994039535594
llm_goals_293,test,17,0.997757613658905,0.9999927282334016
llm_goals_293,test,18,0.9986100196838379,0.9999998211860676
llm_goals_293,test,19,0.9979628324508667,0.9999971985817542
llm_goals_293,test,20,0.9983030557632446,0.9999998211860676
llm_goals_293,test,21,0.9980093836784363,0.999999344348895
llm_goals_293,test,22,0.9984544515609741,0.999999225139634
llm_goals_293,test,23,0.997306227684021,0.9999982714653176
llm_goals_293,test,24,0.9977318644523621,0.9999997019767636
llm_goals_293,test,25,0.9977760910987854,0.9999955296516294
llm_goals_293,test,26,0.9980649352073669,0.9999997615814208
llm_goals_293,test,27,0.9982239603996277,0.9999994635581704
llm_goals_293,test,28,0.9979687333106995,0.9999988675117084
llm_goals_293,test,29,0.9974345564842224,0.9999997615814208
llm_goals_293,test,30,0.9981158971786499,0.9999886751170456
llm_goals_293,test,31,0.9980617165565491,0.9999983906746356
llm_goals_293,test,32,0.9979645013809204,0.9999979734420849
llm_goals_293,test,33,0.9983223080635071,0.9999989271164011
llm_goals_293,test,34,0.9977279305458069,0.9999950528144428
llm_goals_293,test,35,0.9974712133407593,0.9999990463256836
llm_goals_293,test,36,0.9982995390892029,0.9974341988179988
llm_goals_293,test,37,0.998162567615509,0.999947488307946
llm_goals_293,test,38,0.9977446794509888,0.9999994039535594
llm_goals_293,test,39,0.9978448152542114,0.9999996423721385
llm_goals_293,test,40,0.9983381032943726,0.9999990463256836
llm_goals_293,test,41,0.9983727931976318,0.9999995827674883
llm_goals_293,test,42,0.9981435537338257,0.999998450279186
llm_goals_293,test,43,0.9983634352684021,0.999937295915171
llm_goals_293,test,44,0.9985864162445068,0.9999994635581704
llm_goals_293,test,45,0.9983878135681152,0.9999981522560136
llm_goals_293,test,46,0.9977107048034668,0.999995648860848
llm_goals_293,test,47,0.998608410358429,0.9999973177909444
llm_goals_293,test,48,0.9982940554618835,0.9999997019767636
llm_goals_293,test,49,0.9983215928077698,0.9999975562095944
llm_goals_230,test,0,0.9995449185371399,0.9999988675117084
llm_goals_230,test,1,0.9994792342185974,0.9999197721505853
llm_goals_230,test,2,0.9995180368423462,0.9999986290932004
llm_goals_230,test,3,0.9995125532150269,0.9999998807907032
llm_goals_230,test,4,0.9995115995407104,0.9999992847442344
llm_goals_230,test,5,0.9994857311248779,0.97145622955703
llm_goals_230,test,6,0.9994459748268127,0.9931073785733624
llm_goals_230,test,7,0.9995213747024536,0.9999989271164011
llm_goals_230,test,8,0.9995344877243042,0.9999994039535594
llm_goals_230,test,9,0.9995195865631104,0.999468743814106
llm_goals_230,test,10,0.9994753003120422,0.9999828934677488
llm_goals_230,test,11,0.9996103644371033,0.9999996423721385
llm_goals_230,test,12,0.9996077418327332,0.999997794628074
llm_goals_230,test,13,0.9995054006576538,0.9983894825119092
llm_goals_230,test,14,0.9994885921478271,0.9999980926513672
llm_goals_230,test,15,0.9995263814926147,0.9999994039535594
llm_goals_230,test,16,0.9994733929634094,0.99999570846569
llm_goals_230,test,17,0.9994983673095703,0.9997287988607096
llm_goals_230,test,18,0.9994939565658569,0.9999986290932004
llm_goals_230,test,19,0.9995039701461792,0.792201278640246
llm_goals_230,test,20,0.9995585083961487,0.9999989271164011
llm_goals_230,test,21,0.9995052814483643,0.9993889927972028
llm_goals_230,test,22,0.999498724937439,0.9999741315840668
llm_goals_230,test,23,0.9995707869529724,0.9999991655349802
llm_goals_230,test,24,0.9995044469833374,0.9999988675117084
llm_goals_230,test,25,0.9994133710861206,0.9999902248378308
llm_goals_230,test,26,0.9994935989379883,0.9999990463256836
llm_goals_230,test,27,0.9994655251502991,0.9999489188180812
llm_goals_230,test,28,0.9995318651199341,0.9999998211860676
llm_goals_230,test,29,0.9994716048240662,0.9489630473854872
llm_goals_230,test,30,0.9994383454322815,0.9999970197676546
llm_goals_230,test,31,0.999518871307373,0.997134745101166
llm_goals_230,test,32,0.9995865225791931,0.9999995231628418
llm_goals_230,test,33,0.9994599223136902,0.999997437000376
llm_goals_230,test,34,0.9994704127311707,0.9999924898147656
llm_goals_230,test,35,0.999445378780365,0.9999969005585
llm_goals_230,test,36,0.999495267868042,0.9999945163727092
llm_goals_230,test,37,0.9995313882827759,0.9779089689145934
llm_goals_230,test,38,0.9995037317276001,0.9570343502785515
llm_goals_230,test,39,0.9996294975280762,0.9999988079071328
llm_goals_230,test,40,0.9996063113212585,0.9999989271164011
llm_goals_230,test,41,0.99949049949646,0.99562537655113
llm_goals_230,test,42,0.9995137453079224,0.999997794628074
llm_goals_230,test,43,0.9995105266571045,0.9952926635719378
llm_goals_230,test,44,0.9995430707931519,0.9999994635581704
llm_goals_230,test,45,0.999549925327301,0.9999998211860676
llm_goals_230,test,46,0.9995431900024414,0.9999992847442344
llm_goals_230,test,47,0.9995723366737366,0.9999998211860676
llm_goals_230,test,48,0.9994969367980957,0.9999990463256836
llm_goals_230,test,49,0.9995015859603882,0.9998905062670348
llm_goals_115,test,0,0.9995061159133911,0.9999962449073808
llm_goals_115,test,1,0.9994931221008301,0.9999990463256836
llm_goals_115,test,2,0.9996102452278137,0.9999997019767636
llm_goals_115,test,3,0.999471127986908,0.9999576807027534
llm_goals_115,test,4,0.9995810389518738,0.999958157538035
llm_goals_115,test,5,0.9994730353355408,0.999957859516412
llm_goals_115,test,6,0.9995206594467163,0.9999822974207612
llm_goals_115,test,7,0.9994937181472778,0.9999992847442344
llm_goals_115,test,8,0.999460756778717,0.9999833106986686
llm_goals_115,test,9,0.9994326233863831,0.9999994635581704
llm_goals_115,test,10,0.9995684027671814,0.9999232888251398
llm_goals_115,test,11,0.9994169473648071,0.1735883698655277
llm_goals_115,test,12,0.9995926022529602,0.9999995231628418
llm_goals_115,test,13,0.999476969242096,0.999999344348895
llm_goals_115,test,14,0.9996194839477539,0.999998450279186
llm_goals_115,test,15,0.9994727969169617,0.9952644706725092
llm_goals_115,test,16,0.9996507167816162,0.9999927878380692
llm_goals_115,test,17,0.999404788017273,0.999998509883854
llm_goals_115,test,18,0.9996479749679565,0.9999974966048696
llm_goals_115,test,19,0.9994394183158875,0.99996823072331
llm_goals_115,test,20,0.9995126724243164,0.9999916553499876
llm_goals_115,test,21,0.9993845224380493,0.999999344348895
llm_goals_115,test,22,0.9995735287666321,0.9999989867209838
llm_goals_115,test,23,0.9994681477546692,0.9994583130121676
llm_goals_115,test,24,0.9995555281639099,0.9999986290932004
llm_goals_115,test,25,0.999530553817749,0.9999879002575536
llm_goals_115,test,26,0.9995948672294617,0.9999988079071328
llm_goals_115,test,27,0.9994651675224304,0.9999988079071328
llm_goals_115,test,28,0.9994495511054993,0.0502507498981832
llm_goals_115,test,29,0.9994754195213318,0.999960899353502
llm_goals_115,test,30,0.999519944190979,0.9999914169306978
llm_goals_115,test,31,0.9994506239891052,0.9999992847442344
llm_goals_115,test,32,0.9994701743125916,0.9999759793274372
llm_goals_115,test,33,0.9996048808097839,0.9999992847442344
llm_goals_115,test,34,0.9994788765907288,0.9999864101405658
llm_goals_115,test,35,0.999510645866394,0.999999344348895
llm_goals_115,test,36,0.9994699358940125,0.999999344348895
llm_goals_115,test,37,0.9994282126426697,0.9999979734420849
llm_goals_115,test,38,0.9995197057723999,0.9999599456769136
llm_goals_115,test,39,0.9994499087333679,0.999999344348895
llm_goals_115,test,40,0.9995629191398621,0.9998471736847392
llm_goals_115,test,41,0.9995825886726379,0.9999997615814208
llm_goals_115,test,42,0.9996122717857361,0.9999936819076042
llm_goals_115,test,43,0.9995631575584412,0.9999994039535594
llm_goals_115,test,44,0.9994903802871704,0.9999966621397788
llm_goals_115,test,45,0.9994346499443054,0.9908538461901172
llm_goals_115,test,46,0.9994843006134033,0.5668339122107139
llm_goals_115,test,47,0.9994339346885681,0.999965012074428
llm_goals_115,test,48,0.9996761083602905,0.9999986290932004
llm_goals_115,test,49,0.9995599389076233,0.9999966025353348
llm_goals_86,test,0,0.9690098762512207,0.9999950528144428
llm_goals_86,test,1,0.9435622692108154,0.9733920102032824
llm_goals_86,test,2,0.9635230302810669,0.9999997019767636
llm_goals_86,test,3,0.929486870765686,0.9999989867209838
llm_goals_86,test,4,0.9492253065109253,0.9999939203260696
llm_goals_86,test,5,0.950150191783905,0.9799200289999128
llm_goals_86,test,6,0.9455587863922119,0.9999989271164011
llm_goals_86,test,7,0.9810903668403625,0.9999911785123744
llm_goals_86,test,8,0.9565380215644836,0.9906920788388957
llm_goals_86,test,9,0.9451910257339478,0.960079549439269
llm_goals_86,test,10,0.967102587223053,0.9999979734420849
llm_goals_86,test,11,0.9201869368553162,0.998329818271824
llm_goals_86,test,12,0.8924910426139832,0.9999976158140952
llm_goals_86,test,13,0.968529999256134,0.997893571823832
llm_goals_86,test,14,0.9208453297615051,0.9994581937511068
llm_goals_86,test,15,0.9242613315582275,0.9993743300463472
llm_goals_86,test,16,0.8814720511436462,0.9959942101456876
llm_goals_86,test,17,0.9609895944595337,0.9989233016552924
llm_goals_86,test,18,0.9663017988204956,0.999989688396868
llm_goals_86,test,19,0.9713343977928162,0.99977147578688
llm_goals_86,test,20,0.9548009634017944,0.999986886977724
llm_goals_86,test,21,0.9666339755058289,0.9995875954544448
llm_goals_86,test,22,0.9671191573143005,0.9998529553413918
llm_goals_86,test,23,0.9553678035736084,0.9999995827674883
llm_goals_86,test,24,0.9435662627220154,0.98040735685252
llm_goals_86,test,25,0.9173097014427185,0.9999257922204762
llm_goals_86,test,26,0.9495940208435059,0.9992931485112122
llm_goals_86,test,27,0.9284021258354187,0.999970734120207
llm_goals_86,test,28,0.9239625334739685,0.9998175501809444
llm_goals_86,test,29,0.9090359210968018,0.9995185136816022
llm_goals_86,test,30,0.9698707461357117,0.9807010897549824
llm_goals_86,test,31,0.9612546563148499,0.9998762011476526
llm_goals_86,test,32,0.9589423537254333,0.9988895059174044
llm_goals_86,test,33,0.9750165939331055,0.9999738931662266
llm_goals_86,test,34,0.9720157980918884,0.9977411627147142
llm_goals_86,test,35,0.9725225567817688,0.9999870657920578
llm_goals_86,test,36,0.9520710706710815,0.9994595050999702
llm_goals_86,test,37,0.9784383773803711,0.9993578791799612
llm_goals_86,test,38,0.9405047297477722,0.9999866485595712
llm_goals_86,test,39,0.9439506530761719,0.99997985362928
llm_goals_86,test,40,0.9604548811912537,0.999995470046798
llm_goals_86,test,41,0.9749728441238403,0.99996912479408
llm_goals_86,test,42,0.9709283113479614,0.9999997615814208
llm_goals_86,test,43,0.9486064910888672,0.9999995827674883
llm_goals_86,test,44,0.9636296629905701,0.997135043199176
llm_goals_86,test,45,0.933147132396698,0.9999841451646688
llm_goals_86,test,46,0.9228881001472473,0.3364333057002893
llm_goals_86,test,47,0.9632688164710999,0.9991943836221492
llm_goals_86,test,48,0.9520831108093262,0.9999958872793276
llm_goals_86,test,49,0.9693968892097473,0.999995648860848
llm_goals_281,test,0,0.9409029483795166,0.9996947049971306
llm_goals_281,test,1,0.9473555684089661,0.9999986290932004
llm_goals_281,test,2,0.8547940850257874,0.9983671307551129
llm_goals_281,test,3,0.9625587463378906,0.993962764832135
llm_goals_281,test,4,0.9595555067062378,0.9999989271164011
llm_goals_281,test,5,0.9725999236106873,0.9999997019767636
llm_goals_281,test,6,0.9755145907402039,0.9999997019767636
llm_goals_281,test,7,0.9151453971862793,0.973559082090764
llm_goals_281,test,8,0.9524955153465271,0.9999968409538144
llm_goals_281,test,9,0.987725555896759,0.9999994635581704
llm_goals_281,test,10,0.9676972031593323,0.9999997019767636
llm_goals_281,test,11,0.9885538816452026,0.9999951720236044
llm_goals_281,test,12,0.9750565886497498,0.9999264478667818
llm_goals_281,test,13,0.9473578333854675,0.9999998807907032
llm_goals_281,test,14,0.9664015769958496,0.9999976158140952
llm_goals_281,test,15,0.9513174891471863,0.9999992847442344
llm_goals_281,test,16,0.9620725512504578,0.9999986886977652
llm_goals_281,test,17,0.9624820947647095,0.9999985694885254
llm_goals_281,test,18,0.9796006083488464,0.999999344348895
llm_goals_281,test,19,0.8972314596176147,2.503036581908297e-05
llm_goals_281,test,20,0.9529803991317749,0.999996542930667
llm_goals_281,test,21,0.9804546236991882,0.9999997615814208
llm_goals_281,test,22,0.9738683700561523,0.9999986886977652
llm_goals_281,test,23,0.9676529765129089,0.999999344348895
llm_goals_281,test,24,0.9756947755813599,0.9999715089800396
llm_goals_281,test,25,0.9648792147636414,0.9999983310699748
llm_goals_281,test,26,0.9670411348342896,0.9999986290932004
llm_goals_281,test,27,0.9487625360488892,0.9999990463256836
llm_goals_281,test,28,0.9727137088775635,0.8906972993247301
llm_goals_281,test,29,0.9576278924942017,7.346078924151352e-05
llm_goals_281,test,30,0.9811382293701172,0.9999995827674883
llm_goals_281,test,31,0.964154839515686,0.9999997019767636
llm_goals_281,test,32,0.9745395183563232,0.99999910593033
llm_goals_281,test,33,0.9520201086997986,0.9999994039535594
llm_goals_281,test,34,0.9726002216339111,0.9999989867209838
llm_goals_281,test,35,0.9345085024833679,0.9934774041946208
llm_goals_281,test,36,0.9726191759109497,0.9999990463256836
llm_goals_281,test,37,0.940684974193573,3.4259730994143734e-05
llm_goals_281,test,38,0.963934063911438,0.3148602040230503
llm_goals_281,test,39,0.9848390817642212,0.9940564630440718
llm_goals_281,test,40,0.9706920385360718,0.999999344348895
llm_goals_281,test,41,0.9634602069854736,0.9999998211860676
llm_goals_281,test,42,0.858941376209259,0.999860048294782
llm_goals_281,test,43,0.9692313075065613,0.9999997019767636
llm_goals_281,test,44,0.9373846054077148,0.9999992847442344
llm_goals_281,test,45,0.9489147067070007,0.9728845352464304
llm_goals_281,test,46,0.968168318271637,0.9999996423721385
llm_goals_281,test,47,0.9243931174278259,0.999986290931994
llm_goals_281,test,48,0.9692358374595642,0.9999994039535594
llm_goals_281,test,49,0.9722780585289001,0.999999344348895
llm_goals_182,test,0,0.7071959376335144,0.9999994039535594
llm_goals_182,test,1,0.7512170672416687,0.0005625917030918
llm_goals_182,test,2,0.9110251069068909,0.9999986886977652
llm_goals_182,test,3,0.9633567333221436,7.296223953885346e-06
llm_goals_182,test,4,0.8919140696525574,0.9999976158140952
llm_goals_182,test,5,0.5842877626419067,0.003103044503552
llm_goals_182,test,6,0.6914395093917847,0.0119719938396825
llm_goals_182,test,7,0.9444110989570618,0.9999994635581704
llm_goals_182,test,8,0.8441641926765442,0.9999964833260028
llm_goals_182,test,9,0.6216973066329956,0.0113900113927541
llm_goals_182,test,10,0.8647800087928772,0.9961416126209276
llm_goals_182,test,11,0.8888288140296936,0.999994337558862
llm_goals_182,test,12,0.7280411720275879,0.0378442217787105
llm_goals_182,test,13,0.67767733335495,0.0008868430627717
llm_goals_182,test,14,0.5316920876502991,0.0008985942545946
llm_goals_182,test,15,0.6947988867759705,0.9999988675117084
llm_goals_182,test,16,0.5808837413787842,0.0033689156613595
llm_goals_182,test,17,0.8416361808776855,0.0006440135974187
llm_goals_182,test,18,0.4186379909515381,0.0016946532068186
llm_goals_182,test,19,0.9365761280059814,0.9999406933800118
llm_goals_182,test,20,0.743878960609436,0.99999910593033
llm_goals_182,test,21,0.7021141648292542,0.006514652424738
llm_goals_182,test,22,0.5930135846138,0.0008693644862334
llm_goals_182,test,23,0.7066630125045776,0.9999617934213552
llm_goals_182,test,24,0.7281787991523743,0.0038374716596311
llm_goals_182,test,25,0.8575724363327026,0.9941936732536844
llm_goals_182,test,26,0.41924798488616943,0.001039830413279
llm_goals_182,test,27,0.9554086923599243,0.0007135811001391
llm_goals_182,test,28,0.9113317131996155,7.789882184923891e-06
llm_goals_182,test,29,0.8803845643997192,0.999957442283379
llm_goals_182,test,30,0.8868682980537415,0.9981520175441102
llm_goals_182,test,31,0.5266143083572388,0.0117830754540077
llm_goals_182,test,32,0.9077223539352417,0.9999659657485728
llm_goals_182,test,33,0.49704697728157043,0.003031636673792
llm_goals_182,test,34,0.8667479157447815,0.9774106140350124
llm_goals_182,test,35,0.8673832416534424,0.9999996423721385
llm_goals_182,test,36,0.7967185378074646,0.9979310631755344
llm_goals_182,test,37,0.9417948126792908,0.9998408556030968
llm_goals_182,test,38,0.8345458507537842,0.999994218349345
llm_goals_182,test,39,0.8051294088363647,0.1486108277942357
llm_goals_182,test,40,0.8578200936317444,0.9999673962596765
llm_goals_182,test,41,0.5357424020767212,0.0004129864967066
llm_goals_182,test,42,0.8611788749694824,0.9999995231628418
llm_goals_182,test,43,0.48262813687324524,0.0046706145770988
llm_goals_182,test,44,0.7416566014289856,0.9999978542328164
llm_goals_182,test,45,0.7758277654647827,1.853969397415038e-05
llm_goals_182,test,46,0.562261700630188,0.9999624490730772
llm_goals_182,test,47,0.8366743326187134,2.5724993639382006e-05
llm_goals_182,test,48,0.54776930809021,0.0005832931102604
llm_goals_182,test,49,0.6239055395126343,0.0013167466449808
llm_goals_438,test,0,0.9420803189277649,0.0127917126432779
llm_goals_438,test,1,0.9427915811538696,0.9999138712868518
llm_goals_438,test,2,0.9619620442390442,0.001266465311242
llm_goals_438,test,3,0.9420238137245178,0.9997649192827218
llm_goals_438,test,4,0.9663552641868591,0.995094418582848
llm_goals_438,test,5,0.9607886075973511,0.9676808127608026
llm_goals_438,test,6,0.9522190690040588,0.9681100847526048
llm_goals_438,test,7,0.9489927291870117,0.1433627923531326
llm_goals_438,test,8,0.9642730951309204,0.9999272823362588
llm_goals_438,test,9,0.9505143165588379,0.3346369990700403
llm_goals_438,test,10,0.9144012331962585,0.0032511657787224
llm_goals_438,test,11,0.9635599255561829,0.9991979598760046
llm_goals_438,test,12,0.960199236869812,0.9979441166238928
llm_goals_438,test,13,0.9481651782989502,0.1369715342565285
llm_goals_438,test,14,0.9750081300735474,0.9999860525130736
llm_goals_438,test,15,0.9641981720924377,0.9995588660290228
llm_goals_438,test,16,0.9688103199005127,0.9999721646308564
llm_goals_438,test,17,0.9551664590835571,0.9999966025353348
llm_goals_438,test,18,0.9698996543884277,0.9999983906746356
llm_goals_438,test,19,0.9579171538352966,0.9999996423721385
llm_goals_438,test,20,0.9733491539955139,0.9997635483790388
llm_goals_438,test,21,0.9571953415870667,0.9481995120869072
llm_goals_438,test,22,0.9496753811836243,0.9999990463256836
llm_goals_438,test,23,0.9648785591125488,0.9995222687768196
llm_goals_438,test,24,0.9732495546340942,0.999998450279186
llm_goals_438,test,25,0.9248873591423035,0.0119505141994216
llm_goals_438,test,26,0.963257372379303,0.9546977279083356
llm_goals_438,test,27,0.948930561542511,0.9999922513961864
llm_goals_438,test,28,0.9355378746986389,0.9999902844428042
llm_goals_438,test,29,0.9653830528259277,0.9999998211860676
llm_goals_438,test,30,0.9294440746307373,0.0012244399528321
llm_goals_438,test,31,0.9522872567176819,0.9598621719385948
llm_goals_438,test,32,0.9675154089927673,0.9979359506472824
llm_goals_438,test,33,0.9656546115875244,0.9999122619631158
llm_goals_438,test,34,0.9321597218513489,0.0033786082834978
llm_goals_438,test,35,0.9468384385108948,0.7030742694516781
llm_goals_438,test,36,0.9346126914024353,0.0009362307836721
llm_goals_438,test,37,0.9468345046043396,0.9999994635581704
llm_goals_438,test,38,0.9606755971908569,0.9999997019767636
llm_goals_438,test,39,0.9447080492973328,0.0899507873745254
llm_goals_438,test,40,0.9652808308601379,0.8368337146398075
llm_goals_438,test,41,0.9544281363487244,0.9687532202387088
llm_goals_438,test,42,0.9645136594772339,0.001642352875252
llm_goals_438,test,43,0.9683409333229065,0.994022190528956
llm_goals_438,test,44,0.9749013781547546,0.999978065491181
llm_goals_438,test,45,0.9551152586936951,0.9999783635133692
llm_goals_438,test,46,0.9713811874389648,0.9999961853027344
llm_goals_438,test,47,0.9185903072357178,0.9998934864975054
llm_goals_438,test,48,0.9718297719955444,0.9999979734420849
llm_goals_438,test,49,0.9652495980262756,0.9999861717225268
llm_goals_206,test,0,0.9276711344718933,0.9991406798194884
llm_goals_206,test,1,0.31283771991729736,0.7026181814697341
llm_goals_206,test,2,0.34188175201416016,0.9495592135330754
llm_goals_206,test,3,0.05604037269949913,0.4597382924578109
llm_goals_206,test,4,0.0465015210211277,0.456834747394902
llm_goals_206,test,5,0.01946866326034069,0.0069897250631233
llm_goals_206,test,6,0.06848089396953583,0.1198500283021958
llm_goals_206,test,7,0.29466381669044495,0.9853912589369151
llm_goals_206,test,8,0.4538010060787201,0.9858354928324288
llm_goals_206,test,9,0.3058602809906006,0.7676771375411302
llm_goals_206,test,10,0.08701767772436142,6.0325856974144665e-05
llm_goals_206,test,11,0.7203238010406494,0.9932408335154868
llm_goals_206,test,12,0.04008927568793297,0.87151349073003
llm_goals_206,test,13,0.03458315134048462,4.2185384445727474e-05
llm_goals_206,test,14,0.6880357265472412,0.8643435780923203
llm_goals_206,test,15,0.17797231674194336,0.3192446879498265
llm_goals_206,test,16,0.04106733202934265,0.8269734919284588
llm_goals_206,test,17,0.7960613369941711,0.9885780813553218
llm_goals_206,test,18,0.12603668868541718,0.004661507854032
llm_goals_206,test,19,0.019180869683623314,2.09967085772139e-05
llm_goals_206,test,20,0.47979119420051575,0.98699533957105
llm_goals_206,test,21,0.7700930833816528,0.9343360672856648
llm_goals_206,test,22,0.22468039393424988,0.0175621316229822
llm_goals_206,test,23,0.09285406768321991,0.8257364630087937
llm_goals_206,test,24,0.029568418860435486,0.9999812841420614
llm_goals_206,test,25,0.22516489028930664,0.5058550948948065
llm_goals_206,test,26,0.43718063831329346,0.993140757045314
llm_goals_206,test,27,0.0650705024600029,2.2270664938206257e-05
llm_goals_206,test,28,0.33317169547080994,0.950193582732028
llm_goals_206,test,29,0.15095457434654236,0.9696497918757307
llm_goals_206,test,30,0.221140518784523,0.0026329471014817
llm_goals_206,test,31,0.7090089321136475,0.892615734505986
llm_goals_206,test,32,0.22682909667491913,0.6161747460126132
llm_goals_206,test,33,0.045906566083431244,0.0330446953951764
llm_goals_206,test,34,0.1472313106060028,0.9667759539880174
llm_goals_206,test,35,0.2863011956214905,0.9067099130837172
llm_goals_206,test,36,0.9637443423271179,0.9999364614518882
llm_goals_206,test,37,0.013237282633781433,3.930535533630151e-05
llm_goals_206,test,38,0.17886574566364288,0.8118365412646018
llm_goals_206,test,39,0.03801997750997543,0.6533351590647749
llm_goals_206,test,40,0.05122949182987213,1.0367098236044126e-05
llm_goals_206,test,41,0.03918192535638809,0.9999849200248888
llm_goals_206,test,42,0.7236259579658508,0.9495081916810176
llm_goals_206,test,43,0.597007691860199,0.967551113836368
llm_goals_206,test,44,0.1434638351202011,0.0156578839862262
llm_goals_206,test,45,0.018820490688085556,0.0016468393065651
llm_goals_206,test,46,0.08096805959939957,0.0561833196086265
llm_goals_206,test,47,0.0470975786447525,8.951690490961762e-05
llm_goals_206,test,48,0.06949333101511002,0.3268218248822932
llm_goals_206,test,49,0.06363601237535477,0.0047424109160671
