template_id,split,question_idx,prediction,label
llm_goals_236,test,0,0.9999997218449826,0.9999990463256836
llm_goals_236,test,1,0.7686249077969814,0.9998601079056368
llm_goals_236,test,2,0.9999991854031697,0.9999994039535594
llm_goals_236,test,3,0.9999317924164494,0.999998450279186
llm_goals_236,test,4,0.9999824364973996,0.9999971389770508
llm_goals_236,test,5,0.999999602635698,0.9999998211860676
llm_goals_236,test,6,0.9999996423721301,0.9999996423721385
llm_goals_236,test,7,0.9999997019767636,0.999999344348895
llm_goals_236,test,8,0.9999846418693196,0.9999988675117084
llm_goals_236,test,9,0.9999997218449922,0.9999998211860676
llm_goals_236,test,10,0.9801119367392678,0.9942808746609532
llm_goals_236,test,11,0.6765159056607467,0.9998060464816904
llm_goals_236,test,12,0.9999958674112719,0.9999784827237244
llm_goals_236,test,13,0.9999996423721301,0.9999998211860676
llm_goals_236,test,14,0.36166097102323613,0.9999877214430704
llm_goals_236,test,15,0.6780446291062799,0.9999918937684243
llm_goals_236,test,16,0.999943574271095,0.9998958706868224
llm_goals_236,test,17,0.6897193389521501,0.9999055266407578
llm_goals_236,test,18,0.6902291720111279,0.9999948143957074
llm_goals_236,test,19,0.9617385858940602,0.0724598805251418
llm_goals_236,test,20,0.5180971682161543,0.9999921321869182
llm_goals_236,test,21,0.999999602635698,0.9999992847442344
llm_goals_236,test,22,0.6792342877565357,0.9997341632930924
llm_goals_236,test,23,0.9999996026356955,0.9998326301518724
llm_goals_236,test,24,0.4708625672275096,0.999985396862246
llm_goals_236,test,25,0.980394760683796,0.9778256407805812
llm_goals_236,test,26,0.9999658266703305,0.9999951124192492
llm_goals_236,test,27,0.6897611019397677,0.9285253300141496
llm_goals_236,test,28,0.6897586382809892,0.9999996423721385
llm_goals_236,test,29,0.9999973177910165,0.0552585651110835
llm_goals_236,test,30,0.6056859841441908,0.9193987256529454
llm_goals_236,test,31,0.7230391978404672,0.9999983310699748
llm_goals_236,test,32,0.6676852245996706,0.999973952771136
llm_goals_236,test,33,0.9999738335612415,0.9999884366984664
llm_goals_236,test,34,0.9804513054976316,0.9757304783109252
llm_goals_236,test,35,0.999999642372123,0.9999988675117084
llm_goals_236,test,36,0.9997750719480232,0.9998944997781798
llm_goals_236,test,37,0.9999882578849691,0.1365288892720467
llm_goals_236,test,38,0.6782692988756046,0.719762510833872
llm_goals_236,test,39,0.9999957879385389,0.9999720454212542
llm_goals_236,test,40,0.6902076348663596,0.9984533787155216
llm_goals_236,test,41,0.9999996622403385,0.9999998211860676
llm_goals_236,test,42,0.999999642372123,0.9999989271164011
llm_goals_236,test,43,0.999999602635698,0.9999982714653176
llm_goals_236,test,44,0.3770907341924316,0.9999991655349802
llm_goals_236,test,45,0.6731801877650812,0.9999994635581704
llm_goals_236,test,46,0.9999990661939039,0.9999888539316992
llm_goals_236,test,47,0.7533063757688573,0.9999989271164011
llm_goals_236,test,48,0.9999738335612415,0.9999687075599843
llm_goals_236,test,49,0.3725271841291767,0.9976178406916212
llm_goals_228,test,0,0.6742420491393332,0.9999923110003712
llm_goals_228,test,1,0.5838137527875146,0.9997239112788898
llm_goals_228,test,2,0.8679854498346574,0.9999983310699748
llm_goals_228,test,3,0.48492159289916276,0.3235730208667373
llm_goals_228,test,4,0.6612242325277776,0.9999561309796544
llm_goals_228,test,5,0.10834928375531512,0.0500382433432339
llm_goals_228,test,6,0.0007678957764689333,0.0325581693591018
llm_goals_228,test,7,0.3777935996022718,0.9999955296516294
llm_goals_228,test,8,0.9999482433003148,0.9999945759774836
llm_goals_228,test,9,0.6384313468929218,0.0028912413507839
llm_goals_228,test,10,0.6741657890910812,0.999972343445286
llm_goals_228,test,11,0.26470759006855893,0.2356525274624049
llm_goals_228,test,12,0.9660150602474272,0.7824152121811281
llm_goals_228,test,13,0.6670732260059885,9.272773905172216e-05
llm_goals_228,test,14,0.9921451212678335,0.999985396862246
llm_goals_228,test,15,0.9605088432458988,0.9997810721409376
llm_goals_228,test,16,0.999955952168703,0.9999924302101154
llm_goals_228,test,17,0.43772467113582475,0.9964962600884054
llm_goals_228,test,18,0.9999947746593013,0.9999846816053862
llm_goals_228,test,19,0.992125729890097,0.80150937475593
llm_goals_228,test,20,0.5810433764885421,0.9999823570243526
llm_goals_228,test,21,0.9978833993252515,0.7620198101590071
llm_goals_228,test,22,0.9148393511175364,0.999146759521554
llm_goals_228,test,23,0.038269071454662434,0.9753957978718134
llm_goals_228,test,24,0.3401763422503421,0.9999912381174048
llm_goals_228,test,25,0.021365194027802636,0.9997735619549886
llm_goals_228,test,26,0.3739949193347958,0.999994218349345
llm_goals_228,test,27,0.999975800513966,0.9975230096939942
llm_goals_228,test,28,0.6970161335420729,0.0282825634665972
llm_goals_228,test,29,0.95344138107669,0.989357948515744
llm_goals_228,test,30,0.0011606035933157,0.9999648928656788
llm_goals_228,test,31,0.7880997739310344,0.1841044242752227
llm_goals_228,test,32,0.7066598855863128,0.6401162870682584
llm_goals_228,test,33,0.9999873240790063,0.999995231628418
llm_goals_228,test,34,0.3371477586324055,0.9999182820314436
llm_goals_228,test,35,0.9764386804651718,0.999788165093598
llm_goals_228,test,36,0.3352179028388691,0.9875653984316132
llm_goals_228,test,37,0.4660566254644638,0.99879342314863
llm_goals_228,test,38,0.5891618132990932,0.9949926138514864
llm_goals_228,test,39,0.9497244957821017,0.9871301051093162
llm_goals_228,test,40,0.7973057480357909,0.9374855739864648
llm_goals_228,test,41,0.915079597581172,0.768526607723976
llm_goals_228,test,42,0.7479652381532276,0.9999880194663172
llm_goals_228,test,43,0.6672987016528377,0.5863317957282166
llm_goals_228,test,44,0.9998622337946542,0.9999881386760048
llm_goals_228,test,45,0.44381181493345007,0.8984286803691676
llm_goals_228,test,46,0.6971975887267852,0.012311546088274
llm_goals_228,test,47,0.33287014023368805,0.0766497555033863
llm_goals_228,test,48,0.3410968136400348,0.9999973773956584
llm_goals_228,test,49,0.9977276126160461,0.939641477083989
llm_goals_397,test,0,0.9998683929419,0.9814099073764808
llm_goals_397,test,1,0.3506108611693988,0.9999912381174048
llm_goals_397,test,2,0.3651538482406531,0.9999979138373352
llm_goals_397,test,3,0.9998750686664364,0.9999809861190028
llm_goals_397,test,4,0.18879112577241894,0.0255301341302224
llm_goals_397,test,5,0.39076724229647297,0.9999864101405658
llm_goals_397,test,6,0.6670086340534933,0.9998381137785032
llm_goals_397,test,7,0.6855433461979588,0.99997174739913
llm_goals_397,test,8,0.035537070358650766,0.130829360211786
llm_goals_397,test,9,0.0757604183688803,0.9999025464035022
llm_goals_397,test,10,0.0289307174082892,0.9995446801111892
llm_goals_397,test,11,0.0146495813265422,0.9833766826787343
llm_goals_397,test,12,0.3470905964944668,0.999996542930667
llm_goals_397,test,13,0.05283105050733053,0.999946773053646
llm_goals_397,test,14,0.37702109299358516,0.9999846816053862
llm_goals_397,test,15,0.23215380325583715,0.0066017277153082
llm_goals_397,test,16,0.02926010683234953,0.9999932646750854
llm_goals_397,test,17,0.9010398555605974,0.9999726414675878
llm_goals_397,test,18,0.9999789396923453,0.9999631643313146
llm_goals_397,test,19,0.30685019356977067,0.9990255832870808
llm_goals_397,test,20,0.3915867806992919,0.0470702684705779
llm_goals_397,test,21,0.9999616543446139,0.999989688396868
llm_goals_397,test,22,0.0362013551868492,0.9999497532844842
llm_goals_397,test,23,0.19906585149972592,0.9997248649522246
llm_goals_397,test,24,0.9999838074047765,0.9999954104424252
llm_goals_397,test,25,0.36539948054685584,0.9892253874904372
llm_goals_397,test,26,0.023704287066036066,0.9999661445600752
llm_goals_397,test,27,0.3904280694592637,0.999984502792388
llm_goals_397,test,28,0.344160389283312,0.9999876022334326
llm_goals_397,test,29,0.6766505137422391,0.99916046856021
llm_goals_397,test,30,0.4253578774034239,0.9993026852479712
llm_goals_397,test,31,0.9999616543446139,0.999975025654656
llm_goals_397,test,32,0.07908885292039601,0.9996371865271196
llm_goals_397,test,33,0.1929797082084582,0.9999744892118428
llm_goals_397,test,34,0.7277814630973404,0.999513149271868
llm_goals_397,test,35,0.9999848405520009,0.9999858140942104
llm_goals_397,test,36,0.6452715559718049,0.9909363981874668
llm_goals_397,test,37,0.35822251957809925,0.9967403410768448
llm_goals_397,test,38,0.9150522944011487,0.9700924159576454
llm_goals_397,test,39,0.6850903389449025,0.9999973177909444
llm_goals_397,test,40,0.026213341978323635,0.8681342672732176
llm_goals_397,test,41,0.7291357800233288,0.999994218349345
llm_goals_397,test,42,0.6862486650916683,0.9999906420709002
llm_goals_397,test,43,0.6756988782640861,0.9999807476995284
llm_goals_397,test,44,0.6517556072404312,0.6356807298065411
llm_goals_397,test,45,0.060364369999053126,0.999989688396868
llm_goals_397,test,46,0.0181840855386556,0.9996384382368032
llm_goals_397,test,47,0.999815165999319,0.9997389316595698
llm_goals_397,test,48,0.9999459783242717,0.9999530911461468
llm_goals_397,test,49,0.38103823551803045,0.9999887943268112
llm_goals_401,test,0,0.947811048103008,0.9999994635581704
llm_goals_401,test,1,0.9999980926513921,0.9999995231628418
llm_goals_401,test,2,0.9999986092249671,0.9999994635581704
llm_goals_401,test,3,0.9999479651461111,0.9999074339856228
llm_goals_401,test,4,0.9999997814496329,0.999999940395357
llm_goals_401,test,5,0.9999998013178519,0.999999940395357
llm_goals_401,test,6,0.9999996622403492,0.999999940395357
llm_goals_401,test,7,0.9999994039535416,0.9999992847442344
llm_goals_401,test,8,0.9999998013178448,0.999999940395357
llm_goals_401,test,9,0.999999523162824,0.9999998211860676
llm_goals_401,test,10,0.9984521468587858,0.9999995827674883
llm_goals_401,test,11,0.9999983112016798,0.9999995827674883
llm_goals_401,test,12,0.9999996225039146,0.9999995827674883
llm_goals_401,test,13,0.9998372594547859,0.9999997019767636
llm_goals_401,test,14,0.7141542473942492,0.9999986886977652
llm_goals_401,test,15,0.9999998211860639,0.9999998211860676
llm_goals_401,test,16,0.9999678333613055,0.9999991655349802
llm_goals_401,test,17,0.9999996026357062,0.9999992847442344
llm_goals_401,test,18,0.7197539247523834,0.9999995231628418
llm_goals_401,test,19,0.9999986290931376,0.9713776109091996
llm_goals_401,test,20,0.9999998013178448,0.9999998807907032
llm_goals_401,test,21,0.9999997814496364,1.0
llm_goals_401,test,22,0.999998112519552,0.9999998807907032
llm_goals_401,test,23,0.99535546712548,0.9999996423721385
llm_goals_401,test,24,0.9932591320217053,0.9999989867209838
llm_goals_401,test,25,0.9999996423721301,0.9999995231628418
llm_goals_401,test,26,0.987659454663571,0.9999997615814208
llm_goals_401,test,27,0.9999988873799737,0.9999994039535594
llm_goals_401,test,28,0.999940673510968,0.9983195662475728
llm_goals_401,test,29,0.999995529651477,0.9993320703468634
llm_goals_401,test,30,0.9999994834264131,0.9999997019767636
llm_goals_401,test,31,0.9999997814496364,0.9999998807907032
llm_goals_401,test,32,0.9999992251396185,0.9999989271164011
llm_goals_401,test,33,0.9999398787816084,0.9999992847442344
llm_goals_401,test,34,0.9999996026356932,0.9999994635581704
llm_goals_401,test,35,0.9999867677686999,0.9999998807907032
llm_goals_401,test,36,0.9999960263570778,0.9999997019767636
llm_goals_401,test,37,0.9993565281340127,0.8060779005212295
llm_goals_401,test,38,0.9999955892561222,0.9999966025353348
llm_goals_401,test,39,0.9999995430310573,0.9999998211860676
llm_goals_401,test,40,0.9999990065892715,0.9999996423721385
llm_goals_401,test,41,0.9999998013178519,0.999999940395357
llm_goals_401,test,42,0.9999976158141273,0.9999996423721385
llm_goals_401,test,43,0.9999988873799465,0.999999940395357
llm_goals_401,test,44,0.9999998211860627,0.9999997615814208
llm_goals_401,test,45,0.999921421210459,0.9992901682960044
llm_goals_401,test,46,0.9999882976210778,0.9999996423721385
llm_goals_401,test,47,0.9999002019568896,0.9999059438703174
llm_goals_401,test,48,0.9978990952067347,0.9999988079071328
llm_goals_401,test,49,0.9990113179177377,0.999999344348895
llm_goals_326,test,0,0.9999767144517677,0.9998103380222648
llm_goals_326,test,1,0.9998865922272552,0.1015499721104398
llm_goals_326,test,2,0.9999767144517677,0.9995523691298543
llm_goals_326,test,3,0.9968402384999454,0.9820513126171384
llm_goals_326,test,4,0.9997094472236422,0.9997708797458836
llm_goals_326,test,5,0.9998389482522837,0.0041024868608885
llm_goals_326,test,6,0.6694928213824175,0.0013792452717851
llm_goals_326,test,7,0.999959071475895,0.9970470071684427
llm_goals_326,test,8,0.9998116095863786,0.9994441866676194
llm_goals_326,test,9,0.009033320151789434,0.058725887838439
llm_goals_326,test,10,0.9990363319660114,0.9983887672460816
llm_goals_326,test,11,0.9989693363422579,0.9773504733838247
llm_goals_326,test,12,0.9613484349305897,0.5111671721675201
llm_goals_326,test,13,0.013155171314484,0.0023213561119498
llm_goals_326,test,14,0.9888654545819912,0.9992487430849846
llm_goals_326,test,15,0.9991879661581123,0.9998496770786612
llm_goals_326,test,16,0.9888654545819912,0.9995788335870258
llm_goals_326,test,17,0.999860723810649,0.0289194533041716
llm_goals_326,test,18,0.9998607635502265,0.9941717982892394
llm_goals_326,test,19,0.9985565344611391,0.9898712036276838
llm_goals_326,test,20,0.9989812175062269,0.998426973781836
llm_goals_326,test,21,0.011047174015832,0.0080112452725568
llm_goals_326,test,22,0.9995036324031937,0.0170446490301255
llm_goals_326,test,23,0.9926078118451641,0.9315074100877992
llm_goals_326,test,24,0.9993092020465619,0.9996359944310432
llm_goals_326,test,25,0.990088800385753,0.9993702173479914
llm_goals_326,test,26,0.9997616410285768,0.99954205751167
llm_goals_326,test,27,0.6698324962410505,0.1439592978368308
llm_goals_326,test,28,0.9934093751594978,0.9782873980924596
llm_goals_326,test,29,0.9981222748634844,0.6406475907057047
llm_goals_326,test,30,0.9910436270233628,0.9997262358610556
llm_goals_326,test,31,0.011047174015832,0.0025467661319066
llm_goals_326,test,32,0.9972848097154564,0.927265467234428
llm_goals_326,test,33,0.9961523213626581,0.9970411657688188
llm_goals_326,test,34,0.9989134867947111,0.9924188256928188
llm_goals_326,test,35,0.999947130681356,0.9999433159814316
llm_goals_326,test,36,0.9943108359631423,0.7828563458619708
llm_goals_326,test,37,0.9885214968885943,0.9921779035200892
llm_goals_326,test,38,0.9997576276458299,0.9710912105847416
llm_goals_326,test,39,0.8653292063151562,0.9591411966937028
llm_goals_326,test,40,0.999431014074089,0.9675343030198714
llm_goals_326,test,41,0.3414059629926478,0.0125675465450323
llm_goals_326,test,42,0.999972939490046,0.9817813030107898
llm_goals_326,test,43,0.34187869234273566,0.0210821160744808
llm_goals_326,test,44,0.9988872607752791,0.9984526634039176
llm_goals_326,test,45,0.9577237765521976,0.7725256637892597
llm_goals_326,test,46,0.9824576581784878,0.7800079567380194
llm_goals_326,test,47,0.9958587885040572,0.9949684144438216
llm_goals_326,test,48,0.9998013774635769,0.9985677004408368
llm_goals_326,test,49,0.9991917411661051,0.045807901396548
llm_goals_415,test,0,0.9386557142117202,0.9997588992066258
llm_goals_415,test,1,0.9999977548917341,0.9999479055404636
llm_goals_415,test,2,0.9888802572354245,0.9957069754348792
llm_goals_415,test,3,0.6010398241348303,0.9998519420664436
llm_goals_415,test,4,0.9973747332088946,0.9993222355665604
llm_goals_415,test,5,0.9999853372570587,0.0031238526915668
llm_goals_415,test,6,0.3362513712381192,0.0017953563962722
llm_goals_415,test,7,0.9999827742578224,0.9612127544780552
llm_goals_415,test,8,0.995787362156968,0.9824366570655148
llm_goals_415,test,9,0.6671937084760682,0.0016438446411424
llm_goals_415,test,10,0.9999687075622162,0.9999850988388712
llm_goals_415,test,11,0.9999689062443604,0.9989992976042504
llm_goals_415,test,12,0.07499295707216863,0.3845905785054697
llm_goals_415,test,13,0.999747991558718,0.0001663988005876
llm_goals_415,test,14,0.9995367129739621,0.99998015165382
llm_goals_415,test,15,0.9994248946525205,0.9986550807653384
llm_goals_415,test,16,0.9994038740797424,0.9999568462370104
llm_goals_415,test,17,0.999970078467403,0.9992712139839848
llm_goals_415,test,18,0.9999939004578886,0.9999991655349802
llm_goals_415,test,19,0.9422632260751472,0.1268431093615233
llm_goals_415,test,20,0.9994252324121534,0.9989575147526004
llm_goals_415,test,21,0.33412202957222265,0.0009023478721852
llm_goals_415,test,22,0.9999942382173962,0.9999995231628418
llm_goals_415,test,23,0.9992729027757069,0.993058026033597
llm_goals_415,test,24,0.999993383884445,0.9999659061423206
llm_goals_415,test,25,0.9999136129999369,0.9999040365210196
llm_goals_415,test,26,0.9999688466397151,0.9999980330466104
llm_goals_415,test,27,0.9557352447215525,0.9999982118606638
llm_goals_415,test,28,0.9885290269431152,0.9999243617070486
llm_goals_415,test,29,0.9935804011821023,0.0442403483495126
llm_goals_415,test,30,0.999929050605569,0.9996343254989244
llm_goals_415,test,31,0.3336897130919792,0.0007251523135023
llm_goals_415,test,32,0.9792223774348724,0.8080266673914581
llm_goals_415,test,33,0.8082266498689021,0.9999894499780476
llm_goals_415,test,34,0.9999062617653657,0.9994327426028377
llm_goals_415,test,35,0.9688332667464143,0.9989529847642252
llm_goals_415,test,36,0.9999872247382657,0.9999961853027344
llm_goals_415,test,37,0.9727905386775871,0.3947042465556775
llm_goals_415,test,38,0.9999611179043834,0.8703240197518964
llm_goals_415,test,39,0.39225972000096726,0.0273997177536015
llm_goals_415,test,40,0.9999782244369145,0.9999445676815596
llm_goals_415,test,41,0.9995875755918604,0.0002682073682728
llm_goals_415,test,42,0.9999941587448394,0.9981696606070336
llm_goals_415,test,43,0.0013206611182341333,0.2786076155583993
llm_goals_415,test,44,0.9992719491013905,0.9965623020600424
llm_goals_415,test,45,0.99999580780679,0.9966514706305192
llm_goals_415,test,46,0.9992615778933018,0.9965545534402274
llm_goals_415,test,47,0.9999559720361274,0.9999976754188272
llm_goals_415,test,48,0.9999921123188164,0.9999974966048696
llm_goals_415,test,49,0.9995627999487949,0.9999994635581704
llm_goals_146,test,0,0.8786606412865443,0.9937213660464576
llm_goals_146,test,1,0.644763523481691,0.9919615387432202
llm_goals_146,test,2,0.5024092920771994,0.9951347707289916
llm_goals_146,test,3,0.7855305148569753,0.9912926556861676
llm_goals_146,test,4,0.9993715484750693,0.7208669260139297
llm_goals_146,test,5,0.9959559441660751,0.0092084125425934
llm_goals_146,test,6,0.7204914568545759,0.1369906233450708
llm_goals_146,test,7,0.9990867177474726,0.9175609963819736
llm_goals_146,test,8,0.8278109490160003,0.0841403066718625
llm_goals_146,test,9,0.9506567301973705,0.1684153143767103
llm_goals_146,test,10,0.6771526289430417,0.908113835652124
llm_goals_146,test,11,0.5314825037006711,0.8938514016497744
llm_goals_146,test,12,0.6793075224160745,0.0100032471486232
llm_goals_146,test,13,0.4791872140832305,0.0298375477746486
llm_goals_146,test,14,0.3783123312487489,0.3167174882579391
llm_goals_146,test,15,0.9986140728105061,0.927414416219921
llm_goals_146,test,16,0.45669482370902137,0.0159187313778433
llm_goals_146,test,17,0.6866769115091911,0.9978384970986696
llm_goals_146,test,18,0.9507795356339712,0.4101569026026918
llm_goals_146,test,19,0.9999390443179675,0.9986221193952473
llm_goals_146,test,20,0.7463388537518072,0.7513560649612049
llm_goals_146,test,21,0.3233040648388484,0.0021027028134696
llm_goals_146,test,22,0.999732077127694,0.9998958110811108
llm_goals_146,test,23,0.7476529496128609,0.7051541837582169
llm_goals_146,test,24,0.9994392792451947,0.0397261476166626
llm_goals_146,test,25,0.5866651565967308,0.8227276184379292
llm_goals_146,test,26,0.6971820074078344,0.2677446993446601
llm_goals_146,test,27,0.6874046643751074,0.99998730421031
llm_goals_146,test,28,0.8271252373178624,0.492199104386424
llm_goals_146,test,29,0.9996772805879558,0.987386405278924
llm_goals_146,test,30,0.9988952875362719,0.9782173041265074
llm_goals_146,test,31,0.999693632118011,0.0704110414375715
llm_goals_146,test,32,0.9985143542511407,0.5230369446492051
llm_goals_146,test,33,0.9987433552511195,0.3253501541923078
llm_goals_146,test,34,0.5599719217179958,0.9673128124700084
llm_goals_146,test,35,0.9999013145748205,0.9997146725628192
llm_goals_146,test,36,0.7764382267107242,0.9323108220562318
llm_goals_146,test,37,0.9926134345514487,0.9997609853775884
llm_goals_146,test,38,0.6847351663428669,0.9964783192363952
llm_goals_146,test,39,0.9998261729871549,0.0664083062639811
llm_goals_146,test,40,0.7463543509595474,0.6598511913765792
llm_goals_146,test,41,0.9997180302937269,0.1032532555810967
llm_goals_146,test,42,0.7219191008916149,0.9252474342352108
llm_goals_146,test,43,0.4810430413967453,0.2905593299202337
llm_goals_146,test,44,0.995937307781838,0.3807617993711434
llm_goals_146,test,45,0.9998644987732904,0.972134053996152
llm_goals_146,test,46,0.9805246589288804,0.1382936231589401
llm_goals_146,test,47,0.997286061429555,0.9162934404085756
llm_goals_146,test,48,0.9506567301973705,0.2466934390927757
llm_goals_146,test,49,0.6871860643604607,0.9999247193370324
llm_goals_293,test,0,0.9952010711721709,0.9999973177909444
llm_goals_293,test,1,0.9999795556064014,0.9999983310699748
llm_goals_293,test,2,0.9999980330466638,0.9999885559082036
llm_goals_293,test,3,0.9999724229184835,0.9996450543386908
llm_goals_293,test,4,0.9999996225039217,0.9999996423721385
llm_goals_293,test,5,0.9999994436899774,0.9999819397920116
llm_goals_293,test,6,0.9999390244480962,0.9999997019767636
llm_goals_293,test,7,0.9999996026356955,0.999996066093452
llm_goals_293,test,8,0.9988700548817402,0.9999997615814208
llm_goals_293,test,9,0.9984550874032373,0.9999495744722752
llm_goals_293,test,10,0.9999657869339339,0.9999885559082036
llm_goals_293,test,11,0.9952006936760595,0.999996721744454
llm_goals_293,test,12,0.9999923706055082,0.9999988675117084
llm_goals_293,test,13,0.9999551177026023,0.9999983906746356
llm_goals_293,test,14,0.99999364217123,0.9999996423721385
llm_goals_293,test,15,0.9999996026357051,0.9999997615814208
llm_goals_293,test,16,0.9999959667522914,0.9999994039535594
llm_goals_293,test,17,0.9917843342855029,0.9999927282334016
llm_goals_293,test,18,0.9984490077295064,0.9999998211860676
llm_goals_293,test,19,0.9999030232434792,0.9999971985817542
llm_goals_293,test,20,0.9999996225039146,0.9999998211860676
llm_goals_293,test,21,0.9999987085660565,0.999999344348895
llm_goals_293,test,22,0.9984551470078803,0.999999225139634
llm_goals_293,test,23,0.9951944153202051,0.9999982714653176
llm_goals_293,test,24,0.9999719460813302,0.9999997019767636
llm_goals_293,test,25,0.9951929450724676,0.9999955296516294
llm_goals_293,test,26,0.9984540542560315,0.9999997615814208
llm_goals_293,test,27,0.9999677340190273,0.9999994635581704
llm_goals_293,test,28,0.9999949137369711,0.9999988675117084
llm_goals_293,test,29,0.9999761978775458,0.9999997615814208
llm_goals_293,test,30,0.9999916553497415,0.9999886751170456
llm_goals_293,test,31,0.9999556144102154,0.9999983906746356
llm_goals_293,test,32,0.9999997615814173,0.9999979734420849
llm_goals_293,test,33,0.9999934434889957,0.9999989271164011
llm_goals_293,test,34,0.9999974568684719,0.9999950528144428
llm_goals_293,test,35,0.9999993046124759,0.9999990463256836
llm_goals_293,test,36,0.9827023941212686,0.9974341988179988
llm_goals_293,test,37,0.9999391237899543,0.999947488307946
llm_goals_293,test,38,0.9999953309695275,0.9999994039535594
llm_goals_293,test,39,0.9999925494196596,0.9999996423721385
llm_goals_293,test,40,0.9936570725132924,0.9999990463256836
llm_goals_293,test,41,0.9999994039535451,0.9999995827674883
llm_goals_293,test,42,0.9999981721241511,0.999998450279186
llm_goals_293,test,43,0.9999983112016633,0.999937295915171
llm_goals_293,test,44,0.9999996225039217,0.9999994635581704
llm_goals_293,test,45,0.999994715054803,0.9999981522560136
llm_goals_293,test,46,0.9983902176828227,0.999995648860848
llm_goals_293,test,47,0.9999993840853202,0.9999973177909444
llm_goals_293,test,48,0.9999967018763615,0.9999997019767636
llm_goals_293,test,49,0.9999984701474963,0.9999975562095944
llm_goals_230,test,0,0.9999931852021277,0.9999988675117084
llm_goals_230,test,1,0.9999994834264131,0.9999197721505853
llm_goals_230,test,2,0.9999950726826751,0.9999986290932004
llm_goals_230,test,3,0.9999998013178496,0.9999998807907032
llm_goals_230,test,4,0.9999992648760401,0.9999992847442344
llm_goals_230,test,5,0.9953254261730894,0.97145622955703
llm_goals_230,test,6,0.9962996640854516,0.9931073785733624
llm_goals_230,test,7,0.9999954899152576,0.9999989271164011
llm_goals_230,test,8,0.9999992052713852,0.9999994039535594
llm_goals_230,test,9,0.9963000813271953,0.999468743814106
llm_goals_230,test,10,0.999985774357905,0.9999828934677488
llm_goals_230,test,11,0.9999995032946227,0.9999996423721385
llm_goals_230,test,12,0.9999985297520849,0.999997794628074
llm_goals_230,test,13,0.9965252279468061,0.9983894825119092
llm_goals_230,test,14,0.9999985297520707,0.9999980926513672
llm_goals_230,test,15,0.9999991655349696,0.9999994039535594
llm_goals_230,test,16,0.9999975363413256,0.99999570846569
llm_goals_230,test,17,0.9999993840853451,0.9997287988607096
llm_goals_230,test,18,0.9999981721241937,0.9999986290932004
llm_goals_230,test,19,0.9999990860621158,0.792201278640246
llm_goals_230,test,20,0.9999991655349696,0.9999989271164011
llm_goals_230,test,21,0.9988169074056863,0.9993889927972028
llm_goals_230,test,22,0.9999996622403469,0.9999741315840668
llm_goals_230,test,23,0.9999989867210276,0.9999991655349802
llm_goals_230,test,24,0.9999980727831245,0.9999988675117084
llm_goals_230,test,25,0.9999945163726961,0.9999902248378308
llm_goals_230,test,26,0.9999968409537635,0.9999990463256836
llm_goals_230,test,27,0.9999994635581952,0.9999489188180812
llm_goals_230,test,28,0.9999997615814101,0.9999998211860676
llm_goals_230,test,29,0.9999990463256658,0.9489630473854872
llm_goals_230,test,30,0.9999958872794661,0.9999970197676546
llm_goals_230,test,31,0.9962996640854515,0.997134745101166
llm_goals_230,test,32,0.9999989867210276,0.9999995231628418
llm_goals_230,test,33,0.9999981323877899,0.999997437000376
llm_goals_230,test,34,0.9999829332033775,0.9999924898147656
llm_goals_230,test,35,0.9999926884967324,0.9999969005585
llm_goals_230,test,36,0.999996185302635,0.9999945163727092
llm_goals_230,test,37,0.9999989072481797,0.9779089689145934
llm_goals_230,test,38,0.9999988476435165,0.9570343502785515
llm_goals_230,test,39,0.9999984105427906,0.9999988079071328
llm_goals_230,test,40,0.9999990264574597,0.9999989271164011
llm_goals_230,test,41,0.9964545169523736,0.99562537655113
llm_goals_230,test,42,0.9999927083649598,0.999997794628074
llm_goals_230,test,43,0.9983099897736722,0.9952926635719378
llm_goals_230,test,44,0.9999988476434881,0.9999994635581704
llm_goals_230,test,45,0.9999997218449851,0.9999998211860676
llm_goals_230,test,46,0.9999990264574597,0.9999992847442344
llm_goals_230,test,47,0.9999998211860567,0.9999998211860676
llm_goals_230,test,48,0.9999980727831316,0.9999990463256836
llm_goals_230,test,49,0.999999543031049,0.9998905062670348
llm_goals_115,test,0,0.999993165333961,0.9999962449073808
llm_goals_115,test,1,0.9999765157691943,0.9999990463256836
llm_goals_115,test,2,0.9999983906745774,0.9999997019767636
llm_goals_115,test,3,0.9994264642297601,0.9999576807027534
llm_goals_115,test,4,0.9999980131784684,0.999958157538035
llm_goals_115,test,5,0.9999994635581881,0.999957859516412
llm_goals_115,test,6,0.9999985694885171,0.9999822974207612
llm_goals_115,test,7,0.9999925891557865,0.9999992847442344
llm_goals_115,test,8,0.9999796946846896,0.9999833106986686
llm_goals_115,test,9,0.9999873836836305,0.9999994635581704
llm_goals_115,test,10,0.9999962846437777,0.9999232888251398
llm_goals_115,test,11,0.9999956885972684,0.1735883698655277
llm_goals_115,test,12,0.9999986092249328,0.9999995231628418
llm_goals_115,test,13,0.9999943176904925,0.999999344348895
llm_goals_115,test,14,0.9999986688295982,0.999998450279186
llm_goals_115,test,15,0.9999836087230599,0.9952644706725092
llm_goals_115,test,16,0.9999994635581881,0.9999927878380692
llm_goals_115,test,17,0.9999974767367146,0.999998509883854
llm_goals_115,test,18,0.9999983310699617,0.9999974966048696
llm_goals_115,test,19,0.9999944766361525,0.99996823072331
llm_goals_115,test,20,0.9999810854598737,0.9999916553499876
llm_goals_115,test,21,0.9999955296517159,0.999999344348895
llm_goals_115,test,22,0.9999512632688412,0.9999989867209838
llm_goals_115,test,23,0.9999826947851616,0.9994583130121676
llm_goals_115,test,24,0.9999842842423473,0.9999986290932004
llm_goals_115,test,25,0.999997953573804,0.9999879002575536
llm_goals_115,test,26,0.9999990463256753,0.9999988079071328
llm_goals_115,test,27,0.9999171098060721,0.9999988079071328
llm_goals_115,test,28,0.999997675418833,0.0502507498981832
llm_goals_115,test,29,0.9999957680701482,0.999960899353502
llm_goals_115,test,30,0.9999966820079921,0.9999914169306978
llm_goals_115,test,31,0.9999951521554173,0.9999992847442344
llm_goals_115,test,32,0.99998253583939,0.9999759793274372
llm_goals_115,test,33,0.9999981125195317,0.9999992847442344
llm_goals_115,test,34,0.9999940395355003,0.9999864101405658
llm_goals_115,test,35,0.9999975760777708,0.999999344348895
llm_goals_115,test,36,0.9999806086220279,0.999999344348895
llm_goals_115,test,37,0.9999941786129448,0.9999979734420849
llm_goals_115,test,38,0.999996900558429,0.9999599456769136
llm_goals_115,test,39,0.9992787440745393,0.999999344348895
llm_goals_115,test,40,0.9999991854031839,0.9998471736847392
llm_goals_115,test,41,0.9999989867210181,0.9999997615814208
llm_goals_115,test,42,0.9999792178472836,0.9999936819076042
llm_goals_115,test,43,0.9999980727831245,0.9999994039535594
llm_goals_115,test,44,0.999932289123929,0.9999966621397788
llm_goals_115,test,45,0.9994382262135205,0.9908538461901172
llm_goals_115,test,46,0.9999969204265216,0.5668339122107139
llm_goals_115,test,47,0.9999969402949512,0.999965012074428
llm_goals_115,test,48,0.9999953309695501,0.9999986290932004
llm_goals_115,test,49,0.9999701579423155,0.9999966025353348
llm_goals_86,test,0,0.9598534503533239,0.9999950528144428
llm_goals_86,test,1,0.9964192708478317,0.9733920102032824
llm_goals_86,test,2,0.9974764585041465,0.9999997019767636
llm_goals_86,test,3,0.979825635919263,0.9999989867209838
llm_goals_86,test,4,0.9168963052238402,0.9999939203260696
llm_goals_86,test,5,0.9993538856560441,0.9799200289999128
llm_goals_86,test,6,0.9999646147081157,0.9999989271164011
llm_goals_86,test,7,0.9996439417371564,0.9999911785123744
llm_goals_86,test,8,0.9963492750958588,0.9906920788388957
llm_goals_86,test,9,0.9886861044160992,0.960079549439269
llm_goals_86,test,10,0.979451338233535,0.9999979734420849
llm_goals_86,test,11,0.9886867799354255,0.998329818271824
llm_goals_86,test,12,0.7168377324921514,0.9999976158140952
llm_goals_86,test,13,0.9985286593276835,0.997893571823832
llm_goals_86,test,14,0.9363167878147619,0.9994581937511068
llm_goals_86,test,15,0.9794531462412074,0.9993743300463472
llm_goals_86,test,16,0.6758967423292531,0.9959942101456876
llm_goals_86,test,17,0.9717075221253948,0.9989233016552924
llm_goals_86,test,18,0.9951079884527071,0.999989688396868
llm_goals_86,test,19,0.9752819530533515,0.99977147578688
llm_goals_86,test,20,0.9796627757666374,0.999986886977724
llm_goals_86,test,21,0.9963757793246902,0.9995875954544448
llm_goals_86,test,22,0.9999583562226845,0.9998529553413918
llm_goals_86,test,23,0.9997425079297216,0.9999995827674883
llm_goals_86,test,24,0.9999650120724421,0.98040735685252
llm_goals_86,test,25,0.9858915609478717,0.9999257922204762
llm_goals_86,test,26,0.9985453486284244,0.9992931485112122
llm_goals_86,test,27,0.9801557463105341,0.999970734120207
llm_goals_86,test,28,0.8962323230787813,0.9998175501809444
llm_goals_86,test,29,0.9999747077618207,0.9995185136816022
llm_goals_86,test,30,0.998321811294916,0.9807010897549824
llm_goals_86,test,31,0.9962574045033167,0.9998762011476526
llm_goals_86,test,32,0.9932330846601419,0.9988895059174044
llm_goals_86,test,33,0.9974199930122388,0.9999738931662266
llm_goals_86,test,34,0.9997227589293199,0.9977411627147142
llm_goals_86,test,35,0.9796084163336972,0.9999870657920578
llm_goals_86,test,36,0.9675540130444656,0.9994595050999702
llm_goals_86,test,37,0.9752805225419315,0.9993578791799612
llm_goals_86,test,38,0.9983841776200646,0.9999866485595712
llm_goals_86,test,39,0.9997496406326624,0.99997985362928
llm_goals_86,test,40,0.9999721050261057,0.999995470046798
llm_goals_86,test,41,0.9999842246376206,0.99996912479408
llm_goals_86,test,42,0.9999642570825499,0.9999997615814208
llm_goals_86,test,43,0.9998312989856196,0.9999995827674883
llm_goals_86,test,44,0.9997212092025305,0.997135043199176
llm_goals_86,test,45,0.9951225519419428,0.9999841451646688
llm_goals_86,test,46,0.97969794250823,0.3364333057002893
llm_goals_86,test,47,0.9997403423080006,0.9991943836221492
llm_goals_86,test,48,0.9990249276103578,0.9999958872793276
llm_goals_86,test,49,0.8741134975255974,0.999995648860848
llm_goals_281,test,0,0.7308565986395094,0.9996947049971306
llm_goals_281,test,1,0.9705734857600393,0.9999986290932004
llm_goals_281,test,2,0.22870409030877384,0.9983671307551129
llm_goals_281,test,3,0.9705734460235979,0.993962764832135
llm_goals_281,test,4,0.6671446846870527,0.9999989271164011
llm_goals_281,test,5,0.9999998013178483,0.9999997019767636
llm_goals_281,test,6,0.9999983708063039,0.9999997019767636
llm_goals_281,test,7,0.18658941166945123,0.973559082090764
llm_goals_281,test,8,0.9989644090543184,0.9999968409538144
llm_goals_281,test,9,0.9999992648760531,0.9999994635581704
llm_goals_281,test,10,0.9999994436899762,0.9999997019767636
llm_goals_281,test,11,0.9999994834263989,0.9999951720236044
llm_goals_281,test,12,0.9999940395354248,0.9999264478667818
llm_goals_281,test,13,0.9999998211860627,0.9999998807907032
llm_goals_281,test,14,0.9999991059303408,0.9999976158140952
llm_goals_281,test,15,0.999994496504462,0.9999992847442344
llm_goals_281,test,16,0.9713158020056166,0.9999986886977652
llm_goals_281,test,17,0.9889935052589781,0.9999985694885254
llm_goals_281,test,18,0.9999990661938991,0.999999344348895
llm_goals_281,test,19,0.3038374935305585,2.503036581908297e-05
llm_goals_281,test,20,0.9999992847442675,0.999996542930667
llm_goals_281,test,21,0.9999993244807044,0.9999997615814208
llm_goals_281,test,22,0.6770311571762422,0.9999986886977652
llm_goals_281,test,23,0.9999988476435355,0.999999344348895
llm_goals_281,test,24,0.999999225139628,0.9999715089800396
llm_goals_281,test,25,0.667147327159746,0.9999983310699748
llm_goals_281,test,26,0.9999991456667683,0.9999986290932004
llm_goals_281,test,27,0.6569019873045963,0.9999990463256836
llm_goals_281,test,28,0.9999996821085553,0.8906972993247301
llm_goals_281,test,29,0.6671463734853503,7.346078924151352e-05
llm_goals_281,test,30,0.9999992847442639,0.9999995827674883
llm_goals_281,test,31,0.9999996821085576,0.9999997019767636
llm_goals_281,test,32,0.9999992450078174,0.99999910593033
llm_goals_281,test,33,0.999999622503917,0.9999994039535594
llm_goals_281,test,34,0.9999995032946227,0.9999989867209838
llm_goals_281,test,35,0.7081742048298288,0.9934774041946208
llm_goals_281,test,36,0.9999996622403481,0.9999990463256836
llm_goals_281,test,37,0.3574179170212403,3.4259730994143734e-05
llm_goals_281,test,38,0.9999994238217406,0.3148602040230503
llm_goals_281,test,39,0.9999432563773737,0.9940564630440718
llm_goals_281,test,40,0.9999995032946227,0.999999344348895
llm_goals_281,test,41,0.999999443689988,0.9999998211860676
llm_goals_281,test,42,0.46646892681538293,0.999860048294782
llm_goals_281,test,43,0.9999988476435663,0.9999997019767636
llm_goals_281,test,44,0.9999976356823984,0.9999992847442344
llm_goals_281,test,45,0.9989692767669375,0.9728845352464304
llm_goals_281,test,46,0.9999794562656876,0.9999996423721385
llm_goals_281,test,47,0.6758817554337856,0.999986290931994
llm_goals_281,test,48,0.9999987681706995,0.9999994039535594
llm_goals_281,test,49,0.9991969863504258,0.999999344348895
llm_goals_182,test,0,0.33340250515439734,0.9999994039535594
llm_goals_182,test,1,0.33292265294835366,0.0005625917030918
llm_goals_182,test,2,0.9999502301211471,0.9999986886977652
llm_goals_182,test,3,0.9626215891960356,7.296223953885346e-06
llm_goals_182,test,4,0.9998947779343274,0.9999976158140952
llm_goals_182,test,5,0.02622084504475793,0.003103044503552
llm_goals_182,test,6,0.666586362304768,0.0119719938396825
llm_goals_182,test,7,0.9997155864982344,0.9999994635581704
llm_goals_182,test,8,0.9969473084407436,0.9999964833260028
llm_goals_182,test,9,0.583956923706204,0.0113900113927541
llm_goals_182,test,10,0.9998246431394661,0.9961416126209276
llm_goals_182,test,11,0.6665666666610398,0.999994337558862
llm_goals_182,test,12,0.9954998891613452,0.0378442217787105
llm_goals_182,test,13,0.6665666666610399,0.0008868430627717
llm_goals_182,test,14,0.007107748763263667,0.0008985942545946
llm_goals_182,test,15,0.5727320863125052,0.9999988675117084
llm_goals_182,test,16,0.007107748763263667,0.0033689156613595
llm_goals_182,test,17,0.9995251099200814,0.0006440135974187
llm_goals_182,test,18,0.007107748763263667,0.0016946532068186
llm_goals_182,test,19,0.9993623892486229,0.9999406933800118
llm_goals_182,test,20,0.9999833703037241,0.99999910593033
llm_goals_182,test,21,0.6664373506942664,0.006514652424738
llm_goals_182,test,22,0.33340360724592205,0.0008693644862334
llm_goals_182,test,23,0.33339842386716617,0.9999617934213552
llm_goals_182,test,24,0.999805589522223,0.0038374716596311
llm_goals_182,test,25,0.3333414652018203,0.9941936732536844
llm_goals_182,test,26,0.007107748763263667,0.001039830413279
llm_goals_182,test,27,0.9995561838040702,0.0007135811001391
llm_goals_182,test,28,0.9994596640196859,7.789882184923891e-06
llm_goals_182,test,29,0.9980899492759733,0.999957442283379
llm_goals_182,test,30,0.9998489618309262,0.9981520175441102
llm_goals_182,test,31,0.03546814079679483,0.0117830754540077
llm_goals_182,test,32,0.9990676244259696,0.9999659657485728
llm_goals_182,test,33,0.33626792501223196,0.003031636673792
llm_goals_182,test,34,0.99808808160541,0.9774106140350124
llm_goals_182,test,35,0.6664546130131567,0.9999996423721385
llm_goals_182,test,36,0.9991259177572801,0.9979310631755344
llm_goals_182,test,37,0.9850119746744695,0.9998408556030968
llm_goals_182,test,38,0.9997155864982344,0.999994218349345
llm_goals_182,test,39,0.9995560646056797,0.1486108277942357
llm_goals_182,test,40,0.4521918418250302,0.9999673962596765
llm_goals_182,test,41,0.5229152305738993,0.0004129864967066
llm_goals_182,test,42,0.7693150624305188,0.9999995231628418
llm_goals_182,test,43,0.017066127273186266,0.0046706145770988
llm_goals_182,test,44,0.9999983112017249,0.9999978542328164
llm_goals_182,test,45,0.6239051854985765,1.853969397415038e-05
llm_goals_182,test,46,0.3323007447490269,0.9999624490730772
llm_goals_182,test,47,0.8782588832483684,2.5724993639382006e-05
llm_goals_182,test,48,0.007694283965482533,0.0005832931102604
llm_goals_182,test,49,0.664465396035021,0.0013167466449808
llm_goals_438,test,0,0.6952502320430313,0.0127917126432779
llm_goals_438,test,1,0.9999946157138154,0.9999138712868518
llm_goals_438,test,2,0.9999990264574231,0.001266465311242
llm_goals_438,test,3,0.9999997814496329,0.9997649192827218
llm_goals_438,test,4,0.9977286458400169,0.995094418582848
llm_goals_438,test,5,0.9999868075055417,0.9676808127608026
llm_goals_438,test,6,0.6959135125193602,0.9681100847526048
llm_goals_438,test,7,0.9999724030502058,0.1433627923531326
llm_goals_438,test,8,0.9977286657082359,0.9999272823362588
llm_goals_438,test,9,0.9999887744582141,0.3346369990700403
llm_goals_438,test,10,0.9999946157137871,0.0032511657787224
llm_goals_438,test,11,0.9999972184498146,0.9991979598760046
llm_goals_438,test,12,0.9999724825230928,0.9979441166238928
llm_goals_438,test,13,0.9999997615814173,0.1369715342565285
llm_goals_438,test,14,0.9999948541321811,0.9999860525130736
llm_goals_438,test,15,0.9940182168256954,0.9995588660290228
llm_goals_438,test,16,0.9999948143957537,0.9999721646308564
llm_goals_438,test,17,0.9999958872793501,0.9999966025353348
llm_goals_438,test,18,0.9999942580858282,0.9999983906746356
llm_goals_438,test,19,0.9999998211860639,0.9999996423721385
llm_goals_438,test,20,0.9999727606780698,0.9997635483790388
llm_goals_438,test,21,0.9999981721241996,0.9481995120869072
llm_goals_438,test,22,0.9999975760777827,0.9999990463256836
llm_goals_438,test,23,0.9999996026356968,0.9995222687768196
llm_goals_438,test,24,0.9999905625976332,0.999998450279186
llm_goals_438,test,25,0.6956386092174297,0.0119505141994216
llm_goals_438,test,26,0.9999940991402297,0.9546977279083356
llm_goals_438,test,27,0.9999958872793501,0.9999922513961864
llm_goals_438,test,28,0.9999907414115278,0.9999902844428042
llm_goals_438,test,29,0.9999998609224914,0.9999998211860676
llm_goals_438,test,30,0.6669849683384298,0.0012244399528321
llm_goals_438,test,31,0.9979443748987938,0.9598621719385948
llm_goals_438,test,32,0.9999973575273563,0.9979359506472824
llm_goals_438,test,33,0.9999343355470661,0.9999122619631158
llm_goals_438,test,34,0.9999976754188165,0.0033786082834978
llm_goals_438,test,35,0.69563785422522,0.7030742694516781
llm_goals_438,test,36,0.9999867479009436,0.0009362307836721
llm_goals_438,test,37,0.99999958276748,0.9999994635581704
llm_goals_438,test,38,0.9999887943261471,0.9999997019767636
llm_goals_438,test,39,0.999985496202808,0.0899507873745254
llm_goals_438,test,40,0.9999991456667221,0.8368337146398075
llm_goals_438,test,41,0.9999985098838398,0.9687532202387088
llm_goals_438,test,42,0.999995628992751,0.001642352875252
llm_goals_438,test,43,0.9999727010734292,0.994022190528956
llm_goals_438,test,44,0.9999996225039217,0.999978065491181
llm_goals_438,test,45,0.9999982515970309,0.9999783635133692
llm_goals_438,test,46,0.9999948541323662,0.9999961853027344
llm_goals_438,test,47,0.9999946157138154,0.9998934864975054
llm_goals_438,test,48,0.9999952713647495,0.9999979734420849
llm_goals_438,test,49,0.9999825557071995,0.9999861717225268
llm_goals_206,test,0,0.9998208085674326,0.9991406798194884
llm_goals_206,test,1,0.2830845809524302,0.7026181814697341
llm_goals_206,test,2,0.9833296539711395,0.9495592135330754
llm_goals_206,test,3,0.2830845809524302,0.4597382924578109
llm_goals_206,test,4,0.26653698514911384,0.456834747394902
llm_goals_206,test,5,0.010088252237826932,0.0069897250631233
llm_goals_206,test,6,0.2830845809524302,0.1198500283021958
llm_goals_206,test,7,0.9833296539711395,0.9853912589369151
llm_goals_206,test,8,0.9833296539711395,0.9858354928324288
llm_goals_206,test,9,0.2830845809524302,0.7676771375411302
llm_goals_206,test,10,0.0034423198516946893,6.0325856974144665e-05
llm_goals_206,test,11,0.2830845809524302,0.9932408335154868
llm_goals_206,test,12,0.19557333168776384,0.87151349073003
llm_goals_206,test,13,0.2830845809524302,4.2185384445727474e-05
llm_goals_206,test,14,0.9998208085674326,0.8643435780923203
llm_goals_206,test,15,0.19557333168776384,0.3192446879498265
llm_goals_206,test,16,0.2830845809524302,0.8269734919284588
llm_goals_206,test,17,0.9998208085674326,0.9885780813553218
llm_goals_206,test,18,0.16214149621067356,0.004661507854032
llm_goals_206,test,19,8.311679582897623e-05,2.09967085772139e-05
llm_goals_206,test,20,0.9998208085674326,0.98699533957105
llm_goals_206,test,21,0.9998208085674326,0.9343360672856648
llm_goals_206,test,22,0.21648705012588706,0.0175621316229822
llm_goals_206,test,23,0.2207499946355761,0.8257364630087937
llm_goals_206,test,24,0.22074999463557607,0.9999812841420614
llm_goals_206,test,25,0.16223857750819826,0.5058550948948065
llm_goals_206,test,26,0.9833296539711395,0.993140757045314
llm_goals_206,test,27,0.2830845809524302,2.2270664938206257e-05
llm_goals_206,test,28,0.2830845809524302,0.950193582732028
llm_goals_206,test,29,0.19557333168776384,0.9696497918757307
llm_goals_206,test,30,0.21914073506170306,0.0026329471014817
llm_goals_206,test,31,0.9997960329076583,0.892615734505986
llm_goals_206,test,32,0.003341271200940733,0.6161747460126132
llm_goals_206,test,33,0.14176534664758975,0.0330446953951764
llm_goals_206,test,34,0.2207499946355761,0.9667759539880174
llm_goals_206,test,35,0.9833296539711395,0.9067099130837172
llm_goals_206,test,36,0.9998278419114532,0.9999364614518882
llm_goals_206,test,37,8.311679582897623e-05,3.930535533630151e-05
llm_goals_206,test,38,0.16223857750819826,0.8118365412646018
llm_goals_206,test,39,0.16223857750819826,0.6533351590647749
llm_goals_206,test,40,0.011590225510620758,1.0367098236044126e-05
llm_goals_206,test,41,0.22074999463557607,0.9999849200248888
llm_goals_206,test,42,0.9998208085674326,0.9495081916810176
llm_goals_206,test,43,0.6699877428758546,0.967551113836368
llm_goals_206,test,44,0.008696378728677833,0.0156578839862262
llm_goals_206,test,45,0.010088252237826932,0.0016468393065651
llm_goals_206,test,46,0.14176534664758975,0.0561833196086265
llm_goals_206,test,47,7.487497853272581e-05,8.951690490961762e-05
llm_goals_206,test,48,0.16214149621067356,0.3268218248822932
llm_goals_206,test,49,0.010088252237826932,0.0047424109160671
