template_id,split,question_idx,prediction,label
llm_goals_281,test,0,0.8889148831367493,0.6469683041227705
llm_goals_281,test,1,0.8518861532211304,0.9610940798838222
llm_goals_281,test,2,0.7177559733390808,0.5031480224374153
llm_goals_281,test,3,0.8908953070640564,0.7670721923110818
llm_goals_281,test,4,0.9193636775016785,0.8573901670428902
llm_goals_281,test,5,0.8908566236495972,0.9842762939932492
llm_goals_281,test,6,0.945942223072052,0.9899150137032138
llm_goals_281,test,7,0.8095781803131104,0.6044210331647685
llm_goals_281,test,8,0.9029890298843384,0.8588374254971683
llm_goals_281,test,9,0.9654461145401001,0.9818143245858572
llm_goals_281,test,10,0.9308596849441528,0.9566256410829356
llm_goals_281,test,11,0.976141095161438,0.9833216073807935
llm_goals_281,test,12,0.9517697095870972,0.9046776287625902
llm_goals_281,test,13,0.8874462842941284,0.98515391356728
llm_goals_281,test,14,0.9234786629676819,0.9050783518109468
llm_goals_281,test,15,0.8816018104553223,0.8143656887719262
llm_goals_281,test,16,0.8885936141014099,0.9030392149866112
llm_goals_281,test,17,0.9021007418632507,0.9782413247928606
llm_goals_281,test,18,0.9630402326583862,0.9394087233364008
llm_goals_281,test,19,0.8176869750022888,0.013753239211091
llm_goals_281,test,20,0.9035214185714722,0.9155304405450826
llm_goals_281,test,21,0.9468423128128052,0.9849370716395414
llm_goals_281,test,22,0.9307876229286194,0.9678947939625672
llm_goals_281,test,23,0.9340737462043762,0.9923826456520252
llm_goals_281,test,24,0.9491270780563354,0.8888994485286436
llm_goals_281,test,25,0.9217440485954285,0.8670526725158154
llm_goals_281,test,26,0.8817068934440613,0.901592433398574
llm_goals_281,test,27,0.8913512229919434,0.9485982074237508
llm_goals_281,test,28,0.9245212078094482,0.7677171303083985
llm_goals_281,test,29,0.9057103991508484,0.0052734878464236
llm_goals_281,test,30,0.9631592035293579,0.9557743081128488
llm_goals_281,test,31,0.9181713461875916,0.991380810774403
llm_goals_281,test,32,0.9291523098945618,0.9805269843458853
llm_goals_281,test,33,0.9129876494407654,0.972194253937266
llm_goals_281,test,34,0.9216935038566589,0.8780187362883507
llm_goals_281,test,35,0.8154608011245728,0.6544240022055418
llm_goals_281,test,36,0.9420309066772461,0.8261399254388034
llm_goals_281,test,37,0.8887885808944702,0.0237476361601453
llm_goals_281,test,38,0.8958428502082825,0.2014428856002633
llm_goals_281,test,39,0.9657720327377319,0.9075832337909528
llm_goals_281,test,40,0.9542025327682495,0.9579415929114572
llm_goals_281,test,41,0.9220308661460876,0.9618767488068802
llm_goals_281,test,42,0.7447888851165771,0.6901242783749021
llm_goals_281,test,43,0.9110129475593567,0.991684436764246
llm_goals_281,test,44,0.8359889984130859,0.8707976927725176
llm_goals_281,test,45,0.8408788442611694,0.855741209208138
llm_goals_281,test,46,0.9185822606086731,0.9874326582385272
llm_goals_281,test,47,0.8477834463119507,0.8582029976458081
llm_goals_281,test,48,0.9276877641677856,0.9463140960540516
llm_goals_281,test,49,0.8946847319602966,0.9857229593669278
llm_goals_182,test,0,0.5017874240875244,0.9879327417690024
llm_goals_182,test,1,0.20892377197742462,0.143590152166184
llm_goals_182,test,2,0.7021963000297546,0.975250065576832
llm_goals_182,test,3,0.5351539254188538,0.0053962595772566
llm_goals_182,test,4,0.5484315156936646,0.9395814535027472
llm_goals_182,test,5,0.5716864466667175,0.7582924861360835
llm_goals_182,test,6,0.4114004373550415,0.5490946196305377
llm_goals_182,test,7,0.5831255316734314,0.9749513260519144
llm_goals_182,test,8,0.5903239846229553,0.9392208432034008
llm_goals_182,test,9,0.5062577128410339,0.644100014754752
llm_goals_182,test,10,0.40419843792915344,0.3220700188782243
llm_goals_182,test,11,0.5492525100708008,0.981547533906946
llm_goals_182,test,12,0.43145203590393066,0.7137040988313296
llm_goals_182,test,13,0.4961479604244232,0.7387330490542552
llm_goals_182,test,14,0.3648056983947754,0.0829307635671844
llm_goals_182,test,15,0.47626110911369324,0.9703041905123228
llm_goals_182,test,16,0.24859619140625,0.0818078392213706
llm_goals_182,test,17,0.15687140822410583,0.1321702105874928
llm_goals_182,test,18,0.23828759789466858,0.0627371233524436
llm_goals_182,test,19,0.4548826217651367,0.9687731848071606
llm_goals_182,test,20,0.6441452503204346,0.9416436555176496
llm_goals_182,test,21,0.375463604927063,0.6725942479831736
llm_goals_182,test,22,0.3504306972026825,0.0934677665250981
llm_goals_182,test,23,0.6073840856552124,0.9812074898398124
llm_goals_182,test,24,0.16903448104858398,0.0949295295653811
llm_goals_182,test,25,0.4266555607318878,0.44246206774058
llm_goals_182,test,26,0.27159395813941956,0.0774707640077587
llm_goals_182,test,27,0.3264509439468384,0.0713961149448467
llm_goals_182,test,28,0.288644015789032,0.0522770850978139
llm_goals_182,test,29,0.28113287687301636,0.95314246463025
llm_goals_182,test,30,0.45977914333343506,0.2504357008006138
llm_goals_182,test,31,0.6208744049072266,0.7161425258649188
llm_goals_182,test,32,0.6745828986167908,0.9646060453168028
llm_goals_182,test,33,0.1813153177499771,0.0545762932705501
llm_goals_182,test,34,0.5266923904418945,0.3122018653274729
llm_goals_182,test,35,0.47028854489326477,0.9860451225539192
llm_goals_182,test,36,0.4637875556945801,0.281834229591876
llm_goals_182,test,37,0.3852204382419586,0.9542467602371212
llm_goals_182,test,38,0.3550049662590027,0.9552662371080364
llm_goals_182,test,39,0.3688053786754608,0.6622170237731956
llm_goals_182,test,40,0.5105682015419006,0.946633816726873
llm_goals_182,test,41,0.6393083333969116,0.7134743981048756
llm_goals_182,test,42,0.5554371476173401,0.9907715917291932
llm_goals_182,test,43,0.6587479114532471,0.7427734633378366
llm_goals_182,test,44,0.6071882247924805,0.932817223580216
llm_goals_182,test,45,0.4282071888446808,0.0223475038345677
llm_goals_182,test,46,0.4535190761089325,0.9353007113558736
llm_goals_182,test,47,0.4544455111026764,0.0297953783874237
llm_goals_182,test,48,0.35016027092933655,0.0852689569608422
llm_goals_182,test,49,0.47974181175231934,0.1518944682187261
llm_goals_86,test,0,0.9032303094863892,0.8172564483303766
llm_goals_86,test,1,0.8564351201057434,0.2845365633956491
llm_goals_86,test,2,0.9455989599227905,0.9608234754131476
llm_goals_86,test,3,0.8814393281936646,0.4905999470752266
llm_goals_86,test,4,0.9197788238525391,0.5223214595126631
llm_goals_86,test,5,0.9410367012023926,0.9513769165530092
llm_goals_86,test,6,0.927739143371582,0.951058446008376
llm_goals_86,test,7,0.9501922726631165,0.7200026044712197
llm_goals_86,test,8,0.9093949198722839,0.8539416814851118
llm_goals_86,test,9,0.9044317603111267,0.8409760614216922
llm_goals_86,test,10,0.9413809776306152,0.8985089093274398
llm_goals_86,test,11,0.8830036520957947,0.960478126988056
llm_goals_86,test,12,0.9071226119995117,0.8489881816956245
llm_goals_86,test,13,0.9199985861778259,0.4634670890641906
llm_goals_86,test,14,0.8345760703086853,0.4018075540357314
llm_goals_86,test,15,0.9055018424987793,0.8179491136946933
llm_goals_86,test,16,0.8611699342727661,0.5277433486870419
llm_goals_86,test,17,0.8947356343269348,0.7712345746838988
llm_goals_86,test,18,0.9366056323051453,0.89457523897237
llm_goals_86,test,19,0.9298115968704224,0.6892489135704485
llm_goals_86,test,20,0.8790209889411926,0.6353825334363962
llm_goals_86,test,21,0.8932755589485168,0.6530961974169966
llm_goals_86,test,22,0.9501480460166931,0.8880043621770078
llm_goals_86,test,23,0.9310019612312317,0.8809145094341392
llm_goals_86,test,24,0.8973578810691833,0.8164918407571773
llm_goals_86,test,25,0.9169669151306152,0.5604555739923536
llm_goals_86,test,26,0.9006255865097046,0.649475864669537
llm_goals_86,test,27,0.9007765650749207,0.951709689395591
llm_goals_86,test,28,0.8752534985542297,0.8387165613894679
llm_goals_86,test,29,0.9161503314971924,0.674672778677278
llm_goals_86,test,30,0.9248354434967041,0.4648188591057368
llm_goals_86,test,31,0.8926543593406677,0.663420132120388
llm_goals_86,test,32,0.9277546405792236,0.6545684404871791
llm_goals_86,test,33,0.9419189691543579,0.8998044750116885
llm_goals_86,test,34,0.9253087043762207,0.4550697205389422
llm_goals_86,test,35,0.9476525187492371,0.7006743460528161
llm_goals_86,test,36,0.8375635147094727,0.8041987455834365
llm_goals_86,test,37,0.9410467147827148,0.5563101271844357
llm_goals_86,test,38,0.9250527024269104,0.8432080747815303
llm_goals_86,test,39,0.9181710481643677,0.8362265811977031
llm_goals_86,test,40,0.9426422119140625,0.9223666199627952
llm_goals_86,test,41,0.9458842873573303,0.8662457492250272
llm_goals_86,test,42,0.9218788743019104,0.8608515909392838
llm_goals_86,test,43,0.8632718920707703,0.7603164939120921
llm_goals_86,test,44,0.9039748311042786,0.7861883669269881
llm_goals_86,test,45,0.8962558507919312,0.8184625523204602
llm_goals_86,test,46,0.9469501376152039,0.94641274246688
llm_goals_86,test,47,0.8864135146141052,0.5854129271694068
llm_goals_86,test,48,0.9063936471939087,0.920227709127447
llm_goals_86,test,49,0.9310750365257263,0.938532531863768
llm_goals_438,test,0,0.8498615622520447,0.2314562881184323
llm_goals_438,test,1,0.8215128183364868,0.7762565054203492
llm_goals_438,test,2,0.8692623972892761,0.1997924391817541
llm_goals_438,test,3,0.835347056388855,0.7992019611592787
llm_goals_438,test,4,0.8597643375396729,0.8049000497082406
llm_goals_438,test,5,0.8374167680740356,0.7174028107354763
llm_goals_438,test,6,0.806946337223053,0.5982684312746689
llm_goals_438,test,7,0.8454678654670715,0.3291190451199206
llm_goals_438,test,8,0.813043475151062,0.7413057021464516
llm_goals_438,test,9,0.8513330817222595,0.5655428700196011
llm_goals_438,test,10,0.8310874700546265,0.3747542837277384
llm_goals_438,test,11,0.8786489963531494,0.8141028872063276
llm_goals_438,test,12,0.8861264586448669,0.7668041079795853
llm_goals_438,test,13,0.7781515717506409,0.3989739137138193
llm_goals_438,test,14,0.821762204170227,0.630963737596379
llm_goals_438,test,15,0.8442927002906799,0.8688319384714368
llm_goals_438,test,16,0.8192346692085266,0.6505714018494433
llm_goals_438,test,17,0.864793598651886,0.8432750100181929
llm_goals_438,test,18,0.887911319732666,0.5454092034362268
llm_goals_438,test,19,0.8201249837875366,0.7865512327498919
llm_goals_438,test,20,0.895089328289032,0.9279490690754252
llm_goals_438,test,21,0.8105072379112244,0.6012074860500735
llm_goals_438,test,22,0.888645350933075,0.8641822359830585
llm_goals_438,test,23,0.8676559329032898,0.6703505477568762
llm_goals_438,test,24,0.8534742593765259,0.659909363300309
llm_goals_438,test,25,0.8193970322608948,0.345825906210505
llm_goals_438,test,26,0.8213207721710205,0.6342860330821757
llm_goals_438,test,27,0.8525097966194153,0.7595913377648781
llm_goals_438,test,28,0.8318547606468201,0.8379201892332447
llm_goals_438,test,29,0.8353669047355652,0.7401884809786652
llm_goals_438,test,30,0.8497443199157715,0.3998648759112044
llm_goals_438,test,31,0.7867035865783691,0.7480624326316926
llm_goals_438,test,32,0.8778778910636902,0.7187081505530665
llm_goals_438,test,33,0.8447169661521912,0.4107650384506188
llm_goals_438,test,34,0.8621484041213989,0.5939725433582219
llm_goals_438,test,35,0.8360046148300171,0.3302466757601784
llm_goals_438,test,36,0.8423523306846619,0.5187707473385741
llm_goals_438,test,37,0.8606130480766296,0.7520728212475446
llm_goals_438,test,38,0.8361791968345642,0.7995393320092329
llm_goals_438,test,39,0.8386031985282898,0.6185694255280791
llm_goals_438,test,40,0.8923285007476807,0.6220749660889419
llm_goals_438,test,41,0.8508090376853943,0.6546343601883343
llm_goals_438,test,42,0.8707774877548218,0.2289113591576547
llm_goals_438,test,43,0.8697837591171265,0.7896951428928559
llm_goals_438,test,44,0.890226423740387,0.8209977715013562
llm_goals_438,test,45,0.8925567269325256,0.8177235133069483
llm_goals_438,test,46,0.8694995045661926,0.6895343723542676
llm_goals_438,test,47,0.7925323843955994,0.8889681695339486
llm_goals_438,test,48,0.8590726256370544,0.5208110922241418
llm_goals_438,test,49,0.9106641411781311,0.8657228979999158
llm_goals_206,test,0,0.1198403537273407,0.7014024312312447
llm_goals_206,test,1,0.03680582717061043,0.09993519180039
llm_goals_206,test,2,0.005847780965268612,0.3805957976908932
llm_goals_206,test,3,0.008476481772959232,0.2506742005767606
llm_goals_206,test,4,0.019694030284881592,0.168981522681462
llm_goals_206,test,5,0.013766144402325153,0.1396780009361202
llm_goals_206,test,6,0.019264696165919304,0.1710164544129008
llm_goals_206,test,7,0.014808347448706627,0.6071092597712212
llm_goals_206,test,8,0.021762896329164505,0.1978085524873241
llm_goals_206,test,9,0.02800709754228592,0.1702414804858378
llm_goals_206,test,10,0.020918231457471848,0.0308920608149318
llm_goals_206,test,11,0.03320879861712456,0.2459778318217196
llm_goals_206,test,12,0.014313215389847755,0.1179569636055102
llm_goals_206,test,13,0.0058431061916053295,0.0251233861816931
llm_goals_206,test,14,0.05036246031522751,0.2426781205526189
llm_goals_206,test,15,0.034702152013778687,0.1278309727108198
llm_goals_206,test,16,0.012305602431297302,0.1146704353190439
llm_goals_206,test,17,0.09663562476634979,0.3515281137996084
llm_goals_206,test,18,0.01674821227788925,0.2196923742635269
llm_goals_206,test,19,0.006082870531827211,0.1304369288612395
llm_goals_206,test,20,0.07064440101385117,0.4106940669038348
llm_goals_206,test,21,0.09173409640789032,0.3368795798730228
llm_goals_206,test,22,0.026053952053189278,0.2288948502948852
llm_goals_206,test,23,0.017537543550133705,0.074953807513886
llm_goals_206,test,24,0.0053865984082221985,0.2441322028739075
llm_goals_206,test,25,0.021052824333310127,0.3918868995222204
llm_goals_206,test,26,0.018697386607527733,0.1723782187624221
llm_goals_206,test,27,0.008084563538432121,0.0391839098867229
llm_goals_206,test,28,0.012611166574060917,0.4274888337765593
llm_goals_206,test,29,0.019009631127119064,0.2322920572824445
llm_goals_206,test,30,0.040390241891145706,0.1908743178919042
llm_goals_206,test,31,0.08193355798721313,0.3263357686111295
llm_goals_206,test,32,0.030975429341197014,0.1979025671613465
llm_goals_206,test,33,0.019256705418229103,0.3702297199267818
llm_goals_206,test,34,0.020436987280845642,0.2357480876238731
llm_goals_206,test,35,0.04471208527684212,0.1482521268698281
llm_goals_206,test,36,0.20771542191505432,0.5821584340939012
llm_goals_206,test,37,0.009573576971888542,0.2650568670113227
llm_goals_206,test,38,0.04138487949967384,0.1360417554406812
llm_goals_206,test,39,0.011126571334898472,0.2119836423042495
llm_goals_206,test,40,0.01159635093063116,0.0289993447458979
llm_goals_206,test,41,0.0031537171453237534,0.3521756872657808
llm_goals_206,test,42,0.05262141674757004,0.4575015387432249
llm_goals_206,test,43,0.09025675803422928,0.4938333202476994
llm_goals_206,test,44,0.038997091352939606,0.2787682116116408
llm_goals_206,test,45,0.011988133192062378,0.1994537138307505
llm_goals_206,test,46,0.024738173931837082,0.5462074258839572
llm_goals_206,test,47,0.014125547371804714,0.1243126576899177
llm_goals_206,test,48,0.004100976977497339,0.2961273515821391
llm_goals_206,test,49,0.03446773439645767,0.2344178444588115
llm_goals_230,test,0,0.9679796099662781,0.93636017960289
llm_goals_230,test,1,0.9764151573181152,0.9080467805464864
llm_goals_230,test,2,0.9655181169509888,0.9492415176604246
llm_goals_230,test,3,0.9678626656532288,0.9883888360800804
llm_goals_230,test,4,0.9593984484672546,0.9308136137248874
llm_goals_230,test,5,0.9383862018585205,0.7461662385047432
llm_goals_230,test,6,0.9388939142227173,0.717943379234597
llm_goals_230,test,7,0.963740885257721,0.9498506183856972
llm_goals_230,test,8,0.9661031365394592,0.903114856201669
llm_goals_230,test,9,0.9487888813018799,0.82763987275553
llm_goals_230,test,10,0.9478214979171753,0.9176431321128996
llm_goals_230,test,11,0.9736444354057312,0.9351009100066487
llm_goals_230,test,12,0.9790313839912415,0.9397739779762262
llm_goals_230,test,13,0.937338650226593,0.7663744634625839
llm_goals_230,test,14,0.9702174663543701,0.9587652680933508
llm_goals_230,test,15,0.9667588472366333,0.9205685886203512
llm_goals_230,test,16,0.9659364223480225,0.9548512114624126
llm_goals_230,test,17,0.9801526665687561,0.9177103078422468
llm_goals_230,test,18,0.9673787951469421,0.9694847470185912
llm_goals_230,test,19,0.9720714092254639,0.241616336852366
llm_goals_230,test,20,0.965788722038269,0.9513741131303785
llm_goals_230,test,21,0.9404182434082031,0.7885476346382756
llm_goals_230,test,22,0.9795035123825073,0.956588746723208
llm_goals_230,test,23,0.9679479598999023,0.8796235970540837
llm_goals_230,test,24,0.9718519449234009,0.9499204774153412
llm_goals_230,test,25,0.957750678062439,0.7955946265549944
llm_goals_230,test,26,0.9706636071205139,0.96990013164722
llm_goals_230,test,27,0.9811278581619263,0.94318586558855
llm_goals_230,test,28,0.9732431173324585,0.9816553586758804
llm_goals_230,test,29,0.9743596315383911,0.2755945052454056
llm_goals_230,test,30,0.9403655529022217,0.8694230354404326
llm_goals_230,test,31,0.9388474822044373,0.7707924321548216
llm_goals_230,test,32,0.967818558216095,0.90274804670901
llm_goals_230,test,33,0.9624332785606384,0.964968324648223
llm_goals_230,test,34,0.9415628910064697,0.8786148395823622
llm_goals_230,test,35,0.9634715914726257,0.9659460769293176
llm_goals_230,test,36,0.9567984342575073,0.9265550381327228
llm_goals_230,test,37,0.9685457944869995,0.2797244301796934
llm_goals_230,test,38,0.9720286130905151,0.3705167516113265
llm_goals_230,test,39,0.978619396686554,0.9571521884098596
llm_goals_230,test,40,0.9745450615882874,0.9050631524069218
llm_goals_230,test,41,0.936508297920227,0.7826731844385996
llm_goals_230,test,42,0.9653812646865845,0.9429872638799112
llm_goals_230,test,43,0.9332882761955261,0.7863413109728788
llm_goals_230,test,44,0.9652283787727356,0.925476613788026
llm_goals_230,test,45,0.9716586470603943,0.9835724833108438
llm_goals_230,test,46,0.9698902368545532,0.9324350988748504
llm_goals_230,test,47,0.9725331664085388,0.9918220640686296
llm_goals_230,test,48,0.9686213731765747,0.9632008068532188
llm_goals_230,test,49,0.9762741327285767,0.9196290944353572
llm_goals_115,test,0,0.8223930597305298,0.6878400979912542
llm_goals_115,test,1,0.8198015689849854,0.9839099052127078
llm_goals_115,test,2,0.7578124403953552,0.6264771219588837
llm_goals_115,test,3,0.6869078278541565,0.5101504336920828
llm_goals_115,test,4,0.803755521774292,0.4749856076325871
llm_goals_115,test,5,0.8358199000358582,0.8824023049105343
llm_goals_115,test,6,0.8818778395652771,0.8532372770314965
llm_goals_115,test,7,0.7783662676811218,0.6852873624151903
llm_goals_115,test,8,0.8173294067382812,0.7160879332925761
llm_goals_115,test,9,0.8511407375335693,0.8154454841155164
llm_goals_115,test,10,0.813286542892456,0.4039477654904175
llm_goals_115,test,11,0.8476386666297913,0.0754019011174093
llm_goals_115,test,12,0.8152050971984863,0.9216642359611203
llm_goals_115,test,13,0.8697205781936646,0.9023613914305204
llm_goals_115,test,14,0.8000216484069824,0.7385692464553043
llm_goals_115,test,15,0.8045760989189148,0.5273332595390108
llm_goals_115,test,16,0.7981131076812744,0.5319983452316508
llm_goals_115,test,17,0.8275229930877686,0.9818465718466136
llm_goals_115,test,18,0.7552098035812378,0.7098198015331896
llm_goals_115,test,19,0.8271802067756653,0.8100224145820547
llm_goals_115,test,20,0.7770693302154541,0.5463347366319413
llm_goals_115,test,21,0.7982038259506226,0.9708728191294718
llm_goals_115,test,22,0.7649428248405457,0.9564870014981148
llm_goals_115,test,23,0.8664665818214417,0.3427940177342707
llm_goals_115,test,24,0.8768868446350098,0.7216819551218165
llm_goals_115,test,25,0.7960236072540283,0.7118023516747564
llm_goals_115,test,26,0.7493781447410583,0.7495779511143832
llm_goals_115,test,27,0.8469597697257996,0.9876421097288872
llm_goals_115,test,28,0.7921293377876282,0.2396916848864135
llm_goals_115,test,29,0.8578439354896545,0.8734944493526076
llm_goals_115,test,30,0.8151785731315613,0.4720948765146447
llm_goals_115,test,31,0.8607794046401978,0.9613435269013808
llm_goals_115,test,32,0.8023433685302734,0.2802482482345895
llm_goals_115,test,33,0.859454870223999,0.800091024303248
llm_goals_115,test,34,0.8359523415565491,0.4731748202329244
llm_goals_115,test,35,0.8043075203895569,0.6451464331436837
llm_goals_115,test,36,0.799604058265686,0.5222874947485241
llm_goals_115,test,37,0.741481363773346,0.9548006059708632
llm_goals_115,test,38,0.8269977569580078,0.8483833674723993
llm_goals_115,test,39,0.8085469007492065,0.9222807287488012
llm_goals_115,test,40,0.7745557427406311,0.2526171639652511
llm_goals_115,test,41,0.8338761925697327,0.8309168152992626
llm_goals_115,test,42,0.7325893640518188,0.58549510672814
llm_goals_115,test,43,0.8019538521766663,0.9288889171841944
llm_goals_115,test,44,0.7846417427062988,0.8696076826330161
llm_goals_115,test,45,0.7496345043182373,0.3370215724740773
llm_goals_115,test,46,0.8533902764320374,0.0614426001598869
llm_goals_115,test,47,0.7660873532295227,0.5311914101507266
llm_goals_115,test,48,0.7279912233352661,0.6260304408434099
llm_goals_115,test,49,0.8005009293556213,0.9732730991067396
llm_goals_401,test,0,0.7818912863731384,0.995337188206034
llm_goals_401,test,1,0.9111664891242981,0.9877679942135538
llm_goals_401,test,2,0.8295395970344543,0.9922930596197688
llm_goals_401,test,3,0.8964292407035828,0.944885851250094
llm_goals_401,test,4,0.7996104955673218,0.9994499087491504
llm_goals_401,test,5,0.5577222108840942,0.9963147043479246
llm_goals_401,test,6,0.8508748412132263,0.9937722087866844
llm_goals_401,test,7,0.9066892266273499,0.9823669190452718
llm_goals_401,test,8,0.8362085223197937,0.999488890156012
llm_goals_401,test,9,0.7769852876663208,0.9958814380530916
llm_goals_401,test,10,0.7003553509712219,0.9936545489877364
llm_goals_401,test,11,0.6964733600616455,0.9890559909640764
llm_goals_401,test,12,0.8919137120246887,0.974935352749168
llm_goals_401,test,13,0.6941242218017578,0.9811005000644571
llm_goals_401,test,14,0.5325393676757812,0.9053577183425524
llm_goals_401,test,15,0.7274510860443115,0.999511659155082
llm_goals_401,test,16,0.4458974599838257,0.9069459477271428
llm_goals_401,test,17,0.8804003000259399,0.9880303739527652
llm_goals_401,test,18,0.7309160232543945,0.8849021227503215
llm_goals_401,test,19,0.8119015097618103,0.8461972444129129
llm_goals_401,test,20,0.8400749564170837,0.9985405802478567
llm_goals_401,test,21,0.7522395253181458,0.9979217051338364
llm_goals_401,test,22,0.8792271614074707,0.9942128658052918
llm_goals_401,test,23,0.7436124682426453,0.9829282756728752
llm_goals_401,test,24,0.6245872974395752,0.9755103584971708
llm_goals_401,test,25,0.8631844520568848,0.9854062793275654
llm_goals_401,test,26,0.6279245615005493,0.9862527255879427
llm_goals_401,test,27,0.8947197794914246,0.9777193074990224
llm_goals_401,test,28,0.8658385872840881,0.939797105410212
llm_goals_401,test,29,0.8681746125221252,0.924205125757662
llm_goals_401,test,30,0.8806824088096619,0.9887794851583942
llm_goals_401,test,31,0.6970537900924683,0.9974054097423743
llm_goals_401,test,32,0.8974407911300659,0.987442612237454
llm_goals_401,test,33,0.7736192941665649,0.9077114440216438
llm_goals_401,test,34,0.8834820985794067,0.9898619654171366
llm_goals_401,test,35,0.6577134728431702,0.9887833003411676
llm_goals_401,test,36,0.7760429382324219,0.9883841875213925
llm_goals_401,test,37,0.9244824647903442,0.7709992025246631
llm_goals_401,test,38,0.8275463581085205,0.9687479729312614
llm_goals_401,test,39,0.8157053589820862,0.9758216144428412
llm_goals_401,test,40,0.8044607639312744,0.9954323769170262
llm_goals_401,test,41,0.8462241291999817,0.9970808029010324
llm_goals_401,test,42,0.8206955194473267,0.9970337152076278
llm_goals_401,test,43,0.8031671047210693,0.9963160753068976
llm_goals_401,test,44,0.8200364708900452,0.999649882329424
llm_goals_401,test,45,0.835025429725647,0.9708126775062408
llm_goals_401,test,46,0.3388349711894989,0.9777507190974738
llm_goals_401,test,47,0.7675891518592834,0.879110748685256
llm_goals_401,test,48,0.6733109951019287,0.9452995669117286
llm_goals_401,test,49,0.8588008880615234,0.9904525877618487
llm_goals_326,test,0,0.8771628141403198,0.7080152699628419
llm_goals_326,test,1,0.699723482131958,0.1915128822536962
llm_goals_326,test,2,0.8236236572265625,0.7444471737433097
llm_goals_326,test,3,0.7080370783805847,0.4392720971297062
llm_goals_326,test,4,0.8623209595680237,0.891602221308631
llm_goals_326,test,5,0.6040703654289246,0.0516888735952717
llm_goals_326,test,6,0.6701252460479736,0.0431335229249402
llm_goals_326,test,7,0.8470597267150879,0.7478485077290631
llm_goals_326,test,8,0.8179649114608765,0.8572844895817008
llm_goals_326,test,9,0.4713311195373535,0.0757120509371778
llm_goals_326,test,10,0.757296621799469,0.6528841155476689
llm_goals_326,test,11,0.7612580060958862,0.1274303379860786
llm_goals_326,test,12,0.7588234543800354,0.2755885259868873
llm_goals_326,test,13,0.5449234843254089,0.0177855343138265
llm_goals_326,test,14,0.8080050945281982,0.6106618044247591
llm_goals_326,test,15,0.8660605549812317,0.7686960692919514
llm_goals_326,test,16,0.7875694632530212,0.78510933722289
llm_goals_326,test,17,0.7624265551567078,0.1510306836677899
llm_goals_326,test,18,0.7194504737854004,0.714622257156643
llm_goals_326,test,19,0.7467033863067627,0.7195213393137665
llm_goals_326,test,20,0.8344604969024658,0.7418285539231787
llm_goals_326,test,21,0.6446915864944458,0.0439102190505529
llm_goals_326,test,22,0.6352636814117432,0.1806737802484422
llm_goals_326,test,23,0.8763877153396606,0.2841042501811218
llm_goals_326,test,24,0.8189185261726379,0.7647552401874187
llm_goals_326,test,25,0.7966976761817932,0.6430861301277235
llm_goals_326,test,26,0.7436896562576294,0.6501767709406938
llm_goals_326,test,27,0.6024482250213623,0.1962727505123375
llm_goals_326,test,28,0.7633649110794067,0.454183079329165
llm_goals_326,test,29,0.7820103764533997,0.4340101008130515
llm_goals_326,test,30,0.7544552087783813,0.7979558074116693
llm_goals_326,test,31,0.6974093317985535,0.0525898871260873
llm_goals_326,test,32,0.7573753595352173,0.1343880762883031
llm_goals_326,test,33,0.8223215341567993,0.7783031325129874
llm_goals_326,test,34,0.7886305451393127,0.6498840537412343
llm_goals_326,test,35,0.7913297414779663,0.8262182481972753
llm_goals_326,test,36,0.8296854496002197,0.5329381900017961
llm_goals_326,test,37,0.739708423614502,0.7623008977444935
llm_goals_326,test,38,0.8106870055198669,0.4787708612720514
llm_goals_326,test,39,0.7371318936347961,0.3940936731364524
llm_goals_326,test,40,0.7212454080581665,0.1554473532877406
llm_goals_326,test,41,0.6033315062522888,0.0835985162339854
llm_goals_326,test,42,0.8896937370300293,0.6165240441256894
llm_goals_326,test,43,0.5978391766548157,0.0224093895342384
llm_goals_326,test,44,0.8655800223350525,0.9028450273437764
llm_goals_326,test,45,0.7626444101333618,0.4414288558565103
llm_goals_326,test,46,0.8131837248802185,0.3316083963903907
llm_goals_326,test,47,0.7735071182250977,0.5242446701777307
llm_goals_326,test,48,0.7172394394874573,0.6202593994623453
llm_goals_326,test,49,0.6545374393463135,0.1810686795284525
llm_goals_415,test,0,0.9399019479751587,0.945942102532565
llm_goals_415,test,1,0.8899474143981934,0.8927012108946369
llm_goals_415,test,2,0.8955846428871155,0.9655095349643328
llm_goals_415,test,3,0.6618744134902954,0.5773151635184978
llm_goals_415,test,4,0.9010559320449829,0.9305359112645492
llm_goals_415,test,5,0.8690401911735535,0.2698413337656404
llm_goals_415,test,6,0.6989319920539856,0.1924170964690193
llm_goals_415,test,7,0.9389185309410095,0.964020489204398
llm_goals_415,test,8,0.8856791257858276,0.9764932994994092
llm_goals_415,test,9,0.8340560793876648,0.3491418165522914
llm_goals_415,test,10,0.8720287680625916,0.8933741476361363
llm_goals_415,test,11,0.918526828289032,0.920598685397126
llm_goals_415,test,12,0.8443080186843872,0.6909930118311174
llm_goals_415,test,13,0.735689103603363,0.1145420932706047
llm_goals_415,test,14,0.9399200677871704,0.9423774493447
llm_goals_415,test,15,0.9487941861152649,0.9532362831473804
llm_goals_415,test,16,0.8878384828567505,0.8356775665343158
llm_goals_415,test,17,0.9303262829780579,0.9361823813143908
llm_goals_415,test,18,0.9258646965026855,0.9889487026200174
llm_goals_415,test,19,0.9202941060066223,0.6368843297568499
llm_goals_415,test,20,0.9639663696289062,0.9870089292806328
llm_goals_415,test,21,0.7813271284103394,0.192645257351692
llm_goals_415,test,22,0.9431853294372559,0.9748569718628384
llm_goals_415,test,23,0.93593430519104,0.8083263035006917
llm_goals_415,test,24,0.9353917241096497,0.931151511592295
llm_goals_415,test,25,0.8988828063011169,0.8863766227722848
llm_goals_415,test,26,0.9169002771377563,0.9640595915946516
llm_goals_415,test,27,0.869378387928009,0.9081509748500972
llm_goals_415,test,28,0.7595290541648865,0.702391734675441
llm_goals_415,test,29,0.8481994867324829,0.5027195285232966
llm_goals_415,test,30,0.9112808108329773,0.9145678910630844
llm_goals_415,test,31,0.8661061525344849,0.2428483145137429
llm_goals_415,test,32,0.7795446515083313,0.8852615952458959
llm_goals_415,test,33,0.8032856583595276,0.9135139614061104
llm_goals_415,test,34,0.865372896194458,0.9013304127749656
llm_goals_415,test,35,0.9145124554634094,0.9492589831139694
llm_goals_415,test,36,0.9548062682151794,0.9187255502309962
llm_goals_415,test,37,0.91661137342453,0.5449707044122137
llm_goals_415,test,38,0.9139053821563721,0.5046841390470121
llm_goals_415,test,39,0.9159681797027588,0.6393683546149316
llm_goals_415,test,40,0.9347932934761047,0.958167434065704
llm_goals_415,test,41,0.8861564993858337,0.1258629101169009
llm_goals_415,test,42,0.9659507274627686,0.9370320411522076
llm_goals_415,test,43,0.6617469787597656,0.4502328517486405
llm_goals_415,test,44,0.9659721255302429,0.9666150226098564
llm_goals_415,test,45,0.8446998000144958,0.4875853574104955
llm_goals_415,test,46,0.8308586478233337,0.9433028700684638
llm_goals_415,test,47,0.9317395687103271,0.6681809390819066
llm_goals_415,test,48,0.9514779448509216,0.9737072586261016
llm_goals_415,test,49,0.9042190313339233,0.9706154467965924
llm_goals_293,test,0,0.4234122633934021,0.8297409974007197
llm_goals_293,test,1,0.38950785994529724,0.9166843306899098
llm_goals_293,test,2,0.7657260298728943,0.4850136353820626
llm_goals_293,test,3,0.7278903126716614,0.391440800258883
llm_goals_293,test,4,0.8236653208732605,0.9814967512328644
llm_goals_293,test,5,0.8067827224731445,0.9737153059180352
llm_goals_293,test,6,0.2993176281452179,0.985194742988862
llm_goals_293,test,7,0.8164147734642029,0.8402554946174373
llm_goals_293,test,8,0.6771511435508728,0.9863079191276464
llm_goals_293,test,9,0.7656758427619934,0.9549403173664608
llm_goals_293,test,10,0.5392434597015381,0.8132036948651367
llm_goals_293,test,11,0.8328769207000732,0.2287407340843056
llm_goals_293,test,12,0.4402540326118469,0.9235533469292896
llm_goals_293,test,13,0.6072683930397034,0.9750785225411348
llm_goals_293,test,14,0.7367321848869324,0.9922545554063744
llm_goals_293,test,15,0.6870934367179871,0.9838300936204548
llm_goals_293,test,16,0.7544667720794678,0.9858987926415744
llm_goals_293,test,17,0.690079927444458,0.9556264290993132
llm_goals_293,test,18,0.8506165742874146,0.9458334455175702
llm_goals_293,test,19,0.6221857070922852,0.5736353307249014
llm_goals_293,test,20,0.843095600605011,0.9812628634528624
llm_goals_293,test,21,0.599463939666748,0.975720704169484
llm_goals_293,test,22,0.67375648021698,0.9409119473005016
llm_goals_293,test,23,0.44712796807289124,0.6306895569552333
llm_goals_293,test,24,0.7305751442909241,0.9831594820284292
llm_goals_293,test,25,0.26266077160835266,0.8981066919341846
llm_goals_293,test,26,0.5679215788841248,0.9716764680193836
llm_goals_293,test,27,0.6853910684585571,0.8155843585345258
llm_goals_293,test,28,0.6788322925567627,0.9733620875264264
llm_goals_293,test,29,0.15937897562980652,0.9051490452062086
llm_goals_293,test,30,0.8927215337753296,0.7348576092486574
llm_goals_293,test,31,0.6874374151229858,0.9808893198314528
llm_goals_293,test,32,0.7942181825637817,0.593206056427014
llm_goals_293,test,33,0.7179098129272461,0.9657488455537
llm_goals_293,test,34,0.6674308180809021,0.8851169960520849
llm_goals_293,test,35,0.5702299475669861,0.8555944515949592
llm_goals_293,test,36,0.7531617879867554,0.5855041803376898
llm_goals_293,test,37,0.7885055541992188,0.6409415533832579
llm_goals_293,test,38,0.3210437595844269,0.8743998352900223
llm_goals_293,test,39,0.5546095371246338,0.8182019034258088
llm_goals_293,test,40,0.7728641629219055,0.146751538769175
llm_goals_293,test,41,0.8194484114646912,0.9709715243488852
llm_goals_293,test,42,0.8253310918807983,0.8829771283195118
llm_goals_293,test,43,0.6212208867073059,0.9398047936750782
llm_goals_293,test,44,0.8604313135147095,0.9795592430097252
llm_goals_293,test,45,0.843055009841919,0.7025255505490043
llm_goals_293,test,46,0.7756295204162598,0.6531552142747035
llm_goals_293,test,47,0.7696412205696106,0.8527514903629965
llm_goals_293,test,48,0.8597744107246399,0.9844985009129076
llm_goals_293,test,49,0.3809435963630676,0.9160653951101896
llm_goals_146,test,0,0.6789118647575378,0.3466493369029843
llm_goals_146,test,1,0.8374314904212952,0.971693336326244
llm_goals_146,test,2,0.8931024074554443,0.6455769508045827
llm_goals_146,test,3,0.7772990465164185,0.5822222843930172
llm_goals_146,test,4,0.8605282306671143,0.6842324614600077
llm_goals_146,test,5,0.7465598583221436,0.3860201177799698
llm_goals_146,test,6,0.6853370666503906,0.2341590231269344
llm_goals_146,test,7,0.8647381067276001,0.3643683725782807
llm_goals_146,test,8,0.8716593980789185,0.7055765903024079
llm_goals_146,test,9,0.8165027499198914,0.5084888304786936
llm_goals_146,test,10,0.828952968120575,0.421220281945486
llm_goals_146,test,11,0.8814710974693298,0.489412697220974
llm_goals_146,test,12,0.8269957304000854,0.601020744296073
llm_goals_146,test,13,0.742565393447876,0.2557097387485441
llm_goals_146,test,14,0.8621615767478943,0.4640750948087747
llm_goals_146,test,15,0.8433282375335693,0.7362086189794756
llm_goals_146,test,16,0.7127549052238464,0.4987515019820814
llm_goals_146,test,17,0.8628320097923279,0.9587866055408388
llm_goals_146,test,18,0.8255185484886169,0.6036783393019786
llm_goals_146,test,19,0.8561639785766602,0.3513374958540852
llm_goals_146,test,20,0.8953598737716675,0.8281412663626416
llm_goals_146,test,21,0.7354644536972046,0.34065502012531
llm_goals_146,test,22,0.8486377596855164,0.9907655718703378
llm_goals_146,test,23,0.7602353096008301,0.2061079946990629
llm_goals_146,test,24,0.8645589351654053,0.3446073251785768
llm_goals_146,test,25,0.8601768612861633,0.6576184069508046
llm_goals_146,test,26,0.9204468131065369,0.4110159240539976
llm_goals_146,test,27,0.9040494561195374,0.984345377326772
llm_goals_146,test,28,0.8306772112846375,0.4903698662236391
llm_goals_146,test,29,0.7447881698608398,0.362068682571018
llm_goals_146,test,30,0.8723167777061462,0.4551468142410695
llm_goals_146,test,31,0.8476047515869141,0.3773142189484755
llm_goals_146,test,32,0.8759554028511047,0.4187681470726767
llm_goals_146,test,33,0.7708842754364014,0.3985742178328911
llm_goals_146,test,34,0.8212235569953918,0.5044141394199024
llm_goals_146,test,35,0.7339170575141907,0.4783486718786691
llm_goals_146,test,36,0.8901093006134033,0.5445552456181276
llm_goals_146,test,37,0.8165358304977417,0.2691607872517095
llm_goals_146,test,38,0.8106260299682617,0.5517828611974395
llm_goals_146,test,39,0.8312414884567261,0.4823935111879393
llm_goals_146,test,40,0.7909998297691345,0.496646227759238
llm_goals_146,test,41,0.7601637244224548,0.410176485322602
llm_goals_146,test,42,0.7559673190116882,0.2603856324469091
llm_goals_146,test,43,0.7556710839271545,0.555094401775619
llm_goals_146,test,44,0.9381494522094727,0.5218179738711017
llm_goals_146,test,45,0.8765978217124939,0.4447090989140364
llm_goals_146,test,46,0.7560135126113892,0.5567532038584041
llm_goals_146,test,47,0.8463258743286133,0.4034445534741869
llm_goals_146,test,48,0.9282925128936768,0.5335126942675598
llm_goals_146,test,49,0.929172694683075,0.9775228494630558
llm_goals_236,test,0,0.8565194010734558,0.9502374518284544
llm_goals_236,test,1,0.842698872089386,0.632901790189933
llm_goals_236,test,2,0.8707708120346069,0.9547987563797488
llm_goals_236,test,3,0.9075254201889038,0.8955373773460537
llm_goals_236,test,4,0.9184917211532593,0.604483505999029
llm_goals_236,test,5,0.9372838735580444,0.9645155057204132
llm_goals_236,test,6,0.9286142587661743,0.8400796085543216
llm_goals_236,test,7,0.8956581354141235,0.9809643632475058
llm_goals_236,test,8,0.8957735896110535,0.9551138894715042
llm_goals_236,test,9,0.9381572604179382,0.9323794813876984
llm_goals_236,test,10,0.8665385842323303,0.7794327710983796
llm_goals_236,test,11,0.9080019593238831,0.9780675777326168
llm_goals_236,test,12,0.8653497695922852,0.8875326502933057
llm_goals_236,test,13,0.9140412211418152,0.929950178153689
llm_goals_236,test,14,0.771436333656311,0.926757039035834
llm_goals_236,test,15,0.8639029264450073,0.7910198588237014
llm_goals_236,test,16,0.9103807806968689,0.8930606244937771
llm_goals_236,test,17,0.8591552376747131,0.8600503746114734
llm_goals_236,test,18,0.8636931777000427,0.9478088016198049
llm_goals_236,test,19,0.8974511623382568,0.0557340307108
llm_goals_236,test,20,0.8269744515419006,0.7876626308843195
llm_goals_236,test,21,0.8783994913101196,0.8769842974627694
llm_goals_236,test,22,0.8582750558853149,0.3468135553298878
llm_goals_236,test,23,0.8701915144920349,0.9514921900954876
llm_goals_236,test,24,0.8791400194168091,0.9310227652155564
llm_goals_236,test,25,0.8681754469871521,0.6795750924280035
llm_goals_236,test,26,0.7991618514060974,0.7808533270173144
llm_goals_236,test,27,0.8755980134010315,0.3548731041307022
llm_goals_236,test,28,0.916454553604126,0.9628356114282044
llm_goals_236,test,29,0.9128830432891846,0.0678193344934023
llm_goals_236,test,30,0.824634313583374,0.8669796641519075
llm_goals_236,test,31,0.8136042356491089,0.7901777055635718
llm_goals_236,test,32,0.874690055847168,0.9761505717506124
llm_goals_236,test,33,0.8426823019981384,0.925212684096036
llm_goals_236,test,34,0.8480264544487,0.8283609769269727
llm_goals_236,test,35,0.9150921106338501,0.9191289552997411
llm_goals_236,test,36,0.6662749648094177,0.8559995305859318
llm_goals_236,test,37,0.9150691032409668,0.0699999500981296
llm_goals_236,test,38,0.8733724355697632,0.1796257560659765
llm_goals_236,test,39,0.8585026264190674,0.9175097319342992
llm_goals_236,test,40,0.8937768340110779,0.9403113739191608
llm_goals_236,test,41,0.9244950413703918,0.8003138232198173
llm_goals_236,test,42,0.8687674403190613,0.9656298771148252
llm_goals_236,test,43,0.8889601230621338,0.7679948721037306
llm_goals_236,test,44,0.8285688161849976,0.8397446267621825
llm_goals_236,test,45,0.9190342426300049,0.9383022794052605
llm_goals_236,test,46,0.8769875764846802,0.9861661791368868
llm_goals_236,test,47,0.788309633731842,0.8216616511109771
llm_goals_236,test,48,0.8519096970558167,0.858269332712761
llm_goals_236,test,49,0.8615620732307434,0.4904990431885668
llm_goals_228,test,0,0.5315018892288208,0.8649997140208154
llm_goals_228,test,1,0.433281272649765,0.873321951241939
llm_goals_228,test,2,0.6340847611427307,0.8957700764194163
llm_goals_228,test,3,0.35555192828178406,0.1236903018304219
llm_goals_228,test,4,0.4233631491661072,0.7453361748877467
llm_goals_228,test,5,0.220428928732872,0.1192843072628729
llm_goals_228,test,6,0.3449137806892395,0.1326971908799849
llm_goals_228,test,7,0.5730767250061035,0.871489699671336
llm_goals_228,test,8,0.6436306238174438,0.8243934448117555
llm_goals_228,test,9,0.3102738857269287,0.0467115483491378
llm_goals_228,test,10,0.41536369919776917,0.7659628877266128
llm_goals_228,test,11,0.3736259639263153,0.7679518438112958
llm_goals_228,test,12,0.2821238040924072,0.3261574116268692
llm_goals_228,test,13,0.5292288661003113,0.0485964531599145
llm_goals_228,test,14,0.6616604328155518,0.9059895277949974
llm_goals_228,test,15,0.3939535915851593,0.7273862898347316
llm_goals_228,test,16,0.6142241358757019,0.9158116003766568
llm_goals_228,test,17,0.3698164224624634,0.2983101183881344
llm_goals_228,test,18,0.6107176542282104,0.9541860240679364
llm_goals_228,test,19,0.6683350801467896,0.8476448713173524
llm_goals_228,test,20,0.3495992124080658,0.7696132021860423
llm_goals_228,test,21,0.1905076652765274,0.2240634772421697
llm_goals_228,test,22,0.4827711284160614,0.5944716948164779
llm_goals_228,test,23,0.33608758449554443,0.8035987641686457
llm_goals_228,test,24,0.5851327180862427,0.8954167374248716
llm_goals_228,test,25,0.6056202054023743,0.7151555300243474
llm_goals_228,test,26,0.4472878575325012,0.933582548003466
llm_goals_228,test,27,0.4593954086303711,0.7197431412077714
llm_goals_228,test,28,0.3438156247138977,0.2110212364220297
llm_goals_228,test,29,0.6133787035942078,0.7475211519371483
llm_goals_228,test,30,0.33545586466789246,0.8439204089849919
llm_goals_228,test,31,0.23603291809558868,0.233187545611893
llm_goals_228,test,32,0.4075312316417694,0.6522564861744167
llm_goals_228,test,33,0.5736719965934753,0.9036141650151318
llm_goals_228,test,34,0.5676628947257996,0.8063092871634532
llm_goals_228,test,35,0.3881853222846985,0.7386503177076705
llm_goals_228,test,36,0.39585164189338684,0.4339953557233364
llm_goals_228,test,37,0.37539902329444885,0.7638352005094292
llm_goals_228,test,38,0.22433994710445404,0.6360600602425245
llm_goals_228,test,39,0.21794842183589935,0.5582201546461881
llm_goals_228,test,40,0.40228673815727234,0.7919258516091835
llm_goals_228,test,41,0.3035920262336731,0.2382556436276936
llm_goals_228,test,42,0.5541064739227295,0.8180639729723762
llm_goals_228,test,43,0.29143139719963074,0.1744592330547429
llm_goals_228,test,44,0.39021041989326477,0.8059483789474448
llm_goals_228,test,45,0.24274396896362305,0.185739159862888
llm_goals_228,test,46,0.2986437976360321,0.6385866390140609
llm_goals_228,test,47,0.28139403462409973,0.181375317343939
llm_goals_228,test,48,0.5789339542388916,0.9407807579284042
llm_goals_228,test,49,0.43615013360977173,0.3930787424904466
llm_goals_397,test,0,0.36496880650520325,0.699619236076976
llm_goals_397,test,1,0.48399755358695984,0.7858562431683396
llm_goals_397,test,2,0.3674752414226532,0.8656621011318779
llm_goals_397,test,3,0.34931597113609314,0.5216834406120067
llm_goals_397,test,4,0.19998261332511902,0.2170933847941593
llm_goals_397,test,5,0.5948219895362854,0.6203395193658312
llm_goals_397,test,6,0.5193787217140198,0.7062451920643317
llm_goals_397,test,7,0.5092585682868958,0.7818592804776917
llm_goals_397,test,8,0.3987136483192444,0.3722692260862962
llm_goals_397,test,9,0.33590736985206604,0.7525421946277221
llm_goals_397,test,10,0.26125359535217285,0.55382257151224
llm_goals_397,test,11,0.28726571798324585,0.654531962180046
llm_goals_397,test,12,0.48559510707855225,0.659030860866389
llm_goals_397,test,13,0.3210028409957886,0.5080189660267803
llm_goals_397,test,14,0.6408970355987549,0.93949913908347
llm_goals_397,test,15,0.18973824381828308,0.1397603306643144
llm_goals_397,test,16,0.5460852384567261,0.94748652041643
llm_goals_397,test,17,0.57691490650177,0.8337419557614654
llm_goals_397,test,18,0.5762889385223389,0.929486218624307
llm_goals_397,test,19,0.3932444453239441,0.6917306210780744
llm_goals_397,test,20,0.291634738445282,0.3168072461941615
llm_goals_397,test,21,0.5520929098129272,0.7050710313870211
llm_goals_397,test,22,0.3079155683517456,0.9048640732247422
llm_goals_397,test,23,0.23072926700115204,0.6896020664732718
llm_goals_397,test,24,0.7206854224205017,0.9588320250969324
llm_goals_397,test,25,0.586387038230896,0.7992448229336531
llm_goals_397,test,26,0.5719478130340576,0.925966146729264
llm_goals_397,test,27,0.45453327894210815,0.8290411876343606
llm_goals_397,test,28,0.6352465748786926,0.614512804730325
llm_goals_397,test,29,0.5728045701980591,0.8119643351773586
llm_goals_397,test,30,0.500706136226654,0.6416279718371558
llm_goals_397,test,31,0.5546295642852783,0.6679576084359291
llm_goals_397,test,32,0.3699946701526642,0.7497660034408138
llm_goals_397,test,33,0.5184489488601685,0.9406859295331328
llm_goals_397,test,34,0.3634227514266968,0.6256499877247926
llm_goals_397,test,35,0.4495079219341278,0.6832026805931318
llm_goals_397,test,36,0.43243563175201416,0.688132397136647
llm_goals_397,test,37,0.34923651814460754,0.6918761795541711
llm_goals_397,test,38,0.2930731177330017,0.5633524658577074
llm_goals_397,test,39,0.43670040369033813,0.6565008680077707
llm_goals_397,test,40,0.29447445273399353,0.6958896526078188
llm_goals_397,test,41,0.4457164704799652,0.757736441343734
llm_goals_397,test,42,0.4643276333808899,0.8490422913608605
llm_goals_397,test,43,0.34456735849380493,0.7079678798274159
llm_goals_397,test,44,0.2386421114206314,0.1092321450870402
llm_goals_397,test,45,0.3100762367248535,0.5166526556585691
llm_goals_397,test,46,0.3324531614780426,0.5419103560441939
llm_goals_397,test,47,0.40165430307388306,0.4142756611326425
llm_goals_397,test,48,0.7078614234924316,0.956440745261376
llm_goals_397,test,49,0.3684798777103424,0.7473642876631974
