template_id,split,question_idx,prediction,label
llm_goals_281,test,0,0.9970260262961408,0.6469683041227705
llm_goals_281,test,1,0.4989651427310203,0.9610940798838222
llm_goals_281,test,2,0.3841260647041153,0.5031480224374153
llm_goals_281,test,3,0.4989651427310203,0.7670721923110818
llm_goals_281,test,4,0.9146760093576,0.8573901670428902
llm_goals_281,test,5,0.985176980606256,0.9842762939932492
llm_goals_281,test,6,0.984311044064046,0.9899150137032138
llm_goals_281,test,7,0.3841260647041153,0.6044210331647685
llm_goals_281,test,8,0.8781738941238137,0.8588374254971683
llm_goals_281,test,9,0.981601954013518,0.9818143245858572
llm_goals_281,test,10,0.9636623279509186,0.9566256410829356
llm_goals_281,test,11,0.997689127815008,0.9833216073807935
llm_goals_281,test,12,0.9968638419668248,0.9046776287625902
llm_goals_281,test,13,0.981601954013518,0.98515391356728
llm_goals_281,test,14,0.9245036279898609,0.9050783518109468
llm_goals_281,test,15,0.983113109688526,0.8143656887719262
llm_goals_281,test,16,0.4989651427310203,0.9030392149866112
llm_goals_281,test,17,0.9707170730507526,0.9782413247928606
llm_goals_281,test,18,0.9245036279898609,0.9394087233364008
llm_goals_281,test,19,0.4325834462271058,0.013753239211091
llm_goals_281,test,20,0.9146760093576,0.9155304405450826
llm_goals_281,test,21,0.9528015274865468,0.9849370716395414
llm_goals_281,test,22,0.5921471868069869,0.9678947939625672
llm_goals_281,test,23,0.9891991016523244,0.9923826456520252
llm_goals_281,test,24,0.9245036279898609,0.8888994485286436
llm_goals_281,test,25,0.1820344364297351,0.8670526725158154
llm_goals_281,test,26,0.9245036279898609,0.901592433398574
llm_goals_281,test,27,0.9731377964615748,0.9485982074237508
llm_goals_281,test,28,0.9698880908677572,0.7677171303083985
llm_goals_281,test,29,0.1820344364297351,0.0052734878464236
llm_goals_281,test,30,0.973776637644692,0.9557743081128488
llm_goals_281,test,31,0.9865778689160328,0.991380810774403
llm_goals_281,test,32,0.9233813308534932,0.9805269843458853
llm_goals_281,test,33,0.9744142302360912,0.972194253937266
llm_goals_281,test,34,0.9065926111491718,0.8780187362883507
llm_goals_281,test,35,0.2133142706938853,0.6544240022055418
llm_goals_281,test,36,0.9678764339678796,0.8261399254388034
llm_goals_281,test,37,0.4325834462271058,0.0237476361601453
llm_goals_281,test,38,0.8022742856285934,0.2014428856002633
llm_goals_281,test,39,0.9493129268372356,0.9075832337909528
llm_goals_281,test,40,0.9928262828992978,0.9579415929114572
llm_goals_281,test,41,0.8757343903860316,0.9618767488068802
llm_goals_281,test,42,0.3653919457712302,0.6901242783749021
llm_goals_281,test,43,0.9606602782385008,0.991684436764246
llm_goals_281,test,44,0.8504964709709857,0.8707976927725176
llm_goals_281,test,45,0.9972182513168218,0.855741209208138
llm_goals_281,test,46,0.908209504816212,0.9874326582385272
llm_goals_281,test,47,0.9170192516175582,0.8582029976458081
llm_goals_281,test,48,0.9245036279898609,0.9463140960540516
llm_goals_281,test,49,0.7096304325420786,0.9857229593669278
llm_goals_182,test,0,0.0027327359502999,0.9879327417690024
llm_goals_182,test,1,0.0020406184533411,0.143590152166184
llm_goals_182,test,2,0.5334178119991814,0.975250065576832
llm_goals_182,test,3,0.3162737549953742,0.0053962595772566
llm_goals_182,test,4,0.3902053810730581,0.9395814535027472
llm_goals_182,test,5,0.6949818092863563,0.7582924861360835
llm_goals_182,test,6,0.3902053810730581,0.5490946196305377
llm_goals_182,test,7,0.3902053810730581,0.9749513260519144
llm_goals_182,test,8,0.2968212303636612,0.9392208432034008
llm_goals_182,test,9,0.6949818092863563,0.644100014754752
llm_goals_182,test,10,0.9123091115282672,0.3220700188782243
llm_goals_182,test,11,0.2780110922416123,0.981547533906946
llm_goals_182,test,12,0.3821952854111959,0.7137040988313296
llm_goals_182,test,13,0.2780110922416123,0.7387330490542552
llm_goals_182,test,14,0.0358397409146454,0.0829307635671844
llm_goals_182,test,15,0.0009609919028945,0.9703041905123228
llm_goals_182,test,16,0.0386042332375852,0.0818078392213706
llm_goals_182,test,17,0.3162737549953742,0.1321702105874928
llm_goals_182,test,18,0.0386042332375852,0.0627371233524436
llm_goals_182,test,19,0.3371987152705933,0.9687731848071606
llm_goals_182,test,20,0.9683806310206846,0.9416436555176496
llm_goals_182,test,21,0.2780110922416123,0.6725942479831736
llm_goals_182,test,22,0.0022511653181104,0.0934677665250981
llm_goals_182,test,23,0.9802823666102664,0.9812074898398124
llm_goals_182,test,24,0.635922662555064,0.0949295295653811
llm_goals_182,test,25,0.8334470935712753,0.44246206774058
llm_goals_182,test,26,0.0358397409146454,0.0774707640077587
llm_goals_182,test,27,0.2731167353321109,0.0713961149448467
llm_goals_182,test,28,0.3553645955901617,0.0522770850978139
llm_goals_182,test,29,0.3553645955901617,0.95314246463025
llm_goals_182,test,30,0.3902053810730581,0.2504357008006138
llm_goals_182,test,31,0.6949818092863563,0.7161425258649188
llm_goals_182,test,32,0.3902053810730581,0.9646060453168028
llm_goals_182,test,33,0.0470136853652671,0.0545762932705501
llm_goals_182,test,34,0.9123091115282672,0.3122018653274729
llm_goals_182,test,35,0.2780110922416123,0.9860451225539192
llm_goals_182,test,36,0.8484061358349757,0.281834229591876
llm_goals_182,test,37,0.7269271039144315,0.9542467602371212
llm_goals_182,test,38,0.3902053810730581,0.9552662371080364
llm_goals_182,test,39,0.2780110922416123,0.6622170237731956
llm_goals_182,test,40,0.7183589957922274,0.946633816726873
llm_goals_182,test,41,0.6949818092863563,0.7134743981048756
llm_goals_182,test,42,0.5334178119991814,0.9907715917291932
llm_goals_182,test,43,0.6949818092863563,0.7427734633378366
llm_goals_182,test,44,0.9891995189847572,0.932817223580216
llm_goals_182,test,45,0.8334470935712753,0.0223475038345677
llm_goals_182,test,46,0.0017515489765024,0.9353007113558736
llm_goals_182,test,47,0.5021832119923376,0.0297953783874237
llm_goals_182,test,48,0.0386042332375852,0.0852689569608422
llm_goals_182,test,49,0.2541077156400017,0.1518944682187261
llm_goals_86,test,0,0.958408952194646,0.8172564483303766
llm_goals_86,test,1,0.9320649500417488,0.2845365633956491
llm_goals_86,test,2,0.9375270577017076,0.9608234754131476
llm_goals_86,test,3,0.9710646270395116,0.4905999470752266
llm_goals_86,test,4,0.9461237774820452,0.5223214595126631
llm_goals_86,test,5,0.971915603615242,0.9513769165530092
llm_goals_86,test,6,0.8698316155807396,0.951058446008376
llm_goals_86,test,7,0.9269732214522934,0.7200026044712197
llm_goals_86,test,8,0.9766343241180686,0.8539416814851118
llm_goals_86,test,9,0.921683374303969,0.8409760614216922
llm_goals_86,test,10,0.9238550045543634,0.8985089093274398
llm_goals_86,test,11,0.958408952194646,0.960478126988056
llm_goals_86,test,12,0.9429563297784112,0.8489881816956245
llm_goals_86,test,13,0.9664345972710908,0.4634670890641906
llm_goals_86,test,14,0.8538165048392445,0.4018075540357314
llm_goals_86,test,15,0.9238550045543634,0.8179491136946933
llm_goals_86,test,16,0.9710646270395116,0.5277433486870419
llm_goals_86,test,17,0.7163635423868642,0.7712345746838988
llm_goals_86,test,18,0.9551436318961036,0.89457523897237
llm_goals_86,test,19,0.9783905146646132,0.6892489135704485
llm_goals_86,test,20,0.9203491821274896,0.6353825334363962
llm_goals_86,test,21,0.7163635423868642,0.6530961974169966
llm_goals_86,test,22,0.9577130685130092,0.8880043621770078
llm_goals_86,test,23,0.8698435402435035,0.8809145094341392
llm_goals_86,test,24,0.9577130685130092,0.8164918407571773
llm_goals_86,test,25,0.8425366291502469,0.5604555739923536
llm_goals_86,test,26,0.9664345972710908,0.649475864669537
llm_goals_86,test,27,0.94259315657715,0.951709689395591
llm_goals_86,test,28,0.9754882454971192,0.8387165613894679
llm_goals_86,test,29,0.9579166165320196,0.674672778677278
llm_goals_86,test,30,0.9299709782499002,0.4648188591057368
llm_goals_86,test,31,0.9709638361252986,0.663420132120388
llm_goals_86,test,32,0.9710646270395116,0.6545684404871791
llm_goals_86,test,33,0.9664345972710908,0.8998044750116885
llm_goals_86,test,34,0.9710646270395116,0.4550697205389422
llm_goals_86,test,35,0.9389652015526442,0.7006743460528161
llm_goals_86,test,36,0.7492426036071803,0.8041987455834365
llm_goals_86,test,37,0.9240260142295442,0.5563101271844357
llm_goals_86,test,38,0.9429563297784112,0.8432080747815303
llm_goals_86,test,39,0.9429563297784112,0.8362265811977031
llm_goals_86,test,40,0.9577130685130092,0.9223666199627952
llm_goals_86,test,41,0.9464864739868964,0.8662457492250272
llm_goals_86,test,42,0.9569632408273198,0.8608515909392838
llm_goals_86,test,43,0.9710646270395116,0.7603164939120921
llm_goals_86,test,44,0.9627720726158976,0.7861883669269881
llm_goals_86,test,45,0.8694182030848889,0.8184625523204602
llm_goals_86,test,46,0.9461237774820452,0.94641274246688
llm_goals_86,test,47,0.9461821922445616,0.5854129271694068
llm_goals_86,test,48,0.9569632408273198,0.920227709127447
llm_goals_86,test,49,0.7866390947250439,0.938532531863768
llm_goals_438,test,0,0.8864944595374995,0.2314562881184323
llm_goals_438,test,1,0.8766903932318532,0.7762565054203492
llm_goals_438,test,2,0.9510802638605133,0.1997924391817541
llm_goals_438,test,3,0.8952428115695847,0.7992019611592787
llm_goals_438,test,4,0.839428841227459,0.8049000497082406
llm_goals_438,test,5,0.8330081114059361,0.7174028107354763
llm_goals_438,test,6,0.8523262808340685,0.5982684312746689
llm_goals_438,test,7,0.883658055134949,0.3291190451199206
llm_goals_438,test,8,0.7660457559356629,0.7413057021464516
llm_goals_438,test,9,0.8362207371277431,0.5655428700196011
llm_goals_438,test,10,0.8818737229819914,0.3747542837277384
llm_goals_438,test,11,0.8914144049199615,0.8141028872063276
llm_goals_438,test,12,0.8597690508570631,0.7668041079795853
llm_goals_438,test,13,0.7977421880795665,0.3989739137138193
llm_goals_438,test,14,0.6514972423031037,0.630963737596379
llm_goals_438,test,15,0.9148150091373104,0.8688319384714368
llm_goals_438,test,16,0.9332661645087862,0.6505714018494433
llm_goals_438,test,17,0.8914144049199615,0.8432750100181929
llm_goals_438,test,18,0.8739718143767411,0.5454092034362268
llm_goals_438,test,19,0.7977421880795665,0.7865512327498919
llm_goals_438,test,20,0.9286821478400074,0.9279490690754252
llm_goals_438,test,21,0.8994498864099665,0.6012074860500735
llm_goals_438,test,22,0.8739718143767411,0.8641822359830585
llm_goals_438,test,23,0.8887644990089597,0.6703505477568762
llm_goals_438,test,24,0.8362207371277431,0.659909363300309
llm_goals_438,test,25,0.9304245734189692,0.345825906210505
llm_goals_438,test,26,0.8860706716947165,0.6342860330821757
llm_goals_438,test,27,0.8914144049199615,0.7595913377648781
llm_goals_438,test,28,0.8362207371277431,0.8379201892332447
llm_goals_438,test,29,0.8072003148079662,0.7401884809786652
llm_goals_438,test,30,0.8858090041242515,0.3998648759112044
llm_goals_438,test,31,0.6103147362958236,0.7480624326316926
llm_goals_438,test,32,0.8730161222236057,0.7187081505530665
llm_goals_438,test,33,0.7454622428986845,0.4107650384506188
llm_goals_438,test,34,0.8921785376216205,0.5939725433582219
llm_goals_438,test,35,0.9133038506639544,0.3302466757601784
llm_goals_438,test,36,0.8860706716947165,0.5187707473385741
llm_goals_438,test,37,0.8953516471890982,0.7520728212475446
llm_goals_438,test,38,0.935398575589064,0.7995393320092329
llm_goals_438,test,39,0.8994498864099665,0.6185694255280791
llm_goals_438,test,40,0.8739718143767411,0.6220749660889419
llm_goals_438,test,41,0.8739718143767411,0.6546343601883343
llm_goals_438,test,42,0.9510802638605133,0.2289113591576547
llm_goals_438,test,43,0.8597690508570631,0.7896951428928559
llm_goals_438,test,44,0.8586494354324493,0.8209977715013562
llm_goals_438,test,45,0.931167301747896,0.8177235133069483
llm_goals_438,test,46,0.8539053786230804,0.6895343723542676
llm_goals_438,test,47,0.8860706716947165,0.8889681695339486
llm_goals_438,test,48,0.8739718143767411,0.5208110922241418
llm_goals_438,test,49,0.9137594126736444,0.8657228979999158
llm_goals_206,test,0,0.7200524310901181,0.7014024312312447
llm_goals_206,test,1,0.1627963243260043,0.09993519180039
llm_goals_206,test,2,0.267220518788931,0.3805957976908932
llm_goals_206,test,3,0.1627963243260043,0.2506742005767606
llm_goals_206,test,4,0.1627963243260043,0.168981522681462
llm_goals_206,test,5,0.1112020343454531,0.1396780009361202
llm_goals_206,test,6,0.1627963243260043,0.1710164544129008
llm_goals_206,test,7,0.1704166703544272,0.6071092597712212
llm_goals_206,test,8,0.267220518788931,0.1978085524873241
llm_goals_206,test,9,0.155365376260303,0.1702414804858378
llm_goals_206,test,10,0.0245185889341411,0.0308920608149318
llm_goals_206,test,11,0.1627963243260043,0.2459778318217196
llm_goals_206,test,12,0.1683139777696879,0.1179569636055102
llm_goals_206,test,13,0.155365376260303,0.0251233861816931
llm_goals_206,test,14,0.7200524310901181,0.2426781205526189
llm_goals_206,test,15,0.1683139777696879,0.1278309727108198
llm_goals_206,test,16,0.155365376260303,0.1146704353190439
llm_goals_206,test,17,0.7200524310901181,0.3515281137996084
llm_goals_206,test,18,0.0702400022628308,0.2196923742635269
llm_goals_206,test,19,0.0245185889341411,0.1304369288612395
llm_goals_206,test,20,0.7200524310901181,0.4106940669038348
llm_goals_206,test,21,0.7937372375218039,0.3368795798730228
llm_goals_206,test,22,0.0702400022628308,0.2288948502948852
llm_goals_206,test,23,0.0702400022628308,0.074953807513886
llm_goals_206,test,24,0.0702400022628308,0.2441322028739075
llm_goals_206,test,25,0.1683139777696879,0.3918868995222204
llm_goals_206,test,26,0.267220518788931,0.1723782187624221
llm_goals_206,test,27,0.1627963243260043,0.0391839098867229
llm_goals_206,test,28,0.1848058210281502,0.4274888337765593
llm_goals_206,test,29,0.1683139777696879,0.2322920572824445
llm_goals_206,test,30,0.0702400022628308,0.1908743178919042
llm_goals_206,test,31,0.7200524310901181,0.3263357686111295
llm_goals_206,test,32,0.1973985808638077,0.1979025671613465
llm_goals_206,test,33,0.1391129823992727,0.3702297199267818
llm_goals_206,test,34,0.0702400022628308,0.2357480876238731
llm_goals_206,test,35,0.1704166703544272,0.1482521268698281
llm_goals_206,test,36,0.7200524310901181,0.5821584340939012
llm_goals_206,test,37,0.0245185889341411,0.2650568670113227
llm_goals_206,test,38,0.1683139777696879,0.1360417554406812
llm_goals_206,test,39,0.1683139777696879,0.2119836423042495
llm_goals_206,test,40,0.0702400022628308,0.0289993447458979
llm_goals_206,test,41,0.0702400022628308,0.3521756872657808
llm_goals_206,test,42,0.7200524310901181,0.4575015387432249
llm_goals_206,test,43,0.1159435734678778,0.4938333202476994
llm_goals_206,test,44,0.1159435734678778,0.2787682116116408
llm_goals_206,test,45,0.1112020343454531,0.1994537138307505
llm_goals_206,test,46,0.1391129823992727,0.5462074258839572
llm_goals_206,test,47,0.0245185889341411,0.1243126576899177
llm_goals_206,test,48,0.0702400022628308,0.2961273515821391
llm_goals_206,test,49,0.1159435734678778,0.2344178444588115
llm_goals_230,test,0,0.9269158234018404,0.93636017960289
llm_goals_230,test,1,0.9864748715631212,0.9080467805464864
llm_goals_230,test,2,0.9495010395023846,0.9492415176604246
llm_goals_230,test,3,0.985576987510671,0.9883888360800804
llm_goals_230,test,4,0.9197642829548544,0.9308136137248874
llm_goals_230,test,5,0.7371138350740521,0.7461662385047432
llm_goals_230,test,6,0.7973304950231841,0.717943379234597
llm_goals_230,test,7,0.948689581852771,0.9498506183856972
llm_goals_230,test,8,0.8981372120024614,0.903114856201669
llm_goals_230,test,9,0.7005483570662072,0.82763987275553
llm_goals_230,test,10,0.8948673582196687,0.9176431321128996
llm_goals_230,test,11,0.9847698805401248,0.9351009100066487
llm_goals_230,test,12,0.9742456669228152,0.9397739779762262
llm_goals_230,test,13,0.7997059198968332,0.7663744634625839
llm_goals_230,test,14,0.952121019246055,0.9587652680933508
llm_goals_230,test,15,0.9197642829548544,0.9205685886203512
llm_goals_230,test,16,0.956067086036404,0.9548512114624126
llm_goals_230,test,17,0.9847112890609476,0.9177103078422468
llm_goals_230,test,18,0.967961667382444,0.9694847470185912
llm_goals_230,test,19,0.9847698805401248,0.241616336852366
llm_goals_230,test,20,0.949362576585044,0.9513741131303785
llm_goals_230,test,21,0.8101609328250858,0.7885476346382756
llm_goals_230,test,22,0.9932943584427144,0.956588746723208
llm_goals_230,test,23,0.9847698805401248,0.8796235970540837
llm_goals_230,test,24,0.9686375262313234,0.9499204774153412
llm_goals_230,test,25,0.8731350842031056,0.7955946265549944
llm_goals_230,test,26,0.9668522480193504,0.96990013164722
llm_goals_230,test,27,0.9880446192845044,0.94318586558855
llm_goals_230,test,28,0.9929400087624756,0.9816553586758804
llm_goals_230,test,29,0.9858483669472592,0.2755945052454056
llm_goals_230,test,30,0.9311032894486482,0.8694230354404326
llm_goals_230,test,31,0.8101609328250858,0.7707924321548216
llm_goals_230,test,32,0.9847698805401248,0.90274804670901
llm_goals_230,test,33,0.9686375262313234,0.964968324648223
llm_goals_230,test,34,0.8320140186768564,0.8786148395823622
llm_goals_230,test,35,0.9495010395023846,0.9659460769293176
llm_goals_230,test,36,0.9311032894486482,0.9265550381327228
llm_goals_230,test,37,0.9855443240149322,0.2797244301796934
llm_goals_230,test,38,0.9928933979615314,0.3705167516113265
llm_goals_230,test,39,0.9742456669228152,0.9571521884098596
llm_goals_230,test,40,0.9855443240149322,0.9050631524069218
llm_goals_230,test,41,0.828039823245376,0.7826731844385996
llm_goals_230,test,42,0.9269158234018404,0.9429872638799112
llm_goals_230,test,43,0.7371138350740521,0.7863413109728788
llm_goals_230,test,44,0.927405240082982,0.925476613788026
llm_goals_230,test,45,0.9838295585229055,0.9835724833108438
llm_goals_230,test,46,0.9950431583649312,0.9324350988748504
llm_goals_230,test,47,0.990447222918531,0.9918220640686296
llm_goals_230,test,48,0.9686375262313234,0.9632008068532188
llm_goals_230,test,49,0.9897753594273372,0.9196290944353572
llm_goals_115,test,0,0.9387419839695134,0.6878400979912542
llm_goals_115,test,1,0.7905194184432489,0.9839099052127078
llm_goals_115,test,2,0.8639077608984874,0.6264771219588837
llm_goals_115,test,3,0.2087522540108096,0.5101504336920828
llm_goals_115,test,4,0.8749662056974328,0.4749856076325871
llm_goals_115,test,5,0.8203048102267291,0.8824023049105343
llm_goals_115,test,6,0.9486838000106496,0.8532372770314965
llm_goals_115,test,7,0.9202272908974684,0.6852873624151903
llm_goals_115,test,8,0.5102423329360459,0.7160879332925761
llm_goals_115,test,9,0.5967423324749713,0.8154454841155164
llm_goals_115,test,10,0.5010152909078055,0.4039477654904175
llm_goals_115,test,11,0.8869047781031854,0.0754019011174093
llm_goals_115,test,12,0.941484989212326,0.9216642359611203
llm_goals_115,test,13,0.960664569155443,0.9023613914305204
llm_goals_115,test,14,0.5347466488600625,0.7385692464553043
llm_goals_115,test,15,0.974787949821673,0.5273332595390108
llm_goals_115,test,16,0.8666027179154094,0.5319983452316508
llm_goals_115,test,17,0.9549220216797512,0.9818465718466136
llm_goals_115,test,18,0.8629193281077671,0.7098198015331896
llm_goals_115,test,19,0.8977051963557465,0.8100224145820547
llm_goals_115,test,20,0.8517701611119113,0.5463347366319413
llm_goals_115,test,21,0.9549220216797512,0.9708728191294718
llm_goals_115,test,22,0.5967423324749713,0.9564870014981148
llm_goals_115,test,23,0.9607183342872572,0.3427940177342707
llm_goals_115,test,24,0.5102423329360459,0.7216819551218165
llm_goals_115,test,25,0.7159083492565425,0.7118023516747564
llm_goals_115,test,26,0.8959134255397705,0.7495779511143832
llm_goals_115,test,27,0.9366131441272146,0.9876421097288872
llm_goals_115,test,28,0.8869047781031854,0.2396916848864135
llm_goals_115,test,29,0.9627493624979642,0.8734944493526076
llm_goals_115,test,30,0.7159083492565425,0.4720948765146447
llm_goals_115,test,31,0.8977051963557465,0.9613435269013808
llm_goals_115,test,32,0.5102423329360459,0.2802482482345895
llm_goals_115,test,33,0.920657935299956,0.800091024303248
llm_goals_115,test,34,0.7159083492565425,0.4731748202329244
llm_goals_115,test,35,0.5349103756200574,0.6451464331436837
llm_goals_115,test,36,0.9092090138059468,0.5222874947485241
llm_goals_115,test,37,0.8323177749076501,0.9548006059708632
llm_goals_115,test,38,0.958584068943147,0.8483833674723993
llm_goals_115,test,39,0.5657364263991985,0.9222807287488012
llm_goals_115,test,40,0.8372945754472534,0.2526171639652511
llm_goals_115,test,41,0.8959134255397705,0.8309168152992626
llm_goals_115,test,42,0.6611075304193003,0.58549510672814
llm_goals_115,test,43,0.9705766443536628,0.9288889171841944
llm_goals_115,test,44,0.903358940217893,0.8696076826330161
llm_goals_115,test,45,0.974787949821673,0.3370215724740773
llm_goals_115,test,46,0.8977051963557465,0.0614426001598869
llm_goals_115,test,47,0.974787949821673,0.5311914101507266
llm_goals_115,test,48,0.8536878228350183,0.6260304408434099
llm_goals_115,test,49,0.8598853364135812,0.9732730991067396
llm_goals_401,test,0,0.9259043959699992,0.995337188206034
llm_goals_401,test,1,0.981544912164452,0.9877679942135538
llm_goals_401,test,2,0.9683968432567636,0.9922930596197688
llm_goals_401,test,3,0.9274727089209512,0.944885851250094
llm_goals_401,test,4,0.9994153976347327,0.9994499087491504
llm_goals_401,test,5,0.9906280634891927,0.9963147043479246
llm_goals_401,test,6,0.9945283534068372,0.9937722087866844
llm_goals_401,test,7,0.9901809091844996,0.9823669190452718
llm_goals_401,test,8,0.995964705739798,0.999488890156012
llm_goals_401,test,9,0.9906022551730352,0.9958814380530916
llm_goals_401,test,10,0.776569297168925,0.9936545489877364
llm_goals_401,test,11,0.8993414639070432,0.9890559909640764
llm_goals_401,test,12,0.8684036797414467,0.974935352749168
llm_goals_401,test,13,0.9906022551730352,0.9811005000644571
llm_goals_401,test,14,0.1053387560901574,0.9053577183425524
llm_goals_401,test,15,0.9980207085813914,0.999511659155082
llm_goals_401,test,16,0.8066045003238261,0.9069459477271428
llm_goals_401,test,17,0.9879777431854192,0.9880303739527652
llm_goals_401,test,18,0.1053387560901574,0.8849021227503215
llm_goals_401,test,19,0.929468871130168,0.8461972444129129
llm_goals_401,test,20,0.9994153976347327,0.9985405802478567
llm_goals_401,test,21,0.9906022551730352,0.9979217051338364
llm_goals_401,test,22,0.8515889588088706,0.9942128658052918
llm_goals_401,test,23,0.7539855759357148,0.9829282756728752
llm_goals_401,test,24,0.7045639659214615,0.9755103584971708
llm_goals_401,test,25,0.980034232000541,0.9854062793275654
llm_goals_401,test,26,0.1053387560901574,0.9862527255879427
llm_goals_401,test,27,0.958305178404086,0.9777193074990224
llm_goals_401,test,28,0.9255917110327424,0.939797105410212
llm_goals_401,test,29,0.8571571752522407,0.924205125757662
llm_goals_401,test,30,0.9844383596338584,0.9887794851583942
llm_goals_401,test,31,0.9930432436860464,0.9974054097423743
llm_goals_401,test,32,0.9901809091844996,0.987442612237454
llm_goals_401,test,33,0.9646614792771104,0.9077114440216438
llm_goals_401,test,34,0.9844383596338584,0.9898619654171366
llm_goals_401,test,35,0.2012263010137867,0.9887833003411676
llm_goals_401,test,36,0.936094108581751,0.9883841875213925
llm_goals_401,test,37,0.8130070608241741,0.7709992025246631
llm_goals_401,test,38,0.9387620714487432,0.9687479729312614
llm_goals_401,test,39,0.8684036797414467,0.9758216144428412
llm_goals_401,test,40,0.8542292068136882,0.9954323769170262
llm_goals_401,test,41,0.9930432436860464,0.9970808029010324
llm_goals_401,test,42,0.8515889588088706,0.9970337152076278
llm_goals_401,test,43,0.9727691414762276,0.9963160753068976
llm_goals_401,test,44,0.9968653917027338,0.999649882329424
llm_goals_401,test,45,0.8337112076133602,0.9708126775062408
llm_goals_401,test,46,0.0461020720683829,0.9777507190974738
llm_goals_401,test,47,0.8337112076133602,0.879110748685256
llm_goals_401,test,48,0.7045639659214615,0.9452995669117286
llm_goals_401,test,49,0.613242383618655,0.9904525877618487
llm_goals_326,test,0,0.8734524214362605,0.7080152699628419
llm_goals_326,test,1,0.6599143783614211,0.1915128822536962
llm_goals_326,test,2,0.8734524214362605,0.7444471737433097
llm_goals_326,test,3,0.9266490944599196,0.4392720971297062
llm_goals_326,test,4,0.8289676371080394,0.891602221308631
llm_goals_326,test,5,0.8893012396493561,0.0516888735952717
llm_goals_326,test,6,0.0313140135099345,0.0431335229249402
llm_goals_326,test,7,0.8734524214362605,0.7478485077290631
llm_goals_326,test,8,0.8790713585564378,0.8572844895817008
llm_goals_326,test,9,0.0385321690990254,0.0757120509371778
llm_goals_326,test,10,0.6729325100264135,0.6528841155476689
llm_goals_326,test,11,0.6976429867105073,0.1274303379860786
llm_goals_326,test,12,0.6035263703261207,0.2755885259868873
llm_goals_326,test,13,0.0385321690990254,0.0177855343138265
llm_goals_326,test,14,0.7729423092847261,0.6106618044247591
llm_goals_326,test,15,0.8446525388566067,0.7686960692919514
llm_goals_326,test,16,0.8389642803694849,0.78510933722289
llm_goals_326,test,17,0.6786079956240113,0.1510306836677899
llm_goals_326,test,18,0.8212143142022156,0.714622257156643
llm_goals_326,test,19,0.8318031434518383,0.7195213393137665
llm_goals_326,test,20,0.8289676371080394,0.7418285539231787
llm_goals_326,test,21,0.0979855652654354,0.0439102190505529
llm_goals_326,test,22,0.8538919038209881,0.1806737802484422
llm_goals_326,test,23,0.8741457434257403,0.2841042501811218
llm_goals_326,test,24,0.8851751098879559,0.7647552401874187
llm_goals_326,test,25,0.7094108379520637,0.6430861301277235
llm_goals_326,test,26,0.8212143142022156,0.6501767709406938
llm_goals_326,test,27,0.7116057309594647,0.1962727505123375
llm_goals_326,test,28,0.580494484594129,0.454183079329165
llm_goals_326,test,29,0.8738792504096556,0.4340101008130515
llm_goals_326,test,30,0.7410726615214775,0.7979558074116693
llm_goals_326,test,31,0.1289030317665812,0.0525898871260873
llm_goals_326,test,32,0.8122618850945892,0.1343880762883031
llm_goals_326,test,33,0.8389642803694849,0.7783031325129874
llm_goals_326,test,34,0.6729325100264135,0.6498840537412343
llm_goals_326,test,35,0.8641401539298657,0.8262182481972753
llm_goals_326,test,36,0.7255657939307874,0.5329381900017961
llm_goals_326,test,37,0.8507757748614532,0.7623008977444935
llm_goals_326,test,38,0.7845067988740984,0.4787708612720514
llm_goals_326,test,39,0.5260659888861462,0.3940936731364524
llm_goals_326,test,40,0.8958908306951944,0.1554473532877406
llm_goals_326,test,41,0.8538919038209881,0.0835985162339854
llm_goals_326,test,42,0.8477382692843403,0.6165240441256894
llm_goals_326,test,43,0.1289030317665812,0.0224093895342384
llm_goals_326,test,44,0.9018624456097992,0.9028450273437764
llm_goals_326,test,45,0.5251996475694883,0.4414288558565103
llm_goals_326,test,46,0.8554971158383647,0.3316083963903907
llm_goals_326,test,47,0.6756079105143764,0.5242446701777307
llm_goals_326,test,48,0.7985600837701167,0.6202593994623453
llm_goals_326,test,49,0.8610819044037172,0.1810686795284525
llm_goals_415,test,0,0.939900695710119,0.945942102532565
llm_goals_415,test,1,0.9897807833141988,0.8927012108946369
llm_goals_415,test,2,0.9349483859704972,0.9655095349643328
llm_goals_415,test,3,0.751324537467491,0.5773151635184978
llm_goals_415,test,4,0.9702419046079236,0.9305359112645492
llm_goals_415,test,5,0.974508583321976,0.2698413337656404
llm_goals_415,test,6,0.3421724240673773,0.1924170964690193
llm_goals_415,test,7,0.9685590274524544,0.964020489204398
llm_goals_415,test,8,0.9730309851931448,0.9764932994994092
llm_goals_415,test,9,0.9033248439022064,0.3491418165522914
llm_goals_415,test,10,0.9226641640470884,0.8933741476361363
llm_goals_415,test,11,0.9764983050163552,0.920598685397126
llm_goals_415,test,12,0.6813268133720755,0.6909930118311174
llm_goals_415,test,13,0.9447310554704864,0.1145420932706047
llm_goals_415,test,14,0.9832645663319978,0.9423774493447
llm_goals_415,test,15,0.9572694883036872,0.9532362831473804
llm_goals_415,test,16,0.9897807833141988,0.8356775665343158
llm_goals_415,test,17,0.9538553950703909,0.9361823813143908
llm_goals_415,test,18,0.9234642421660192,0.9889487026200174
llm_goals_415,test,19,0.9845848083940886,0.6368843297568499
llm_goals_415,test,20,0.9923967721209456,0.9870089292806328
llm_goals_415,test,21,0.2328507646173785,0.192645257351692
llm_goals_415,test,22,0.899708866705551,0.9748569718628384
llm_goals_415,test,23,0.9897807833141988,0.8083263035006917
llm_goals_415,test,24,0.9873025417387266,0.931151511592295
llm_goals_415,test,25,0.8665758326611603,0.8863766227722848
llm_goals_415,test,26,0.9592590333708132,0.9640595915946516
llm_goals_415,test,27,0.7940652966545639,0.9081509748500972
llm_goals_415,test,28,0.9592590333708132,0.702391734675441
llm_goals_415,test,29,0.9832645663319978,0.5027195285232966
llm_goals_415,test,30,0.9822497364314084,0.9145678910630844
llm_goals_415,test,31,0.2328507646173785,0.2428483145137429
llm_goals_415,test,32,0.9685590274524544,0.8852615952458959
llm_goals_415,test,33,0.8339263814807621,0.9135139614061104
llm_goals_415,test,34,0.8023915829263717,0.9013304127749656
llm_goals_415,test,35,0.9246083535348538,0.9492589831139694
llm_goals_415,test,36,0.9226641640470884,0.9187255502309962
llm_goals_415,test,37,0.983033121032194,0.5449707044122137
llm_goals_415,test,38,0.9800647504492532,0.5046841390470121
llm_goals_415,test,39,0.992403447477752,0.6393683546149316
llm_goals_415,test,40,0.9926360843268496,0.958167434065704
llm_goals_415,test,41,0.987277686204777,0.1258629101169009
llm_goals_415,test,42,0.9897807833141988,0.9370320411522076
llm_goals_415,test,43,0.2328507646173785,0.4502328517486405
llm_goals_415,test,44,0.9860755802271516,0.9666150226098564
llm_goals_415,test,45,0.9278320057732556,0.4875853574104955
llm_goals_415,test,46,0.9876614213998623,0.9433028700684638
llm_goals_415,test,47,0.7200094495259239,0.6681809390819066
llm_goals_415,test,48,0.9917455915267251,0.9737072586261016
llm_goals_415,test,49,0.990031599563664,0.9706154467965924
llm_goals_293,test,0,0.1504404181809552,0.8297409974007197
llm_goals_293,test,1,0.9437870970761996,0.9166843306899098
llm_goals_293,test,2,0.8850249067579998,0.4850136353820626
llm_goals_293,test,3,0.2680830166340783,0.391440800258883
llm_goals_293,test,4,0.992815435064132,0.9814967512328644
llm_goals_293,test,5,0.9317349191301264,0.9737153059180352
llm_goals_293,test,6,0.149806467359327,0.985194742988862
llm_goals_293,test,7,0.9373683342666012,0.8402554946174373
llm_goals_293,test,8,0.0599507794622085,0.9863079191276464
llm_goals_293,test,9,0.115089467040268,0.9549403173664608
llm_goals_293,test,10,0.8508535678459953,0.8132036948651367
llm_goals_293,test,11,0.7194933347693279,0.2287407340843056
llm_goals_293,test,12,0.8295294003755161,0.9235533469292896
llm_goals_293,test,13,0.0392033464237246,0.9750785225411348
llm_goals_293,test,14,0.5004012694357524,0.9922545554063744
llm_goals_293,test,15,0.993710875378371,0.9838300936204548
llm_goals_293,test,16,0.5004012694357524,0.9858987926415744
llm_goals_293,test,17,0.3236027233705802,0.9556264290993132
llm_goals_293,test,18,0.115089467040268,0.9458334455175702
llm_goals_293,test,19,0.0676119886353112,0.5736353307249014
llm_goals_293,test,20,0.9922689198616884,0.9812628634528624
llm_goals_293,test,21,0.9317349191301264,0.975720704169484
llm_goals_293,test,22,0.115089467040268,0.9409119473005016
llm_goals_293,test,23,0.5004012694357524,0.6306895569552333
llm_goals_293,test,24,0.6645599066737594,0.9831594820284292
llm_goals_293,test,25,0.1504404181809552,0.8981066919341846
llm_goals_293,test,26,0.115089467040268,0.9716764680193836
llm_goals_293,test,27,0.0392033464237246,0.8155843585345258
llm_goals_293,test,28,0.9350634837915286,0.9733620875264264
llm_goals_293,test,29,0.2771866627591522,0.9051490452062086
llm_goals_293,test,30,0.7989720069206064,0.7348576092486574
llm_goals_293,test,31,0.437073509144407,0.9808893198314528
llm_goals_293,test,32,0.970408081539754,0.593206056427014
llm_goals_293,test,33,0.2680830166340783,0.9657488455537
llm_goals_293,test,34,0.9195300916783828,0.8851169960520849
llm_goals_293,test,35,0.8595089899616909,0.8555944515949592
llm_goals_293,test,36,0.0903978414873686,0.5855041803376898
llm_goals_293,test,37,0.0392033464237246,0.6409415533832579
llm_goals_293,test,38,0.2771866627591522,0.8743998352900223
llm_goals_293,test,39,0.791041971759438,0.8182019034258088
llm_goals_293,test,40,0.115089467040268,0.146751538769175
llm_goals_293,test,41,0.9317349191301264,0.9709715243488852
llm_goals_293,test,42,0.8595089899616909,0.8829771283195118
llm_goals_293,test,43,0.9494981178069288,0.9398047936750782
llm_goals_293,test,44,0.9925633667723772,0.9795592430097252
llm_goals_293,test,45,0.2680830166340783,0.7025255505490043
llm_goals_293,test,46,0.1132729740564379,0.6531552142747035
llm_goals_293,test,47,0.6692441011493292,0.8527514903629965
llm_goals_293,test,48,0.9852290749606362,0.9844985009129076
llm_goals_293,test,49,0.3686313509319464,0.9160653951101896
llm_goals_146,test,0,0.9763174060678168,0.3466493369029843
llm_goals_146,test,1,0.9842358821242856,0.971693336326244
llm_goals_146,test,2,0.3519955439774925,0.6455769508045827
llm_goals_146,test,3,0.9561274646512404,0.5822222843930172
llm_goals_146,test,4,0.9555908441431644,0.6842324614600077
llm_goals_146,test,5,0.915699540891688,0.3860201177799698
llm_goals_146,test,6,0.9783771037565326,0.2341590231269344
llm_goals_146,test,7,0.9553247109779304,0.3643683725782807
llm_goals_146,test,8,0.8183297481940671,0.7055765903024079
llm_goals_146,test,9,0.9590535173331762,0.5084888304786936
llm_goals_146,test,10,0.9144726422536568,0.421220281945486
llm_goals_146,test,11,0.935391425153092,0.489412697220974
llm_goals_146,test,12,0.9598017326817012,0.601020744296073
llm_goals_146,test,13,0.4444542037760621,0.2557097387485441
llm_goals_146,test,14,0.5349865498718128,0.4640750948087747
llm_goals_146,test,15,0.9310175211152522,0.7362086189794756
llm_goals_146,test,16,0.3170930398227682,0.4987515019820814
llm_goals_146,test,17,0.9484531876724208,0.9587866055408388
llm_goals_146,test,18,0.9788026812665184,0.6036783393019786
llm_goals_146,test,19,0.906868519431059,0.3513374958540852
llm_goals_146,test,20,0.9590535173331762,0.8281412663626416
llm_goals_146,test,21,0.9661250110159704,0.34065502012531
llm_goals_146,test,22,0.7485789621615103,0.9907655718703378
llm_goals_146,test,23,0.9408119325862344,0.2061079946990629
llm_goals_146,test,24,0.7223736115105882,0.3446073251785768
llm_goals_146,test,25,0.9651112567798452,0.6576184069508046
llm_goals_146,test,26,0.9590535173331762,0.4110159240539976
llm_goals_146,test,27,0.6975677675486389,0.984345377326772
llm_goals_146,test,28,0.935391425153092,0.4903698662236391
llm_goals_146,test,29,0.9123953588915944,0.362068682571018
llm_goals_146,test,30,0.9263845059981431,0.4551468142410695
llm_goals_146,test,31,0.9527738101387117,0.3773142189484755
llm_goals_146,test,32,0.9553247109779304,0.4187681470726767
llm_goals_146,test,33,0.9553247109779304,0.3985742178328911
llm_goals_146,test,34,0.319736876709092,0.5044141394199024
llm_goals_146,test,35,0.9144726422536568,0.4783486718786691
llm_goals_146,test,36,0.9661250110159704,0.5445552456181276
llm_goals_146,test,37,0.9844332932461524,0.2691607872517095
llm_goals_146,test,38,0.4600029401012364,0.5517828611974395
llm_goals_146,test,39,0.9866691232104834,0.4823935111879393
llm_goals_146,test,40,0.7485789621615103,0.496646227759238
llm_goals_146,test,41,0.8752394338246963,0.410176485322602
llm_goals_146,test,42,0.5031236411913739,0.2603856324469091
llm_goals_146,test,43,0.5349865498718128,0.555094401775619
llm_goals_146,test,44,0.9188595447901434,0.5218179738711017
llm_goals_146,test,45,0.9012134710633124,0.4447090989140364
llm_goals_146,test,46,0.6480128161839503,0.5567532038584041
llm_goals_146,test,47,0.92646664465824,0.4034445534741869
llm_goals_146,test,48,0.978924632880992,0.5335126942675598
llm_goals_146,test,49,0.9102600249556796,0.9775228494630558
llm_goals_236,test,0,0.9771565793476484,0.9502374518284544
llm_goals_236,test,1,0.9122170204725258,0.632901790189933
llm_goals_236,test,2,0.9709234825405104,0.9547987563797488
llm_goals_236,test,3,0.8558315076104217,0.8955373773460537
llm_goals_236,test,4,0.876533394622475,0.604483505999029
llm_goals_236,test,5,0.8804357662846043,0.9645155057204132
llm_goals_236,test,6,0.9505364903208792,0.8400796085543216
llm_goals_236,test,7,0.9709234825405104,0.9809643632475058
llm_goals_236,test,8,0.8555289479379273,0.9551138894715042
llm_goals_236,test,9,0.954343140539349,0.9323794813876984
llm_goals_236,test,10,0.8718563384799843,0.7794327710983796
llm_goals_236,test,11,0.9967281223449158,0.9780675777326168
llm_goals_236,test,12,0.9610387685795516,0.8875326502933057
llm_goals_236,test,13,0.8804357662846043,0.929950178153689
llm_goals_236,test,14,0.5507287958750791,0.926757039035834
llm_goals_236,test,15,0.7833787247710904,0.7910198588237014
llm_goals_236,test,16,0.9878400568483175,0.8930606244937771
llm_goals_236,test,17,0.7870584731045799,0.8600503746114734
llm_goals_236,test,18,0.6726785820405716,0.9478088016198049
llm_goals_236,test,19,0.9299775953000998,0.0557340307108
llm_goals_236,test,20,0.8664205679135619,0.7876626308843195
llm_goals_236,test,21,0.938283977416047,0.8769842974627694
llm_goals_236,test,22,0.9895033238207606,0.3468135553298878
llm_goals_236,test,23,0.996088206539671,0.9514921900954876
llm_goals_236,test,24,0.7870584731045799,0.9310227652155564
llm_goals_236,test,25,0.8718563384799843,0.6795750924280035
llm_goals_236,test,26,0.9317804595407108,0.7808533270173144
llm_goals_236,test,27,0.7870584731045799,0.3548731041307022
llm_goals_236,test,28,0.956507147996424,0.9628356114282044
llm_goals_236,test,29,0.905532658008844,0.0678193344934023
llm_goals_236,test,30,0.7327141237581755,0.8669796641519075
llm_goals_236,test,31,0.9361907863503748,0.7901777055635718
llm_goals_236,test,32,0.832387144988267,0.9761505717506124
llm_goals_236,test,33,0.8735581653957326,0.925212684096036
llm_goals_236,test,34,0.6245204936265042,0.8283609769269727
llm_goals_236,test,35,0.9771565793476484,0.9191289552997411
llm_goals_236,test,36,0.638121605736869,0.8559995305859318
llm_goals_236,test,37,0.9299775953000998,0.0699999500981296
llm_goals_236,test,38,0.9827693106271594,0.1796257560659765
llm_goals_236,test,39,0.9625397316605918,0.9175097319342992
llm_goals_236,test,40,0.9348946246359592,0.9403113739191608
llm_goals_236,test,41,0.9505364903208792,0.8003138232198173
llm_goals_236,test,42,0.9771565793476484,0.9656298771148252
llm_goals_236,test,43,0.8804357662846043,0.7679948721037306
llm_goals_236,test,44,0.638627710020184,0.8397446267621825
llm_goals_236,test,45,0.4998879250758387,0.9383022794052605
llm_goals_236,test,46,0.9610387685795516,0.9861661791368868
llm_goals_236,test,47,0.675367826932047,0.8216616511109771
llm_goals_236,test,48,0.8604973502652514,0.858269332712761
llm_goals_236,test,49,0.638627710020184,0.4904990431885668
llm_goals_228,test,0,0.3747814242820722,0.8649997140208154
llm_goals_228,test,1,0.0934690144627459,0.873321951241939
llm_goals_228,test,2,0.5847378864836573,0.8957700764194163
llm_goals_228,test,3,0.3165976744400553,0.1236903018304219
llm_goals_228,test,4,0.0934690144627459,0.7453361748877467
llm_goals_228,test,5,0.0299399344906917,0.1192843072628729
llm_goals_228,test,6,0.0598252436867937,0.1326971908799849
llm_goals_228,test,7,0.1350146769878104,0.871489699671336
llm_goals_228,test,8,0.752486105527824,0.8243934448117555
llm_goals_228,test,9,0.4299004945124177,0.0467115483491378
llm_goals_228,test,10,0.3421126717423901,0.7659628877266128
llm_goals_228,test,11,0.0883855443381257,0.7679518438112958
llm_goals_228,test,12,0.2061622769485617,0.3261574116268692
llm_goals_228,test,13,0.6522848090598534,0.0485964531599145
llm_goals_228,test,14,0.5435484532546564,0.9059895277949974
llm_goals_228,test,15,0.7498403258231305,0.7273862898347316
llm_goals_228,test,16,0.6826691654247213,0.9158116003766568
llm_goals_228,test,17,0.1007170721800058,0.2983101183881344
llm_goals_228,test,18,0.6826691654247213,0.9541860240679364
llm_goals_228,test,19,0.3081389780029551,0.8476448713173524
llm_goals_228,test,20,0.1991794477154621,0.7696132021860423
llm_goals_228,test,21,0.3618012204769024,0.2240634772421697
llm_goals_228,test,22,0.8198114595110845,0.5944716948164779
llm_goals_228,test,23,0.1991794477154621,0.8035987641686457
llm_goals_228,test,24,0.152607897496274,0.8954167374248716
llm_goals_228,test,25,0.1292775015649383,0.7151555300243474
llm_goals_228,test,26,0.0961126237076901,0.933582548003466
llm_goals_228,test,27,0.3217228196147736,0.7197431412077714
llm_goals_228,test,28,0.7323605438070202,0.2110212364220297
llm_goals_228,test,29,0.6568619069885948,0.7475211519371483
llm_goals_228,test,30,0.044041308396171,0.8439204089849919
llm_goals_228,test,31,0.472490001444005,0.233187545611893
llm_goals_228,test,32,0.1350146769878104,0.6522564861744167
llm_goals_228,test,33,0.6826691654247213,0.9036141650151318
llm_goals_228,test,34,0.0777626842834659,0.8063092871634532
llm_goals_228,test,35,0.1811176194917266,0.7386503177076705
llm_goals_228,test,36,0.1600511160936317,0.4339953557233364
llm_goals_228,test,37,0.3747814242820722,0.7638352005094292
llm_goals_228,test,38,0.1820311814294611,0.6360600602425245
llm_goals_228,test,39,0.1811176194917266,0.5582201546461881
llm_goals_228,test,40,0.7048819629664896,0.7919258516091835
llm_goals_228,test,41,0.3804892371783966,0.2382556436276936
llm_goals_228,test,42,0.1934496543396562,0.8180639729723762
llm_goals_228,test,43,0.2698361226657809,0.1744592330547429
llm_goals_228,test,44,0.7651398747691588,0.8059483789474448
llm_goals_228,test,45,0.0540611742515662,0.185739159862888
llm_goals_228,test,46,0.0757593597496882,0.6385866390140609
llm_goals_228,test,47,0.0961126237076901,0.181375317343939
llm_goals_228,test,48,0.4390637849514953,0.9407807579284042
llm_goals_228,test,49,0.793690981100586,0.3930787424904466
llm_goals_397,test,0,0.8343363942554917,0.699619236076976
llm_goals_397,test,1,0.1941628356237562,0.7858562431683396
llm_goals_397,test,2,0.2806560912779074,0.8656621011318779
llm_goals_397,test,3,0.5080543043542495,0.5216834406120067
llm_goals_397,test,4,0.2348541327077603,0.2170933847941593
llm_goals_397,test,5,0.7530263582967811,0.6203395193658312
llm_goals_397,test,6,0.4639227352039509,0.7062451920643317
llm_goals_397,test,7,0.8343363942554917,0.7818592804776917
llm_goals_397,test,8,0.3668039899465631,0.3722692260862962
llm_goals_397,test,9,0.23449632654904,0.7525421946277221
llm_goals_397,test,10,0.2217900902520113,0.55382257151224
llm_goals_397,test,11,0.23449632654904,0.654531962180046
llm_goals_397,test,12,0.5568820764250456,0.659030860866389
llm_goals_397,test,13,0.3087837285264002,0.5080189660267803
llm_goals_397,test,14,0.9257637871073188,0.93949913908347
llm_goals_397,test,15,0.1941628356237562,0.1397603306643144
llm_goals_397,test,16,0.3192451446333424,0.94748652041643
llm_goals_397,test,17,0.5677311313069571,0.8337419557614654
llm_goals_397,test,18,0.865988787720079,0.929486218624307
llm_goals_397,test,19,0.3087837285264002,0.6917306210780744
llm_goals_397,test,20,0.3461487713022156,0.3168072461941615
llm_goals_397,test,21,0.6932083264787874,0.7050710313870211
llm_goals_397,test,22,0.2806560912779074,0.9048640732247422
llm_goals_397,test,23,0.3783517198144873,0.6896020664732718
llm_goals_397,test,24,0.9257637871073188,0.9588320250969324
llm_goals_397,test,25,0.554384297033507,0.7992448229336531
llm_goals_397,test,26,0.2244911451917454,0.925966146729264
llm_goals_397,test,27,0.2729398721683181,0.8290411876343606
llm_goals_397,test,28,0.2746272932498697,0.614512804730325
llm_goals_397,test,29,0.4639227352039509,0.8119643351773586
llm_goals_397,test,30,0.379052909173711,0.6416279718371558
llm_goals_397,test,31,0.7530263582967811,0.6679576084359291
llm_goals_397,test,32,0.4059674786384716,0.7497660034408138
llm_goals_397,test,33,0.2875226146878445,0.9406859295331328
llm_goals_397,test,34,0.5647581305289123,0.6256499877247926
llm_goals_397,test,35,0.8343363942554917,0.6832026805931318
llm_goals_397,test,36,0.1941628356237562,0.688132397136647
llm_goals_397,test,37,0.1933625956148257,0.6918761795541711
llm_goals_397,test,38,0.4639227352039509,0.5633524658577074
llm_goals_397,test,39,0.5568820764250456,0.6565008680077707
llm_goals_397,test,40,0.2122973284487392,0.6958896526078188
llm_goals_397,test,41,0.6564384773887115,0.757736441343734
llm_goals_397,test,42,0.2806560912779074,0.8490422913608605
llm_goals_397,test,43,0.2827473335763558,0.7079678798274159
llm_goals_397,test,44,0.3092767853102187,0.1092321450870402
llm_goals_397,test,45,0.2055688248163388,0.5166526556585691
llm_goals_397,test,46,0.1628915621011673,0.5419103560441939
llm_goals_397,test,47,0.495625786753587,0.4142756611326425
llm_goals_397,test,48,0.9257637871073188,0.956440745261376
llm_goals_397,test,49,0.260951118548649,0.7473642876631974
