template_id,split,question_idx,prediction,label
llm_goals_281,test,0,0.7183988052871021,0.6469683041227705
llm_goals_281,test,1,0.6684118121266431,0.9610940798838222
llm_goals_281,test,2,0.28227790010682,0.5031480224374153
llm_goals_281,test,3,0.656737209955579,0.7670721923110818
llm_goals_281,test,4,0.6585449784878924,0.8573901670428902
llm_goals_281,test,5,0.9848657049867047,0.9842762939932492
llm_goals_281,test,6,0.9821067846867075,0.9899150137032138
llm_goals_281,test,7,0.332970458465935,0.6044210331647685
llm_goals_281,test,8,0.7897193277941875,0.8588374254971683
llm_goals_281,test,9,0.9772094487159357,0.9818143245858572
llm_goals_281,test,10,0.9402280663626258,0.9566256410829356
llm_goals_281,test,11,0.944666803646017,0.9833216073807935
llm_goals_281,test,12,0.9909370939273421,0.9046776287625902
llm_goals_281,test,13,0.974347154789883,0.98515391356728
llm_goals_281,test,14,0.9438170008447012,0.9050783518109468
llm_goals_281,test,15,0.9329310252107317,0.8143656887719262
llm_goals_281,test,16,0.7881927692268821,0.9030392149866112
llm_goals_281,test,17,0.8445716079275951,0.9782413247928606
llm_goals_281,test,18,0.9281736226749926,0.9394087233364008
llm_goals_281,test,19,0.4419626484505434,0.013753239211091
llm_goals_281,test,20,0.9599960444403943,0.9155304405450826
llm_goals_281,test,21,0.9728768079035057,0.9849370716395414
llm_goals_281,test,22,0.6721798499369087,0.9678947939625672
llm_goals_281,test,23,0.9332074741915521,0.9923826456520252
llm_goals_281,test,24,0.9326195720387233,0.8888994485286436
llm_goals_281,test,25,0.6806111756540058,0.8670526725158154
llm_goals_281,test,26,0.9375427579548695,0.901592433398574
llm_goals_281,test,27,0.6420491159436872,0.9485982074237508
llm_goals_281,test,28,0.9858233531267654,0.7677171303083985
llm_goals_281,test,29,0.7212920597386102,0.0052734878464236
llm_goals_281,test,30,0.9367355117022885,0.9557743081128488
llm_goals_281,test,31,0.9896339378950642,0.991380810774403
llm_goals_281,test,32,0.9107410127810667,0.9805269843458853
llm_goals_281,test,33,0.9689436564321792,0.972194253937266
llm_goals_281,test,34,0.9330734222539877,0.8780187362883507
llm_goals_281,test,35,0.4621174330869662,0.6544240022055418
llm_goals_281,test,36,0.9857004284593294,0.8261399254388034
llm_goals_281,test,37,0.6168168000991003,0.0237476361601453
llm_goals_281,test,38,0.9110819492965273,0.2014428856002633
llm_goals_281,test,39,0.9417638772728743,0.9075832337909528
llm_goals_281,test,40,0.9715412052379083,0.9579415929114572
llm_goals_281,test,41,0.948845148683958,0.9618767488068802
llm_goals_281,test,42,0.410197527463513,0.6901242783749021
llm_goals_281,test,43,0.9766184687480579,0.991684436764246
llm_goals_281,test,44,0.8521138832939492,0.8707976927725176
llm_goals_281,test,45,0.8280796430943996,0.855741209208138
llm_goals_281,test,46,0.910401482945157,0.9874326582385272
llm_goals_281,test,47,0.7281748074177002,0.8582029976458081
llm_goals_281,test,48,0.9339478221477053,0.9463140960540516
llm_goals_281,test,49,0.7715660138885135,0.9857229593669278
llm_goals_182,test,0,0.2839292557235104,0.9879327417690024
llm_goals_182,test,1,0.09232244007867235,0.143590152166184
llm_goals_182,test,2,0.8052934378916196,0.975250065576832
llm_goals_182,test,3,0.3690787116702389,0.0053962595772566
llm_goals_182,test,4,0.7009134468388925,0.9395814535027472
llm_goals_182,test,5,0.7005834767859556,0.7582924861360835
llm_goals_182,test,6,0.40856436436589155,0.5490946196305377
llm_goals_182,test,7,0.47934366950111146,0.9749513260519144
llm_goals_182,test,8,0.5058910062890771,0.9392208432034008
llm_goals_182,test,9,0.6768707049499897,0.644100014754752
llm_goals_182,test,10,0.7245700987608533,0.3220700188782243
llm_goals_182,test,11,0.42078739221906253,0.981547533906946
llm_goals_182,test,12,0.3581747014924055,0.7137040988313296
llm_goals_182,test,13,0.4207873922190624,0.7387330490542552
llm_goals_182,test,14,0.03638685011711787,0.0829307635671844
llm_goals_182,test,15,0.26038509871228144,0.9703041905123228
llm_goals_182,test,16,0.03638685011711787,0.0818078392213706
llm_goals_182,test,17,0.28913386085636583,0.1321702105874928
llm_goals_182,test,18,0.03638685011711787,0.0627371233524436
llm_goals_182,test,19,0.31927111663420316,0.9687731848071606
llm_goals_182,test,20,0.9008062879156231,0.9416436555176496
llm_goals_182,test,21,0.19877515523010922,0.6725942479831736
llm_goals_182,test,22,0.254006683459486,0.0934677665250981
llm_goals_182,test,23,0.3282229125122708,0.9812074898398124
llm_goals_182,test,24,0.5082350233814691,0.0949295295653811
llm_goals_182,test,25,0.27871220246995737,0.44246206774058
llm_goals_182,test,26,0.03638685011711787,0.0774707640077587
llm_goals_182,test,27,0.29769662739182806,0.0713961149448467
llm_goals_182,test,28,0.3312147527676313,0.0522770850978139
llm_goals_182,test,29,0.3729339725889181,0.95314246463025
llm_goals_182,test,30,0.6327609584697458,0.2504357008006138
llm_goals_182,test,31,0.6527486422370018,0.7161425258649188
llm_goals_182,test,32,0.45832704838265315,0.9646060453168028
llm_goals_182,test,33,0.15768626911765687,0.0545762932705501
llm_goals_182,test,34,0.5933757095589755,0.3122018653274729
llm_goals_182,test,35,0.1984837091834775,0.9860451225539192
llm_goals_182,test,36,0.6441361374632281,0.281834229591876
llm_goals_182,test,37,0.42483236317902096,0.9542467602371212
llm_goals_182,test,38,0.47934366950111146,0.9552662371080364
llm_goals_182,test,39,0.3312247146073977,0.6622170237731956
llm_goals_182,test,40,0.43475513866404497,0.946633816726873
llm_goals_182,test,41,0.6833990637727255,0.7134743981048756
llm_goals_182,test,42,0.6259243291053346,0.9907715917291932
llm_goals_182,test,43,0.48020973958025565,0.7427734633378366
llm_goals_182,test,44,0.9824756188564278,0.932817223580216
llm_goals_182,test,45,0.4611051390124224,0.0223475038345677
llm_goals_182,test,46,0.12841979867080888,0.9353007113558736
llm_goals_182,test,47,0.6855512092338117,0.0297953783874237
llm_goals_182,test,48,0.03854612564584423,0.0852689569608422
llm_goals_182,test,49,0.16226149565755885,0.1518944682187261
llm_goals_86,test,0,0.8832093874936925,0.8172564483303766
llm_goals_86,test,1,0.8558621179260429,0.2845365633956491
llm_goals_86,test,2,0.9197845441965312,0.9608234754131476
llm_goals_86,test,3,0.9589934342718903,0.4905999470752266
llm_goals_86,test,4,0.8885239953195406,0.5223214595126631
llm_goals_86,test,5,0.944687447617742,0.9513769165530092
llm_goals_86,test,6,0.9248960000124123,0.951058446008376
llm_goals_86,test,7,0.9628182644215922,0.7200026044712197
llm_goals_86,test,8,0.9270199548845799,0.8539416814851118
llm_goals_86,test,9,0.9406896443926218,0.8409760614216922
llm_goals_86,test,10,0.9405993621512904,0.8985089093274398
llm_goals_86,test,11,0.9518430635900001,0.960478126988056
llm_goals_86,test,12,0.9199072116266853,0.8489881816956245
llm_goals_86,test,13,0.955577909196232,0.4634670890641906
llm_goals_86,test,14,0.8026890329687387,0.4018075540357314
llm_goals_86,test,15,0.9411600431106217,0.8179491136946933
llm_goals_86,test,16,0.9336031281783502,0.5277433486870419
llm_goals_86,test,17,0.8722120905321251,0.7712345746838988
llm_goals_86,test,18,0.9566086938642019,0.89457523897237
llm_goals_86,test,19,0.95843724469051,0.6892489135704485
llm_goals_86,test,20,0.9239092059774556,0.6353825334363962
llm_goals_86,test,21,0.7507568373684134,0.6530961974169966
llm_goals_86,test,22,0.9393748268365006,0.8880043621770078
llm_goals_86,test,23,0.9001330528047206,0.8809145094341392
llm_goals_86,test,24,0.9281693083070229,0.8164918407571773
llm_goals_86,test,25,0.8680078194072355,0.5604555739923536
llm_goals_86,test,26,0.9530191802489553,0.649475864669537
llm_goals_86,test,27,0.9591336650017824,0.951709689395591
llm_goals_86,test,28,0.8300308593332509,0.8387165613894679
llm_goals_86,test,29,0.9378875103824393,0.674672778677278
llm_goals_86,test,30,0.8977163638987659,0.4648188591057368
llm_goals_86,test,31,0.7662232333653233,0.663420132120388
llm_goals_86,test,32,0.8864755806377665,0.6545684404871791
llm_goals_86,test,33,0.9542560772285021,0.8998044750116885
llm_goals_86,test,34,0.9407372883407922,0.4550697205389422
llm_goals_86,test,35,0.9346981643358081,0.7006743460528161
llm_goals_86,test,36,0.692316155859284,0.8041987455834365
llm_goals_86,test,37,0.9424153168872683,0.5563101271844357
llm_goals_86,test,38,0.9449424542354657,0.8432080747815303
llm_goals_86,test,39,0.9469664705593063,0.8362265811977031
llm_goals_86,test,40,0.9579233927306925,0.9223666199627952
llm_goals_86,test,41,0.9110544726968967,0.8662457492250272
llm_goals_86,test,42,0.9408881666573791,0.8608515909392838
llm_goals_86,test,43,0.952173789228327,0.7603164939120921
llm_goals_86,test,44,0.9217262488896641,0.7861883669269881
llm_goals_86,test,45,0.8970815926575916,0.8184625523204602
llm_goals_86,test,46,0.9376450372716066,0.94641274246688
llm_goals_86,test,47,0.9480417580480229,0.5854129271694068
llm_goals_86,test,48,0.9662953611985099,0.920227709127447
llm_goals_86,test,49,0.8980523734288326,0.938532531863768
llm_goals_438,test,0,0.6882266228076297,0.2314562881184323
llm_goals_438,test,1,0.8993865468385446,0.7762565054203492
llm_goals_438,test,2,0.9200275714349231,0.1997924391817541
llm_goals_438,test,3,0.8420089085876558,0.7992019611592787
llm_goals_438,test,4,0.8200932903449525,0.8049000497082406
llm_goals_438,test,5,0.8503447978397244,0.7174028107354763
llm_goals_438,test,6,0.7031963320450222,0.5982684312746689
llm_goals_438,test,7,0.8689086487160687,0.3291190451199206
llm_goals_438,test,8,0.8028478640689847,0.7413057021464516
llm_goals_438,test,9,0.8615050094441904,0.5655428700196011
llm_goals_438,test,10,0.8422978932330335,0.3747542837277384
llm_goals_438,test,11,0.8899089519748061,0.8141028872063276
llm_goals_438,test,12,0.9083869873941023,0.7668041079795853
llm_goals_438,test,13,0.7473791417277452,0.3989739137138193
llm_goals_438,test,14,0.7600205726700845,0.630963737596379
llm_goals_438,test,15,0.8519180991067946,0.8688319384714368
llm_goals_438,test,16,0.7632010769187573,0.6505714018494433
llm_goals_438,test,17,0.8911270902030507,0.8432750100181929
llm_goals_438,test,18,0.8768285714876424,0.5454092034362268
llm_goals_438,test,19,0.8424279688648698,0.7865512327498919
llm_goals_438,test,20,0.8663890755492707,0.9279490690754252
llm_goals_438,test,21,0.8987691019151974,0.6012074860500735
llm_goals_438,test,22,0.9195300136877164,0.8641822359830585
llm_goals_438,test,23,0.8645433185218079,0.6703505477568762
llm_goals_438,test,24,0.8773266860835981,0.659909363300309
llm_goals_438,test,25,0.7090600543088429,0.345825906210505
llm_goals_438,test,26,0.9159739207311266,0.6342860330821757
llm_goals_438,test,27,0.8911270902030507,0.7595913377648781
llm_goals_438,test,28,0.8774932416821227,0.8379201892332447
llm_goals_438,test,29,0.8491097718103816,0.7401884809786652
llm_goals_438,test,30,0.7367792833353666,0.3998648759112044
llm_goals_438,test,31,0.7199965346890814,0.7480624326316926
llm_goals_438,test,32,0.8700452651799382,0.7187081505530665
llm_goals_438,test,33,0.6603612158847957,0.4107650384506188
llm_goals_438,test,34,0.8794278343920686,0.5939725433582219
llm_goals_438,test,35,0.7286419090653546,0.3302466757601784
llm_goals_438,test,36,0.9120947521403636,0.5187707473385741
llm_goals_438,test,37,0.8583225194168853,0.7520728212475446
llm_goals_438,test,38,0.9212916880105007,0.7995393320092329
llm_goals_438,test,39,0.9021769575672871,0.6185694255280791
llm_goals_438,test,40,0.8703541747827925,0.6220749660889419
llm_goals_438,test,41,0.9088747664345181,0.6546343601883343
llm_goals_438,test,42,0.9072747440207457,0.2289113591576547
llm_goals_438,test,43,0.9102605391036783,0.7896951428928559
llm_goals_438,test,44,0.8510667278314256,0.8209977715013562
llm_goals_438,test,45,0.903502146016232,0.8177235133069483
llm_goals_438,test,46,0.8230770799723471,0.6895343723542676
llm_goals_438,test,47,0.8993865468385446,0.8889681695339486
llm_goals_438,test,48,0.8715232001654437,0.5208110922241418
llm_goals_438,test,49,0.8918593122264319,0.8657228979999158
llm_goals_206,test,0,0.731684492701902,0.7014024312312447
llm_goals_206,test,1,0.16765584053815252,0.09993519180039
llm_goals_206,test,2,0.23760147953047084,0.3805957976908932
llm_goals_206,test,3,0.16765584053815252,0.2506742005767606
llm_goals_206,test,4,0.13971500600873452,0.168981522681462
llm_goals_206,test,5,0.12360518288810078,0.1396780009361202
llm_goals_206,test,6,0.16765584053815252,0.1710164544129008
llm_goals_206,test,7,0.23760147953047084,0.6071092597712212
llm_goals_206,test,8,0.23760147953047084,0.1978085524873241
llm_goals_206,test,9,0.16765584053815252,0.1702414804858378
llm_goals_206,test,10,0.05686411343221304,0.0308920608149318
llm_goals_206,test,11,0.16765584053815252,0.2459778318217196
llm_goals_206,test,12,0.1873648981248287,0.1179569636055102
llm_goals_206,test,13,0.16765584053815252,0.0251233861816931
llm_goals_206,test,14,0.731684492701902,0.2426781205526189
llm_goals_206,test,15,0.1873648981248287,0.1278309727108198
llm_goals_206,test,16,0.16765584053815252,0.1146704353190439
llm_goals_206,test,17,0.731684492701902,0.3515281137996084
llm_goals_206,test,18,0.062304600665372224,0.2196923742635269
llm_goals_206,test,19,0.035477831530358404,0.1304369288612395
llm_goals_206,test,20,0.731684492701902,0.4106940669038348
llm_goals_206,test,21,0.731684492701902,0.3368795798730228
llm_goals_206,test,22,0.08558767462247603,0.2288948502948852
llm_goals_206,test,23,0.057604212180439006,0.074953807513886
llm_goals_206,test,24,0.057604212180439006,0.2441322028739075
llm_goals_206,test,25,0.21439892198598623,0.3918868995222204
llm_goals_206,test,26,0.23760147953047084,0.1723782187624221
llm_goals_206,test,27,0.16765584053815252,0.0391839098867229
llm_goals_206,test,28,0.16765584053815252,0.4274888337765593
llm_goals_206,test,29,0.1873648981248287,0.2322920572824445
llm_goals_206,test,30,0.06076259273485287,0.1908743178919042
llm_goals_206,test,31,0.7204944115997817,0.3263357686111295
llm_goals_206,test,32,0.16181438477643517,0.1979025671613465
llm_goals_206,test,33,0.13375396778915982,0.3702297199267818
llm_goals_206,test,34,0.057604212180439006,0.2357480876238731
llm_goals_206,test,35,0.23760147953047084,0.1482521268698281
llm_goals_206,test,36,0.7719130970722277,0.5821584340939012
llm_goals_206,test,37,0.035477831530358404,0.2650568670113227
llm_goals_206,test,38,0.21439892198598623,0.1360417554406812
llm_goals_206,test,39,0.21439892198598623,0.2119836423042495
llm_goals_206,test,40,0.0451357388486294,0.0289993447458979
llm_goals_206,test,41,0.057604212180439006,0.3521756872657808
llm_goals_206,test,42,0.731684492701902,0.4575015387432249
llm_goals_206,test,43,0.5432444140265998,0.4938333202476994
llm_goals_206,test,44,0.11506344002953427,0.2787682116116408
llm_goals_206,test,45,0.12360518288810078,0.1994537138307505
llm_goals_206,test,46,0.13375396778915982,0.5462074258839572
llm_goals_206,test,47,0.03334126579068907,0.1243126576899177
llm_goals_206,test,48,0.062304600665372224,0.2961273515821391
llm_goals_206,test,49,0.12360518288810078,0.2344178444588115
llm_goals_230,test,0,0.9300296106254257,0.93636017960289
llm_goals_230,test,1,0.9876877466112801,0.9080467805464864
llm_goals_230,test,2,0.9290338753800711,0.9492415176604246
llm_goals_230,test,3,0.9868806802581295,0.9883888360800804
llm_goals_230,test,4,0.9300779308839003,0.9308136137248874
llm_goals_230,test,5,0.694313864245096,0.7461662385047432
llm_goals_230,test,6,0.7693465949714925,0.717943379234597
llm_goals_230,test,7,0.9395465265452492,0.9498506183856972
llm_goals_230,test,8,0.9257650182040057,0.903114856201669
llm_goals_230,test,9,0.7578286936059909,0.82763987275553
llm_goals_230,test,10,0.8859948887817245,0.9176431321128996
llm_goals_230,test,11,0.9898563621701228,0.9351009100066487
llm_goals_230,test,12,0.9700037641302206,0.9397739779762262
llm_goals_230,test,13,0.7543436892965735,0.7663744634625839
llm_goals_230,test,14,0.9580107525875216,0.9587652680933508
llm_goals_230,test,15,0.9409024325102221,0.9205685886203512
llm_goals_230,test,16,0.9478041529398995,0.9548512114624126
llm_goals_230,test,17,0.9842686648532207,0.9177103078422468
llm_goals_230,test,18,0.9634068211166662,0.9694847470185912
llm_goals_230,test,19,0.9841769932591559,0.241616336852366
llm_goals_230,test,20,0.9409024325102221,0.9513741131303785
llm_goals_230,test,21,0.7815350876407741,0.7885476346382756
llm_goals_230,test,22,0.992370625586672,0.956588746723208
llm_goals_230,test,23,0.9780459602092358,0.8796235970540837
llm_goals_230,test,24,0.9629067376199408,0.9499204774153412
llm_goals_230,test,25,0.8827360458196606,0.7955946265549944
llm_goals_230,test,26,0.9645425286311374,0.96990013164722
llm_goals_230,test,27,0.9895476300895694,0.94318586558855
llm_goals_230,test,28,0.9887515110476572,0.9816553586758804
llm_goals_230,test,29,0.9833041231435198,0.2755945052454056
llm_goals_230,test,30,0.8915713418042518,0.8694230354404326
llm_goals_230,test,31,0.7693465949714925,0.7707924321548216
llm_goals_230,test,32,0.9780459602092358,0.90274804670901
llm_goals_230,test,33,0.959441960667986,0.964968324648223
llm_goals_230,test,34,0.8796847628994026,0.8786148395823622
llm_goals_230,test,35,0.932138323933469,0.9659460769293176
llm_goals_230,test,36,0.8952429277159739,0.9265550381327228
llm_goals_230,test,37,0.9823537669369117,0.2797244301796934
llm_goals_230,test,38,0.9863107400404255,0.3705167516113265
llm_goals_230,test,39,0.964336375532952,0.9571521884098596
llm_goals_230,test,40,0.9884524543066627,0.9050631524069218
llm_goals_230,test,41,0.7795830377122229,0.7826731844385996
llm_goals_230,test,42,0.9246101579768299,0.9429872638799112
llm_goals_230,test,43,0.721386928205347,0.7863413109728788
llm_goals_230,test,44,0.9129941479627224,0.925476613788026
llm_goals_230,test,45,0.987448851598684,0.9835724833108438
llm_goals_230,test,46,0.9884524543066627,0.9324350988748504
llm_goals_230,test,47,0.989072263401304,0.9918220640686296
llm_goals_230,test,48,0.9629211621786201,0.9632008068532188
llm_goals_230,test,49,0.9871668815790179,0.9196290944353572
llm_goals_115,test,0,0.9320190946614234,0.6878400979912542
llm_goals_115,test,1,0.8895554361609385,0.9839099052127078
llm_goals_115,test,2,0.7643487126842037,0.6264771219588837
llm_goals_115,test,3,0.4153584725580423,0.5101504336920828
llm_goals_115,test,4,0.9163104305747231,0.4749856076325871
llm_goals_115,test,5,0.8481254196930115,0.8824023049105343
llm_goals_115,test,6,0.9255466471323951,0.8532372770314965
llm_goals_115,test,7,0.7703766449556629,0.6852873624151903
llm_goals_115,test,8,0.6510554270623391,0.7160879332925761
llm_goals_115,test,9,0.8137798526751304,0.8154454841155164
llm_goals_115,test,10,0.7728964465445837,0.4039477654904175
llm_goals_115,test,11,0.8883410719042216,0.0754019011174093
llm_goals_115,test,12,0.8741151328647218,0.9216642359611203
llm_goals_115,test,13,0.9148466583217184,0.9023613914305204
llm_goals_115,test,14,0.8146602925449503,0.7385692464553043
llm_goals_115,test,15,0.814538117233622,0.5273332595390108
llm_goals_115,test,16,0.8481254196930115,0.5319983452316508
llm_goals_115,test,17,0.9209087694142456,0.9818465718466136
llm_goals_115,test,18,0.8634858721796185,0.7098198015331896
llm_goals_115,test,19,0.9080255806195131,0.8100224145820547
llm_goals_115,test,20,0.7400678762590956,0.5463347366319413
llm_goals_115,test,21,0.95110400530361,0.9708728191294718
llm_goals_115,test,22,0.6942411838594652,0.9564870014981148
llm_goals_115,test,23,0.807071146635769,0.3427940177342707
llm_goals_115,test,24,0.7715083129659132,0.7216819551218165
llm_goals_115,test,25,0.8133625189930932,0.7118023516747564
llm_goals_115,test,26,0.9184862786828353,0.7495779511143832
llm_goals_115,test,27,0.8045090685151912,0.9876421097288872
llm_goals_115,test,28,0.8950965015213471,0.2396916848864135
llm_goals_115,test,29,0.9558986431934827,0.8734944493526076
llm_goals_115,test,30,0.718296602843707,0.4720948765146447
llm_goals_115,test,31,0.86096382214801,0.9613435269013808
llm_goals_115,test,32,0.8022641900819888,0.2802482482345895
llm_goals_115,test,33,0.8937029261932641,0.800091024303248
llm_goals_115,test,34,0.5660983737768358,0.4731748202329244
llm_goals_115,test,35,0.7140515608889834,0.6451464331436837
llm_goals_115,test,36,0.8037768008061511,0.5222874947485241
llm_goals_115,test,37,0.8975705117055286,0.9548006059708632
llm_goals_115,test,38,0.9571127290438644,0.8483833674723993
llm_goals_115,test,39,0.7984389111446052,0.9222807287488012
llm_goals_115,test,40,0.8085095304403195,0.2526171639652511
llm_goals_115,test,41,0.8477969565161335,0.8309168152992626
llm_goals_115,test,42,0.7145011385988966,0.58549510672814
llm_goals_115,test,43,0.9541181934424569,0.9288889171841944
llm_goals_115,test,44,0.6773330929160154,0.8696076826330161
llm_goals_115,test,45,0.480536409815347,0.3370215724740773
llm_goals_115,test,46,0.8704678812187424,0.0614426001598869
llm_goals_115,test,47,0.8072007242057705,0.5311914101507266
llm_goals_115,test,48,0.8272866210014288,0.6260304408434099
llm_goals_115,test,49,0.8121740663571542,0.9732730991067396
llm_goals_401,test,0,0.7183440841787894,0.995337188206034
llm_goals_401,test,1,0.9344291280357035,0.9877679942135538
llm_goals_401,test,2,0.9718465208424837,0.9922930596197688
llm_goals_401,test,3,0.895826933879211,0.944885851250094
llm_goals_401,test,4,0.7459252028232976,0.9994499087491504
llm_goals_401,test,5,0.9855806429337758,0.9963147043479246
llm_goals_401,test,6,0.9858261346891881,0.9937722087866844
llm_goals_401,test,7,0.9907015363490917,0.9823669190452718
llm_goals_401,test,8,0.9974151650257549,0.999488890156012
llm_goals_401,test,9,0.9354955169687607,0.9958814380530916
llm_goals_401,test,10,0.9143801693833686,0.9936545489877364
llm_goals_401,test,11,0.9361563930876189,0.9890559909640764
llm_goals_401,test,12,0.9168286138358054,0.974935352749168
llm_goals_401,test,13,0.6813699706149291,0.9811005000644571
llm_goals_401,test,14,0.12176453373476744,0.9053577183425524
llm_goals_401,test,15,0.99779419098111,0.999511659155082
llm_goals_401,test,16,0.7587965915549999,0.9069459477271428
llm_goals_401,test,17,0.9828812080211621,0.9880303739527652
llm_goals_401,test,18,0.2916450106718881,0.8849021227503215
llm_goals_401,test,19,0.9086606306245081,0.8461972444129129
llm_goals_401,test,20,0.9974151650257549,0.9985405802478567
llm_goals_401,test,21,0.9854715467784364,0.9979217051338364
llm_goals_401,test,22,0.9196275860195793,0.9942128658052918
llm_goals_401,test,23,0.6246464301479784,0.9829282756728752
llm_goals_401,test,24,0.32575876034096013,0.9755103584971708
llm_goals_401,test,25,0.9846497177256487,0.9854062793275654
llm_goals_401,test,26,0.15587828340383947,0.9862527255879427
llm_goals_401,test,27,0.9715599216154228,0.9777193074990224
llm_goals_401,test,28,0.903407198401978,0.939797105410212
llm_goals_401,test,29,0.9071399406997013,0.924205125757662
llm_goals_401,test,30,0.9827053743818777,0.9887794851583942
llm_goals_401,test,31,0.9854715467784363,0.9974054097423743
llm_goals_401,test,32,0.9773176515699172,0.987442612237454
llm_goals_401,test,33,0.6021644015086923,0.9077114440216438
llm_goals_401,test,34,0.9857641853037729,0.9898619654171366
llm_goals_401,test,35,0.5206516178913145,0.9887833003411676
llm_goals_401,test,36,0.6479662396500238,0.9883841875213925
llm_goals_401,test,37,0.873670738342263,0.7709992025246631
llm_goals_401,test,38,0.9152324619490363,0.9687479729312614
llm_goals_401,test,39,0.8983059758223998,0.9758216144428412
llm_goals_401,test,40,0.9172799978466789,0.9954323769170262
llm_goals_401,test,41,0.9880180160629782,0.9970808029010324
llm_goals_401,test,42,0.9262155876384676,0.9970337152076278
llm_goals_401,test,43,0.9473079257523507,0.9963160753068976
llm_goals_401,test,44,0.9985979000679294,0.999649882329424
llm_goals_401,test,45,0.8721533646327811,0.9708126775062408
llm_goals_401,test,46,0.13815977718866446,0.9777507190974738
llm_goals_401,test,47,0.8509512160784548,0.879110748685256
llm_goals_401,test,48,0.5236664018060249,0.9452995669117286
llm_goals_401,test,49,0.8608463989537644,0.9904525877618487
llm_goals_326,test,0,0.8617769482168222,0.7080152699628419
llm_goals_326,test,1,0.69381870639887,0.1915128822536962
llm_goals_326,test,2,0.8617769482168222,0.7444471737433097
llm_goals_326,test,3,0.9185553203433171,0.4392720971297062
llm_goals_326,test,4,0.8699671470914255,0.891602221308631
llm_goals_326,test,5,0.786715760371095,0.0516888735952717
llm_goals_326,test,6,0.5991719561789383,0.0431335229249402
llm_goals_326,test,7,0.8694406552322675,0.7478485077290631
llm_goals_326,test,8,0.8903987441775572,0.8572844895817008
llm_goals_326,test,9,0.06819483011443693,0.0757120509371778
llm_goals_326,test,10,0.7096569940214961,0.6528841155476689
llm_goals_326,test,11,0.822468046040879,0.1274303379860786
llm_goals_326,test,12,0.5835325743076385,0.2755885259868873
llm_goals_326,test,13,0.04631236654076454,0.0177855343138265
llm_goals_326,test,14,0.8004706492011125,0.6106618044247591
llm_goals_326,test,15,0.7937322237960615,0.7686960692919514
llm_goals_326,test,16,0.8004706492011125,0.78510933722289
llm_goals_326,test,17,0.7071594614297342,0.1510306836677899
llm_goals_326,test,18,0.8018592430487868,0.714622257156643
llm_goals_326,test,19,0.8802445938532713,0.7195213393137665
llm_goals_326,test,20,0.8546275347770269,0.7418285539231787
llm_goals_326,test,21,0.12268255042914113,0.0439102190505529
llm_goals_326,test,22,0.7428217454709877,0.1806737802484422
llm_goals_326,test,23,0.8344909546541036,0.2841042501811218
llm_goals_326,test,24,0.8545185118298235,0.7647552401874187
llm_goals_326,test,25,0.7057881114277954,0.6430861301277235
llm_goals_326,test,26,0.8330743307383087,0.6501767709406938
llm_goals_326,test,27,0.47751436287665766,0.1962727505123375
llm_goals_326,test,28,0.5937673475593312,0.454183079329165
llm_goals_326,test,29,0.8166348306461937,0.4340101008130515
llm_goals_326,test,30,0.7092374537607843,0.7979558074116693
llm_goals_326,test,31,0.12268255042914113,0.0525898871260873
llm_goals_326,test,32,0.8083887688037473,0.1343880762883031
llm_goals_326,test,33,0.8165613175069423,0.7783031325129874
llm_goals_326,test,34,0.6914386987807385,0.6498840537412343
llm_goals_326,test,35,0.8675203154864509,0.8262182481972753
llm_goals_326,test,36,0.7130222513427605,0.5329381900017961
llm_goals_326,test,37,0.8399363576558342,0.7623008977444935
llm_goals_326,test,38,0.8553839140321454,0.4787708612720514
llm_goals_326,test,39,0.5710851041804402,0.3940936731364524
llm_goals_326,test,40,0.8511373168514814,0.1554473532877406
llm_goals_326,test,41,0.30334393199601073,0.0835985162339854
llm_goals_326,test,42,0.8608692711816275,0.6165240441256894
llm_goals_326,test,43,0.34524385173327515,0.0224093895342384
llm_goals_326,test,44,0.8708770286343488,0.9028450273437764
llm_goals_326,test,45,0.484599518235451,0.4414288558565103
llm_goals_326,test,46,0.8696197459602769,0.3316083963903907
llm_goals_326,test,47,0.6915723048833847,0.5242446701777307
llm_goals_326,test,48,0.7967599442998062,0.6202593994623453
llm_goals_326,test,49,0.6869556134510563,0.1810686795284525
llm_goals_415,test,0,0.9440492789742896,0.945942102532565
llm_goals_415,test,1,0.9899882871267655,0.8927012108946369
llm_goals_415,test,2,0.9655881330054575,0.9655095349643328
llm_goals_415,test,3,0.57369942572221,0.5773151635184978
llm_goals_415,test,4,0.961343666716575,0.9305359112645492
llm_goals_415,test,5,0.9793299631112422,0.2698413337656404
llm_goals_415,test,6,0.5676515305779893,0.1924170964690193
llm_goals_415,test,7,0.9793590908242993,0.964020489204398
llm_goals_415,test,8,0.9629377530677149,0.9764932994994092
llm_goals_415,test,9,0.7465957142971907,0.3491418165522914
llm_goals_415,test,10,0.8635807814521735,0.8933741476361363
llm_goals_415,test,11,0.9707984323733756,0.920598685397126
llm_goals_415,test,12,0.680416422792979,0.6909930118311174
llm_goals_415,test,13,0.9725606040596176,0.1145420932706047
llm_goals_415,test,14,0.9728834635774181,0.9423774493447
llm_goals_415,test,15,0.9748912450594829,0.9532362831473804
llm_goals_415,test,16,0.8994384211521832,0.8356775665343158
llm_goals_415,test,17,0.9726807075355378,0.9361823813143908
llm_goals_415,test,18,0.9529769829132829,0.9889487026200174
llm_goals_415,test,19,0.9213437830546911,0.6368843297568499
llm_goals_415,test,20,0.9866003396652356,0.9870089292806328
llm_goals_415,test,21,0.5129529284775748,0.192645257351692
llm_goals_415,test,22,0.9565356171835967,0.9748569718628384
llm_goals_415,test,23,0.9586670997834341,0.8083263035006917
llm_goals_415,test,24,0.9741185305403747,0.931151511592295
llm_goals_415,test,25,0.8849755313637698,0.8863766227722848
llm_goals_415,test,26,0.9695588347399116,0.9640595915946516
llm_goals_415,test,27,0.910860538123318,0.9081509748500972
llm_goals_415,test,28,0.9295833910607162,0.702391734675441
llm_goals_415,test,29,0.9381806461823309,0.5027195285232966
llm_goals_415,test,30,0.9249344647204444,0.9145678910630844
llm_goals_415,test,31,0.4886726735972069,0.2428483145137429
llm_goals_415,test,32,0.9429493766214233,0.8852615952458959
llm_goals_415,test,33,0.7247303105000439,0.9135139614061104
llm_goals_415,test,34,0.8192261253639578,0.9013304127749656
llm_goals_415,test,35,0.9377659772022809,0.9492589831139694
llm_goals_415,test,36,0.9676743343599449,0.9187255502309962
llm_goals_415,test,37,0.916798513133847,0.5449707044122137
llm_goals_415,test,38,0.9699997107929436,0.5046841390470121
llm_goals_415,test,39,0.8026892915466824,0.6393683546149316
llm_goals_415,test,40,0.9869667293138012,0.958167434065704
llm_goals_415,test,41,0.9505224032623669,0.1258629101169009
llm_goals_415,test,42,0.9796658559517609,0.9370320411522076
llm_goals_415,test,43,0.22659135784956427,0.4502328517486405
llm_goals_415,test,44,0.9557747050087385,0.9666150226098564
llm_goals_415,test,45,0.9286896792832402,0.4875853574104955
llm_goals_415,test,46,0.9443281893183727,0.9433028700684638
llm_goals_415,test,47,0.8992651117031188,0.6681809390819066
llm_goals_415,test,48,0.9839808145213561,0.9737072586261016
llm_goals_415,test,49,0.9573755459759018,0.9706154467965924
llm_goals_293,test,0,0.6289313701237628,0.8297409974007197
llm_goals_293,test,1,0.4552826428797485,0.9166843306899098
llm_goals_293,test,2,0.6412015235300162,0.4850136353820626
llm_goals_293,test,3,0.40059026849044543,0.391440800258883
llm_goals_293,test,4,0.9940062364001858,0.9814967512328644
llm_goals_293,test,5,0.9484286902180816,0.9737153059180352
llm_goals_293,test,6,0.23567965439468966,0.985194742988862
llm_goals_293,test,7,0.9369569031700472,0.8402554946174373
llm_goals_293,test,8,0.3445293983989212,0.9863079191276464
llm_goals_293,test,9,0.6848217218109646,0.9549403173664608
llm_goals_293,test,10,0.5874502467000368,0.8132036948651367
llm_goals_293,test,11,0.6024340290722948,0.2287407340843056
llm_goals_293,test,12,0.7090993686857997,0.9235533469292896
llm_goals_293,test,13,0.20138533415754914,0.9750785225411348
llm_goals_293,test,14,0.6580527038536181,0.9922545554063744
llm_goals_293,test,15,0.9928477206708122,0.9838300936204548
llm_goals_293,test,16,0.664550118142436,0.9858987926415744
llm_goals_293,test,17,0.28844352620705177,0.9556264290993132
llm_goals_293,test,18,0.586406784112941,0.9458334455175702
llm_goals_293,test,19,0.33599289219492284,0.5736353307249014
llm_goals_293,test,20,0.9901417491957248,0.9812628634528624
llm_goals_293,test,21,0.7499547959563339,0.975720704169484
llm_goals_293,test,22,0.6449540027328199,0.9409119473005016
llm_goals_293,test,23,0.2782298298531805,0.6306895569552333
llm_goals_293,test,24,0.3007566144689342,0.9831594820284292
llm_goals_293,test,25,0.6458044899588743,0.8981066919341846
llm_goals_293,test,26,0.5915446467777347,0.9716764680193836
llm_goals_293,test,27,0.6095898167683232,0.8155843585345258
llm_goals_293,test,28,0.6930090711907777,0.9733620875264264
llm_goals_293,test,29,0.23653636082448518,0.9051490452062086
llm_goals_293,test,30,0.840706466865664,0.7348576092486574
llm_goals_293,test,31,0.49158042553103004,0.9808893198314528
llm_goals_293,test,32,0.9756856165622402,0.593206056427014
llm_goals_293,test,33,0.459741613310545,0.9657488455537
llm_goals_293,test,34,0.880331160526822,0.8851169960520849
llm_goals_293,test,35,0.8807234573799656,0.8555944515949592
llm_goals_293,test,36,0.3994324113247829,0.5855041803376898
llm_goals_293,test,37,0.5794305231375005,0.6409415533832579
llm_goals_293,test,38,0.42202958975135757,0.8743998352900223
llm_goals_293,test,39,0.8317834746523953,0.8182019034258088
llm_goals_293,test,40,0.33983408286874456,0.146751538769175
llm_goals_293,test,41,0.9582892061636653,0.9709715243488852
llm_goals_293,test,42,0.6622579155465312,0.8829771283195118
llm_goals_293,test,43,0.8103189957001423,0.9398047936750782
llm_goals_293,test,44,0.9922268390184946,0.9795592430097252
llm_goals_293,test,45,0.666035288797583,0.7025255505490043
llm_goals_293,test,46,0.10673362498274463,0.6531552142747035
llm_goals_293,test,47,0.8014182018800264,0.8527514903629965
llm_goals_293,test,48,0.8721614435137207,0.9844985009129076
llm_goals_293,test,49,0.5867329098014759,0.9160653951101896
llm_goals_146,test,0,0.9409020354108306,0.3466493369029843
llm_goals_146,test,1,0.7992834865201334,0.971693336326244
llm_goals_146,test,2,0.7229850947850237,0.6455769508045827
llm_goals_146,test,3,0.7233750508511899,0.5822222843930172
llm_goals_146,test,4,0.9559995532105422,0.6842324614600077
llm_goals_146,test,5,0.9266201256554639,0.3860201177799698
llm_goals_146,test,6,0.7993939255131567,0.2341590231269344
llm_goals_146,test,7,0.9559108421554643,0.3643683725782807
llm_goals_146,test,8,0.8825529607900272,0.7055765903024079
llm_goals_146,test,9,0.9722602771602288,0.5084888304786936
llm_goals_146,test,10,0.7648782585333773,0.421220281945486
llm_goals_146,test,11,0.8002471298442448,0.489412697220974
llm_goals_146,test,12,0.7455213319921734,0.601020744296073
llm_goals_146,test,13,0.6138068954987667,0.2557097387485441
llm_goals_146,test,14,0.7068134111278596,0.4640750948087747
llm_goals_146,test,15,0.936890425324323,0.7362086189794756
llm_goals_146,test,16,0.5647215354230861,0.4987515019820814
llm_goals_146,test,17,0.8616511645946515,0.9587866055408388
llm_goals_146,test,18,0.9021020921030068,0.6036783393019786
llm_goals_146,test,19,0.9092585853299443,0.3513374958540852
llm_goals_146,test,20,0.8564056185779916,0.8281412663626416
llm_goals_146,test,21,0.588530004277383,0.34065502012531
llm_goals_146,test,22,0.8733114597292359,0.9907655718703378
llm_goals_146,test,23,0.6946091621106486,0.2061079946990629
llm_goals_146,test,24,0.8186061021664103,0.3446073251785768
llm_goals_146,test,25,0.7745402901556216,0.6576184069508046
llm_goals_146,test,26,0.8672582133172058,0.4110159240539976
llm_goals_146,test,27,0.8545543375264594,0.984345377326772
llm_goals_146,test,28,0.9149135764219359,0.4903698662236391
llm_goals_146,test,29,0.9015954717494953,0.362068682571018
llm_goals_146,test,30,0.859608729175775,0.4551468142410695
llm_goals_146,test,31,0.9373292324456868,0.3773142189484755
llm_goals_146,test,32,0.8692554641690373,0.4187681470726767
llm_goals_146,test,33,0.9145067535106032,0.3985742178328911
llm_goals_146,test,34,0.5775306904517995,0.5044141394199024
llm_goals_146,test,35,0.9142913437825975,0.4783486718786691
llm_goals_146,test,36,0.8868105418297207,0.5445552456181276
llm_goals_146,test,37,0.908061344110942,0.2691607872517095
llm_goals_146,test,38,0.7439780376947005,0.5517828611974395
llm_goals_146,test,39,0.9518865155370699,0.4823935111879393
llm_goals_146,test,40,0.7905169736155364,0.496646227759238
llm_goals_146,test,41,0.827341219050051,0.410176485322602
llm_goals_146,test,42,0.80926779921539,0.2603856324469091
llm_goals_146,test,43,0.7566769479125587,0.555094401775619
llm_goals_146,test,44,0.9149397035458371,0.5218179738711017
llm_goals_146,test,45,0.9474135054589797,0.4447090989140364
llm_goals_146,test,46,0.8186076070717639,0.5567532038584041
llm_goals_146,test,47,0.8906742922007863,0.4034445534741869
llm_goals_146,test,48,0.9722602771602289,0.5335126942675598
llm_goals_146,test,49,0.7455171777850195,0.9775228494630558
llm_goals_236,test,0,0.957252682018484,0.9502374518284544
llm_goals_236,test,1,0.7646954788972211,0.632901790189933
llm_goals_236,test,2,0.9609889199317906,0.9547987563797488
llm_goals_236,test,3,0.9106854417416628,0.8955373773460537
llm_goals_236,test,4,0.8742793020572406,0.604483505999029
llm_goals_236,test,5,0.9167882608630116,0.9645155057204132
llm_goals_236,test,6,0.9236770271734072,0.8400796085543216
llm_goals_236,test,7,0.9762972389842597,0.9809643632475058
llm_goals_236,test,8,0.9177606891323048,0.9551138894715042
llm_goals_236,test,9,0.9487543315700074,0.9323794813876984
llm_goals_236,test,10,0.8891244944933471,0.7794327710983796
llm_goals_236,test,11,0.9217849590025645,0.9780675777326168
llm_goals_236,test,12,0.9604551800029156,0.8875326502933057
llm_goals_236,test,13,0.9042134075171241,0.929950178153689
llm_goals_236,test,14,0.7020253753064135,0.926757039035834
llm_goals_236,test,15,0.7263063986525209,0.7910198588237014
llm_goals_236,test,16,0.943945845620518,0.8930606244937771
llm_goals_236,test,17,0.8944194719989295,0.8600503746114734
llm_goals_236,test,18,0.8636092131735921,0.9478088016198049
llm_goals_236,test,19,0.8147500101208833,0.0557340307108
llm_goals_236,test,20,0.7666145562295451,0.7876626308843195
llm_goals_236,test,21,0.9496511807045014,0.8769842974627694
llm_goals_236,test,22,0.8539895828468794,0.3468135553298878
llm_goals_236,test,23,0.9944334029321089,0.9514921900954876
llm_goals_236,test,24,0.7775120602263347,0.9310227652155564
llm_goals_236,test,25,0.9040129808204801,0.6795750924280035
llm_goals_236,test,26,0.8787510809785303,0.7808533270173144
llm_goals_236,test,27,0.9134515133815558,0.3548731041307022
llm_goals_236,test,28,0.9012965571004207,0.9628356114282044
llm_goals_236,test,29,0.9482695463646543,0.0678193344934023
llm_goals_236,test,30,0.7907583591411492,0.8669796641519075
llm_goals_236,test,31,0.7505058068033207,0.7901777055635718
llm_goals_236,test,32,0.7797007934066071,0.9761505717506124
llm_goals_236,test,33,0.8711721505512666,0.925212684096036
llm_goals_236,test,34,0.8711222651353091,0.8283609769269727
llm_goals_236,test,35,0.9554309042804259,0.9191289552997411
llm_goals_236,test,36,0.7420761791064204,0.8559995305859318
llm_goals_236,test,37,0.9558294018355594,0.0699999500981296
llm_goals_236,test,38,0.8296119547969493,0.1796257560659765
llm_goals_236,test,39,0.9488968453121792,0.9175097319342992
llm_goals_236,test,40,0.8218989566217161,0.9403113739191608
llm_goals_236,test,41,0.9230854113405101,0.8003138232198173
llm_goals_236,test,42,0.9554309042804259,0.9656298771148252
llm_goals_236,test,43,0.9167882608630116,0.7679948721037306
llm_goals_236,test,44,0.6646815276779834,0.8397446267621825
llm_goals_236,test,45,0.8276139834326162,0.9383022794052605
llm_goals_236,test,46,0.9838693143071024,0.9861661791368868
llm_goals_236,test,47,0.7544915861714797,0.8216616511109771
llm_goals_236,test,48,0.8711721505512666,0.858269332712761
llm_goals_236,test,49,0.6683120676698615,0.4904990431885668
llm_goals_228,test,0,0.2881166997937754,0.8649997140208154
llm_goals_228,test,1,0.22836604216781609,0.873321951241939
llm_goals_228,test,2,0.6073694371988889,0.8957700764194163
llm_goals_228,test,3,0.3276776742958998,0.1236903018304219
llm_goals_228,test,4,0.4522488692434014,0.7453361748877467
llm_goals_228,test,5,0.04794211747635053,0.1192843072628729
llm_goals_228,test,6,0.0644121135334544,0.1326971908799849
llm_goals_228,test,7,0.24968375417910846,0.871489699671336
llm_goals_228,test,8,0.6698266052406548,0.8243934448117555
llm_goals_228,test,9,0.4506768189233115,0.0467115483491378
llm_goals_228,test,10,0.4122030612112963,0.7659628877266128
llm_goals_228,test,11,0.1419628609438828,0.7679518438112958
llm_goals_228,test,12,0.2904858612737971,0.3261574116268692
llm_goals_228,test,13,0.4265130856411079,0.0485964531599145
llm_goals_228,test,14,0.6019003786011455,0.9059895277949974
llm_goals_228,test,15,0.6043372148174034,0.7273862898347316
llm_goals_228,test,16,0.7854228425059077,0.9158116003766568
llm_goals_228,test,17,0.09860051968308665,0.2983101183881344
llm_goals_228,test,18,0.8231293957746736,0.9541860240679364
llm_goals_228,test,19,0.3821392778495896,0.8476448713173524
llm_goals_228,test,20,0.22958270760490537,0.7696132021860423
llm_goals_228,test,21,0.3493678289007558,0.2240634772421697
llm_goals_228,test,22,0.6285605821732507,0.5944716948164779
llm_goals_228,test,23,0.14672429651662058,0.8035987641686457
llm_goals_228,test,24,0.37369251863894193,0.8954167374248716
llm_goals_228,test,25,0.08715303530388986,0.7151555300243474
llm_goals_228,test,26,0.1089793338157178,0.933582548003466
llm_goals_228,test,27,0.5310048731063802,0.7197431412077714
llm_goals_228,test,28,0.3642040262858841,0.2110212364220297
llm_goals_228,test,29,0.495118803676704,0.7475211519371483
llm_goals_228,test,30,0.07781443759203747,0.8439204089849919
llm_goals_228,test,31,0.44672783453810494,0.233187545611893
llm_goals_228,test,32,0.39951392691805915,0.6522564861744167
llm_goals_228,test,33,0.8264383686185718,0.9036141650151318
llm_goals_228,test,34,0.15674007562216483,0.8063092871634532
llm_goals_228,test,35,0.5929273449869273,0.7386503177076705
llm_goals_228,test,36,0.14692420813733262,0.4339953557233364
llm_goals_228,test,37,0.21380086080185592,0.7638352005094292
llm_goals_228,test,38,0.3538146178114554,0.6360600602425245
llm_goals_228,test,39,0.2197241020058133,0.5582201546461881
llm_goals_228,test,40,0.4350345841927865,0.7919258516091835
llm_goals_228,test,41,0.6375981151677469,0.2382556436276936
llm_goals_228,test,42,0.17066114343514568,0.8180639729723762
llm_goals_228,test,43,0.37482911542624425,0.1744592330547429
llm_goals_228,test,44,0.7992858476314485,0.8059483789474448
llm_goals_228,test,45,0.20982567565541135,0.185739159862888
llm_goals_228,test,46,0.16080196800738675,0.6385866390140609
llm_goals_228,test,47,0.28599048219781537,0.181375317343939
llm_goals_228,test,48,0.462806757726389,0.9407807579284042
llm_goals_228,test,49,0.7888487985109305,0.3930787424904466
llm_goals_397,test,0,0.8109472735243862,0.699619236076976
llm_goals_397,test,1,0.3070034629376784,0.7858562431683396
llm_goals_397,test,2,0.4893404038293528,0.8656621011318779
llm_goals_397,test,3,0.6311838777099562,0.5216834406120067
llm_goals_397,test,4,0.2895240912181481,0.2170933847941593
llm_goals_397,test,5,0.4746044572150188,0.6203395193658312
llm_goals_397,test,6,0.46932415378976505,0.7062451920643317
llm_goals_397,test,7,0.6254101868579395,0.7818592804776917
llm_goals_397,test,8,0.3035713745202373,0.3722692260862962
llm_goals_397,test,9,0.27103319154574335,0.7525421946277221
llm_goals_397,test,10,0.22499448088549445,0.55382257151224
llm_goals_397,test,11,0.22284987596780645,0.654531962180046
llm_goals_397,test,12,0.3253396782667278,0.659030860866389
llm_goals_397,test,13,0.30901658183092723,0.5080189660267803
llm_goals_397,test,14,0.5468387347171204,0.93949913908347
llm_goals_397,test,15,0.26821179695992264,0.1397603306643144
llm_goals_397,test,16,0.2398479015684314,0.94748652041643
llm_goals_397,test,17,0.5449122827043442,0.8337419557614654
llm_goals_397,test,18,0.898383656275534,0.929486218624307
llm_goals_397,test,19,0.31799430050580196,0.6917306210780744
llm_goals_397,test,20,0.2773012193173791,0.3168072461941615
llm_goals_397,test,21,0.705463798006857,0.7050710313870211
llm_goals_397,test,22,0.37213781282915415,0.9048640732247422
llm_goals_397,test,23,0.27784110476782115,0.6896020664732718
llm_goals_397,test,24,0.9069763435960246,0.9588320250969324
llm_goals_397,test,25,0.40999067183549015,0.7992448229336531
llm_goals_397,test,26,0.21951444874106954,0.925966146729264
llm_goals_397,test,27,0.3777292128613871,0.8290411876343606
llm_goals_397,test,28,0.43808418080449685,0.614512804730325
llm_goals_397,test,29,0.41425318472764133,0.8119643351773586
llm_goals_397,test,30,0.5260145495979299,0.6416279718371558
llm_goals_397,test,31,0.705463798006857,0.6679576084359291
llm_goals_397,test,32,0.3402898147761279,0.7497660034408138
llm_goals_397,test,33,0.27475910268741793,0.9406859295331328
llm_goals_397,test,34,0.5197865254603903,0.6256499877247926
llm_goals_397,test,35,0.796577552540298,0.6832026805931318
llm_goals_397,test,36,0.3286136518362901,0.688132397136647
llm_goals_397,test,37,0.386228760786782,0.6918761795541711
llm_goals_397,test,38,0.5881171369503135,0.5633524658577074
llm_goals_397,test,39,0.44875395362939025,0.6565008680077707
llm_goals_397,test,40,0.2266456815354545,0.6958896526078188
llm_goals_397,test,41,0.5200964259905952,0.757736441343734
llm_goals_397,test,42,0.6214478796988919,0.8490422913608605
llm_goals_397,test,43,0.5686434670393797,0.7079678798274159
llm_goals_397,test,44,0.2847149371582436,0.1092321450870402
llm_goals_397,test,45,0.244193928824755,0.5166526556585691
llm_goals_397,test,46,0.22724264201452016,0.5419103560441939
llm_goals_397,test,47,0.4642352470177185,0.4142756611326425
llm_goals_397,test,48,0.9346364554905396,0.956440745261376
llm_goals_397,test,49,0.40600008736019483,0.7473642876631974
