template_id,split,question_idx,prediction,label
llm_goals_401,test,0,0.8621564492569891,0.9578712581802692
llm_goals_401,test,1,0.9396688929087016,0.9541386350517288
llm_goals_401,test,2,0.8927125910716845,0.957633375223842
llm_goals_401,test,3,0.8935810915253648,0.939051149330178
llm_goals_401,test,4,0.9785047180262462,0.977310418686652
llm_goals_401,test,5,0.9684225907502312,0.9809432028055692
llm_goals_401,test,6,0.9593001618754148,0.9755462400413056
llm_goals_401,test,7,0.9610711945306416,0.9511837371803112
llm_goals_401,test,8,0.9646819848279976,0.9766189453717342
llm_goals_401,test,9,0.9563025243481176,0.980273604068012
llm_goals_401,test,10,0.7972925237712262,0.9337562348751304
llm_goals_401,test,11,0.8348160442956895,0.9722627409957164
llm_goals_401,test,12,0.9361882822695218,0.9492781746196448
llm_goals_401,test,13,0.9563025243481176,0.9655717616666432
llm_goals_401,test,14,0.6519702161070812,0.9574255360215488
llm_goals_401,test,15,0.9618073111216788,0.981384516529317
llm_goals_401,test,16,0.9008152511572876,0.9222335225671268
llm_goals_401,test,17,0.924678147301334,0.9532549975325786
llm_goals_401,test,18,0.6519702161070812,0.9602041260526504
llm_goals_401,test,19,0.8940041085672934,0.6478757906857974
llm_goals_401,test,20,0.9785047180262462,0.9756843455136628
llm_goals_401,test,21,0.9563025243481176,0.990806877474314
llm_goals_401,test,22,0.9025533231935684,0.9781303405092489
llm_goals_401,test,23,0.9167314782221871,0.9417706148309914
llm_goals_401,test,24,0.8764885691626193,0.956336258298358
llm_goals_401,test,25,0.9126283518171868,0.9313769351906296
llm_goals_401,test,26,0.6519702161070812,0.979961037598938
llm_goals_401,test,27,0.9473529462317738,0.9393076331919324
llm_goals_401,test,28,0.9271593700107464,0.8926497724419561
llm_goals_401,test,29,0.9192545981083362,0.9039012216839892
llm_goals_401,test,30,0.8990291957170568,0.95604890766277
llm_goals_401,test,31,0.9640161402029788,0.9892266395215328
llm_goals_401,test,32,0.9610711945306416,0.9539506448444212
llm_goals_401,test,33,0.951461313719808,0.96642041239244
llm_goals_401,test,34,0.8990291957170568,0.9213858238598618
llm_goals_401,test,35,0.5377569895628603,0.9778974644047042
llm_goals_401,test,36,0.8893301485306999,0.9268431675812392
llm_goals_401,test,37,0.764766157385323,0.8127340038962743
llm_goals_401,test,38,0.95256912639511,0.9302681059523448
llm_goals_401,test,39,0.9361882822695218,0.965603948473316
llm_goals_401,test,40,0.8702496855441776,0.9546924251425224
llm_goals_401,test,41,0.9640161402029788,0.9801422355796748
llm_goals_401,test,42,0.9025533231935684,0.942841528380264
llm_goals_401,test,43,0.964454590222358,0.9624654057275208
llm_goals_401,test,44,0.9706017374955488,0.966766237132969
llm_goals_401,test,45,0.9094043398712124,0.9166605495286292
llm_goals_401,test,46,0.5226286747496097,0.9735371475107132
llm_goals_401,test,47,0.9094043398712124,0.9170851722665576
llm_goals_401,test,48,0.8764885691626193,0.9537843477184356
llm_goals_401,test,49,0.8076949705727857,0.9543297303993916
llm_goals_438,test,0,0.9228849406667308,0.7747630551513792
llm_goals_438,test,1,0.957239865113378,0.935855152767207
llm_goals_438,test,2,0.9829735755229012,0.7147502234092127
llm_goals_438,test,3,0.9611083870867012,0.8761475031821572
llm_goals_438,test,4,0.9457392105700492,0.8370855426053896
llm_goals_438,test,5,0.9674397692431914,0.9507074946857058
llm_goals_438,test,6,0.9263247840310084,0.9429353481406656
llm_goals_438,test,7,0.9745925673037522,0.7990903812700391
llm_goals_438,test,8,0.9537837507938614,0.8538515570307835
llm_goals_438,test,9,0.938230576517048,0.9386380912818448
llm_goals_438,test,10,0.9747113581993404,0.6740432265841833
llm_goals_438,test,11,0.9599318518290386,0.923251629590114
llm_goals_438,test,12,0.9374802096663288,0.9507690082509844
llm_goals_438,test,13,0.962442459037621,0.9289195559017176
llm_goals_438,test,14,0.922309754535174,0.9291442660965707
llm_goals_438,test,15,0.9613562835186076,0.7999125079987789
llm_goals_438,test,16,0.956838072367058,0.88095736898606
llm_goals_438,test,17,0.9599318518290386,0.9601117963812904
llm_goals_438,test,18,0.9570851918198864,0.9583113794533262
llm_goals_438,test,19,0.962442459037621,0.95757842004367
llm_goals_438,test,20,0.9829382904547124,0.8569924879179522
llm_goals_438,test,21,0.9602787495652164,0.925142229626847
llm_goals_438,test,22,0.9570851918198864,0.969126404096814
llm_goals_438,test,23,0.963409662886193,0.9270654345691632
llm_goals_438,test,24,0.938230576517048,0.9447244425458496
llm_goals_438,test,25,0.9717245108863192,0.8921203628029744
llm_goals_438,test,26,0.9546589838982754,0.8840255122167773
llm_goals_438,test,27,0.9599318518290386,0.9518933904854712
llm_goals_438,test,28,0.938230576517048,0.9239946006518056
llm_goals_438,test,29,0.9690358625745852,0.967243014785835
llm_goals_438,test,30,0.9715998168369392,0.6513131257391838
llm_goals_438,test,31,0.9521245378043348,0.9452766773393708
llm_goals_438,test,32,0.9646786462674866,0.8986559507515572
llm_goals_438,test,33,0.9330728668038696,0.9178363106363248
llm_goals_438,test,34,0.9623557326622764,0.58431126605279
llm_goals_438,test,35,0.9691584097325988,0.8549510299218779
llm_goals_438,test,36,0.9546589838982754,0.7795077576161488
llm_goals_438,test,37,0.9643271558322924,0.942087231678633
llm_goals_438,test,38,0.9375533430801476,0.9569196102046018
llm_goals_438,test,39,0.9602787495652164,0.6285572658291573
llm_goals_438,test,40,0.9570851918198864,0.9166060095399312
llm_goals_438,test,41,0.9570851918198864,0.9583812359561668
llm_goals_438,test,42,0.9829735755229012,0.7214403769582053
llm_goals_438,test,43,0.9374802096663288,0.953284323393524
llm_goals_438,test,44,0.9461974512412652,0.8806080817389553
llm_goals_438,test,45,0.9767272475316984,0.92897343737164
llm_goals_438,test,46,0.9630804050637024,0.9584044214292112
llm_goals_438,test,47,0.9546589838982754,0.8794537151734262
llm_goals_438,test,48,0.9570851918198864,0.9488244649817108
llm_goals_438,test,49,0.981886505200026,0.96985775201818
llm_goals_293,test,0,0.5520872324902683,0.8120835453586716
llm_goals_293,test,1,0.9551060190386996,0.6480853571933485
llm_goals_293,test,2,0.9155593493883656,0.6079345474456403
llm_goals_293,test,3,0.8346015238663196,0.6129396608114358
llm_goals_293,test,4,0.9862380027499302,0.98210900952278
llm_goals_293,test,5,0.963296114229794,0.8658899081858322
llm_goals_293,test,6,0.8605262011707768,0.8882423636795116
llm_goals_293,test,7,0.9835519188036976,0.8509290855710241
llm_goals_293,test,8,0.8745157149595516,0.9446962492710368
llm_goals_293,test,9,0.7872463442637466,0.7513049230182386
llm_goals_293,test,10,0.9463899740142236,0.9177243110970909
llm_goals_293,test,11,0.7744545453286062,0.745101023404262
llm_goals_293,test,12,0.927806082808624,0.938155530986403
llm_goals_293,test,13,0.9310349252602091,0.93102258486241
llm_goals_293,test,14,0.8931952746945748,0.9835613372030028
llm_goals_293,test,15,0.9967182875476308,0.9863449336337948
llm_goals_293,test,16,0.8931952746945748,0.9708489179126024
llm_goals_293,test,17,0.8103870189066451,0.6973515732177806
llm_goals_293,test,18,0.7872463442637466,0.9506214255847638
llm_goals_293,test,19,0.845737393966378,0.8167264479797811
llm_goals_293,test,20,0.9945159554765336,0.9863548279811404
llm_goals_293,test,21,0.963296114229794,0.7980274506980439
llm_goals_293,test,22,0.7872463442637466,0.6721615182655347
llm_goals_293,test,23,0.8931952746945748,0.3710465401115394
llm_goals_293,test,24,0.8004988465189381,0.9595301158883638
llm_goals_293,test,25,0.5520872324902683,0.958163618329278
llm_goals_293,test,26,0.7872463442637466,0.966849388190172
llm_goals_293,test,27,0.9310349252602091,0.8260024784538619
llm_goals_293,test,28,0.7777210337775292,0.8388491865678236
llm_goals_293,test,29,0.8855732117669096,0.8884818588260758
llm_goals_293,test,30,0.8792771053151601,0.918233934226988
llm_goals_293,test,31,0.8583698234157052,0.7707225792789547
llm_goals_293,test,32,0.9512056110594572,0.8156692405787661
llm_goals_293,test,33,0.8346015238663196,0.9365220048324068
llm_goals_293,test,34,0.9772180313597708,0.9379748130481748
llm_goals_293,test,35,0.9501362441015164,0.8901683114693926
llm_goals_293,test,36,0.5918600632816917,0.8493608200921786
llm_goals_293,test,37,0.9310349252602091,0.861633780857663
llm_goals_293,test,38,0.8855732117669096,0.9021887179891658
llm_goals_293,test,39,0.901512506752562,0.9140409207490428
llm_goals_293,test,40,0.7872463442637466,0.7293724502897094
llm_goals_293,test,41,0.963296114229794,0.9162639417701852
llm_goals_293,test,42,0.9501362441015164,0.8334788131423683
llm_goals_293,test,43,0.9686008686206928,0.7597774188725597
llm_goals_293,test,44,0.9849097724272856,0.9659911986912778
llm_goals_293,test,45,0.8346015238663196,0.931854425798201
llm_goals_293,test,46,0.8297238362013515,0.5591331662726297
llm_goals_293,test,47,0.9860253934337446,0.8162434715058863
llm_goals_293,test,48,0.9775935419116958,0.9539650091821064
llm_goals_293,test,49,0.8231172593653571,0.7483454415724036
llm_goals_397,test,0,0.846385661259674,0.6336495795213035
llm_goals_397,test,1,0.1771384100210152,0.5588590528022535
llm_goals_397,test,2,0.2247116337750546,0.8967757860893129
llm_goals_397,test,3,0.4838799635550948,0.5944806240686361
llm_goals_397,test,4,0.2271342619526839,0.2252168546192252
llm_goals_397,test,5,0.5556145950725657,0.4122469495302511
llm_goals_397,test,6,0.20882042572414,0.6366335121435647
llm_goals_397,test,7,0.846385661259674,0.8096849879343126
llm_goals_397,test,8,0.1772854421549303,0.2219286497366863
llm_goals_397,test,9,0.2028890760102352,0.3790244200754085
llm_goals_397,test,10,0.232658093001139,0.5739754985147579
llm_goals_397,test,11,0.2028890760102352,0.5146662099973958
llm_goals_397,test,12,0.5892266098498904,0.7724721370318186
llm_goals_397,test,13,0.1904176236524492,0.5128249643851043
llm_goals_397,test,14,0.6296530388505203,0.7029672903171433
llm_goals_397,test,15,0.1771384100210152,0.1755089749954058
llm_goals_397,test,16,0.1480633931002797,0.5780643909290929
llm_goals_397,test,17,0.5279321496358373,0.5651405953323156
llm_goals_397,test,18,0.6969248064910901,0.6692897169839361
llm_goals_397,test,19,0.1904176236524492,0.3010834864311782
llm_goals_397,test,20,0.2846100423054455,0.1723383376025849
llm_goals_397,test,21,0.5883363075506464,0.5552848455448852
llm_goals_397,test,22,0.2247116337750546,0.6247249816867365
llm_goals_397,test,23,0.351109937142033,0.6892307964539947
llm_goals_397,test,24,0.6296530388505203,0.707193144679384
llm_goals_397,test,25,0.4305044419772669,0.4728503384141321
llm_goals_397,test,26,0.180059422072177,0.6125340479102231
llm_goals_397,test,27,0.2086171710567019,0.638109206443543
llm_goals_397,test,28,0.1297914009966909,0.6131846340287811
llm_goals_397,test,29,0.20882042572414,0.3070471542438216
llm_goals_397,test,30,0.2325727957624433,0.5305219388850028
llm_goals_397,test,31,0.5556145950725657,0.5467284805801325
llm_goals_397,test,32,0.1889852487124991,0.6052179554793606
llm_goals_397,test,33,0.223376125311709,0.6898052709387655
llm_goals_397,test,34,0.4666865072021154,0.4488721547034506
llm_goals_397,test,35,0.846385661259674,0.8279244269125856
llm_goals_397,test,36,0.1771384100210152,0.4830040624014946
llm_goals_397,test,37,0.2829638433530499,0.25459376065388
llm_goals_397,test,38,0.20882042572414,0.2830348916920439
llm_goals_397,test,39,0.5892266098498904,0.6957841399255769
llm_goals_397,test,40,0.2553894719474983,0.4595589778278475
llm_goals_397,test,41,0.5687370858617109,0.5333267612417034
llm_goals_397,test,42,0.2247116337750546,0.8577821893356561
llm_goals_397,test,43,0.1980791363208195,0.5300958828918404
llm_goals_397,test,44,0.2267324773507996,0.2841438783476365
llm_goals_397,test,45,0.1389151817863565,0.5478091730108666
llm_goals_397,test,46,0.1772672043892784,0.623584984691235
llm_goals_397,test,47,0.4672643946820849,0.5396113255522375
llm_goals_397,test,48,0.6296530388505203,0.6615871853452404
llm_goals_397,test,49,0.2868139892717326,0.4776676395182983
llm_goals_281,test,0,0.9826933750044728,0.7595571331426817
llm_goals_281,test,1,0.8418323999225854,0.9579733612090848
llm_goals_281,test,2,0.6208341107130894,0.7309832638356816
llm_goals_281,test,3,0.8418323999225854,0.909583748095366
llm_goals_281,test,4,0.9210228335647032,0.8390255026542476
llm_goals_281,test,5,0.9794715051313276,0.9811318521618418
llm_goals_281,test,6,0.9414144178682567,0.9764572976382064
llm_goals_281,test,7,0.6208341107130894,0.7324330753461098
llm_goals_281,test,8,0.8247642473930411,0.7994783462392235
llm_goals_281,test,9,0.9871521593201986,0.9815838934803736
llm_goals_281,test,10,0.9567866308918476,0.9535347237148352
llm_goals_281,test,11,0.9867102506795224,0.9162909963751452
llm_goals_281,test,12,0.9784601326053726,0.3257945040794002
llm_goals_281,test,13,0.9871521593201986,0.9844478965422564
llm_goals_281,test,14,0.959630011279428,0.927134453638218
llm_goals_281,test,15,0.9733661414658582,0.8836817175051781
llm_goals_281,test,16,0.8418323999225854,0.9699068666000064
llm_goals_281,test,17,0.972790121967261,0.9397605064907452
llm_goals_281,test,18,0.959630011279428,0.9597848062242011
llm_goals_281,test,19,0.7662299243873592,0.0774181964240286
llm_goals_281,test,20,0.9210228335647032,0.7915892634199072
llm_goals_281,test,21,0.9465035185192392,0.971953988882578
llm_goals_281,test,22,0.8563207459727508,0.9602834573260726
llm_goals_281,test,23,0.9879373311589764,0.9122855662185028
llm_goals_281,test,24,0.959630011279428,0.8979990468804274
llm_goals_281,test,25,0.4547651084958153,0.9241587506219392
llm_goals_281,test,26,0.959630011279428,0.9170165050266824
llm_goals_281,test,27,0.9587580569006664,0.9611372957336312
llm_goals_281,test,28,0.9826052191374768,0.87756150859893
llm_goals_281,test,29,0.4547651084958153,0.0434288112667957
llm_goals_281,test,30,0.9840196372496004,0.9342891559515047
llm_goals_281,test,31,0.982003867633515,0.9743182057709278
llm_goals_281,test,32,0.9666602620461042,0.9397307039659378
llm_goals_281,test,33,0.9743390091495004,0.9512294521408392
llm_goals_281,test,34,0.9584595561233648,0.9536682955508284
llm_goals_281,test,35,0.6800838740156591,0.7535714456504239
llm_goals_281,test,36,0.9655359970088584,0.923856973167726
llm_goals_281,test,37,0.7662299243873592,0.0418935402602879
llm_goals_281,test,38,0.9428095805095535,0.7059133681203931
llm_goals_281,test,39,0.4101863626047013,0.3500755803223985
llm_goals_281,test,40,0.9779602290162828,0.9247009790185582
llm_goals_281,test,41,0.9686967716733508,0.9782058597610022
llm_goals_281,test,42,0.5235867627594746,0.7983229743904063
llm_goals_281,test,43,0.9758918875699396,0.975883662329788
llm_goals_281,test,44,0.8055722070378598,0.8750632399664175
llm_goals_281,test,45,0.9863260390222932,0.8910572538163689
llm_goals_281,test,46,0.9595629592158346,0.8908228852175125
llm_goals_281,test,47,0.9457294338707144,0.9549922359097318
llm_goals_281,test,48,0.959630011279428,0.953490375608375
llm_goals_281,test,49,0.8523237661088207,0.9719706773426644
llm_goals_415,test,0,0.7526462654467962,0.8577433203043316
llm_goals_415,test,1,0.9223743649273424,0.9021974832977488
llm_goals_415,test,2,0.7435958861559764,0.8694165318475334
llm_goals_415,test,3,0.7342630552125795,0.82278495857052
llm_goals_415,test,4,0.8696211592282651,0.9068547493871278
llm_goals_415,test,5,0.839377458028101,0.4606612308159887
llm_goals_415,test,6,0.3814610888959738,0.5138990722916633
llm_goals_415,test,7,0.7884937497164132,0.7904330429356278
llm_goals_415,test,8,0.93050998208312,0.8709577911266214
llm_goals_415,test,9,0.8012841296163278,0.4953228711784456
llm_goals_415,test,10,0.8637350837936838,0.8458883220009739
llm_goals_415,test,11,0.9645829218233792,0.7530689318495781
llm_goals_415,test,12,0.6459093635632825,0.5374074070351774
llm_goals_415,test,13,0.7540461364223685,0.4543814478672013
llm_goals_415,test,14,0.892917991288558,0.895305392435367
llm_goals_415,test,15,0.9198530923912224,0.8949400812124582
llm_goals_415,test,16,0.9223743649273424,0.8238564098543186
llm_goals_415,test,17,0.8325511854972161,0.7003946285795368
llm_goals_415,test,18,0.8024136430944728,0.8541047516491939
llm_goals_415,test,19,0.9504423143614508,0.4356960950136901
llm_goals_415,test,20,0.8949363205680722,0.8781125499856784
llm_goals_415,test,21,0.3706168963534405,0.4662951793594435
llm_goals_415,test,22,0.9364384444314832,0.9133476600123868
llm_goals_415,test,23,0.9223743649273424,0.729661283414631
llm_goals_415,test,24,0.9361786845174788,0.8203684726547874
llm_goals_415,test,25,0.8049360563376297,0.8415742531339464
llm_goals_415,test,26,0.8409848209996877,0.9206447035427088
llm_goals_415,test,27,0.7156568238428158,0.8224443724423657
llm_goals_415,test,28,0.8409848209996877,0.7688508117653093
llm_goals_415,test,29,0.892917991288558,0.4040278781550455
llm_goals_415,test,30,0.8904894575091873,0.8326128082774296
llm_goals_415,test,31,0.3706168963534405,0.5181213462550505
llm_goals_415,test,32,0.7884937497164132,0.7754768797937415
llm_goals_415,test,33,0.7997953347459735,0.8137893713459559
llm_goals_415,test,34,0.8625245724691484,0.8569270432280164
llm_goals_415,test,35,0.7968231412969317,0.7699967690808278
llm_goals_415,test,36,0.8637350837936838,0.921674433414038
llm_goals_415,test,37,0.916532454672472,0.4660833031516516
llm_goals_415,test,38,0.9715234630472755,0.65152560988785
llm_goals_415,test,39,0.9295516018501946,0.5898309243469293
llm_goals_415,test,40,0.935621500070384,0.8091818666854369
llm_goals_415,test,41,0.9719797965539292,0.6069718569678856
llm_goals_415,test,42,0.9223743649273424,0.801905695679187
llm_goals_415,test,43,0.3706168963534405,0.7026985943405766
llm_goals_415,test,44,0.921824036492479,0.8893336073229712
llm_goals_415,test,45,0.923294244495978,0.7374717538757577
llm_goals_415,test,46,0.8308949496365038,0.7627238714563013
llm_goals_415,test,47,0.8211512591205861,0.8843601962700689
llm_goals_415,test,48,0.8956100947338044,0.8760370596393947
llm_goals_415,test,49,0.8736215866730496,0.934119643218544
llm_goals_228,test,0,0.49044890597476,0.5536142490727757
llm_goals_228,test,1,0.344004505296502,0.5970781946787781
llm_goals_228,test,2,0.3373344886074051,0.6661069439112477
llm_goals_228,test,3,0.4187138614460871,0.1621815926046939
llm_goals_228,test,4,0.344004505296502,0.576352949396635
llm_goals_228,test,5,0.1501895888501575,0.3840882428737504
llm_goals_228,test,6,0.2784053245647708,0.3586276503372383
llm_goals_228,test,7,0.3704467944591096,0.6176623614140859
llm_goals_228,test,8,0.5807389516401625,0.6954425570040618
llm_goals_228,test,9,0.5196584481283578,0.259004657946923
llm_goals_228,test,10,0.4102676391732627,0.7034691604166908
llm_goals_228,test,11,0.3092804721987794,0.3667773177860855
llm_goals_228,test,12,0.4166426046642189,0.5441412387152404
llm_goals_228,test,13,0.6877928283857522,0.1798966425933617
llm_goals_228,test,14,0.4938075360714112,0.6634600123551451
llm_goals_228,test,15,0.3117372920327519,0.5674040322938352
llm_goals_228,test,16,0.7844099994409388,0.7765365728525085
llm_goals_228,test,17,0.5753164125439394,0.4222217460157005
llm_goals_228,test,18,0.7844099994409388,0.7662538791291613
llm_goals_228,test,19,0.6602435677008731,0.551409534979895
llm_goals_228,test,20,0.3993064588274401,0.6118689802554753
llm_goals_228,test,21,0.3704420030542347,0.4397631653224428
llm_goals_228,test,22,0.4468326501772702,0.4973987580331913
llm_goals_228,test,23,0.3993064588274401,0.4235300043058759
llm_goals_228,test,24,0.3407831257589742,0.7459660834986607
llm_goals_228,test,25,0.3995537751833177,0.7296489537631492
llm_goals_228,test,26,0.2226587360666084,0.7553963575905828
llm_goals_228,test,27,0.6337619884220504,0.4245395914745872
llm_goals_228,test,28,0.4010283679530605,0.2113718455283501
llm_goals_228,test,29,0.6949142290816068,0.5839822372879856
llm_goals_228,test,30,0.2954784169441364,0.7882484823843936
llm_goals_228,test,31,0.6704802575007421,0.3978078935012989
llm_goals_228,test,32,0.3704467944591096,0.2741357451446661
llm_goals_228,test,33,0.7844099994409388,0.7841096477395639
llm_goals_228,test,34,0.2826500407833402,0.7149417896157899
llm_goals_228,test,35,0.3746105274206673,0.4510400759046654
llm_goals_228,test,36,0.6056030575551578,0.5606685093118066
llm_goals_228,test,37,0.49044890597476,0.4949700429354497
llm_goals_228,test,38,0.3547893517574756,0.5294251416140277
llm_goals_228,test,39,0.3746105274206673,0.5303236770643239
llm_goals_228,test,40,0.3972983582068252,0.3614923254884982
llm_goals_228,test,41,0.5158004022720915,0.5566797369456766
llm_goals_228,test,42,0.640237437176308,0.5874469302943671
llm_goals_228,test,43,0.6467220727607794,0.4047568932904402
llm_goals_228,test,44,0.6145736648625292,0.5769778999083791
llm_goals_228,test,45,0.2073488136646849,0.381513250802113
llm_goals_228,test,46,0.327218173349645,0.3717434970639749
llm_goals_228,test,47,0.2226587360666084,0.1147154724199779
llm_goals_228,test,48,0.5039715902120603,0.8181026530840192
llm_goals_228,test,49,0.3581974839386199,0.3646229970196732
llm_goals_236,test,0,0.8104860216044824,0.7150391262778099
llm_goals_236,test,1,0.8689908337330304,0.6861406620593683
llm_goals_236,test,2,0.842810744450733,0.871267676778662
llm_goals_236,test,3,0.8273473412334961,0.8072499568065914
llm_goals_236,test,4,0.6635713614855454,0.7988170942289826
llm_goals_236,test,5,0.923393783243376,0.9525319342810132
llm_goals_236,test,6,0.9128326213920838,0.922369719315248
llm_goals_236,test,7,0.842810744450733,0.8816325671024458
llm_goals_236,test,8,0.641597759782671,0.850525379453481
llm_goals_236,test,9,0.9470037820961522,0.8905094280701752
llm_goals_236,test,10,0.8298587738526848,0.8517140753999074
llm_goals_236,test,11,0.9581971752000552,0.568158988781905
llm_goals_236,test,12,0.910711171345716,0.81539887388713
llm_goals_236,test,13,0.923393783243376,0.9457094685792072
llm_goals_236,test,14,0.6593130788569774,0.926884118217816
llm_goals_236,test,15,0.8119178392390979,0.6732607492268958
llm_goals_236,test,16,0.9229488968250248,0.9227134615696562
llm_goals_236,test,17,0.6788733471583155,0.6862386995801422
llm_goals_236,test,18,0.7536962111629868,0.9588107473600692
llm_goals_236,test,19,0.8756378949511144,0.4410424607670248
llm_goals_236,test,20,0.8167659046319317,0.7869937367368668
llm_goals_236,test,21,0.9443888068066691,0.9367554776085516
llm_goals_236,test,22,0.9596449723541052,0.6817940522161587
llm_goals_236,test,23,0.9603539119390678,0.6579733541579077
llm_goals_236,test,24,0.6788733471583155,0.9454425582899262
llm_goals_236,test,25,0.8298587738526848,0.8055338192842599
llm_goals_236,test,26,0.9310371866436996,0.9246634247941125
llm_goals_236,test,27,0.6788733471583155,0.5396068227032322
llm_goals_236,test,28,0.8722341661744007,0.7982258737433797
llm_goals_236,test,29,0.8493010457232597,0.3578559150563034
llm_goals_236,test,30,0.7861118943458288,0.831663663699925
llm_goals_236,test,31,0.8897562046327177,0.8995406617766696
llm_goals_236,test,32,0.6978544671044632,0.7293236306375612
llm_goals_236,test,33,0.9358850101917684,0.9457866545397724
llm_goals_236,test,34,0.6841052697563492,0.8364083160174887
llm_goals_236,test,35,0.8104860216044824,0.7920782594120231
llm_goals_236,test,36,0.8172719511606175,0.8063141051706565
llm_goals_236,test,37,0.8756378949511144,0.5136290105021059
llm_goals_236,test,38,0.9443339714239396,0.4514706416931497
llm_goals_236,test,39,0.8917396081855754,0.8644759660227708
llm_goals_236,test,40,0.9124374998928544,0.6572924853656194
llm_goals_236,test,41,0.9128326213920838,0.9311078819261616
llm_goals_236,test,42,0.8104860216044824,0.7421276499016344
llm_goals_236,test,43,0.923393783243376,0.9182581940665174
llm_goals_236,test,44,0.7113193812548855,0.78992920712052
llm_goals_236,test,45,0.7306037587780979,0.7690354024834042
llm_goals_236,test,46,0.910711171345716,0.7972039589337531
llm_goals_236,test,47,0.6658229885569279,0.7979662948734613
llm_goals_236,test,48,0.9357975115271064,0.9401202207915952
llm_goals_236,test,49,0.7113193812548855,0.6403300609260552
llm_goals_86,test,0,0.8117799762269883,0.848978225488033
llm_goals_86,test,1,0.7166633065225165,0.5667421908216284
llm_goals_86,test,2,0.593003455273001,0.9666773097569414
llm_goals_86,test,3,0.8267109949526712,0.6816530119880609
llm_goals_86,test,4,0.8130738158330604,0.7288792743080739
llm_goals_86,test,5,0.812162641266683,0.5964895998637825
llm_goals_86,test,6,0.69538562555287,0.8656080931523872
llm_goals_86,test,7,0.8034716284046374,0.847071824665876
llm_goals_86,test,8,0.9233784688914852,0.8023766851830423
llm_goals_86,test,9,0.7331870164699926,0.8243871920658875
llm_goals_86,test,10,0.7406893973189458,0.8687054464050711
llm_goals_86,test,11,0.8117799762269883,0.8881319769937132
llm_goals_86,test,12,0.8672004392738583,0.6590020093828339
llm_goals_86,test,13,0.8210889674612494,0.6733084849210463
llm_goals_86,test,14,0.84947777385691,0.3116476330682434
llm_goals_86,test,15,0.7406893973189458,0.663392413111642
llm_goals_86,test,16,0.8267109949526712,0.6387773046544254
llm_goals_86,test,17,0.6586169659854312,0.8044010404371023
llm_goals_86,test,18,0.9076346130886148,0.8337891051244346
llm_goals_86,test,19,0.6609499485874204,0.6396502923711483
llm_goals_86,test,20,0.6401537171422877,0.788879332405017
llm_goals_86,test,21,0.6586169659854312,0.6394004244150501
llm_goals_86,test,22,0.7399311153436959,0.8626416934726503
llm_goals_86,test,23,0.9158872946837132,0.7951470614108611
llm_goals_86,test,24,0.7399311153436959,0.7989944850297322
llm_goals_86,test,25,0.8245069360913481,0.5568233615310513
llm_goals_86,test,26,0.8210889674612494,0.7084484057432834
llm_goals_86,test,27,0.7967206194426278,0.752342212713773
llm_goals_86,test,28,0.8486288201250333,0.7815325258308541
llm_goals_86,test,29,0.8397696659887409,0.5424953082979187
llm_goals_86,test,30,0.8290197295762043,0.623081015497586
llm_goals_86,test,31,0.8535170612982632,0.7577194596401322
llm_goals_86,test,32,0.8267109949526712,0.6601579792051269
llm_goals_86,test,33,0.8210889674612494,0.8711616389038969
llm_goals_86,test,34,0.8267109949526712,0.5900015643097076
llm_goals_86,test,35,0.8148290496855773,0.7298998281488869
llm_goals_86,test,36,0.667753695548907,0.8123467602894
llm_goals_86,test,37,0.6420943739633802,0.6271477319447354
llm_goals_86,test,38,0.8672004392738583,0.7261717872389034
llm_goals_86,test,39,0.8672004392738583,0.6599378021515605
llm_goals_86,test,40,0.7399311153436959,0.8521513981089313
llm_goals_86,test,41,0.9232060933174276,0.7704036243710474
llm_goals_86,test,42,0.8853471259861807,0.8350187493970602
llm_goals_86,test,43,0.8267109949526712,0.7481739555208251
llm_goals_86,test,44,0.7284996457641373,0.6095754460637204
llm_goals_86,test,45,0.7762045939067586,0.7827780278235656
llm_goals_86,test,46,0.8130738158330604,0.7216219938712807
llm_goals_86,test,47,0.6437292610355282,0.5942280347054933
llm_goals_86,test,48,0.8853471259861807,0.9115690572813184
llm_goals_86,test,49,0.5732274706475254,0.8632650981365884
llm_goals_230,test,0,0.7781749893573545,0.8179351702935589
llm_goals_230,test,1,0.940213380976501,0.8188182051872285
llm_goals_230,test,2,0.7751696104476632,0.826086635244689
llm_goals_230,test,3,0.9024982441157016,0.8927791723378472
llm_goals_230,test,4,0.5890379017402987,0.5912138130658398
llm_goals_230,test,5,0.6018936829636693,0.5729324801938974
llm_goals_230,test,6,0.5736303730034582,0.5531325232730563
llm_goals_230,test,7,0.7730808800919923,0.8021489969603882
llm_goals_230,test,8,0.61854601376823,0.581443858272074
llm_goals_230,test,9,0.5240514717570771,0.6239073379929715
llm_goals_230,test,10,0.802810841810876,0.8046756982507505
llm_goals_230,test,11,0.7406035113367138,0.8313074111593964
llm_goals_230,test,12,0.8618483016363299,0.7681956263676517
llm_goals_230,test,13,0.6957837252060355,0.5988426118672923
llm_goals_230,test,14,0.8075718317733449,0.8599042234104695
llm_goals_230,test,15,0.5890379017402987,0.5936393834712063
llm_goals_230,test,16,0.8740652217216409,0.8206661307032131
llm_goals_230,test,17,0.7791727784741138,0.8436086722555165
llm_goals_230,test,18,0.8622556348226575,0.8499366057691331
llm_goals_230,test,19,0.7406035113367138,0.6149675790828681
llm_goals_230,test,20,0.61695815043493,0.6266825308309105
llm_goals_230,test,21,0.6109114748537695,0.6164414775805864
llm_goals_230,test,22,0.9444381603180936,0.8343809289495249
llm_goals_230,test,23,0.7406035113367138,0.7837306823394206
llm_goals_230,test,24,0.8504167844593624,0.846372128553261
llm_goals_230,test,25,0.7947518188021918,0.7390370859690565
llm_goals_230,test,26,0.8475911664622813,0.8494265096725975
llm_goals_230,test,27,0.941479861546531,0.8455606124040723
llm_goals_230,test,28,0.9151349694832764,0.8579674965057287
llm_goals_230,test,29,0.7939240883839211,0.5880964942153284
llm_goals_230,test,30,0.8809854355743711,0.8358535771844652
llm_goals_230,test,31,0.6109114748537695,0.5805894006126754
llm_goals_230,test,32,0.7406035113367138,0.7934818259785719
llm_goals_230,test,33,0.8504167844593624,0.834267023337033
llm_goals_230,test,34,0.8004530071349493,0.7892429815800577
llm_goals_230,test,35,0.7751696104476632,0.8120109426774391
llm_goals_230,test,36,0.8809854355743711,0.8267144565859352
llm_goals_230,test,37,0.7774729771787451,0.675275568637887
llm_goals_230,test,38,0.7983629695402451,0.601560060077995
llm_goals_230,test,39,0.8618483016363299,0.7968490736396183
llm_goals_230,test,40,0.7774729771787451,0.8216474607704832
llm_goals_230,test,41,0.6193116870868751,0.5197035792893621
llm_goals_230,test,42,0.7781749893573545,0.8097020966346316
llm_goals_230,test,43,0.6018936829636693,0.5719336953223904
llm_goals_230,test,44,0.6324781042046584,0.5981191789043642
llm_goals_230,test,45,0.8997153622784676,0.8736265335179677
llm_goals_230,test,46,0.9395328157924586,0.8487543997753076
llm_goals_230,test,47,0.9004454644100445,0.862138026666209
llm_goals_230,test,48,0.8504167844593624,0.8476904574354545
llm_goals_230,test,49,0.9499060501245726,0.8333969706647004
llm_goals_206,test,0,0.8733907954293858,0.8559101199930208
llm_goals_206,test,1,0.8125003013717056,0.7879802535556255
llm_goals_206,test,2,0.7846333423129158,0.8428787031752032
llm_goals_206,test,3,0.8125003013717056,0.8372089275473125
llm_goals_206,test,4,0.8125003013717056,0.7926284648989284
llm_goals_206,test,5,0.5602441554470122,0.5153650037350492
llm_goals_206,test,6,0.8125003013717056,0.3343649675741765
llm_goals_206,test,7,0.7799906734923735,0.9070251599505484
llm_goals_206,test,8,0.7420663216413208,0.8455585838330733
llm_goals_206,test,9,0.7425164602328462,0.6389058176485727
llm_goals_206,test,10,0.1258734133747919,0.1182260710030537
llm_goals_206,test,11,0.8125003013717056,0.7467083406482149
llm_goals_206,test,12,0.5281519308864281,0.4574706710312913
llm_goals_206,test,13,0.7425164602328462,0.0931690538173042
llm_goals_206,test,14,0.8868232991464519,0.6621974660772205
llm_goals_206,test,15,0.5281519308864281,0.494185809495628
llm_goals_206,test,16,0.7425164602328462,0.7829698941017894
llm_goals_206,test,17,0.8868232991464519,0.8449901332087761
llm_goals_206,test,18,0.559707417412993,0.5445193157731812
llm_goals_206,test,19,0.1258734133747919,0.1852277467567061
llm_goals_206,test,20,0.8868232991464519,0.779411075601452
llm_goals_206,test,21,0.8733907954293858,0.8588447585962018
llm_goals_206,test,22,0.559707417412993,0.6656724760322136
llm_goals_206,test,23,0.559707417412993,0.7493727269767156
llm_goals_206,test,24,0.559707417412993,0.9005759943815377
llm_goals_206,test,25,0.5281519308864281,0.6107508047041867
llm_goals_206,test,26,0.7846333423129158,0.6818652741926582
llm_goals_206,test,27,0.8125003013717056,0.101753575918842
llm_goals_206,test,28,0.81456595293438,0.7895615709538879
llm_goals_206,test,29,0.5281519308864281,0.6033396275372427
llm_goals_206,test,30,0.559707417412993,0.5204197590718647
llm_goals_206,test,31,0.8868232991464519,0.552048008783336
llm_goals_206,test,32,0.5645189775308849,0.6983551428844045
llm_goals_206,test,33,0.4214012646959394,0.7920455896978277
llm_goals_206,test,34,0.559707417412993,0.8229582905649624
llm_goals_206,test,35,0.7799906734923735,0.7513486755861682
llm_goals_206,test,36,0.8868232991464519,0.8837921634473136
llm_goals_206,test,37,0.1258734133747919,0.2323070941836299
llm_goals_206,test,38,0.5281519308864281,0.6051056320867053
llm_goals_206,test,39,0.5281519308864281,0.3355604905336211
llm_goals_206,test,40,0.559707417412993,0.0412251261013626
llm_goals_206,test,41,0.559707417412993,0.8154974559604902
llm_goals_206,test,42,0.8868232991464519,0.650940780779249
llm_goals_206,test,43,0.5621939271706696,0.8136300459617928
llm_goals_206,test,44,0.5621939271706696,0.7293123065101965
llm_goals_206,test,45,0.5602441554470122,0.7633236130088205
llm_goals_206,test,46,0.4214012646959394,0.7359003505062409
llm_goals_206,test,47,0.1258734133747919,0.1849221570121216
llm_goals_206,test,48,0.559707417412993,0.5419546416667619
llm_goals_206,test,49,0.5621939271706696,0.7725634685769378
llm_goals_146,test,0,0.455872084831542,0.4053194295707414
llm_goals_146,test,1,0.4451910932005133,0.77863227297152
llm_goals_146,test,2,0.3057458689637426,0.4040155724720515
llm_goals_146,test,3,0.7792989608540917,0.5871794799134783
llm_goals_146,test,4,0.7342193785540545,0.560196939517166
llm_goals_146,test,5,0.7484583896113799,0.3825763215338447
llm_goals_146,test,6,0.4468521994481242,0.4487256154760436
llm_goals_146,test,7,0.7804973697987257,0.4411791015965565
llm_goals_146,test,8,0.752850299464926,0.6130303084936348
llm_goals_146,test,9,0.8104980626452305,0.416171525001919
llm_goals_146,test,10,0.8097790419932451,0.8501744885482464
llm_goals_146,test,11,0.8359113338907147,0.4275203540944987
llm_goals_146,test,12,0.4972679418083453,0.1433368859863564
llm_goals_146,test,13,0.2797373686301431,0.3454179305560879
llm_goals_146,test,14,0.236109351830373,0.3984231289972209
llm_goals_146,test,15,0.7459213772481919,0.7779127849429681
llm_goals_146,test,16,0.3727968318476876,0.3923375995541924
llm_goals_146,test,17,0.7440918791268477,0.7787704480980863
llm_goals_146,test,18,0.239101399160052,0.4485241820972409
llm_goals_146,test,19,0.6575172469873125,0.3670787385820063
llm_goals_146,test,20,0.8104980626452305,0.5808513911957404
llm_goals_146,test,21,0.2020666488897821,0.348566839291943
llm_goals_146,test,22,0.4617183218783479,0.8201150868695501
llm_goals_146,test,23,0.5843021761028128,0.4340105664556233
llm_goals_146,test,24,0.7947990253058123,0.3779777975457017
llm_goals_146,test,25,0.8109234543980224,0.7064739339171033
llm_goals_146,test,26,0.8104980626452305,0.5068406448496142
llm_goals_146,test,27,0.3104720854777406,0.8190644408840766
llm_goals_146,test,28,0.8359113338907147,0.5180591218776336
llm_goals_146,test,29,0.5599983356003466,0.333344294156815
llm_goals_146,test,30,0.7740380766090722,0.7747827278582075
llm_goals_146,test,31,0.6984255346622695,0.407372660950012
llm_goals_146,test,32,0.7804973697987257,0.410007141610074
llm_goals_146,test,33,0.7804973697987257,0.4871137583409777
llm_goals_146,test,34,0.3779823710075387,0.7239974058432814
llm_goals_146,test,35,0.8097790419932451,0.4717056489178101
llm_goals_146,test,36,0.2020666488897821,0.7518023835632467
llm_goals_146,test,37,0.3444619312627089,0.4299980307715084
llm_goals_146,test,38,0.4084638646823842,0.6853837869126352
llm_goals_146,test,39,0.7846072346275884,0.3821269496784033
llm_goals_146,test,40,0.4617183218783479,0.358534933784248
llm_goals_146,test,41,0.8293533373109587,0.3237407407589303
llm_goals_146,test,42,0.4365847938344161,0.465061399698899
llm_goals_146,test,43,0.236109351830373,0.4315601062358607
llm_goals_146,test,44,0.7444934335215283,0.5556539728312679
llm_goals_146,test,45,0.7976248810903046,0.5920740253668018
llm_goals_146,test,46,0.3189707575706161,0.4871274917312868
llm_goals_146,test,47,0.7161569109962244,0.581843033103631
llm_goals_146,test,48,0.8598319963417393,0.3075919216632473
llm_goals_146,test,49,0.7632867608158656,0.7619587682314233
llm_goals_115,test,0,0.8326787907735654,0.4434632286904964
llm_goals_115,test,1,0.6291469218238824,0.6904317046527385
llm_goals_115,test,2,0.4321288042853845,0.6516189539788452
llm_goals_115,test,3,0.1601127931765256,0.4339340266994299
llm_goals_115,test,4,0.782653812868382,0.3268096695267067
llm_goals_115,test,5,0.8367139757745837,0.5710080860563095
llm_goals_115,test,6,0.769662012096681,0.5525431491717658
llm_goals_115,test,7,0.8534478505972338,0.4917475457547118
llm_goals_115,test,8,0.4272102717944173,0.5222463726327303
llm_goals_115,test,9,0.5340263178771691,0.7214239291876969
llm_goals_115,test,10,0.4481034240510289,0.1423416680133991
llm_goals_115,test,11,0.6935340749047081,0.0211501267474188
llm_goals_115,test,12,0.874827739924434,0.7876465277644278
llm_goals_115,test,13,0.6701465234869979,0.7712913027170795
llm_goals_115,test,14,0.7108696177762456,0.8433526700949283
llm_goals_115,test,15,0.8302502084923074,0.4617767241926875
llm_goals_115,test,16,0.8565839522327893,0.751642873633365
llm_goals_115,test,17,0.8023592171541302,0.5750395750632571
llm_goals_115,test,18,0.5036789146708851,0.731190860777953
llm_goals_115,test,19,0.5390329528950011,0.4010090785721757
llm_goals_115,test,20,0.7334473042104603,0.4639274084070101
llm_goals_115,test,21,0.8023592171541302,0.8439438875722727
llm_goals_115,test,22,0.5340263178771691,0.4930168575659444
llm_goals_115,test,23,0.7017429469816077,0.1606455731785788
llm_goals_115,test,24,0.4272102717944173,0.8225050510480415
llm_goals_115,test,25,0.5829809727583759,0.4682683350736841
llm_goals_115,test,26,0.8030986229751763,0.8534070288351345
llm_goals_115,test,27,0.6160724150684787,0.4742317939252941
llm_goals_115,test,28,0.6935340749047081,0.0572278600376891
llm_goals_115,test,29,0.7960652070410271,0.4907303253909411
llm_goals_115,test,30,0.5829809727583759,0.1910564822051387
llm_goals_115,test,31,0.5390329528950011,0.8779074551007869
llm_goals_115,test,32,0.4272102717944173,0.504762116367231
llm_goals_115,test,33,0.8598298438452439,0.7961870421429237
llm_goals_115,test,34,0.5829809727583759,0.3117413427150098
llm_goals_115,test,35,0.3517441606806075,0.6062498233633526
llm_goals_115,test,36,0.6508847379006042,0.5465164093133901
llm_goals_115,test,37,0.5645457619369642,0.5868361038548235
llm_goals_115,test,38,0.7784333150910085,0.3403703059415776
llm_goals_115,test,39,0.3860934071019099,0.8219108606932716
llm_goals_115,test,40,0.4618629650546399,0.1035981058362717
llm_goals_115,test,41,0.8030986229751763,0.8823458588361053
llm_goals_115,test,42,0.2490546433569961,0.2803475549795062
llm_goals_115,test,43,0.8332580928095579,0.8513012565900029
llm_goals_115,test,44,0.6922088316866296,0.7679774087348048
llm_goals_115,test,45,0.8302502084923074,0.2698108084459203
llm_goals_115,test,46,0.5390329528950011,0.0357215216493676
llm_goals_115,test,47,0.8302502084923074,0.3874775112245213
llm_goals_115,test,48,0.4861967920214458,0.7725214275580398
llm_goals_115,test,49,0.7721546902500949,0.6720584616591353
llm_goals_182,test,0,0.0174462139103108,0.8652063058172594
llm_goals_182,test,1,0.0125080189246313,0.0826947230522371
llm_goals_182,test,2,0.712489189694975,0.798587619624287
llm_goals_182,test,3,0.6036872988946758,0.1153333326317589
llm_goals_182,test,4,0.5503918416881156,0.8911074983818299
llm_goals_182,test,5,0.3896187891633972,0.322673941983326
llm_goals_182,test,6,0.5503918416881156,0.3873713790307967
llm_goals_182,test,7,0.5503918416881156,0.7954637431338407
llm_goals_182,test,8,0.544562094303831,0.8713126766724444
llm_goals_182,test,9,0.3896187891633972,0.3727719680238343
llm_goals_182,test,10,0.6358237788529365,0.344259508803394
llm_goals_182,test,11,0.6007463979399906,0.826409100211895
llm_goals_182,test,12,0.6335546367312215,0.6995229144237722
llm_goals_182,test,13,0.6007463979399906,0.2983322739239659
llm_goals_182,test,14,0.204535476224307,0.1121533235192302
llm_goals_182,test,15,0.0119591903871237,0.8530130402661898
llm_goals_182,test,16,0.2039040189440866,0.140642355581753
llm_goals_182,test,17,0.6036872988946758,0.0750534295482943
llm_goals_182,test,18,0.2039040189440866,0.1021012312203676
llm_goals_182,test,19,0.6567927681590605,0.7930271065360427
llm_goals_182,test,20,0.835346754894704,0.8727144576853948
llm_goals_182,test,21,0.6007463979399906,0.3883832635395169
llm_goals_182,test,22,0.017583163641783,0.0858987886275301
llm_goals_182,test,23,0.8900305632326149,0.8587499834361978
llm_goals_182,test,24,0.6986691924377937,0.1529090764042953
llm_goals_182,test,25,0.6424850757094086,0.3907861244891192
llm_goals_182,test,26,0.204535476224307,0.0898813936513848
llm_goals_182,test,27,0.5303391660701607,0.0749023763424232
llm_goals_182,test,28,0.5985308363146518,0.1190598459155961
llm_goals_182,test,29,0.5985308363146518,0.7016449528375276
llm_goals_182,test,30,0.5503918416881156,0.3981986561532387
llm_goals_182,test,31,0.3896187891633972,0.4033468376136607
llm_goals_182,test,32,0.5503918416881156,0.8175631740353995
llm_goals_182,test,33,0.0846477594222344,0.1448325966795168
llm_goals_182,test,34,0.6358237788529365,0.3547330626609051
llm_goals_182,test,35,0.6007463979399906,0.823988084949945
llm_goals_182,test,36,0.7187333185208328,0.3920453941320438
llm_goals_182,test,37,0.7721992197598634,0.7340161718404824
llm_goals_182,test,38,0.5503918416881156,0.7157883671582869
llm_goals_182,test,39,0.6007463979399906,0.6255065804950025
llm_goals_182,test,40,0.7494649904756623,0.8448494619425109
llm_goals_182,test,41,0.3896187891633972,0.3755363161560536
llm_goals_182,test,42,0.712489189694975,0.8729538896060706
llm_goals_182,test,43,0.3896187891633972,0.3845820969466132
llm_goals_182,test,44,0.8404793709875512,0.8535886434286102
llm_goals_182,test,45,0.6424850757094086,0.1216040102965252
llm_goals_182,test,46,0.0112847664213683,0.8235719161892086
llm_goals_182,test,47,0.7443918654650613,0.1493568636498045
llm_goals_182,test,48,0.2039040189440866,0.0977336313667494
llm_goals_182,test,49,0.6370210049712426,0.1145731477706443
llm_goals_326,test,0,0.7708390884894821,0.7210679537447465
llm_goals_326,test,1,0.6509625862352971,0.3698897290610674
llm_goals_326,test,2,0.7708390884894821,0.6763902331902123
llm_goals_326,test,3,0.6800144796542983,0.6144636338527515
llm_goals_326,test,4,0.689505582789645,0.7226604268728406
llm_goals_326,test,5,0.7635513626305813,0.225010837259175
llm_goals_326,test,6,0.2679166310981392,0.2511202974122448
llm_goals_326,test,7,0.7708390884894821,0.682107850479918
llm_goals_326,test,8,0.7651171633727942,0.7688388280774076
llm_goals_326,test,9,0.2000987474890948,0.2201601423152862
llm_goals_326,test,10,0.6629695354528886,0.5927527654534117
llm_goals_326,test,11,0.6326321218690903,0.6402653335664261
llm_goals_326,test,12,0.5692064806650856,0.5219165167657188
llm_goals_326,test,13,0.2000987474890948,0.0912598670014219
llm_goals_326,test,14,0.6712261956619043,0.6237388815231029
llm_goals_326,test,15,0.713584358553857,0.58422994911281
llm_goals_326,test,16,0.688374146213984,0.598158895081766
llm_goals_326,test,17,0.6170649501290834,0.3075615988754216
llm_goals_326,test,18,0.7277207901553985,0.5624766832927214
llm_goals_326,test,19,0.9123372457076508,0.5837591950755885
llm_goals_326,test,20,0.689505582789645,0.7023531773229094
llm_goals_326,test,21,0.2970770242046742,0.2231004774292705
llm_goals_326,test,22,0.798198100586769,0.2981176158853984
llm_goals_326,test,23,0.8264415863832498,0.5947662537107573
llm_goals_326,test,24,0.6621280813738737,0.6704973022514685
llm_goals_326,test,25,0.7327619255965571,0.7548767212573778
llm_goals_326,test,26,0.7277207901553985,0.6431580812900115
llm_goals_326,test,27,0.7177667096689443,0.3327881124528037
llm_goals_326,test,28,0.6904833329417781,0.7641554441675951
llm_goals_326,test,29,0.6826336234138841,0.4778539914575542
llm_goals_326,test,30,0.7826815233416098,0.7097294996625062
llm_goals_326,test,31,0.3004329666863943,0.2337370659971392
llm_goals_326,test,32,0.6812178445007917,0.6377238074883209
llm_goals_326,test,33,0.688374146213984,0.6003066307923431
llm_goals_326,test,34,0.6629695354528886,0.6119511714458287
llm_goals_326,test,35,0.7937042746668983,0.5639635886155634
llm_goals_326,test,36,0.7152905411815637,0.6540181072858835
llm_goals_326,test,37,0.7736427206195895,0.6297099560404894
llm_goals_326,test,38,0.6947864183594372,0.5278265562464118
llm_goals_326,test,39,0.4638355567982467,0.5029836143581652
llm_goals_326,test,40,0.7836519498660364,0.5904794288270965
llm_goals_326,test,41,0.798198100586769,0.2163880989100536
llm_goals_326,test,42,0.7890632759891245,0.6295586948815126
llm_goals_326,test,43,0.3004329666863943,0.2120109108404722
llm_goals_326,test,44,0.7618448727101292,0.6940786874714112
llm_goals_326,test,45,0.6518606582656121,0.6451782543850242
llm_goals_326,test,46,0.7141560813413347,0.5818721654926584
llm_goals_326,test,47,0.777321758031231,0.6937841106268494
llm_goals_326,test,48,0.6660519772819409,0.6520763720762168
llm_goals_326,test,49,0.6660589644818956,0.3675984402598143
