template_id,split,question_idx,prediction,label
llm_goals_401,test,0,0.8524126410484314,0.9578712581802692
llm_goals_401,test,1,0.9022751450538635,0.9541386350517288
llm_goals_401,test,2,0.8945668339729309,0.957633375223842
llm_goals_401,test,3,0.9132717251777649,0.939051149330178
llm_goals_401,test,4,0.8925984501838684,0.977310418686652
llm_goals_401,test,5,0.8314502239227295,0.9809432028055692
llm_goals_401,test,6,0.9037631154060364,0.9755462400413056
llm_goals_401,test,7,0.9235144853591919,0.9511837371803112
llm_goals_401,test,8,0.9080584049224854,0.9766189453717342
llm_goals_401,test,9,0.8920381665229797,0.980273604068012
llm_goals_401,test,10,0.8395403027534485,0.9337562348751304
llm_goals_401,test,11,0.8654385805130005,0.9722627409957164
llm_goals_401,test,12,0.9108644723892212,0.9492781746196448
llm_goals_401,test,13,0.8459717631340027,0.9655717616666432
llm_goals_401,test,14,0.8169746398925781,0.9574255360215488
llm_goals_401,test,15,0.8617803454399109,0.981384516529317
llm_goals_401,test,16,0.7855676412582397,0.9222335225671268
llm_goals_401,test,17,0.9162961840629578,0.9532549975325786
llm_goals_401,test,18,0.8693026304244995,0.9602041260526504
llm_goals_401,test,19,0.8673660159111023,0.6478757906857974
llm_goals_401,test,20,0.8978684544563293,0.9756843455136628
llm_goals_401,test,21,0.8879700899124146,0.990806877474314
llm_goals_401,test,22,0.9056273698806763,0.9781303405092489
llm_goals_401,test,23,0.8537178635597229,0.9417706148309914
llm_goals_401,test,24,0.8574464917182922,0.956336258298358
llm_goals_401,test,25,0.8746485114097595,0.9313769351906296
llm_goals_401,test,26,0.8523766994476318,0.979961037598938
llm_goals_401,test,27,0.9125308990478516,0.9393076331919324
llm_goals_401,test,28,0.9203398823738098,0.8926497724419561
llm_goals_401,test,29,0.9059864282608032,0.9039012216839892
llm_goals_401,test,30,0.895670473575592,0.95604890766277
llm_goals_401,test,31,0.8256146907806396,0.9892266395215328
llm_goals_401,test,32,0.9231892228126526,0.9539506448444212
llm_goals_401,test,33,0.8737059235572815,0.96642041239244
llm_goals_401,test,34,0.880867063999176,0.9213858238598618
llm_goals_401,test,35,0.8516449332237244,0.9778974644047042
llm_goals_401,test,36,0.8241158723831177,0.9268431675812392
llm_goals_401,test,37,0.9136673808097839,0.8127340038962743
llm_goals_401,test,38,0.8982343673706055,0.9302681059523448
llm_goals_401,test,39,0.8916950821876526,0.965603948473316
llm_goals_401,test,40,0.894041121006012,0.9546924251425224
llm_goals_401,test,41,0.9123266339302063,0.9801422355796748
llm_goals_401,test,42,0.8937304019927979,0.942841528380264
llm_goals_401,test,43,0.8851839900016785,0.9624654057275208
llm_goals_401,test,44,0.8705542087554932,0.966766237132969
llm_goals_401,test,45,0.9001138210296631,0.9166605495286292
llm_goals_401,test,46,0.7421334981918335,0.9735371475107132
llm_goals_401,test,47,0.8368861079216003,0.9170851722665576
llm_goals_401,test,48,0.8709980249404907,0.9537843477184356
llm_goals_401,test,49,0.880611777305603,0.9543297303993916
llm_goals_438,test,0,0.9471712708473206,0.7747630551513792
llm_goals_438,test,1,0.9431087374687195,0.935855152767207
llm_goals_438,test,2,0.9533823132514954,0.7147502234092127
llm_goals_438,test,3,0.9467986822128296,0.8761475031821572
llm_goals_438,test,4,0.9381945729255676,0.8370855426053896
llm_goals_438,test,5,0.94935542345047,0.9507074946857058
llm_goals_438,test,6,0.9464107155799866,0.9429353481406656
llm_goals_438,test,7,0.9452167749404907,0.7990903812700391
llm_goals_438,test,8,0.9357267618179321,0.8538515570307835
llm_goals_438,test,9,0.9458791613578796,0.9386380912818448
llm_goals_438,test,10,0.9374678730964661,0.6740432265841833
llm_goals_438,test,11,0.9525468945503235,0.923251629590114
llm_goals_438,test,12,0.9505873322486877,0.9507690082509844
llm_goals_438,test,13,0.9413697719573975,0.9289195559017176
llm_goals_438,test,14,0.9483479261398315,0.9291442660965707
llm_goals_438,test,15,0.9364951848983765,0.7999125079987789
llm_goals_438,test,16,0.9486410617828369,0.88095736898606
llm_goals_438,test,17,0.9472707509994507,0.9601117963812904
llm_goals_438,test,18,0.9521845579147339,0.9583113794533262
llm_goals_438,test,19,0.9403349757194519,0.95757842004367
llm_goals_438,test,20,0.9476560950279236,0.8569924879179522
llm_goals_438,test,21,0.9463949203491211,0.925142229626847
llm_goals_438,test,22,0.9512144327163696,0.969126404096814
llm_goals_438,test,23,0.9538188576698303,0.9270654345691632
llm_goals_438,test,24,0.9505111575126648,0.9447244425458496
llm_goals_438,test,25,0.9434115290641785,0.8921203628029744
llm_goals_438,test,26,0.9427762627601624,0.8840255122167773
llm_goals_438,test,27,0.9494116306304932,0.9518933904854712
llm_goals_438,test,28,0.94244384765625,0.9239946006518056
llm_goals_438,test,29,0.9487530589103699,0.967243014785835
llm_goals_438,test,30,0.9419201612472534,0.6513131257391838
llm_goals_438,test,31,0.9363071322441101,0.9452766773393708
llm_goals_438,test,32,0.9539816975593567,0.8986559507515572
llm_goals_438,test,33,0.9463258385658264,0.9178363106363248
llm_goals_438,test,34,0.9472054839134216,0.58431126605279
llm_goals_438,test,35,0.9465794563293457,0.8549510299218779
llm_goals_438,test,36,0.9425029754638672,0.7795077576161488
llm_goals_438,test,37,0.9406687021255493,0.942087231678633
llm_goals_438,test,38,0.9441304802894592,0.9569196102046018
llm_goals_438,test,39,0.943552553653717,0.6285572658291573
llm_goals_438,test,40,0.9552686810493469,0.9166060095399312
llm_goals_438,test,41,0.9500699043273926,0.9583812359561668
llm_goals_438,test,42,0.951078474521637,0.7214403769582053
llm_goals_438,test,43,0.953087568283081,0.953284323393524
llm_goals_438,test,44,0.947502076625824,0.8806080817389553
llm_goals_438,test,45,0.9510453343391418,0.92897343737164
llm_goals_438,test,46,0.9542425870895386,0.9584044214292112
llm_goals_438,test,47,0.9343183040618896,0.8794537151734262
llm_goals_438,test,48,0.9503834247589111,0.9488244649817108
llm_goals_438,test,49,0.9586799740791321,0.96985775201818
llm_goals_293,test,0,0.8370587229728699,0.8120835453586716
llm_goals_293,test,1,0.7962749004364014,0.6480853571933485
llm_goals_293,test,2,0.9258023500442505,0.6079345474456403
llm_goals_293,test,3,0.8838224411010742,0.6129396608114358
llm_goals_293,test,4,0.9481929540634155,0.98210900952278
llm_goals_293,test,5,0.9078243374824524,0.8658899081858322
llm_goals_293,test,6,0.7957795262336731,0.8882423636795116
llm_goals_293,test,7,0.9240426421165466,0.8509290855710241
llm_goals_293,test,8,0.9238739013671875,0.9446962492710368
llm_goals_293,test,9,0.8762806057929993,0.7513049230182386
llm_goals_293,test,10,0.9027110934257507,0.9177243110970909
llm_goals_293,test,11,0.8964426517486572,0.745101023404262
llm_goals_293,test,12,0.8482556343078613,0.938155530986403
llm_goals_293,test,13,0.9024718999862671,0.93102258486241
llm_goals_293,test,14,0.9071269035339355,0.9835613372030028
llm_goals_293,test,15,0.9416016936302185,0.9863449336337948
llm_goals_293,test,16,0.9082396626472473,0.9708489179126024
llm_goals_293,test,17,0.8678594827651978,0.6973515732177806
llm_goals_293,test,18,0.886665940284729,0.9506214255847638
llm_goals_293,test,19,0.8420450687408447,0.8167264479797811
llm_goals_293,test,20,0.954983651638031,0.9863548279811404
llm_goals_293,test,21,0.8426553010940552,0.7980274506980439
llm_goals_293,test,22,0.8746855854988098,0.6721615182655347
llm_goals_293,test,23,0.7983736991882324,0.3710465401115394
llm_goals_293,test,24,0.8856616616249084,0.9595301158883638
llm_goals_293,test,25,0.7902638912200928,0.958163618329278
llm_goals_293,test,26,0.8522329926490784,0.966849388190172
llm_goals_293,test,27,0.9143502116203308,0.8260024784538619
llm_goals_293,test,28,0.8853931427001953,0.8388491865678236
llm_goals_293,test,29,0.6799036860466003,0.8884818588260758
llm_goals_293,test,30,0.9363337755203247,0.918233934226988
llm_goals_293,test,31,0.8348227739334106,0.7707225792789547
llm_goals_293,test,32,0.8846687078475952,0.8156692405787661
llm_goals_293,test,33,0.8844488263130188,0.9365220048324068
llm_goals_293,test,34,0.9036564826965332,0.9379748130481748
llm_goals_293,test,35,0.9049606919288635,0.8901683114693926
llm_goals_293,test,36,0.8525580167770386,0.8493608200921786
llm_goals_293,test,37,0.9037182927131653,0.861633780857663
llm_goals_293,test,38,0.7451280355453491,0.9021887179891658
llm_goals_293,test,39,0.8791772127151489,0.9140409207490428
llm_goals_293,test,40,0.8606754541397095,0.7293724502897094
llm_goals_293,test,41,0.9306851029396057,0.9162639417701852
llm_goals_293,test,42,0.920566201210022,0.8334788131423683
llm_goals_293,test,43,0.876956582069397,0.7597774188725597
llm_goals_293,test,44,0.9457112550735474,0.9659911986912778
llm_goals_293,test,45,0.9284553527832031,0.931854425798201
llm_goals_293,test,46,0.867128312587738,0.5591331662726297
llm_goals_293,test,47,0.902186930179596,0.8162434715058863
llm_goals_293,test,48,0.9368234872817993,0.9539650091821064
llm_goals_293,test,49,0.8282053470611572,0.7483454415724036
llm_goals_397,test,0,0.34870240092277527,0.6336495795213035
llm_goals_397,test,1,0.3657739460468292,0.5588590528022535
llm_goals_397,test,2,0.3819062113761902,0.8967757860893129
llm_goals_397,test,3,0.31000468134880066,0.5944806240686361
llm_goals_397,test,4,0.19071650505065918,0.2252168546192252
llm_goals_397,test,5,0.4483721852302551,0.4122469495302511
llm_goals_397,test,6,0.3470475375652313,0.6366335121435647
llm_goals_397,test,7,0.4294317662715912,0.8096849879343126
llm_goals_397,test,8,0.30352264642715454,0.2219286497366863
llm_goals_397,test,9,0.24706436693668365,0.3790244200754085
llm_goals_397,test,10,0.24015933275222778,0.5739754985147579
llm_goals_397,test,11,0.2413983792066574,0.5146662099973958
llm_goals_397,test,12,0.46015241742134094,0.7724721370318186
llm_goals_397,test,13,0.2617800831794739,0.5128249643851043
llm_goals_397,test,14,0.433951735496521,0.7029672903171433
llm_goals_397,test,15,0.1917339563369751,0.1755089749954058
llm_goals_397,test,16,0.3536168038845062,0.5780643909290929
llm_goals_397,test,17,0.42297834157943726,0.5651405953323156
llm_goals_397,test,18,0.3396398425102234,0.6692897169839361
llm_goals_397,test,19,0.29814594984054565,0.3010834864311782
llm_goals_397,test,20,0.2901838719844818,0.1723383376025849
llm_goals_397,test,21,0.4259721338748932,0.5552848455448852
llm_goals_397,test,22,0.24481014907360077,0.6247249816867365
llm_goals_397,test,23,0.2396632879972458,0.6892307964539947
llm_goals_397,test,24,0.4590011537075043,0.707193144679384
llm_goals_397,test,25,0.3684018552303314,0.4728503384141321
llm_goals_397,test,26,0.39764025807380676,0.6125340479102231
llm_goals_397,test,27,0.3816690444946289,0.638109206443543
llm_goals_397,test,28,0.4451788365840912,0.6131846340287811
llm_goals_397,test,29,0.3489958941936493,0.3070471542438216
llm_goals_397,test,30,0.32682913541793823,0.5305219388850028
llm_goals_397,test,31,0.47890210151672363,0.5467284805801325
llm_goals_397,test,32,0.2849346101284027,0.6052179554793606
llm_goals_397,test,33,0.34155818819999695,0.6898052709387655
llm_goals_397,test,34,0.2352113574743271,0.4488721547034506
llm_goals_397,test,35,0.37682557106018066,0.8279244269125856
llm_goals_397,test,36,0.31170454621315,0.4830040624014946
llm_goals_397,test,37,0.26639702916145325,0.25459376065388
llm_goals_397,test,38,0.23731981217861176,0.2830348916920439
llm_goals_397,test,39,0.45436516404151917,0.6957841399255769
llm_goals_397,test,40,0.22514840960502625,0.4595589778278475
llm_goals_397,test,41,0.31486862897872925,0.5333267612417034
llm_goals_397,test,42,0.40385136008262634,0.8577821893356561
llm_goals_397,test,43,0.31131571531295776,0.5300958828918404
llm_goals_397,test,44,0.21592992544174194,0.2841438783476365
llm_goals_397,test,45,0.27656400203704834,0.5478091730108666
llm_goals_397,test,46,0.2901138663291931,0.623584984691235
llm_goals_397,test,47,0.39878764748573303,0.5396113255522375
llm_goals_397,test,48,0.4613792896270752,0.6615871853452404
llm_goals_397,test,49,0.33330705761909485,0.4776676395182983
llm_goals_281,test,0,0.8869763612747192,0.7595571331426817
llm_goals_281,test,1,0.9115554094314575,0.9579733612090848
llm_goals_281,test,2,0.863641619682312,0.7309832638356816
llm_goals_281,test,3,0.9373809695243835,0.909583748095366
llm_goals_281,test,4,0.9267447590827942,0.8390255026542476
llm_goals_281,test,5,0.9321655631065369,0.9811318521618418
llm_goals_281,test,6,0.9567320942878723,0.9764572976382064
llm_goals_281,test,7,0.909152090549469,0.7324330753461098
llm_goals_281,test,8,0.9381840229034424,0.7994783462392235
llm_goals_281,test,9,0.9716416597366333,0.9815838934803736
llm_goals_281,test,10,0.9229423999786377,0.9535347237148352
llm_goals_281,test,11,0.9560900330543518,0.9162909963751452
llm_goals_281,test,12,0.8576543927192688,0.3257945040794002
llm_goals_281,test,13,0.9305455684661865,0.9844478965422564
llm_goals_281,test,14,0.9291976690292358,0.927134453638218
llm_goals_281,test,15,0.8868961334228516,0.8836817175051781
llm_goals_281,test,16,0.9234717488288879,0.9699068666000064
llm_goals_281,test,17,0.9310001134872437,0.9397605064907452
llm_goals_281,test,18,0.9563196897506714,0.9597848062242011
llm_goals_281,test,19,0.9080724716186523,0.0774181964240286
llm_goals_281,test,20,0.9230511784553528,0.7915892634199072
llm_goals_281,test,21,0.9380741715431213,0.971953988882578
llm_goals_281,test,22,0.9668547511100769,0.9602834573260726
llm_goals_281,test,23,0.9467114210128784,0.9122855662185028
llm_goals_281,test,24,0.9350681900978088,0.8979990468804274
llm_goals_281,test,25,0.9476428031921387,0.9241587506219392
llm_goals_281,test,26,0.906114399433136,0.9170165050266824
llm_goals_281,test,27,0.9443438053131104,0.9611372957336312
llm_goals_281,test,28,0.9438189268112183,0.87756150859893
llm_goals_281,test,29,0.9312481880187988,0.0434288112667957
llm_goals_281,test,30,0.9537910223007202,0.9342891559515047
llm_goals_281,test,31,0.9235231280326843,0.9743182057709278
llm_goals_281,test,32,0.941023051738739,0.9397307039659378
llm_goals_281,test,33,0.9226258397102356,0.9512294521408392
llm_goals_281,test,34,0.9359976053237915,0.9536682955508284
llm_goals_281,test,35,0.8243107199668884,0.7535714456504239
llm_goals_281,test,36,0.9213883280754089,0.923856973167726
llm_goals_281,test,37,0.9214293360710144,0.0418935402602879
llm_goals_281,test,38,0.9060636162757874,0.7059133681203931
llm_goals_281,test,39,0.8435599207878113,0.3500755803223985
llm_goals_281,test,40,0.9525139927864075,0.9247009790185582
llm_goals_281,test,41,0.9537639617919922,0.9782058597610022
llm_goals_281,test,42,0.8643144965171814,0.7983229743904063
llm_goals_281,test,43,0.9316307306289673,0.975883662329788
llm_goals_281,test,44,0.8625995516777039,0.8750632399664175
llm_goals_281,test,45,0.9192345142364502,0.8910572538163689
llm_goals_281,test,46,0.9051950573921204,0.8908228852175125
llm_goals_281,test,47,0.9152641296386719,0.9549922359097318
llm_goals_281,test,48,0.9350406527519226,0.953490375608375
llm_goals_281,test,49,0.9282435178756714,0.9719706773426644
llm_goals_415,test,0,0.8541042804718018,0.8577433203043316
llm_goals_415,test,1,0.8179971575737,0.9021974832977488
llm_goals_415,test,2,0.8359084725379944,0.8694165318475334
llm_goals_415,test,3,0.6645286679267883,0.82278495857052
llm_goals_415,test,4,0.8210452795028687,0.9068547493871278
llm_goals_415,test,5,0.776516854763031,0.4606612308159887
llm_goals_415,test,6,0.6650353074073792,0.5138990722916633
llm_goals_415,test,7,0.8583585619926453,0.7904330429356278
llm_goals_415,test,8,0.7734270095825195,0.8709577911266214
llm_goals_415,test,9,0.7580764293670654,0.4953228711784456
llm_goals_415,test,10,0.8341782093048096,0.8458883220009739
llm_goals_415,test,11,0.8361110687255859,0.7530689318495781
llm_goals_415,test,12,0.7781969904899597,0.5374074070351774
llm_goals_415,test,13,0.6918448805809021,0.4543814478672013
llm_goals_415,test,14,0.8824878334999084,0.895305392435367
llm_goals_415,test,15,0.8754775524139404,0.8949400812124582
llm_goals_415,test,16,0.8280245065689087,0.8238564098543186
llm_goals_415,test,17,0.8414263725280762,0.7003946285795368
llm_goals_415,test,18,0.8443206548690796,0.8541047516491939
llm_goals_415,test,19,0.814988374710083,0.4356960950136901
llm_goals_415,test,20,0.8961036205291748,0.8781125499856784
llm_goals_415,test,21,0.7362707257270813,0.4662951793594435
llm_goals_415,test,22,0.8672305345535278,0.9133476600123868
llm_goals_415,test,23,0.861822247505188,0.729661283414631
llm_goals_415,test,24,0.8777843117713928,0.8203684726547874
llm_goals_415,test,25,0.8275111317634583,0.8415742531339464
llm_goals_415,test,26,0.8520706295967102,0.9206447035427088
llm_goals_415,test,27,0.7754093408584595,0.8224443724423657
llm_goals_415,test,28,0.7185725569725037,0.7688508117653093
llm_goals_415,test,29,0.7589912414550781,0.4040278781550455
llm_goals_415,test,30,0.8731718063354492,0.8326128082774296
llm_goals_415,test,31,0.7938299179077148,0.5181213462550505
llm_goals_415,test,32,0.7382726073265076,0.7754768797937415
llm_goals_415,test,33,0.7643568515777588,0.8137893713459559
llm_goals_415,test,34,0.8140544295310974,0.8569270432280164
llm_goals_415,test,35,0.8646600842475891,0.7699967690808278
llm_goals_415,test,36,0.9019414782524109,0.921674433414038
llm_goals_415,test,37,0.8397238850593567,0.4660833031516516
llm_goals_415,test,38,0.8564302325248718,0.65152560988785
llm_goals_415,test,39,0.8273094296455383,0.5898309243469293
llm_goals_415,test,40,0.862206220626831,0.8091818666854369
llm_goals_415,test,41,0.823241114616394,0.6069718569678856
llm_goals_415,test,42,0.9098058342933655,0.801905695679187
llm_goals_415,test,43,0.6889539361000061,0.7026985943405766
llm_goals_415,test,44,0.9150057435035706,0.8893336073229712
llm_goals_415,test,45,0.8227876424789429,0.7374717538757577
llm_goals_415,test,46,0.746891975402832,0.7627238714563013
llm_goals_415,test,47,0.862408459186554,0.8843601962700689
llm_goals_415,test,48,0.9040994644165039,0.8760370596393947
llm_goals_415,test,49,0.8352123498916626,0.934119643218544
llm_goals_228,test,0,0.46501481533050537,0.5536142490727757
llm_goals_228,test,1,0.48657774925231934,0.5970781946787781
llm_goals_228,test,2,0.47715410590171814,0.6661069439112477
llm_goals_228,test,3,0.3745863437652588,0.1621815926046939
llm_goals_228,test,4,0.4385414719581604,0.576352949396635
llm_goals_228,test,5,0.3274977207183838,0.3840882428737504
llm_goals_228,test,6,0.35269731283187866,0.3586276503372383
llm_goals_228,test,7,0.5090670585632324,0.6176623614140859
llm_goals_228,test,8,0.5030184388160706,0.6954425570040618
llm_goals_228,test,9,0.3085308074951172,0.259004657946923
llm_goals_228,test,10,0.4566653370857239,0.7034691604166908
llm_goals_228,test,11,0.41759437322616577,0.3667773177860855
llm_goals_228,test,12,0.39014437794685364,0.5441412387152404
llm_goals_228,test,13,0.4631666839122772,0.1798966425933617
llm_goals_228,test,14,0.6227437257766724,0.6634600123551451
llm_goals_228,test,15,0.379191130399704,0.5674040322938352
llm_goals_228,test,16,0.5729643106460571,0.7765365728525085
llm_goals_228,test,17,0.5035513639450073,0.4222217460157005
llm_goals_228,test,18,0.48659536242485046,0.7662538791291613
llm_goals_228,test,19,0.6051055192947388,0.551409534979895
llm_goals_228,test,20,0.373867392539978,0.6118689802554753
llm_goals_228,test,21,0.2624092102050781,0.4397631653224428
llm_goals_228,test,22,0.42634114623069763,0.4973987580331913
llm_goals_228,test,23,0.3870355188846588,0.4235300043058759
llm_goals_228,test,24,0.5524622797966003,0.7459660834986607
llm_goals_228,test,25,0.5632219314575195,0.7296489537631492
llm_goals_228,test,26,0.43481871485710144,0.7553963575905828
llm_goals_228,test,27,0.533842921257019,0.4245395914745872
llm_goals_228,test,28,0.3550812304019928,0.2113718455283501
llm_goals_228,test,29,0.4944137930870056,0.5839822372879856
llm_goals_228,test,30,0.44512516260147095,0.7882484823843936
llm_goals_228,test,31,0.34372279047966003,0.3978078935012989
llm_goals_228,test,32,0.4287792146205902,0.2741357451446661
llm_goals_228,test,33,0.5465041995048523,0.7841096477395639
llm_goals_228,test,34,0.49059629440307617,0.7149417896157899
llm_goals_228,test,35,0.29869917035102844,0.4510400759046654
llm_goals_228,test,36,0.4518401622772217,0.5606685093118066
llm_goals_228,test,37,0.5042917132377625,0.4949700429354497
llm_goals_228,test,38,0.31454601883888245,0.5294251416140277
llm_goals_228,test,39,0.3357203006744385,0.5303236770643239
llm_goals_228,test,40,0.3812980055809021,0.3614923254884982
llm_goals_228,test,41,0.27058497071266174,0.5566797369456766
llm_goals_228,test,42,0.4709511995315552,0.5874469302943671
llm_goals_228,test,43,0.33107417821884155,0.4047568932904402
llm_goals_228,test,44,0.4340558350086212,0.5769778999083791
llm_goals_228,test,45,0.35798853635787964,0.381513250802113
llm_goals_228,test,46,0.33722278475761414,0.3717434970639749
llm_goals_228,test,47,0.3637126088142395,0.1147154724199779
llm_goals_228,test,48,0.510148286819458,0.8181026530840192
llm_goals_228,test,49,0.4894702732563019,0.3646229970196732
llm_goals_236,test,0,0.7900722622871399,0.7150391262778099
llm_goals_236,test,1,0.8449337482452393,0.6861406620593683
llm_goals_236,test,2,0.8223466277122498,0.871267676778662
llm_goals_236,test,3,0.8757316470146179,0.8072499568065914
llm_goals_236,test,4,0.8857331871986389,0.7988170942289826
llm_goals_236,test,5,0.9167410731315613,0.9525319342810132
llm_goals_236,test,6,0.892216145992279,0.922369719315248
llm_goals_236,test,7,0.8461621403694153,0.8816325671024458
llm_goals_236,test,8,0.8580812811851501,0.850525379453481
llm_goals_236,test,9,0.9139590859413147,0.8905094280701752
llm_goals_236,test,10,0.843202531337738,0.8517140753999074
llm_goals_236,test,11,0.8702868819236755,0.568158988781905
llm_goals_236,test,12,0.8297717571258545,0.81539887388713
llm_goals_236,test,13,0.8962129354476929,0.9457094685792072
llm_goals_236,test,14,0.7888959050178528,0.926884118217816
llm_goals_236,test,15,0.8086618185043335,0.6732607492268958
llm_goals_236,test,16,0.8885697722434998,0.9227134615696562
llm_goals_236,test,17,0.8494935035705566,0.6862386995801422
llm_goals_236,test,18,0.8537701964378357,0.9588107473600692
llm_goals_236,test,19,0.8653668165206909,0.4410424607670248
llm_goals_236,test,20,0.8057076930999756,0.7869937367368668
llm_goals_236,test,21,0.8705077767372131,0.9367554776085516
llm_goals_236,test,22,0.8359382748603821,0.6817940522161587
llm_goals_236,test,23,0.8282710313796997,0.6579733541579077
llm_goals_236,test,24,0.8816174864768982,0.9454425582899262
llm_goals_236,test,25,0.8344848155975342,0.8055338192842599
llm_goals_236,test,26,0.8027205467224121,0.9246634247941125
llm_goals_236,test,27,0.8668426275253296,0.5396068227032322
llm_goals_236,test,28,0.877181351184845,0.7982258737433797
llm_goals_236,test,29,0.8668695092201233,0.3578559150563034
llm_goals_236,test,30,0.8199861645698547,0.831663663699925
llm_goals_236,test,31,0.8182535171508789,0.8995406617766696
llm_goals_236,test,32,0.8470464944839478,0.7293236306375612
llm_goals_236,test,33,0.8273318409919739,0.9457866545397724
llm_goals_236,test,34,0.8364954590797424,0.8364083160174887
llm_goals_236,test,35,0.8590178489685059,0.7920782594120231
llm_goals_236,test,36,0.6908992528915405,0.8063141051706565
llm_goals_236,test,37,0.8858789801597595,0.5136290105021059
llm_goals_236,test,38,0.8258740901947021,0.4514706416931497
llm_goals_236,test,39,0.8291875123977661,0.8644759660227708
llm_goals_236,test,40,0.8637059330940247,0.6572924853656194
llm_goals_236,test,41,0.8964783549308777,0.9311078819261616
llm_goals_236,test,42,0.8112434148788452,0.7421276499016344
llm_goals_236,test,43,0.8747326135635376,0.9182581940665174
llm_goals_236,test,44,0.8241971731185913,0.78992920712052
llm_goals_236,test,45,0.8918569087982178,0.7690354024834042
llm_goals_236,test,46,0.8191659450531006,0.7972039589337531
llm_goals_236,test,47,0.7693638801574707,0.7979662948734613
llm_goals_236,test,48,0.8543825149536133,0.9401202207915952
llm_goals_236,test,49,0.8688172698020935,0.6403300609260552
llm_goals_86,test,0,0.8211391568183899,0.848978225488033
llm_goals_86,test,1,0.7960069179534912,0.5667421908216284
llm_goals_86,test,2,0.7667121887207031,0.9666773097569414
llm_goals_86,test,3,0.7772120833396912,0.6816530119880609
llm_goals_86,test,4,0.8150764107704163,0.7288792743080739
llm_goals_86,test,5,0.8004093766212463,0.5964895998637825
llm_goals_86,test,6,0.7543286085128784,0.8656080931523872
llm_goals_86,test,7,0.8252887725830078,0.847071824665876
llm_goals_86,test,8,0.7448119521141052,0.8023766851830423
llm_goals_86,test,9,0.8198525309562683,0.8243871920658875
llm_goals_86,test,10,0.7588164806365967,0.8687054464050711
llm_goals_86,test,11,0.7658075094223022,0.8881319769937132
llm_goals_86,test,12,0.7268943190574646,0.6590020093828339
llm_goals_86,test,13,0.7060166597366333,0.6733084849210463
llm_goals_86,test,14,0.6324606537818909,0.3116476330682434
llm_goals_86,test,15,0.6969985961914062,0.663392413111642
llm_goals_86,test,16,0.7878701090812683,0.6387773046544254
llm_goals_86,test,17,0.8126682043075562,0.8044010404371023
llm_goals_86,test,18,0.8499211668968201,0.8337891051244346
llm_goals_86,test,19,0.7003682255744934,0.6396502923711483
llm_goals_86,test,20,0.7211155295372009,0.788879332405017
llm_goals_86,test,21,0.7330179810523987,0.6394004244150501
llm_goals_86,test,22,0.8514737486839294,0.8626416934726503
llm_goals_86,test,23,0.7984221577644348,0.7951470614108611
llm_goals_86,test,24,0.8099161982536316,0.7989944850297322
llm_goals_86,test,25,0.7297438383102417,0.5568233615310513
llm_goals_86,test,26,0.718487560749054,0.7084484057432834
llm_goals_86,test,27,0.6991020441055298,0.752342212713773
llm_goals_86,test,28,0.7593417763710022,0.7815325258308541
llm_goals_86,test,29,0.7039525508880615,0.5424953082979187
llm_goals_86,test,30,0.7962791323661804,0.623081015497586
llm_goals_86,test,31,0.7084929347038269,0.7577194596401322
llm_goals_86,test,32,0.7922750115394592,0.6601579792051269
llm_goals_86,test,33,0.8609376549720764,0.8711616389038969
llm_goals_86,test,34,0.7985096573829651,0.5900015643097076
llm_goals_86,test,35,0.8139947056770325,0.7298998281488869
llm_goals_86,test,36,0.664451003074646,0.8123467602894
llm_goals_86,test,37,0.8021107316017151,0.6271477319447354
llm_goals_86,test,38,0.7276129722595215,0.7261717872389034
llm_goals_86,test,39,0.7512977719306946,0.6599378021515605
llm_goals_86,test,40,0.8051773309707642,0.8521513981089313
llm_goals_86,test,41,0.8137199282646179,0.7704036243710474
llm_goals_86,test,42,0.7798517346382141,0.8350187493970602
llm_goals_86,test,43,0.7093400955200195,0.7481739555208251
llm_goals_86,test,44,0.719569206237793,0.6095754460637204
llm_goals_86,test,45,0.7995238900184631,0.7827780278235656
llm_goals_86,test,46,0.8108497262001038,0.7216219938712807
llm_goals_86,test,47,0.686565637588501,0.5942280347054933
llm_goals_86,test,48,0.7300572991371155,0.9115690572813184
llm_goals_86,test,49,0.8024742007255554,0.8632650981365884
llm_goals_230,test,0,0.8018460869789124,0.8179351702935589
llm_goals_230,test,1,0.9149779081344604,0.8188182051872285
llm_goals_230,test,2,0.7881067395210266,0.826086635244689
llm_goals_230,test,3,0.8526021242141724,0.8927791723378472
llm_goals_230,test,4,0.7729359865188599,0.5912138130658398
llm_goals_230,test,5,0.7163633704185486,0.5729324801938974
llm_goals_230,test,6,0.7208786606788635,0.5531325232730563
llm_goals_230,test,7,0.7872152924537659,0.8021489969603882
llm_goals_230,test,8,0.7608092427253723,0.581443858272074
llm_goals_230,test,9,0.7592130899429321,0.6239073379929715
llm_goals_230,test,10,0.8182843923568726,0.8046756982507505
llm_goals_230,test,11,0.8368982672691345,0.8313074111593964
llm_goals_230,test,12,0.86920166015625,0.7681956263676517
llm_goals_230,test,13,0.7280395030975342,0.5988426118672923
llm_goals_230,test,14,0.8409392833709717,0.8599042234104695
llm_goals_230,test,15,0.7776268124580383,0.5936393834712063
llm_goals_230,test,16,0.849705159664154,0.8206661307032131
llm_goals_230,test,17,0.9108864068984985,0.8436086722555165
llm_goals_230,test,18,0.854369044303894,0.8499366057691331
llm_goals_230,test,19,0.8272205591201782,0.6149675790828681
llm_goals_230,test,20,0.7541782259941101,0.6266825308309105
llm_goals_230,test,21,0.7143390774726868,0.6164414775805864
llm_goals_230,test,22,0.9103886485099792,0.8343809289495249
llm_goals_230,test,23,0.8107341527938843,0.7837306823394206
llm_goals_230,test,24,0.8776156306266785,0.846372128553261
llm_goals_230,test,25,0.8228924870491028,0.7390370859690565
llm_goals_230,test,26,0.8512123823165894,0.8494265096725975
llm_goals_230,test,27,0.9134640693664551,0.8455606124040723
llm_goals_230,test,28,0.8508319854736328,0.8579674965057287
llm_goals_230,test,29,0.8279092907905579,0.5880964942153284
llm_goals_230,test,30,0.7996892333030701,0.8358535771844652
llm_goals_230,test,31,0.6952724456787109,0.5805894006126754
llm_goals_230,test,32,0.8174787163734436,0.7934818259785719
llm_goals_230,test,33,0.83844393491745,0.834267023337033
llm_goals_230,test,34,0.8005051016807556,0.7892429815800577
llm_goals_230,test,35,0.805772066116333,0.8120109426774391
llm_goals_230,test,36,0.8227776288986206,0.8267144565859352
llm_goals_230,test,37,0.8060332536697388,0.675275568637887
llm_goals_230,test,38,0.8236576318740845,0.601560060077995
llm_goals_230,test,39,0.870607852935791,0.7968490736396183
llm_goals_230,test,40,0.858913004398346,0.8216474607704832
llm_goals_230,test,41,0.7285559177398682,0.5197035792893621
llm_goals_230,test,42,0.7863759398460388,0.8097020966346316
llm_goals_230,test,43,0.694148600101471,0.5719336953223904
llm_goals_230,test,44,0.7746042609214783,0.5981191789043642
llm_goals_230,test,45,0.8416786789894104,0.8736265335179677
llm_goals_230,test,46,0.8353901505470276,0.8487543997753076
llm_goals_230,test,47,0.8549081087112427,0.862138026666209
llm_goals_230,test,48,0.8468077778816223,0.8476904574354545
llm_goals_230,test,49,0.9041564464569092,0.8333969706647004
llm_goals_206,test,0,0.4452988803386688,0.8559101199930208
llm_goals_206,test,1,0.42049363255500793,0.7879802535556255
llm_goals_206,test,2,0.1844203621149063,0.8428787031752032
llm_goals_206,test,3,0.16869236528873444,0.8372089275473125
llm_goals_206,test,4,0.34880149364471436,0.7926284648989284
llm_goals_206,test,5,0.15840241312980652,0.5153650037350492
llm_goals_206,test,6,0.2559617757797241,0.3343649675741765
llm_goals_206,test,7,0.21057309210300446,0.9070251599505484
llm_goals_206,test,8,0.30809518694877625,0.8455585838330733
llm_goals_206,test,9,0.439159095287323,0.6389058176485727
llm_goals_206,test,10,0.14385458827018738,0.1182260710030537
llm_goals_206,test,11,0.4315495491027832,0.7467083406482149
llm_goals_206,test,12,0.23959819972515106,0.4574706710312913
llm_goals_206,test,13,0.07975756376981735,0.0931690538173042
llm_goals_206,test,14,0.40392354130744934,0.6621974660772205
llm_goals_206,test,15,0.28690198063850403,0.494185809495628
llm_goals_206,test,16,0.2508836090564728,0.7829698941017894
llm_goals_206,test,17,0.4333667755126953,0.8449901332087761
llm_goals_206,test,18,0.2126500904560089,0.5445193157731812
llm_goals_206,test,19,0.0489397756755352,0.1852277467567061
llm_goals_206,test,20,0.48344799876213074,0.779411075601452
llm_goals_206,test,21,0.4738370180130005,0.8588447585962018
llm_goals_206,test,22,0.35660022497177124,0.6656724760322136
llm_goals_206,test,23,0.3369237184524536,0.7493727269767156
llm_goals_206,test,24,0.17519107460975647,0.9005759943815377
llm_goals_206,test,25,0.1974167674779892,0.6107508047041867
llm_goals_206,test,26,0.2624392509460449,0.6818652741926582
llm_goals_206,test,27,0.1072247326374054,0.101753575918842
llm_goals_206,test,28,0.26582637429237366,0.7895615709538879
llm_goals_206,test,29,0.26377278566360474,0.6033396275372427
llm_goals_206,test,30,0.3156009614467621,0.5204197590718647
llm_goals_206,test,31,0.5230872631072998,0.552048008783336
llm_goals_206,test,32,0.31973713636398315,0.6983551428844045
llm_goals_206,test,33,0.17294542491436005,0.7920455896978277
llm_goals_206,test,34,0.31811991333961487,0.8229582905649624
llm_goals_206,test,35,0.28126415610313416,0.7513486755861682
llm_goals_206,test,36,0.5509564280509949,0.8837921634473136
llm_goals_206,test,37,0.04343729838728905,0.2323070941836299
llm_goals_206,test,38,0.3062567412853241,0.6051056320867053
llm_goals_206,test,39,0.16920071840286255,0.3355604905336211
llm_goals_206,test,40,0.09348815679550171,0.0412251261013626
llm_goals_206,test,41,0.18625113368034363,0.8154974559604902
llm_goals_206,test,42,0.3884795308113098,0.650940780779249
llm_goals_206,test,43,0.5454878211021423,0.8136300459617928
llm_goals_206,test,44,0.29864221811294556,0.7293123065101965
llm_goals_206,test,45,0.18590860068798065,0.7633236130088205
llm_goals_206,test,46,0.17266689240932465,0.7359003505062409
llm_goals_206,test,47,0.07682981342077255,0.1849221570121216
llm_goals_206,test,48,0.15422323346138,0.5419546416667619
llm_goals_206,test,49,0.3170132339000702,0.7725634685769378
llm_goals_146,test,0,0.39359575510025024,0.4053194295707414
llm_goals_146,test,1,0.5305871367454529,0.77863227297152
llm_goals_146,test,2,0.6794983148574829,0.4040155724720515
llm_goals_146,test,3,0.5232992172241211,0.5871794799134783
llm_goals_146,test,4,0.6830503344535828,0.560196939517166
llm_goals_146,test,5,0.6783779859542847,0.3825763215338447
llm_goals_146,test,6,0.3842698335647583,0.4487256154760436
llm_goals_146,test,7,0.7279307246208191,0.4411791015965565
llm_goals_146,test,8,0.5802988409996033,0.6130303084936348
llm_goals_146,test,9,0.5500755906105042,0.416171525001919
llm_goals_146,test,10,0.7761400938034058,0.8501744885482464
llm_goals_146,test,11,0.6441487669944763,0.4275203540944987
llm_goals_146,test,12,0.5750645995140076,0.1433368859863564
llm_goals_146,test,13,0.5310558676719666,0.3454179305560879
llm_goals_146,test,14,0.6837421655654907,0.3984231289972209
llm_goals_146,test,15,0.6175356507301331,0.7779127849429681
llm_goals_146,test,16,0.5582737326622009,0.3923375995541924
llm_goals_146,test,17,0.7159853577613831,0.7787704480980863
llm_goals_146,test,18,0.5611720681190491,0.4485241820972409
llm_goals_146,test,19,0.7305017113685608,0.3670787385820063
llm_goals_146,test,20,0.6320905089378357,0.5808513911957404
llm_goals_146,test,21,0.46091243624687195,0.348566839291943
llm_goals_146,test,22,0.6180842518806458,0.8201150868695501
llm_goals_146,test,23,0.4459773302078247,0.4340105664556233
llm_goals_146,test,24,0.5638190507888794,0.3779777975457017
llm_goals_146,test,25,0.6523606777191162,0.7064739339171033
llm_goals_146,test,26,0.6086872220039368,0.5068406448496142
llm_goals_146,test,27,0.6950638294219971,0.8190644408840766
llm_goals_146,test,28,0.6113318204879761,0.5180591218776336
llm_goals_146,test,29,0.45661911368370056,0.333344294156815
llm_goals_146,test,30,0.7466292381286621,0.7747827278582075
llm_goals_146,test,31,0.5889121890068054,0.407372660950012
llm_goals_146,test,32,0.5979768633842468,0.410007141610074
llm_goals_146,test,33,0.47315698862075806,0.4871137583409777
llm_goals_146,test,34,0.6437682509422302,0.7239974058432814
llm_goals_146,test,35,0.6439517140388489,0.4717056489178101
llm_goals_146,test,36,0.7391322255134583,0.7518023835632467
llm_goals_146,test,37,0.6076200604438782,0.4299980307715084
llm_goals_146,test,38,0.5709861516952515,0.6853837869126352
llm_goals_146,test,39,0.5027680397033691,0.3821269496784033
llm_goals_146,test,40,0.5150987505912781,0.358534933784248
llm_goals_146,test,41,0.406572163105011,0.3237407407589303
llm_goals_146,test,42,0.7631851434707642,0.465061399698899
llm_goals_146,test,43,0.6237367391586304,0.4315601062358607
llm_goals_146,test,44,0.7666124105453491,0.5556539728312679
llm_goals_146,test,45,0.7687322497367859,0.5920740253668018
llm_goals_146,test,46,0.6087818145751953,0.4871274917312868
llm_goals_146,test,47,0.671068012714386,0.581843033103631
llm_goals_146,test,48,0.6608366966247559,0.3075919216632473
llm_goals_146,test,49,0.8106344938278198,0.7619587682314233
llm_goals_115,test,0,0.6425188183784485,0.4434632286904964
llm_goals_115,test,1,0.7131127119064331,0.6904317046527385
llm_goals_115,test,2,0.5032650232315063,0.6516189539788452
llm_goals_115,test,3,0.5474719405174255,0.4339340266994299
llm_goals_115,test,4,0.7403281331062317,0.3268096695267067
llm_goals_115,test,5,0.7386695742607117,0.5710080860563095
llm_goals_115,test,6,0.7779484987258911,0.5525431491717658
llm_goals_115,test,7,0.620979905128479,0.4917475457547118
llm_goals_115,test,8,0.6023992300033569,0.5222463726327303
llm_goals_115,test,9,0.7522634863853455,0.7214239291876969
llm_goals_115,test,10,0.6568086743354797,0.1423416680133991
llm_goals_115,test,11,0.560935914516449,0.0211501267474188
llm_goals_115,test,12,0.7142970561981201,0.7876465277644278
llm_goals_115,test,13,0.7747724652290344,0.7712913027170795
llm_goals_115,test,14,0.7369701266288757,0.8433526700949283
llm_goals_115,test,15,0.6044343113899231,0.4617767241926875
llm_goals_115,test,16,0.6743440628051758,0.751642873633365
llm_goals_115,test,17,0.7537640333175659,0.5750395750632571
llm_goals_115,test,18,0.6971588730812073,0.731190860777953
llm_goals_115,test,19,0.6560701131820679,0.4010090785721757
llm_goals_115,test,20,0.72298264503479,0.4639274084070101
llm_goals_115,test,21,0.7329993844032288,0.8439438875722727
llm_goals_115,test,22,0.6764328479766846,0.4930168575659444
llm_goals_115,test,23,0.6462683081626892,0.1606455731785788
llm_goals_115,test,24,0.7743133902549744,0.8225050510480415
llm_goals_115,test,25,0.5939698815345764,0.4682683350736841
llm_goals_115,test,26,0.6657041311264038,0.8534070288351345
llm_goals_115,test,27,0.7301197052001953,0.4742317939252941
llm_goals_115,test,28,0.5294566750526428,0.0572278600376891
llm_goals_115,test,29,0.6542552709579468,0.4907303253909411
llm_goals_115,test,30,0.7017902135848999,0.1910564822051387
llm_goals_115,test,31,0.8118933439254761,0.8779074551007869
llm_goals_115,test,32,0.589830219745636,0.504762116367231
llm_goals_115,test,33,0.8469209671020508,0.7961870421429237
llm_goals_115,test,34,0.6158614158630371,0.3117413427150098
llm_goals_115,test,35,0.6856768727302551,0.6062498233633526
llm_goals_115,test,36,0.6144417524337769,0.5465164093133901
llm_goals_115,test,37,0.5118377208709717,0.5868361038548235
llm_goals_115,test,38,0.6558129191398621,0.3403703059415776
llm_goals_115,test,39,0.7098742127418518,0.8219108606932716
llm_goals_115,test,40,0.5247913599014282,0.1035981058362717
llm_goals_115,test,41,0.7127695083618164,0.8823458588361053
llm_goals_115,test,42,0.5123327970504761,0.2803475549795062
llm_goals_115,test,43,0.7304520606994629,0.8513012565900029
llm_goals_115,test,44,0.6434853076934814,0.7679774087348048
llm_goals_115,test,45,0.580403208732605,0.2698108084459203
llm_goals_115,test,46,0.7464240789413452,0.0357215216493676
llm_goals_115,test,47,0.5888521671295166,0.3874775112245213
llm_goals_115,test,48,0.5754453539848328,0.7725214275580398
llm_goals_115,test,49,0.7845688462257385,0.6720584616591353
llm_goals_182,test,0,0.40802234411239624,0.8652063058172594
llm_goals_182,test,1,0.31202754378318787,0.0826947230522371
llm_goals_182,test,2,0.6406647562980652,0.798587619624287
llm_goals_182,test,3,0.6755633354187012,0.1153333326317589
llm_goals_182,test,4,0.6440982222557068,0.8911074983818299
llm_goals_182,test,5,0.467868447303772,0.322673941983326
llm_goals_182,test,6,0.39511099457740784,0.3873713790307967
llm_goals_182,test,7,0.6515709161758423,0.7954637431338407
llm_goals_182,test,8,0.6363833546638489,0.8713126766724444
llm_goals_182,test,9,0.45886850357055664,0.3727719680238343
llm_goals_182,test,10,0.3999599516391754,0.344259508803394
llm_goals_182,test,11,0.5791087746620178,0.826409100211895
llm_goals_182,test,12,0.5795111656188965,0.6995229144237722
llm_goals_182,test,13,0.42647886276245117,0.2983322739239659
llm_goals_182,test,14,0.3953152894973755,0.1121533235192302
llm_goals_182,test,15,0.44977283477783203,0.8530130402661898
llm_goals_182,test,16,0.35438355803489685,0.140642355581753
llm_goals_182,test,17,0.322365403175354,0.0750534295482943
llm_goals_182,test,18,0.2608392536640167,0.1021012312203676
llm_goals_182,test,19,0.6714637875556946,0.7930271065360427
llm_goals_182,test,20,0.6425413489341736,0.8727144576853948
llm_goals_182,test,21,0.38667771220207214,0.3883832635395169
llm_goals_182,test,22,0.3706037104129791,0.0858987886275301
llm_goals_182,test,23,0.5663382411003113,0.8587499834361978
llm_goals_182,test,24,0.33350905776023865,0.1529090764042953
llm_goals_182,test,25,0.4785125255584717,0.3907861244891192
llm_goals_182,test,26,0.26415544748306274,0.0898813936513848
llm_goals_182,test,27,0.5443840026855469,0.0749023763424232
llm_goals_182,test,28,0.5037024021148682,0.1190598459155961
llm_goals_182,test,29,0.49412041902542114,0.7016449528375276
llm_goals_182,test,30,0.4807630777359009,0.3981986561532387
llm_goals_182,test,31,0.4566341042518616,0.4033468376136607
llm_goals_182,test,32,0.6856442093849182,0.8175631740353995
llm_goals_182,test,33,0.20493580400943756,0.1448325966795168
llm_goals_182,test,34,0.4987255036830902,0.3547330626609051
llm_goals_182,test,35,0.46356669068336487,0.823988084949945
llm_goals_182,test,36,0.41301605105400085,0.3920453941320438
llm_goals_182,test,37,0.6505021452903748,0.7340161718404824
llm_goals_182,test,38,0.46340325474739075,0.7157883671582869
llm_goals_182,test,39,0.5066962838172913,0.6255065804950025
llm_goals_182,test,40,0.6106889247894287,0.8448494619425109
llm_goals_182,test,41,0.527564287185669,0.3755363161560536
llm_goals_182,test,42,0.6006482243537903,0.8729538896060706
llm_goals_182,test,43,0.4930153489112854,0.3845820969466132
llm_goals_182,test,44,0.6204966902732849,0.8535886434286102
llm_goals_182,test,45,0.5286952257156372,0.1216040102965252
llm_goals_182,test,46,0.36390915513038635,0.8235719161892086
llm_goals_182,test,47,0.5321910381317139,0.1493568636498045
llm_goals_182,test,48,0.40771931409835815,0.0977336313667494
llm_goals_182,test,49,0.4421783983707428,0.1145731477706443
llm_goals_326,test,0,0.7847309708595276,0.7210679537447465
llm_goals_326,test,1,0.5930368304252625,0.3698897290610674
llm_goals_326,test,2,0.7389282584190369,0.6763902331902123
llm_goals_326,test,3,0.6559911966323853,0.6144636338527515
llm_goals_326,test,4,0.7898126244544983,0.7226604268728406
llm_goals_326,test,5,0.5623865723609924,0.225010837259175
llm_goals_326,test,6,0.6279349327087402,0.2511202974122448
llm_goals_326,test,7,0.7759313583374023,0.682107850479918
llm_goals_326,test,8,0.731614351272583,0.7688388280774076
llm_goals_326,test,9,0.5091253519058228,0.2201601423152862
llm_goals_326,test,10,0.6894823908805847,0.5927527654534117
llm_goals_326,test,11,0.6753029823303223,0.6402653335664261
llm_goals_326,test,12,0.6946346163749695,0.5219165167657188
llm_goals_326,test,13,0.5789473056793213,0.0912598670014219
llm_goals_326,test,14,0.750351071357727,0.6237388815231029
llm_goals_326,test,15,0.7650144100189209,0.58422994911281
llm_goals_326,test,16,0.7166934013366699,0.598158895081766
llm_goals_326,test,17,0.6626491546630859,0.3075615988754216
llm_goals_326,test,18,0.666754961013794,0.5624766832927214
llm_goals_326,test,19,0.7151588201522827,0.5837591950755885
llm_goals_326,test,20,0.768338680267334,0.7023531773229094
llm_goals_326,test,21,0.6112645864486694,0.2231004774292705
llm_goals_326,test,22,0.6042892336845398,0.2981176158853984
llm_goals_326,test,23,0.7668597102165222,0.5947662537107573
llm_goals_326,test,24,0.6990388035774231,0.6704973022514685
llm_goals_326,test,25,0.7347596883773804,0.7548767212573778
llm_goals_326,test,26,0.6739091277122498,0.6431580812900115
llm_goals_326,test,27,0.530316174030304,0.3327881124528037
llm_goals_326,test,28,0.6887086629867554,0.7641554441675951
llm_goals_326,test,29,0.7176430821418762,0.4778539914575542
llm_goals_326,test,30,0.6960781812667847,0.7097294996625062
llm_goals_326,test,31,0.6720806360244751,0.2337370659971392
llm_goals_326,test,32,0.7051307559013367,0.6377238074883209
llm_goals_326,test,33,0.7538363933563232,0.6003066307923431
llm_goals_326,test,34,0.6906871795654297,0.6119511714458287
llm_goals_326,test,35,0.7219927906990051,0.5639635886155634
llm_goals_326,test,36,0.7464237809181213,0.6540181072858835
llm_goals_326,test,37,0.7107611298561096,0.6297099560404894
llm_goals_326,test,38,0.744953989982605,0.5278265562464118
llm_goals_326,test,39,0.6583830714225769,0.5029836143581652
llm_goals_326,test,40,0.6709603667259216,0.5904794288270965
llm_goals_326,test,41,0.5710654854774475,0.2163880989100536
llm_goals_326,test,42,0.8097677826881409,0.6295586948815126
llm_goals_326,test,43,0.607761561870575,0.2120109108404722
llm_goals_326,test,44,0.7891908884048462,0.6940786874714112
llm_goals_326,test,45,0.7078865170478821,0.6451782543850242
llm_goals_326,test,46,0.7317407131195068,0.5818721654926584
llm_goals_326,test,47,0.7149933576583862,0.6937841106268494
llm_goals_326,test,48,0.6652705669403076,0.6520763720762168
llm_goals_326,test,49,0.6101588606834412,0.3675984402598143
