model,Rating,Rating std,Rating alpha,is_english Rating,is_english Rating std,is_english Rating alpha,is_chinese Rating,is_chinese Rating std,is_chinese Rating alpha,is_code Rating,is_code Rating std,is_code Rating alpha,is_hard Rating,is_hard Rating std,is_hard Rating alpha
gpt-4o-2024-05-13,1296.5964790725538,2.276888171532072,"[np.float64(-4.0500205497558), np.float64(4.766997015290144)]",-12.722240748916793,3.109610345362571,"[np.float64(-7.559441031516818), np.float64(4.569957719440065)]",-3.202442298675295,4.462183252954948,"[np.float64(-8.509095030316072), np.float64(8.703259696044128)]",15.460074003059937,3.740272554165186,"[np.float64(-6.181232358508975), np.float64(8.325937999254347)]",13.40388101587258,3.4728795823748912,"[np.float64(-5.209570569390413), np.float64(8.254166667149839)]"
claude-3-5-sonnet-20240620,1285.8415268386227,3.708371434579528,"[np.float64(-7.165066951463132), np.float64(7.289512735063454)]",-29.13808020539193,5.010123311957951,"[np.float64(-11.300886449031562), np.float64(8.841374207822007)]",-19.31571325746969,7.04043553924261,"[np.float64(-13.011679906646144), np.float64(14.643477325919307)]",43.76346862366197,6.179514573678014,"[np.float64(-10.955855168254615), np.float64(12.685097646945195)]",13.80821515097738,5.348033005296337,"[np.float64(-8.969912671174393), np.float64(11.650527718167792)]"
gemini-advanced-0514,1285.0907351859937,2.5245877745600187,"[np.float64(-4.955479650412144), np.float64(4.740138501468209)]",-25.9875274007753,3.525847080774605,"[np.float64(-8.168515435136417), np.float64(5.479943287272793)]",7.538918389841823,4.861382586671252,"[np.float64(-9.359705960079577), np.float64(9.265069172426056)]",2.0693168910370874,4.337955037414525,"[np.float64(-7.614078883378233), np.float64(9.39284925003799)]",2.9036170001804438,3.7145895509413487,"[np.float64(-6.129104616099998), np.float64(8.527207422097499)]"
gemini-1.5-pro-api-0514,1273.4158216015678,2.395001580167996,"[np.float64(-4.226972985977682), np.float64(5.032356142193748)]",-19.652567875734395,3.441946650783224,"[np.float64(-7.7535337736228485), np.float64(5.426741022483796)]",18.96379605552419,4.747355387730503,"[np.float64(-8.543820293277495), np.float64(9.790911260699636)]",10.958201169810192,4.116048965166948,"[np.float64(-6.877872201546774), np.float64(9.244428721185724)]",15.203558007362856,3.607157258868398,"[np.float64(-5.3077648717002806), np.float64(8.112818654912882)]"
claude-3-opus-20240229,1272.83286716387,1.549620612108748,"[np.float64(-2.584123545621651), np.float64(3.598016073081908)]",-39.146776296624246,2.3863426406715336,"[np.float64(-6.484544971992783), np.float64(2.7650156687203165)]",0.6555769350331458,2.8434347003197487,"[np.float64(-5.173335508921111), np.float64(5.926124784707322)]",11.518603612587716,2.712987001587835,"[np.float64(-4.362535451247027), np.float64(6.153169289683407)]",15.213715539238809,2.4654497623463376,"[np.float64(-3.199708186632442), np.float64(6.375558148268647)]"
bard-jan-24-gemini-pro,1271.1681309587368,6.480652132898937,"[np.float64(-12.342688634999831), np.float64(12.848989078765953)]",-48.13066992494291,7.140742545696555,"[np.float64(-14.710050702177618), np.float64(13.143509884274081)]",-24.97364875680349,13.085221829876115,"[np.float64(-25.75919039239866), np.float64(25.21349816628384)]",-10.769904374932835,8.073106668565254,"[np.float64(-13.819559076125016), np.float64(17.411363871069298)]",-45.26718868110007,7.063051965534456,"[np.float64(-12.594767041878178), np.float64(14.985048462782238)]"
gpt-4-1106-preview,1264.6180657259658,2.034429879637865,"[np.float64(-3.632264007833328), np.float64(4.256724653089577)]",-11.10158230387015,2.7544623987271004,"[np.float64(-7.094752999153848), np.float64(3.910531914893964)]",-3.022771894020819,3.87168879321432,"[np.float64(-7.2883385579988085), np.float64(7.9325984048811495)]",11.403962424940588,3.176063767644898,"[np.float64(-5.060244224936312), np.float64(7.591663030546117)]",8.99160095741375,2.8197410624332315,"[np.float64(-4.092925952167775), np.float64(6.90091951334768)]"
gemini-1.5-pro-api-0409-preview,1264.1297764866238,2.367819355508095,"[np.float64(-4.172805398766741), np.float64(5.036255387333313)]",-2.1217695359531468,3.1636847877667247,"[np.float64(-7.635899070122347), np.float64(4.714076277349109)]",4.690909704211835,4.2354637417312935,"[np.float64(-8.89915406632314), np.float64(7.974460540204227)]",-11.768368645338438,3.660873840139096,"[np.float64(-6.395449156538302), np.float64(8.696377872093258)]",-4.142253195770268,3.452780123619985,"[np.float64(-5.15025160391726), np.float64(8.082348312953274)]"
gpt-4-turbo-2024-04-09,1257.7040033511378,1.899942919690239,"[np.float64(-3.2932146317752995), np.float64(4.149145261277454)]",3.012832810170861,2.8841995725301723,"[np.float64(-7.100185871516645), np.float64(4.111758499351707)]",3.765715850363598,3.6911939703936687,"[np.float64(-6.674817320254688), np.float64(7.507741122015644)]",15.087807804662136,3.459768709813998,"[np.float64(-5.574951253606821), np.float64(7.698650108901408)]",11.13562570781905,2.911981507178184,"[np.float64(-4.328137465858086), np.float64(7.060354061156025)]"
gpt-4-0125-preview,1254.8088196009767,1.9964001868947368,"[np.float64(-3.3913401608735967), np.float64(4.255967028811483)]",-5.3295300419024905,2.785956272826388,"[np.float64(-6.850927741058264), np.float64(3.985356921958532)]",1.3623540806159506,3.4330503005804607,"[np.float64(-6.297804323484755), np.float64(7.348380889748075)]",2.375092238466664,3.2412690480174517,"[np.float64(-5.09980620652581), np.float64(7.43584006200768)]",13.997974121917771,2.953095469661501,"[np.float64(-4.259367169203639), np.float64(6.980878858875729)]"
gemini-1.5-flash-api-0514,1243.0982536459585,2.472810119719505,"[np.float64(-4.299514564915626), np.float64(5.019195403919639)]",-21.120562751222486,3.3511485349592336,"[np.float64(-7.678547357684044), np.float64(5.027355248338967)]",8.122593569971286,5.001332755026658,"[np.float64(-9.75815123338646), np.float64(9.67925154046105)]",14.889738880742753,4.00311684798567,"[np.float64(-7.520872487112982), np.float64(8.61901511438904)]",9.34433207981215,3.691713638840981,"[np.float64(-5.945101788935251), np.float64(8.835293618433685)]"
yi-large-preview,1237.1027506619544,2.1964255053581763,"[np.float64(-4.324141972544794), np.float64(4.722751716009952)]",-3.5010550450157374,3.2329136504464486,"[np.float64(-8.169856508691613), np.float64(4.630387016859062)]",37.369537684624405,4.383893486914457,"[np.float64(-8.659276299523043), np.float64(8.633531796153111)]",11.37826363321874,3.956401400806828,"[np.float64(-6.739185201312107), np.float64(9.209762772016031)]",16.67473055144067,3.535839593496115,"[np.float64(-5.3828862808243585), np.float64(8.742425664272965)]"
gemma-2-27b-it,1232.3987249375866,5.949900677007577,"[np.float64(-11.091366955153717), np.float64(11.939188836501216)]",-10.21049830818878,7.489568842748233,"[np.float64(-15.562654737652357), np.float64(13.405910534232374)]",-0.7614849375929321,10.985032838924315,"[np.float64(-20.142998843299456), np.float64(22.908645842163356)]",7.081595774962447,8.810177997889603,"[np.float64(-16.736740026669032), np.float64(18.334344962253926)]",-14.566853295710924,8.034384115414424,"[np.float64(-14.607177999875361), np.float64(16.647775781085038)]"
yi-large,1221.986790129421,4.178621038229871,"[np.float64(-7.135337320201643), np.float64(8.911234955367036)]",-3.304895989685192,5.830849111619188,"[np.float64(-13.208273768515888), np.float64(9.3241770281272)]",8.514261907749907,8.435807722455177,"[np.float64(-16.36790478929138), np.float64(17.853489710287015)]",20.520926518009325,7.624361338848679,"[np.float64(-13.646280526937026), np.float64(16.368573780375836)]",11.006355286147588,6.6027750493414175,"[np.float64(-11.538815998856643), np.float64(13.749943440039143)]"
nemotron-4-340b-instruct,1221.542012620363,3.570372951577848,"[np.float64(-7.058492558428952), np.float64(6.939258449477165)]",-13.151174079568303,4.726144645987191,"[np.float64(-11.095891006864619), np.float64(8.326563656524263)]",7.45002129470337,6.7121358338848784,"[np.float64(-12.666955178300757), np.float64(13.661424649079542)]",-6.142333290069301,6.039949331012656,"[np.float64(-9.889358451167233), np.float64(13.168892217952083)]",9.111377781954864,5.371871561497319,"[np.float64(-8.949801024027575), np.float64(11.843574869032368)]"
claude-3-sonnet-20240229,1219.9661333376998,1.734208627922065,"[np.float64(-3.0638440722830182), np.float64(3.6536282484612457)]",-26.154661490062875,2.5214575895611606,"[np.float64(-6.347892388884912), np.float64(3.373681088333999)]",-14.72467104133045,2.990737128563714,"[np.float64(-5.5941137973774495), np.float64(5.951904461837987)]",26.38643014815442,2.9328786666575395,"[np.float64(-4.194955015491917), np.float64(7.138224710277829)]",6.48060942239388,2.667858659540264,"[np.float64(-3.9158625769701976), np.float64(6.442427460541045)]"
command-r-plus,1213.6816582520341,1.853586231425847,"[np.float64(-3.388946517452723), np.float64(4.008501870900773)]",-17.706034841574994,2.715778803698105,"[np.float64(-6.790963708690832), np.float64(3.7338376832864135)]",5.670137755754955,3.4306230952327423,"[np.float64(-6.337366754709783), np.float64(6.7162686388197965)]",-13.73432328431094,3.1468622994310955,"[np.float64(-4.894457319831513), np.float64(7.664043967714752)]",-6.527131924231143,2.9524745359112625,"[np.float64(-4.368129259632354), np.float64(6.961179781619543)]"
gpt-4-0314,1213.4085467040898,2.399210637552309,"[np.float64(-4.3836977500518515), np.float64(4.9215974445753545)]",-29.243818599603948,3.157277873851373,"[np.float64(-7.7342892657596956), np.float64(4.894738853682313)]",-13.36574494909406,4.275599613374321,"[np.float64(-8.179388430356973), np.float64(8.85730511690279)]",8.690204747900271,3.766486100901905,"[np.float64(-5.5119829953721435), np.float64(8.574612405711344)]",22.961615916270315,3.3644265184001423,"[np.float64(-5.416828538555325), np.float64(7.865910567294895)]"
reka-core-20240501,1211.5830606735778,1.9334609145557973,"[np.float64(-3.361204827343954), np.float64(4.24050184502471)]",-11.107284128011328,2.873302901342281,"[np.float64(-7.101845076051672), np.float64(3.9693828338536026)]",8.713328114687819,4.023813886174006,"[np.float64(-7.670044369270965), np.float64(8.395847930611778)]",-1.5531426625698066,3.463381892386519,"[np.float64(-5.223375271081854), np.float64(8.206593808561962)]",5.514489388790675,3.152931963850772,"[np.float64(-4.895616512186871), np.float64(7.467139756403043)]"
claude-3-haiku-20240307,1209.2415351190941,1.778296043359257,"[np.float64(-3.423835413056395), np.float64(3.8538018827989617)]",-29.68143062796953,2.613101162050602,"[np.float64(-6.371594567349618), np.float64(3.460942615621075)]",-35.85456330054247,2.9311712574811417,"[np.float64(-5.258343153482208), np.float64(6.222658603963879)]",16.344263858780078,2.827508420560748,"[np.float64(-4.087635710909584), np.float64(6.959972355900016)]",9.87051835357817,2.7228100356963245,"[np.float64(-3.8119925301556563), np.float64(6.626856864351926)]"
gemma-2-9b-it,1203.7231940768718,5.634697822979152,"[np.float64(-10.447590330619732), np.float64(11.543110239422276)]",-17.070333575249812,7.379107824637768,"[np.float64(-15.855534087587401), np.float64(14.183668321967826)]",-0.9604763976589504,10.759351946741834,"[np.float64(-22.08817839776642), np.float64(21.353485331284293)]",-13.510087252439453,9.113868443198744,"[np.float64(-16.736916654458845), np.float64(18.548016190526265)]",3.358249579384403,7.857277929194149,"[np.float64(-13.587656419965704), np.float64(16.574782512359565)]"
glm-4-0520,1201.5047317859967,4.979563927215668,"[np.float64(-9.464752799317012), np.float64(10.158955658204832)]",7.690070305685692,6.39823710529914,"[np.float64(-14.014357378855761), np.float64(11.285306664447178)]",48.62713812118381,9.21146310741236,"[np.float64(-17.435011718730514), np.float64(17.75420725681235)]",11.99592835594823,7.63697849336641,"[np.float64(-13.613053426352248), np.float64(15.677017471690547)]",18.963421414084973,6.781109332424665,"[np.float64(-12.0187569285645), np.float64(14.1921621852693)]"
gpt-4-0613,1190.9269351162523,2.028087536117686,"[np.float64(-3.5366696956211854), np.float64(4.407869063518547)]",-22.14608353797701,2.8014588818200727,"[np.float64(-6.803046062232378), np.float64(3.7312554581918818)]",-40.696412635377634,3.596124600881436,"[np.float64(-6.662183417447189), np.float64(7.4432937145542155)]",1.4955383326290903,3.2180930616181094,"[np.float64(-4.897045774703412), np.float64(7.5662824333334555)]",17.663555367844467,2.826424117612764,"[np.float64(-4.246078717008668), np.float64(6.6619136473361085)]"
claude-1,1190.2368418550554,4.45767012944118,"[np.float64(-8.124446319737444), np.float64(8.851108888760109)]",-29.734766464372697,5.236640359021858,"[np.float64(-12.115643417162726), np.float64(8.647853028157755)]",-18.40115000700192,8.856138058358546,"[np.float64(-17.00893433679194), np.float64(17.003438741104496)]",3.743612034748379,6.050476777176656,"[np.float64(-10.578765506083974), np.float64(12.882913106580263)]",-18.948638220673896,5.186245510235905,"[np.float64(-8.399278855488307), np.float64(11.468170341417487)]"
reka-flash-preview-20240611,1187.657793252523,3.9053710481092425,"[np.float64(-7.005051562731069), np.float64(8.20186924994664)]",-15.126671655961816,5.0955196844410375,"[np.float64(-11.187285384849577), np.float64(8.476855858025234)]",-4.692616761238898,7.548191934313396,"[np.float64(-15.034693623426481), np.float64(14.110436788400026)]",7.228137908773498,6.5444028705489234,"[np.float64(-11.426278254769926), np.float64(13.898603669977028)]",-9.93570082416328,5.635829526219236,"[np.float64(-9.901279769621466), np.float64(12.17031949229113)]"
llama-3-70b-instruct,1186.7743361933074,1.4782254967096544,"[np.float64(-2.4263668423363924), np.float64(3.3611322353301603)]",67.34525830918865,2.298493782919058,"[np.float64(-6.278781809729523), np.float64(2.638672427335166)]",-50.89857136585173,2.8277742729264657,"[np.float64(-5.224470915810322), np.float64(5.798969217895163)]",-9.642696905502396,2.6457756636058467,"[np.float64(-4.061009219634361), np.float64(6.467478290416457)]",-0.9072789353874494,2.3640170746032005,"[np.float64(-2.976585858087945), np.float64(6.002685011795032)]"
qwen-max-0428,1186.7660628768192,2.626255681405764,"[np.float64(-4.956831767109179), np.float64(5.434301301821961)]",3.3923689300239728,3.914776750573124,"[np.float64(-9.0971802049845), np.float64(6.227612785418348)]",62.42825973354493,6.191785819732496,"[np.float64(-11.394679722840749), np.float64(12.653066554154819)]",7.97580228285511,4.905632986801403,"[np.float64(-9.066393911974487), np.float64(10.446232912568911)]",11.073258261361156,4.515299727449295,"[np.float64(-8.077636316465503), np.float64(9.489015505872427)]"
qwen2-72b-instruct,1181.910277798138,3.031499762618876,"[np.float64(-5.417375115240247), np.float64(6.403276407575731)]",9.143976426657568,3.961746547734041,"[np.float64(-10.107697140636903), np.float64(6.147349008090657)]",68.48740638852831,5.891025622086968,"[np.float64(-11.197110010786204), np.float64(12.34384094374434)]",-3.3766830144652933,5.162046473976124,"[np.float64(-8.317266247565362), np.float64(11.841491125096432)]",13.44659447280741,4.5504228666259925,"[np.float64(-7.713424127398662), np.float64(9.770364720488944)]"
gemini-pro-dev-api,1181.5370922088264,3.9637685941756975,"[np.float64(-6.99164490065823), np.float64(8.438988776423002)]",-37.76189040263807,4.90952803490405,"[np.float64(-11.602830150276791), np.float64(8.499699224053352)]",-23.00376483799492,7.379898592306464,"[np.float64(-14.138074406495834), np.float64(15.030667053899933)]",-22.642677977412205,5.84887279342864,"[np.float64(-10.224635335860302), np.float64(12.624638290970807)]",-10.65971875462013,5.316338806207645,"[np.float64(-9.439062535430825), np.float64(11.628230140889013)]"
deepseek-coder-v2,1180.7271080491225,4.828707772345773,"[np.float64(-9.07324632058885), np.float64(9.739691429995446)]",-34.09597501769337,6.120520330618407,"[np.float64(-13.56941008150348), np.float64(10.492567917527758)]",15.988601125505571,8.749537243895977,"[np.float64(-17.624163755202623), np.float64(16.9788971347763)]",63.754468842948114,7.513950982602946,"[np.float64(-13.169285204272853), np.float64(16.680132764158444)]",42.70017273898778,6.473538743223294,"[np.float64(-12.090694895256554), np.float64(13.997373121561285)]"
reka-flash-21b-20240226-online,1175.8699343374817,3.686612947614165,"[np.float64(-6.96748543912372), np.float64(7.2640273843394425)]",-11.551729954198914,4.792066050916121,"[np.float64(-10.601308591369321), np.float64(7.446424153301864)]",-9.61825888353403,6.449181559050988,"[np.float64(-12.304641081865466), np.float64(13.311752283004052)]",0.9218860242371923,5.778165588045573,"[np.float64(-10.165520286798467), np.float64(11.889606259310584)]",-2.1736283245451395,5.197535221358802,"[np.float64(-8.73803962492396), np.float64(11.146344602978868)]"
command-r,1174.9816167322833,2.1080509318642497,"[np.float64(-3.523579393619002), np.float64(4.515545531286989)]",-14.611841928317146,2.990154350784448,"[np.float64(-7.371403381606597), np.float64(4.240340880357591)]",12.17070802139847,3.6906818605022353,"[np.float64(-7.357363826624132), np.float64(7.525384096156113)]",-8.622440572582661,3.7042681439890783,"[np.float64(-5.926588655810058), np.float64(8.520158753502795)]",-23.323188082537737,3.2877906296395385,"[np.float64(-4.934387931113008), np.float64(7.333352457274497)]"
reka-flash-21b-20240226,1170.2105696056515,2.875561003326476,"[np.float64(-4.905995198044138), np.float64(5.794636246216442)]",-15.55027078589163,3.850731978178543,"[np.float64(-8.885439249534436), np.float64(5.861551370377413)]",-12.307339582271991,5.604128638960221,"[np.float64(-11.751206405260843), np.float64(10.142069053945802)]",4.882854846439944,4.526252229770429,"[np.float64(-8.065793314730332), np.float64(9.704118697135007)]",-6.1806706044971325,4.360483820734341,"[np.float64(-6.893995846879831), np.float64(9.958348062186412)]"
claude-2.0,1163.8326518012161,5.496315189601659,"[np.float64(-10.210426619832106), np.float64(11.572107920946337)]",-25.59650017800181,6.13671891900986,"[np.float64(-13.700878532945568), np.float64(10.418851444919202)]",-7.842786834255508,11.98733375675186,"[np.float64(-22.979366996071654), np.float64(24.02692901040019)]",11.66406121965312,7.723641320820076,"[np.float64(-14.147769787983798), np.float64(16.1437591326689)]",2.883105137392908,6.484995131517148,"[np.float64(-11.664279139889091), np.float64(13.497905152742183)]"
mistral-large-2402,1163.1155391115408,2.0881657703474543,"[np.float64(-3.7451232843332036), np.float64(4.2777188441939415)]",2.075709074914838,3.001587490356629,"[np.float64(-7.505177101498562), np.float64(4.2932392465615035)]",-32.10906722772737,3.85556655704792,"[np.float64(-7.395367831748885), np.float64(7.8663081249911215)]",9.688847911794388,3.481554399769889,"[np.float64(-5.535590818665703), np.float64(7.6875657588565955)]",20.39659289772264,3.045598223560392,"[np.float64(-4.751476606463921), np.float64(6.914558148598875)]"
gpt-3.5-turbo-0314,1162.0944632429487,10.854958561468704,"[np.float64(-20.677873779361107), np.float64(20.8322464683024)]",-56.30234156496616,11.353820484952136,"[np.float64(-23.72191921196368), np.float64(20.136866778888546)]",9.330512059053891,16.153310627956543,"[np.float64(-32.77767565562267), np.float64(31.856881390114832)]",14.430992879887798,12.66436761583891,"[np.float64(-22.513841892757362), np.float64(26.513327030816193)]",22.495024940046395,10.48256165670177,"[np.float64(-18.859260642117707), np.float64(22.031030430046965)]"
qwen1.5-110b-chat,1161.424739045549,2.592704222180205,"[np.float64(-4.552188550919254), np.float64(5.414295751080772)]",11.88211711681554,3.452336966101203,"[np.float64(-8.135946333333482), np.float64(5.410491027015946)]",58.32743664242218,5.48666165164866,"[np.float64(-10.68355916481088), np.float64(11.088681734876566)]",10.857305374474656,4.838076979139533,"[np.float64(-8.357497134100825), np.float64(10.660909815258913)]",9.66169431512001,4.248984758680309,"[np.float64(-7.388536016315756), np.float64(9.145376914951468)]"
gpt-3.5-turbo-0613,1160.9945569318782,3.265271021169691,"[np.float64(-6.2691109000106735), np.float64(6.5754073758650975)]",-40.93931574192706,3.737249935440246,"[np.float64(-9.112704412505568), np.float64(5.802294151405121)]",-36.8556117772359,7.792108308407141,"[np.float64(-14.675079430976194), np.float64(16.156508238590035)]",21.255480154377675,4.764822509203558,"[np.float64(-8.326995414491225), np.float64(10.233312144211835)]",13.19746589861124,3.9055867688241546,"[np.float64(-6.907071294617458), np.float64(8.796728591701726)]"
claude-2.1,1156.2493551354687,3.141429954774008,"[np.float64(-5.870867865085302), np.float64(6.569593985099573)]",-39.71529788451348,3.8289846956360023,"[np.float64(-9.096703465270945), np.float64(5.842830638918201)]",-45.343980875766434,5.996918001971247,"[np.float64(-11.816356794788348), np.float64(12.179676352752828)]",21.719689448854517,4.477575661952088,"[np.float64(-7.797347668529444), np.float64(9.721150616350101)]",10.297480316191564,3.971431126037258,"[np.float64(-6.035826085923819), np.float64(9.309642238422443)]"
mistral-next,1153.4349645376453,5.242522226405,"[np.float64(-9.97122759519857), np.float64(11.314663332617556)]",-20.187084516321622,6.087553205929221,"[np.float64(-13.07334387207899), np.float64(10.719445979854328)]",-49.66904457970826,10.496076102590026,"[np.float64(-19.42846514416657), np.float64(20.36939613259768)]",7.626697193317165,7.70836149472085,"[np.float64(-13.824783340903553), np.float64(16.219156449204025)]",14.17063837906638,6.281712717570524,"[np.float64(-11.164978060772471), np.float64(13.223553248424317)]"
mistral-medium,1152.6263079727714,3.049636758774468,"[np.float64(-5.573155901246764), np.float64(6.29802387017844)]",8.578270512346105,3.791616588594336,"[np.float64(-8.741005814096685), np.float64(6.0576681537797015)]",-24.144856681857416,5.7199061734082255,"[np.float64(-10.968158882351947), np.float64(11.223560296496135)]",10.516948079016275,4.704612593267992,"[np.float64(-8.068364534526934), np.float64(11.036398686439775)]",4.82206096998789,4.032334497980755,"[np.float64(-6.3543640212370605), np.float64(9.417685504184774)]"
mixtral-8x22b-instruct-v0.1,1152.2020931306643,2.325973211444269,"[np.float64(-4.0954544875289685), np.float64(4.905363612437895)]",5.437339962123975,3.202551144012681,"[np.float64(-7.67247515266717), np.float64(4.8108415751135105)]",-6.891246187176111,4.55874117798566,"[np.float64(-9.153498967021747), np.float64(8.756703015047346)]",8.359286790213796,4.006594493668077,"[np.float64(-6.558531868008863), np.float64(8.912771472742097)]",11.510372696021587,3.598863867418856,"[np.float64(-5.608775485431126), np.float64(8.435141781033291)]"
llama-3-8b-instruct,1149.9218708896858,1.7037724820324809,"[np.float64(-2.871740237311087), np.float64(3.554683885653276)]",47.854916621255136,2.568526829291097,"[np.float64(-6.538516177488482), np.float64(3.453645851334038)]",-41.41793556754733,3.335331448979616,"[np.float64(-6.130452451560458), np.float64(6.99744935149981)]",-4.406018161920126,3.0951363157143006,"[np.float64(-4.763608016659423), np.float64(6.7435760769394255)]",-16.13838623547589,2.738994097064458,"[np.float64(-3.7800301371053475), np.float64(6.535309865868978)]"
glm-4-0116,1148.5107813499383,5.227362528958307,"[np.float64(-10.319833110802847), np.float64(10.239882979872846)]",44.54830445025538,6.507322261339624,"[np.float64(-13.451251491043251), np.float64(11.269132459414116)]",76.10765331551858,10.11967154666686,"[np.float64(-18.964185661896707), np.float64(19.525320626294025)]",11.685867141572894,8.35554759491841,"[np.float64(-15.022646567186756), np.float64(16.519172533008774)]",20.745813856100952,7.36604859243692,"[np.float64(-12.562914854114148), np.float64(16.108135061887555)]"
qwen1.5-72b-chat,1148.3386993293825,2.5225285218608864,"[np.float64(-4.575426792911458), np.float64(5.5640466565296265)]",10.787328183388103,3.330296380100691,"[np.float64(-7.868568739655299), np.float64(4.922996691227556)]",58.39540668043468,4.867305722907882,"[np.float64(-9.498376880143532), np.float64(9.030199062930997)]",19.32490158302827,4.157958193014081,"[np.float64(-6.6446131267627955), np.float64(9.35734520933212)]",-1.189221574965715,3.481512817028924,"[np.float64(-5.360403636836663), np.float64(8.284042792510565)]"
gpt-3.5-turbo-0125,1147.1100538874546,2.119614113582829,"[np.float64(-3.6863638725121746), np.float64(4.565530644130831)]",-38.27238207599195,2.919411139180977,"[np.float64(-6.979991230053361), np.float64(3.955783827944586)]",-45.85863408233492,3.759671315927185,"[np.float64(-7.35929973775341), np.float64(7.107245138690054)]",18.73663333213944,3.429055272260135,"[np.float64(-5.1825375143677626), np.float64(8.347338305816372)]",10.836346170024765,3.18143531109158,"[np.float64(-4.770228440633277), np.float64(7.241183368043844)]"
zephyr-orpo-141b-A35b-v0.1,1143.4877633011536,6.369942389001909,"[np.float64(-12.079673103556843), np.float64(12.561509878322568)]",3.565591649517122,8.170982755306614,"[np.float64(-17.17383468461027), np.float64(13.502371103863332)]",-26.593068313674895,11.235332895280306,"[np.float64(-22.54724791300864), np.float64(21.20634405893731)]",-0.6681081001134184,10.401683258842176,"[np.float64(-19.015240903546534), np.float64(21.529302158966424)]",-2.3210145227521672,9.470020128740714,"[np.float64(-16.88194333215416), np.float64(20.180406538431036)]"
gemini-pro,1139.4330928062504,7.536783689874238,"[np.float64(-15.150950215037255), np.float64(16.155583269046247)]",-6.921593576622075,8.70958125191078,"[np.float64(-18.199579716814867), np.float64(15.439813486721423)]",1.6644779845193145,15.190086302429409,"[np.float64(-29.74200234102923), np.float64(29.665550678899542)]",-6.662438577882884,10.166544902455575,"[np.float64(-18.64676385073339), np.float64(20.690412138100747)]",-22.839013231912325,8.240324556645927,"[np.float64(-14.787183995976081), np.float64(17.388196860241806)]"
claude-instant-1,1133.6018266451458,4.638086798025437,"[np.float64(-8.801044952296479), np.float64(9.061936308755776)]",-12.509599296864907,5.185046241868411,"[np.float64(-11.634619326258385), np.float64(8.51009845868006)]",-15.32579739507833,9.78169025411702,"[np.float64(-19.09299635330719), np.float64(18.67845323446337)]",2.549913471005614,6.399116691441363,"[np.float64(-11.230204633853607), np.float64(13.43210719016458)]",7.466698494649587,5.373816992069794,"[np.float64(-8.62848935888658), np.float64(12.05976917244845)]"
wizardlm-70b,1129.1238423361046,6.601740094639348,"[np.float64(-12.825990744952605), np.float64(13.541904333440925)]",4.673457725331565,7.378467743702159,"[np.float64(-15.5462619292733), np.float64(13.646141062227564)]",-30.27425559448579,14.436600905215924,"[np.float64(-28.148147889122896), np.float64(29.81092184029723)]",-26.05249855233115,9.31040593166776,"[np.float64(-17.50300041128923), np.float64(19.186529525986785)]",-18.53519217084969,7.885315423523101,"[np.float64(-14.63229580478311), np.float64(16.148957578100898)]"
snowflake-arctic-instruct,1126.0853209745628,2.646855405058917,"[np.float64(-4.841793195368609), np.float64(5.577961986859464)]",-12.786406253848211,3.647932354659494,"[np.float64(-8.589670204800095), np.float64(5.57480184839179)]",-3.374074278875064,5.126268554534607,"[np.float64(-9.44602904946553), np.float64(9.689196893043086)]",-10.981739393171226,4.459010889063751,"[np.float64(-7.5183101945000335), np.float64(10.107600907172193)]",-12.825983468253822,3.7725111449144655,"[np.float64(-5.837677610834273), np.float64(8.681775431325454)]"
qwen1.5-32b-chat,1126.0273804714504,3.052466703563048,"[np.float64(-5.657515641880991), np.float64(6.338640139430481)]",5.104091176263998,4.131119041685391,"[np.float64(-9.070753806096366), np.float64(6.806360978695805)]",68.63884806063199,5.544143162378092,"[np.float64(-10.583577557228537), np.float64(11.214321830835857)]",21.81037750112879,5.343745809760248,"[np.float64(-9.544618001131244), np.float64(11.371530841249209)]",7.966753378674111,4.657201516711692,"[np.float64(-8.142205491206651), np.float64(10.028312494654294)]"
yi-1.5-34b-chat,1125.7481962016946,3.13747454081868,"[np.float64(-5.869968844291634), np.float64(6.665630559735291)]",62.57962382382713,4.343328627320814,"[np.float64(-9.7371431236134), np.float64(7.77796077596517)]",102.83449010149958,6.115885598273907,"[np.float64(-11.47097223382248), np.float64(12.193260170772348)]",1.1097603201050228,5.481266253094773,"[np.float64(-9.260019908019066), np.float64(11.827061562562044)]",5.410061149276945,4.858368039430091,"[np.float64(-8.319899522567404), np.float64(10.533141467283)]"
phi-3-medium-4k-instruct,1125.503596715216,3.7863294871687136,"[np.float64(-7.24213160886211), np.float64(7.853233088245133)]",11.813772588639806,5.050409136888196,"[np.float64(-11.127891148259218), np.float64(8.867194472148585)]",-6.544358880891617,6.927677857486844,"[np.float64(-13.284030095838645), np.float64(12.941211894453048)]",4.644308518220606,6.389403896948261,"[np.float64(-11.758688864092935), np.float64(13.289339382133896)]",20.69670977200211,5.5581940975876485,"[np.float64(-9.625933520980755), np.float64(12.131606665772233)]"
tulu-2-dpo-70b,1122.39559319609,6.973043492031771,"[np.float64(-12.988712773803627), np.float64(13.705652253529479)]",-3.679264271227348,7.848427967509023,"[np.float64(-16.926477901121544), np.float64(13.512452192941618)]",-71.94117275014042,14.910690792803694,"[np.float64(-28.71074705419892), np.float64(29.676909509846)]",-6.397083824004204,9.830085225928828,"[np.float64(-17.329958996358634), np.float64(19.908145842160494)]",7.87729777401583,8.127342301687179,"[np.float64(-14.089822718387726), np.float64(17.18238194453346)]"
mixtral-8x7b-instruct-v0.1,1114.0,2.1787985216745858,"[np.float64(-4.04009913554205), np.float64(4.516835904938034)]",25.15863485099822,2.955090950083577,"[np.float64(-7.561080445015683), np.float64(4.172554871359473)]",-37.10913010132825,3.718845450494696,"[np.float64(-6.908739994427442), np.float64(7.296537793432627)]",-3.8763595447465815,3.4716866954283208,"[np.float64(-5.545250689340719), np.float64(8.164925405234952)]",9.913234964238264,3.070390824844112,"[np.float64(-4.492100360288617), np.float64(6.979608714775862)]"
openchat-3.5-0106,1113.8015555486152,4.543924761020591,"[np.float64(-8.123140934542562), np.float64(9.239522401312115)]",-3.41651570278906,5.461351404582127,"[np.float64(-11.9525884708727), np.float64(9.250885310631316)]",-2.591423486537971,7.854620091715655,"[np.float64(-14.583880259287833), np.float64(16.7321934036146)]",17.071729998023145,6.6069112224289706,"[np.float64(-12.017411587937335), np.float64(14.197008114210242)]",-10.947347695519753,5.756153281977053,"[np.float64(-9.939291468012353), np.float64(12.025178046740898)]"
qwen1.5-14b-chat,1112.0550614752551,3.4290835213155293,"[np.float64(-6.506741648775005), np.float64(6.8899467974413255)]",10.472401498349836,4.53125754246043,"[np.float64(-10.312204997745802), np.float64(6.991848033861082)]",56.58136951236024,5.667815334513221,"[np.float64(-10.284400094177599), np.float64(11.268592623398767)]",10.852119964178057,5.762565465342167,"[np.float64(-9.51403337162471), np.float64(12.705001509176581)]",8.207986006105209,4.902385989039685,"[np.float64(-8.461494677406876), np.float64(10.847328324001712)]"
llama2-70b-steerlm-chat,1111.2542089602998,10.10855366070968,"[np.float64(-20.24620427490663), np.float64(20.856079811393556)]",-2.897569677353296,11.15523077061378,"[np.float64(-24.059284687859098), np.float64(20.325690062749057)]",-28.089679509687425,17.545369775879482,"[np.float64(-34.00111337246392), np.float64(36.37793796140318)]",-51.806922147198,12.495164563758635,"[np.float64(-23.40043526601224), np.float64(24.606416008219355)]",-13.210227223797322,11.713059423046422,"[np.float64(-20.700258087117078), np.float64(24.117504151560023)]"
starling-lm-7b-beta,1110.747310224183,3.956895620425831,"[np.float64(-6.913128037088427), np.float64(8.285865206720246)]",18.567424104275663,4.951533241809344,"[np.float64(-11.473555720915865), np.float64(8.439849701157321)]",34.613898123061986,6.28391532744074,"[np.float64(-11.412195909748547), np.float64(12.590108489362564)]",18.024930423539725,5.99915203714933,"[np.float64(-10.832473030005207), np.float64(12.007704116256859)]",1.0380010819477563,5.238312029974688,"[np.float64(-8.768901955498109), np.float64(11.447324570038782)]"
llama-2-70b-chat,1108.021856904185,2.7265968321700025,"[np.float64(-4.5462642096745185), np.float64(5.743031098787469)]",23.940840809921134,3.359166577413295,"[np.float64(-7.701382186212879), np.float64(5.288044172753146)]",-78.42801340422324,5.412207042472726,"[np.float64(-10.556901929076673), np.float64(10.890662136239783)]",-14.703360949943416,4.306466648063384,"[np.float64(-7.717535406072689), np.float64(9.28957265940727)]",-18.16892228043715,3.681509402510275,"[np.float64(-5.980263915784709), np.float64(8.462204379772778)]"
gpt-3.5-turbo-1106,1106.2286797062313,4.638448564538305,"[np.float64(-8.77950621096602), np.float64(9.068392402079098)]",-35.909348772783886,5.4908057552733815,"[np.float64(-12.394814974151888), np.float64(9.209313079477575)]",-62.11289733561488,11.330697744534444,"[np.float64(-21.842842942068316), np.float64(22.644231905533978)]",20.01748246535362,6.717565003897631,"[np.float64(-12.630080427359243), np.float64(13.559832256338412)]",32.935006613127,5.630522040196277,"[np.float64(-9.377233542070357), np.float64(13.038932750477986)]"
vicuna-33b,1105.0461243365712,3.998963974868732,"[np.float64(-7.496173004759612), np.float64(8.477121929582609)]",16.907596245667303,4.641476722862391,"[np.float64(-10.650511054872581), np.float64(7.566405827674748)]",-26.908655070372276,7.862394609028661,"[np.float64(-14.796566672292318), np.float64(16.03120821665752)]",-18.358240192126303,5.912624060161765,"[np.float64(-9.95338716572843), np.float64(12.827634066814966)]",-18.37410334808449,4.914739810119553,"[np.float64(-8.4766509225197), np.float64(10.941634451562539)]"
phi-3-small-8k-instruct,1103.2110118048088,3.5376779289930287,"[np.float64(-6.480599823812554), np.float64(7.202800467516681)]",27.199444491584217,4.87045885546896,"[np.float64(-10.282382536303693), np.float64(8.407861749511902)]",-16.302402706071064,6.4061965437604105,"[np.float64(-12.394999199940411), np.float64(12.813506800150165)]",-4.7402226245595935,6.131837212933475,"[np.float64(-10.151698903019296), np.float64(13.385531953096496)]",13.041834450185972,5.304588267210821,"[np.float64(-8.83265688852788), np.float64(11.650630709735275)]"
openchat-3.5,1101.0301299091864,6.747063177915883,"[np.float64(-12.400508709946052), np.float64(13.821723687306303)]",-5.204182826573839,7.600311844235361,"[np.float64(-16.194737551304545), np.float64(13.363744335419401)]",2.072187168181648,14.164765348317546,"[np.float64(-28.459379925027903), np.float64(26.470288786681053)]",-22.50558176198692,9.065016883094636,"[np.float64(-17.15717239877974), np.float64(19.15596150400305)]",1.6522409847596573,7.317799649378459,"[np.float64(-12.607026887328558), np.float64(15.853922776901394)]"
dbrx-instruct-preview,1100.7359961398254,2.739869874408191,"[np.float64(-4.983755517517238), np.float64(5.7433011980997435)]",25.308819954926427,3.5100187119201824,"[np.float64(-8.29342171888451), np.float64(5.366052916740308)]",-4.08228517434043,4.805538161613813,"[np.float64(-9.447242928943666), np.float64(9.892499815091039)]",16.695324121786854,4.54820228427087,"[np.float64(-7.7679517536148595), np.float64(10.558379273544872)]",4.904951075273545,3.798368674609468,"[np.float64(-5.969806467946047), np.float64(8.68274263309461)]"
yi-34b-chat,1100.7238355755567,4.035818658632629,"[np.float64(-7.407701977559327), np.float64(8.562448806229895)]",36.07224029449352,5.034880494494618,"[np.float64(-12.129911956006339), np.float64(7.7335096550780875)]",94.3155390749361,8.250315258541413,"[np.float64(-16.834568062811), np.float64(16.347674071469186)]",-5.767087011630676,6.199625348795918,"[np.float64(-11.125923703152024), np.float64(12.908494018753597)]",-6.154182172420226,5.579962454374741,"[np.float64(-10.219470331881716), np.float64(12.56313397556445)]"
starling-lm-7b-alpha,1098.590571266095,5.553525951031589,"[np.float64(-10.495000956133254), np.float64(11.315753097178458)]",17.045218528754027,6.357460294873228,"[np.float64(-13.626226537423058), np.float64(11.625996210708177)]",-16.99649415505732,10.02240700187553,"[np.float64(-19.906642760287706), np.float64(19.827615986662515)]",-2.7391667199454037,7.950685396250316,"[np.float64(-13.948470028208744), np.float64(16.388419486020624)]",-11.89718859036402,7.075374332760113,"[np.float64(-13.119733116577144), np.float64(15.045531849217603)]"
gemma-1.1-7b-it,1096.9808513502492,3.048116125860267,"[np.float64(-5.401474443777033), np.float64(6.110460864130346)]",14.350199657166732,4.077358891597979,"[np.float64(-9.06455615765613), np.float64(6.779113627822481)]",-2.074665138009618,5.457192796696644,"[np.float64(-10.300901093247619), np.float64(10.286198305791537)]",3.223500280449098,5.123091706081786,"[np.float64(-9.089403661710152), np.float64(10.901191319206019)]",-9.112551827719646,4.511532687816498,"[np.float64(-6.943430272093908), np.float64(10.212726454406422)]"
pplx-70b-online,1095.021963835251,7.396904848358622,"[np.float64(-15.400292111335602), np.float64(13.418125638631182)]",11.019877952899549,8.28003643617229,"[np.float64(-16.932249933152004), np.float64(16.217337937774513)]",13.185259930397498,15.412900908434324,"[np.float64(-29.240576220759507), np.float64(31.209931850676305)]",-31.675132074618528,9.404183032953108,"[np.float64(-17.35338126921622), np.float64(18.165309302857846)]",-38.53481265164221,8.520990417337345,"[np.float64(-15.199777798568025), np.float64(18.55666130788172)]"
deepseek-llm-67b-chat,1092.4996841287168,8.464244448484626,"[np.float64(-17.582792024007404), np.float64(16.959510646706804)]",0.34869601655917,9.274910466469825,"[np.float64(-19.340910383199247), np.float64(16.844965504152498)]",45.95671727225999,16.966315726590402,"[np.float64(-30.418861927445803), np.float64(34.217864981949575)]",10.297996776835094,11.27135105911461,"[np.float64(-21.16973123518499), np.float64(21.80011608860781)]",-10.283876934912549,10.031509422800443,"[np.float64(-16.930371378754938), np.float64(22.477124310563717)]"
nous-hermes-2-mixtral-8x7b-dpo,1090.487464373319,9.869369198591226,"[np.float64(-18.71572847978223), np.float64(20.502128599446905)]",24.84278848383632,10.77311772859199,"[np.float64(-23.214265788154414), np.float64(18.575053203650985)]",-25.75849583660077,18.580535189781752,"[np.float64(-37.049921664116155), np.float64(35.01341528065676)]",16.935561132831513,11.433410713349929,"[np.float64(-22.265948063102034), np.float64(21.519046908999698)]",-42.4830405314654,9.76584591534844,"[np.float64(-17.347930508501484), np.float64(19.7621217885905)]"
qwen1.5-7b-chat,1086.2058984051657,7.052970258634042,"[np.float64(-13.51331105597842), np.float64(13.663168359924157)]",-4.071561724652349,8.687342491923232,"[np.float64(-17.82777035059229), np.float64(14.84950980349415)]",71.97024373712347,12.280744587157107,"[np.float64(-22.73140669852765), np.float64(25.86247736954698)]",24.22753040194223,10.276658615385095,"[np.float64(-18.48129590525676), np.float64(21.219349371720263)]",-7.9345498240374415,9.037356236528462,"[np.float64(-15.845375329956454), np.float64(18.056614250775773)]"
wizardlm-13b,1082.5628807493504,7.387522547991261,"[np.float64(-14.14709438591467), np.float64(15.188716538959852)]",5.479566599575605,8.198538575716423,"[np.float64(-18.0069053453599), np.float64(13.556889888032728)]",-10.14335337880361,13.748420156359254,"[np.float64(-28.034015257719034), np.float64(25.671661272034655)]",-13.454592828968533,10.589393943546623,"[np.float64(-20.083623577790597), np.float64(21.677217548925093)]",-41.06601957580651,9.059774289013063,"[np.float64(-16.57657919472993), np.float64(17.856137742298017)]"
llama-2-13b-chat,1081.194570866868,4.00445685957165,"[np.float64(-7.709024526035591), np.float64(7.679261477614773)]",11.760184171990309,4.784159705206143,"[np.float64(-10.629532575538713), np.float64(8.179243600592905)]",-58.15489996393533,8.419712844428346,"[np.float64(-15.900271973126692), np.float64(17.43098200558014)]",-8.50066945616004,6.2235035254591935,"[np.float64(-11.026745382366023), np.float64(13.733728166550506)]",-9.867666612042385,5.261838358023082,"[np.float64(-8.304548525364684), np.float64(11.940040496877005)]"
qwen-14b-chat,1081.1674528600427,8.334168129802435,"[np.float64(-15.783970204097614), np.float64(16.348894484911852)]",-35.167143298103184,9.314883512940806,"[np.float64(-18.885726694029437), np.float64(16.64450356633749)]",11.127442069410538,17.186677705116793,"[np.float64(-31.746040965303624), np.float64(35.169874030226644)]",32.63598381182464,11.88233670337462,"[np.float64(-20.989236201579097), np.float64(24.7421440889)]",-17.498437590900398,10.016979226426551,"[np.float64(-19.266774156017135), np.float64(21.098986976234816)]"
vicuna-13b,1077.1888703481493,4.602458949006399,"[np.float64(-8.376669054805689), np.float64(9.243125937426385)]",-15.499066224648878,5.189816481543221,"[np.float64(-10.93401559642897), np.float64(8.912609731020062)]",7.411907045078093,8.740972383770552,"[np.float64(-17.014216594414904), np.float64(16.353954000528034)]",0.6177933687993357,6.515451235192063,"[np.float64(-11.712807924019296), np.float64(13.723283785529286)]",-14.948820045648798,5.582206746995685,"[np.float64(-10.201936355818951), np.float64(12.445352895968526)]"
openhermes-2.5-mistral-7b,1074.698702700591,7.883324429499398,"[np.float64(-14.816663539138744), np.float64(15.605188923468404)]",27.991102066642217,8.732064640472217,"[np.float64(-19.497938482531406), np.float64(15.565746254349616)]",-13.473048234185072,15.60691645128954,"[np.float64(-29.715079742443187), np.float64(30.69640849467126)]",-16.95193549014802,10.81013551898448,"[np.float64(-19.596118306247057), np.float64(22.808817097206695)]",2.9894593580689435,8.95636582188065,"[np.float64(-17.639064244726086), np.float64(17.63193145696512)]"
phi-3-mini-128k-instruct,1071.657111330783,3.283065047264946,"[np.float64(-6.109170126592517), np.float64(6.85169579372382)]",0.4363913648861597,4.262548515414366,"[np.float64(-9.839406789663622), np.float64(6.976025526251922)]",-2.2912710011119604,6.1251282562644604,"[np.float64(-11.86457630380345), np.float64(12.048020995935122)]",-23.45344077784811,5.39480791175015,"[np.float64(-9.195451364462446), np.float64(11.539999239286804)]",0.0072571754650289,4.675413731780484,"[np.float64(-8.093645992980827), np.float64(10.683710348265697)]"
codellama-34b-instruct,1070.3858212151433,6.770114976223381,"[np.float64(-12.837772608791283), np.float64(12.865947979355042)]",-5.029470246527128,7.76212582064765,"[np.float64(-17.31505932146609), np.float64(13.466255101330177)]",-57.10280781314243,15.51767815181997,"[np.float64(-30.454218975302783), np.float64(29.887220816814285)]",4.757856792205663,10.071104961078404,"[np.float64(-18.273991141536307), np.float64(21.091190214970077)]",-13.290022376534235,8.127149964973126,"[np.float64(-13.96086222637362), np.float64(17.514422688284874)]"
phi-3-mini-4k-instruct,1067.5983888167864,3.179780017584995,"[np.float64(-5.6923671945671686), np.float64(6.667427206701177)]",27.63862275988083,4.3228205531979,"[np.float64(-10.00487495721271), np.float64(6.697328732120418)]",-24.07377681569235,6.705886415753114,"[np.float64(-13.1503960577532), np.float64(13.55625415102281)]",10.150661874648309,5.332429251422033,"[np.float64(-9.035437066705361), np.float64(11.897611843573905)]",14.563805707042077,4.9221664173510735,"[np.float64(-8.084246122355061), np.float64(10.538085358034458)]"
solar-10.7b-instruct-v1.0,1063.6726694149295,9.139916700294636,"[np.float64(-17.528740030817517), np.float64(19.09925438659502)]",26.593687115533506,10.249269567509511,"[np.float64(-20.30069728096011), np.float64(18.852145683422762)]",-23.08560777087123,17.333257691506603,"[np.float64(-33.238162648244824), np.float64(33.32089815766615)]",-15.064294098731422,12.336587529659614,"[np.float64(-22.01844998268203), np.float64(26.830192535063524)]",2.4369341154259305,10.23853369824046,"[np.float64(-18.59270296775016), np.float64(20.915728077643852)]"
dolphin-2.2.1-mistral-7b,1059.7792391015405,12.474686947498515,"[np.float64(-24.844553546257885), np.float64(25.14234433189813)]",30.44967646303589,13.379950747616345,"[np.float64(-27.548578466588), np.float64(25.33673391936638)]",15.18452974922337,19.55353197432481,"[np.float64(-37.52997238801295), np.float64(37.02588452438377)]",-31.22092553068278,16.11326189958388,"[np.float64(-28.668362975914086), np.float64(32.1860887279731)]",0.3404418435746458,13.999421965287292,"[np.float64(-23.544075504498988), np.float64(28.349218580627415)]"
vicuna-7b,1058.039652475944,8.41560920247535,"[np.float64(-17.043390540544124), np.float64(16.90730304317117)]",-36.372611556110144,9.281717230434236,"[np.float64(-20.010670706835633), np.float64(15.85960122173961)]",-36.64536608053322,14.297804216558475,"[np.float64(-27.65015927398707), np.float64(28.956024622759514)]",-17.90820216065714,10.55262481675704,"[np.float64(-19.753851774666977), np.float64(20.35228835342557)]",-2.701361945986695,9.082971539678116,"[np.float64(-15.607505162101042), np.float64(18.436001810641354)]"
falcon-180b-chat,1056.3496338580615,15.61422123831672,"[np.float64(-28.235489187405392), np.float64(30.47693950948701)]",3.001075362696963,15.872007690292373,"[np.float64(-32.861108698418924), np.float64(28.25427567453279)]",-22.203219756010697,17.98088405565608,"[np.float64(-34.52854536131509), np.float64(34.69466143932104)]",-4.449517585973962,18.91681609267778,"[np.float64(-35.03137002344118), np.float64(39.904877923555745)]",-24.77278946883953,16.01252558680698,"[np.float64(-29.83162517752973), np.float64(30.484392550737464)]"
mistral-7b-instruct-v0.2,1053.9873384172845,3.80204476587568,"[np.float64(-6.9796306433538575), np.float64(7.782263650137793)]",55.45230604298983,4.743446616645343,"[np.float64(-10.161606798806595), np.float64(7.976399323098036)]",-5.963425025839357,6.980599986869331,"[np.float64(-12.836926928411557), np.float64(14.09672274286828)]",0.0399550558349425,5.747249579692671,"[np.float64(-9.907756358288559), np.float64(13.07538818453701)]",-3.0253783863446726,4.900951145251357,"[np.float64(-9.21503001857921), np.float64(10.560208677011452)]"
zephyr-7b-alpha,1050.666155098179,12.996824105300291,"[np.float64(-26.877150744308665), np.float64(25.3503523083416)]",21.83915082694029,14.024772441893466,"[np.float64(-26.880068636755325), np.float64(27.48627636693218)]",-14.105000896890605,19.15930396637712,"[np.float64(-37.21558929615756), np.float64(38.85196365377341)]",-1.5591813265382028,16.69535144554449,"[np.float64(-31.31630904523652), np.float64(32.48005616303347)]",-18.443335362069725,15.107170759910828,"[np.float64(-26.07075471383645), np.float64(33.43085186467686)]"
zephyr-7b-beta,1049.156779861562,5.905507441691475,"[np.float64(-11.066774098565702), np.float64(11.767895644225518)]",38.55692581472492,6.629019823889078,"[np.float64(-14.699351586234954), np.float64(11.228223812152905)]",-42.30788527833653,12.885151817581388,"[np.float64(-25.062130419445165), np.float64(25.540808410084523)]",-13.296458245310856,8.140574153976377,"[np.float64(-13.925040128463086), np.float64(17.897227577620523)]",-22.18564932932032,6.797388553677196,"[np.float64(-12.408050512169297), np.float64(14.508209540450231)]"
gemma-1.1-2b-it,1044.2682603999913,4.4972748808451,"[np.float64(-8.479577280071453), np.float64(9.627002469539775)]",2.352426023112158,5.601696832031325,"[np.float64(-12.277229561670826), np.float64(9.453154572088042)]",8.893095240780866,8.372290362402179,"[np.float64(-16.170766861682313), np.float64(16.098049540073106)]",24.519485875006897,7.1336582262032255,"[np.float64(-12.661627206934565), np.float64(14.648925781399852)]",-19.427161003459226,6.585660543639783,"[np.float64(-11.81058108077782), np.float64(14.658948480510645)]"
mpt-30b-chat,1040.889626373056,11.039906004969236,"[np.float64(-21.075442460778504), np.float64(21.093452383814792)]",31.248563148542218,12.129418328216204,"[np.float64(-25.38061366471625), np.float64(21.54665377792186)]",-14.487691437608444,19.87487771481841,"[np.float64(-37.56960013886498), np.float64(38.60051071890199)]",-19.830217568566265,15.597861164114985,"[np.float64(-30.863327734363708), np.float64(30.415598083244053)]",12.216299130706831,13.298732122624802,"[np.float64(-23.666228160453496), np.float64(27.93223779611045)]"
codellama-70b-instruct,1038.6296876531576,12.361153516696756,"[np.float64(-23.092782956315887), np.float64(24.802616585052874)]",28.593332180927973,14.485103738987547,"[np.float64(-29.250879812416613), np.float64(26.920701032695415)]",24.911140803491783,18.171502066701755,"[np.float64(-34.62317433834427), np.float64(36.307087261652484)]",-1.0554490257871254,16.731918973662264,"[np.float64(-29.544225039324004), np.float64(34.1661821965557)]",8.225592250390285,15.494098206904509,"[np.float64(-28.713093003860912), np.float64(32.1568239882607)]"
pplx-7b-online,1037.9623333218021,7.995918434523707,"[np.float64(-14.616591171797154), np.float64(15.71083006700519)]",34.64309519544954,9.098680911171904,"[np.float64(-18.851952201853337), np.float64(16.53812336631263)]",21.29503247238764,15.386140673150548,"[np.float64(-27.836292801938185), np.float64(32.74553074316096)]",-21.11899204177015,10.03555465859782,"[np.float64(-19.17788821711217), np.float64(21.165557870923855)]",-17.3371272666952,9.373177193527289,"[np.float64(-15.81229271427237), np.float64(20.065288124616902)]"
llama-2-7b-chat,1036.1455127758754,4.650064155857458,"[np.float64(-8.36205320566296), np.float64(10.274401007722417)]",44.839158915276585,5.404517021074618,"[np.float64(-13.1466342314487), np.float64(8.84609296650845)]",-27.971043021026848,9.357594208276394,"[np.float64(-19.327681449292797), np.float64(18.937399677331843)]",-31.326844862292603,7.129425101781569,"[np.float64(-13.51104968147741), np.float64(14.528428783082923)]",-22.41329746595718,5.859942656881946,"[np.float64(-10.434148183462636), np.float64(12.777755376047516)]"
guanaco-33b,1034.761434127062,11.256171405009749,"[np.float64(-21.704461418499022), np.float64(21.711570304178622)]",29.680577851144164,12.021059559442174,"[np.float64(-24.234325873448356), np.float64(21.963208309933318)]",-17.305929325392004,16.734578965129906,"[np.float64(-31.92970043637385), np.float64(32.88360490025029)]",-52.52080641414874,14.078892607544772,"[np.float64(-27.240896323539967), np.float64(26.412824842981358)]",-12.409169263082129,12.137902642958291,"[np.float64(-22.85196915654468), np.float64(24.634396907983266)]"
gemma-7b-it,1029.2045548608198,5.6752078608549175,"[np.float64(-11.24047878906731), np.float64(11.874540585044315)]",28.054948527447856,6.612216689153507,"[np.float64(-14.10512645382164), np.float64(11.204419972064127)]",37.53774777886909,9.280526500164283,"[np.float64(-16.70582820842774), np.float64(18.75249669177697)]",6.869412940691344,7.868530692836523,"[np.float64(-14.589134959076532), np.float64(15.173306762014285)]",11.224652950747736,7.325460738453973,"[np.float64(-12.685600942300896), np.float64(15.663107674870586)]"
stripedhyena-nous-7b,1027.7893733658645,7.825350774657931,"[np.float64(-13.490357362683653), np.float64(16.08763139627058)]",20.97769635740452,8.743551255065123,"[np.float64(-18.785961840181294), np.float64(15.246281592003186)]",-16.158184668602168,17.583207259777904,"[np.float64(-33.08163598608332), np.float64(34.77377858026243)]",-9.867420254022798,11.156803519740068,"[np.float64(-19.738492994094152), np.float64(22.641600682176474)]",-18.721561624042494,8.85067441702023,"[np.float64(-17.490577852541318), np.float64(17.655358997460226)]"
qwen1.5-4b-chat,1025.911415664198,5.986299515833321,"[np.float64(-10.47905822385087), np.float64(12.903084654604072)]",-22.625236026766192,7.0576699234262215,"[np.float64(-16.038188752102023), np.float64(12.77042132745557)]",35.89483872396798,9.848271128593888,"[np.float64(-19.11700966936958), np.float64(19.450018202555306)]",3.612374663116461,8.63152737100522,"[np.float64(-16.282257289527408), np.float64(17.888731381937113)]",-13.486241822604038,7.674156589758235,"[np.float64(-14.26294110993372), np.float64(15.674053817730877)]"
mistral-7b-instruct,1008.2700641845677,6.339799574426992,"[np.float64(-12.33592249821686), np.float64(12.197160200723829)]",31.75683636019342,7.411121275280059,"[np.float64(-16.167778136541717), np.float64(12.651851864756452)]",-26.218334482609727,13.432574182460998,"[np.float64(-27.8876029466032), np.float64(26.57957216960174)]",0.2111979597931876,8.969624056458542,"[np.float64(-17.765301539326114), np.float64(17.865165489871604)]",-3.72969679105186,7.4995861746353425,"[np.float64(-12.215597602006937), np.float64(16.65308247459398)]"
palm-2,996.9866601513518,7.597127773560904,"[np.float64(-14.080782827957364), np.float64(14.612773482917873)]",43.29727207154819,8.412310015992004,"[np.float64(-17.192133388586022), np.float64(15.437423688515743)]",-69.23088135774631,14.841991556890584,"[np.float64(-29.56588963740421), np.float64(29.36510413154673)]",-18.040853618854324,9.651778745175507,"[np.float64(-17.675240322043148), np.float64(20.227289704992724)]",-0.1862130538219771,8.12311510119366,"[np.float64(-14.598282975397053), np.float64(16.39492394764241)]"
gemma-2b-it,995.4227072942936,7.650569560631834,"[np.float64(-14.068078055960086), np.float64(14.71798454174268)]",19.523027876046385,9.114125534376562,"[np.float64(-19.067987515823418), np.float64(16.454841147956085)]",32.57895225780721,12.715519052773434,"[np.float64(-24.765569286618224), np.float64(25.69206579675127)]",6.964986400500711,11.351380189634458,"[np.float64(-20.377551845197335), np.float64(23.218937664556673)]",-7.204229719976517,9.484254325475677,"[np.float64(-18.492567469835066), np.float64(18.033761838226123)]"
olmo-7b-instruct,994.772862190533,6.825464613028152,"[np.float64(-13.375983397043683), np.float64(13.376186117479165)]",58.94543604871258,7.798083195155859,"[np.float64(-16.37628102373789), np.float64(14.32113752162546)]",54.279323723654166,11.672904346432292,"[np.float64(-21.956621867175166), np.float64(23.12928302695805)]",8.124733179002495,10.453302002110206,"[np.float64(-20.04200600800216), np.float64(20.847368063845238)]",-29.74576575548781,8.652725978302177,"[np.float64(-15.108544323562361), np.float64(17.81178999575345)]"
RWKV-4-Raven-14B,971.4897365668874,9.56928951665897,"[np.float64(-19.47058921893722), np.float64(18.99070477293253)]",-30.04851570427233,10.406795612300158,"[np.float64(-21.92860328931961), np.float64(18.80369424378644)]",-27.569079340605967,14.959827784844396,"[np.float64(-29.46132144184746), np.float64(29.622744444062576)]",-7.269686653981167,11.632694350405906,"[np.float64(-21.682829527083353), np.float64(23.815441468661213)]",-22.808463669875948,10.441214468717405,"[np.float64(-18.653213153491887), np.float64(22.358422102729982)]"
koala-13b,966.8925367204581,8.863594662551838,"[np.float64(-17.743231867967552), np.float64(17.837321421965953)]",31.18768644749332,9.506785045148806,"[np.float64(-20.54737994143138), np.float64(17.714271813017028)]",-43.79363431661037,12.74247021005573,"[np.float64(-24.3848730699382), np.float64(25.205399753892785)]",-4.552841490150045,10.198477710851018,"[np.float64(-20.276952506363575), np.float64(20.149193734450684)]",-35.95961166107503,9.01378823810261,"[np.float64(-15.948411280250852), np.float64(18.096659953971823)]"
alpaca-13b,955.4745919381228,9.90749041205145,"[np.float64(-16.983668543484555), np.float64(20.635127614153816)]",-11.263047124125505,10.777851262780615,"[np.float64(-23.432662112596283), np.float64(18.678365738742073)]",-96.4665747495864,14.467207312598186,"[np.float64(-28.5118945650279), np.float64(27.97736804510673)]",-78.19774634427408,12.584651667257727,"[np.float64(-22.717948815723744), np.float64(26.507399495966013)]",-62.447851158138086,9.720205601250868,"[np.float64(-16.625883987648614), np.float64(19.932621833521416)]"
chatglm3-6b,945.6706371180733,8.686366908160918,"[np.float64(-16.756207626823652), np.float64(16.70673796443657)]",32.55430503977118,9.580365013980757,"[np.float64(-19.842729572378687), np.float64(17.676623257571222)]",112.74168316483951,15.890069107322798,"[np.float64(-32.12776092063835), np.float64(29.975115125756744)]",-6.734080883065393,12.25903420784541,"[np.float64(-22.789287520773588), np.float64(24.270266385321925)]",3.5061439945000585,10.029508545467351,"[np.float64(-18.539422670692797), np.float64(21.178349527000787)]"
mpt-7b-chat,944.1991746646124,10.766593023102146,"[np.float64(-19.967913092431445), np.float64(21.50037105960905)]",8.152120082017458,11.62942856919747,"[np.float64(-24.801427451054455), np.float64(22.07648551691228)]",37.392594015207024,15.790341507810115,"[np.float64(-30.171883810000935), np.float64(30.245420833188643)]",-6.941572976273302,15.057189108059418,"[np.float64(-26.404345548370316), np.float64(30.986836766968604)]",-25.208389735114896,11.481625056292977,"[np.float64(-23.644573274281047), np.float64(23.373332686413757)]"
chatglm2-6b,930.2610007312096,10.596632366083098,"[np.float64(-18.827296272806052), np.float64(20.903404167322833)]",30.20960527593591,11.798170348598935,"[np.float64(-24.24053671857708), np.float64(22.074233102391176)]",66.9857475791915,19.85581657569613,"[np.float64(-37.66616741240229), np.float64(42.019242340055825)]",-37.76758664194029,15.118971611563689,"[np.float64(-27.678218438016998), np.float64(29.941615010675918)]",-2.3444956865076083,13.828272149216373,"[np.float64(-25.40874958322157), np.float64(28.836790492438496)]"
gpt4all-13b-snoozy,924.3764295545761,13.165592517256693,"[np.float64(-26.33587811438815), np.float64(25.812130082691738)]",38.63598602062527,14.075398631555345,"[np.float64(-27.434495328435176), np.float64(27.770284564612812)]",-7.990080030156926,16.59237252618251,"[np.float64(-31.938082855364232), np.float64(30.942945802365138)]",-20.81083989083799,17.035939471199878,"[np.float64(-32.65910515768355), np.float64(32.528177837367814)]",9.535282510797918,13.970290919744802,"[np.float64(-25.51176188669865), np.float64(27.960717571184354)]"
oasst-pythia-12b,911.546994824344,9.088848724841494,"[np.float64(-17.795651808974753), np.float64(18.12652618044035)]",10.014042151308065,9.647261713184427,"[np.float64(-20.43486893417233), np.float64(17.471118712403875)]",-62.931453909193706,13.040151240094826,"[np.float64(-25.143235266030473), np.float64(26.10516234750257)]",-12.230824919882725,11.19857515396876,"[np.float64(-20.558998072728677), np.float64(24.725420025896426)]",-4.336864377246117,9.82401635011034,"[np.float64(-19.48915207741863), np.float64(20.091213458647346)]"
fastchat-t5-3b,878.9077415727118,10.75208347146718,"[np.float64(-20.25668481866046), np.float64(22.211877052569207)]",41.686531615979874,11.194702991420154,"[np.float64(-22.91377412970091), np.float64(20.21517344841218)]",-108.06887460981712,14.072744965179227,"[np.float64(-26.650930791319226), np.float64(28.39093106267599)]",-90.22055955993991,13.04289539233172,"[np.float64(-24.619476613694346), np.float64(27.80909701277517)]",-35.96720513187071,10.979559189221485,"[np.float64(-19.776491805122813), np.float64(23.279403693075174)]"
chatglm-6b,873.7974645403858,10.243025856870595,"[np.float64(-19.044641259235277), np.float64(20.267540300902965)]",11.157811755882998,10.806050858121832,"[np.float64(-22.703029050924897), np.float64(19.283603496471077)]",199.2920010647928,14.90999122037267,"[np.float64(-29.095394993644305), np.float64(29.48552341057936)]",10.943954093078206,12.104186211886828,"[np.float64(-23.863689651051203), np.float64(23.477318971763403)]",18.308826013273844,10.816223031336245,"[np.float64(-20.538856391174207), np.float64(20.771180558257054)]"
dolly-v2-12b,856.4664997826483,11.88003227962277,"[np.float64(-23.999775292278855), np.float64(23.149664473448183)]",-15.362899965472256,12.488692590537564,"[np.float64(-26.39569503657472), np.float64(22.801770121201926)]",7.537752771040457,15.775123687191336,"[np.float64(-31.29953261382352), np.float64(30.738502653636623)]",-58.36869789041042,13.780610989766805,"[np.float64(-24.985172829585267), np.float64(29.57040535181327)]",4.269368557051646,12.396588098718722,"[np.float64(-22.555802664115223), np.float64(25.515931742157054)]"
llama-13b,853.0923121034634,12.840196119160025,"[np.float64(-25.52832177893447), np.float64(25.6790984709653)]",-26.19324784285037,13.550493016006902,"[np.float64(-27.327800091739853), np.float64(25.356038798235762)]",-8.252073762648571,15.639323359673108,"[np.float64(-27.94810573984186), np.float64(32.759469482774)]",-88.77123476716297,14.794315008173918,"[np.float64(-26.400560147087326), np.float64(31.768075649833705)]",-35.73391872192578,14.522984759365686,"[np.float64(-27.542573197680625), np.float64(28.78041402367102)]"
stablelm-tuned-alpha-7b,842.6840112823909,11.122807811915637,"[np.float64(-21.448203872705108), np.float64(21.02124704963694)]",18.64425490270281,11.767277405561336,"[np.float64(-23.304161952775647), np.float64(22.828090339654835)]",30.43960897676234,15.087618075736026,"[np.float64(-28.616608619766996), np.float64(29.078960866992063)]",31.910515414758,13.098979379641044,"[np.float64(-24.325210112382177), np.float64(27.17313063874741)]",-13.421141293632596,11.664331805732235,"[np.float64(-21.34793067766429), np.float64(22.779817534009283)]"
