model,Rating,Rating std,Rating alpha,is_chinese Rating,is_chinese Rating std,is_chinese Rating alpha,is_code Rating,is_code Rating std,is_code Rating alpha,is_english Rating,is_english Rating std,is_english Rating alpha,is_hard Rating,is_hard Rating std,is_hard Rating alpha
gpt-4o-2024-05-13,1296.5964790725538,2.2415334925568247,"[np.float64(-4.247913685577942), np.float64(4.450712579716992)]",-3.202442298675295,4.439760539652228,"[np.float64(-8.170734076588584), np.float64(8.784771314978816)]",15.460074003059937,4.086747074237233,"[np.float64(-5.681621433316192), np.float64(10.353901762928654)]",-12.722240748916793,3.7902309292311824,"[np.float64(-10.308140890761448), np.float64(4.277555373401746)]",13.40388101587258,3.8480908390331194,"[np.float64(-4.796741290684734), np.float64(10.048035240618939)]"
claude-3-5-sonnet-20240620,1285.8415268386225,3.6317123938961657,"[np.float64(-7.372323521044336), np.float64(6.7491964797948185)]",-19.31571325746969,7.033328958675472,"[np.float64(-12.932342822336057), np.float64(14.993133010694265)]",43.763468623661964,6.426263852109224,"[np.float64(-10.573916317867045), np.float64(14.87458957774301)]",-29.13808020539193,5.475863658636105,"[np.float64(-13.923565889480278), np.float64(8.219989095472464)]",13.80821515097738,5.614417583635183,"[np.float64(-8.470082901328949), np.float64(13.590989810000597)]"
gemini-advanced-0514,1285.0907351859937,2.5035687484378473,"[np.float64(-5.069409957758808), np.float64(4.528497969252612)]",7.538918389841823,4.843352061166345,"[np.float64(-9.42766267910297), np.float64(9.181723494140414)]",2.0693168910370874,4.6795427405996755,"[np.float64(-6.896519149830872), np.float64(11.184789223494551)]",-25.9875274007753,4.104195719375006,"[np.float64(-10.668738672999694), np.float64(4.547849252023251)]",2.9036170001804433,4.0577611846903725,"[np.float64(-5.585979894919719), np.float64(10.270769670261554)]"
gemini-1.5-pro-api-0514,1273.4158216015676,2.410161008964023,"[np.float64(-4.467839089781592), np.float64(4.800708068360109)]",18.96379605552419,4.716586428708939,"[np.float64(-8.57911756345656), np.float64(9.994081243430923)]",10.958201169810193,4.4441072634632,"[np.float64(-5.834971197344251), np.float64(11.274047505951392)]",-19.652567875734395,4.062854912752703,"[np.float64(-10.464143585055997), np.float64(4.755857645555974)]",15.203558007362856,3.9928297209740515,"[np.float64(-4.786279397776964), np.float64(10.046440244160689)]"
claude-3-opus-20240229,1272.83286716387,1.50066102997193,"[np.float64(-2.718042936056918), np.float64(2.981228425855761)]",0.6555769350331458,2.7942198905298694,"[np.float64(-5.112964750433674), np.float64(5.951264765699963)]",11.518603612587716,3.166710166073418,"[np.float64(-3.7087630067470787), np.float64(8.390478552507748)]",-39.146776296624246,3.191972249349644,"[np.float64(-9.2489703816603), np.float64(2.536255865912466)]",15.213715539238809,2.9831085951496634,"[np.float64(-2.8726677686345283), np.float64(8.382390870877275)]"
bard-jan-24-gemini-pro,1271.1681309587366,6.398341811164231,"[np.float64(-12.591579668004897), np.float64(12.384940759666279)]",-24.97364875680349,13.097669186954448,"[np.float64(-25.76288293029995), np.float64(25.634231061325423)]",-10.769904374932837,8.252821139798975,"[np.float64(-13.447849961385849), np.float64(18.991240009855602)]",-48.13066992494291,7.457404516948873,"[np.float64(-16.917367823861674), np.float64(11.728415919632702)]",-45.26718868110007,7.222113419447022,"[np.float64(-11.670578889264434), np.float64(16.854846548203533)]"
gpt-4-1106-preview,1264.6180657259656,1.9595777183349092,"[np.float64(-4.0438186488847805), np.float64(3.6604679991537523)]",-3.022771894020819,3.832341206819936,"[np.float64(-7.163910501910774), np.float64(7.970409378364083)]",11.403962424940588,3.5904882617799925,"[np.float64(-4.842257010954295), np.float64(9.328824175264417)]",-11.10158230387015,3.478719642654102,"[np.float64(-9.68109825352057), np.float64(3.4814266904653763)]",8.99160095741375,3.2568924864983457,"[np.float64(-3.4892669843198156), np.float64(8.733488215565329)]"
gemini-1.5-pro-api-0409-preview,1264.1297764866235,2.3324911034859457,"[np.float64(-4.4148123314435), np.float64(4.62427102678862)]",4.690909704211835,4.207111833398503,"[np.float64(-8.473200739458578), np.float64(7.87302358254022)]",-11.768368645338438,4.0086137829140895,"[np.float64(-5.453641978778336), np.float64(10.800536323762906)]",-2.1217695359531468,3.7613683101526294,"[np.float64(-10.07386423403217), np.float64(4.078353510260188)]",-4.142253195770268,3.8476954317450676,"[np.float64(-4.764387764437561), np.float64(9.774559427640623)]"
gpt-4-turbo-2024-04-09,1257.7040033511375,1.863248552125959,"[np.float64(-3.4464485992350546), np.float64(3.817382495992206)]",3.765715850363598,3.657133128390539,"[np.float64(-6.5550013888086625), np.float64(7.430763778160418)]",15.087807804662136,3.885474404591617,"[np.float64(-4.939040312770912), np.float64(9.582200610339964)]",3.0128328101708615,3.5929618167608273,"[np.float64(-9.807423747217767), np.float64(3.616524923326035)]",11.13562570781905,3.3224855495369137,"[np.float64(-3.76439269027955), np.float64(8.915206805281475)]"
gpt-4-0125-preview,1254.8088196009767,1.927040534259937,"[np.float64(-3.646981476449355), np.float64(3.636441746490391)]",1.3623540806159506,3.3979845274201557,"[np.float64(-6.098165569344504), np.float64(7.257950054049061)]",2.3750922384666637,3.651315466532448,"[np.float64(-4.680776878841868), np.float64(9.529890131661581)]",-5.3295300419024905,3.503893839446712,"[np.float64(-9.561616718412292), np.float64(3.4577162984966616)]",13.997974121917771,3.4080078886288585,"[np.float64(-3.8582557319587103), np.float64(9.029509483775161)]"
gemini-1.5-flash-api-0514,1243.0982536459587,2.4279601758707163,"[np.float64(-4.515302244503118), np.float64(4.659486133829205)]",8.122593569971286,4.965416176727236,"[np.float64(-9.70806913127884), np.float64(9.525479315700943)]",14.889738880742751,4.3329635409244105,"[np.float64(-6.682808994436577), np.float64(10.810666230568764)]",-21.120562751222486,3.956933038759403,"[np.float64(-10.543499579759793), np.float64(4.502647934238418)]",9.34433207981215,4.041576511882164,"[np.float64(-5.4708396611138514), np.float64(10.44390873344367)]"
yi-large-preview,1237.1027506619544,2.200929164794468,"[np.float64(-4.377711017810498), np.float64(4.377048468707244)]",37.369537684624405,4.346281473169023,"[np.float64(-8.59362682977143), np.float64(8.75773911124137)]",11.37826363321874,4.243212990961121,"[np.float64(-5.777044602051635), np.float64(11.033715364739617)]",-3.5010550450157374,3.8812150592994645,"[np.float64(-10.644048847721889), np.float64(4.106961547918564)]",16.67473055144067,3.883677258104441,"[np.float64(-4.843612771926562), np.float64(10.464301048169277)]"
gemma-2-27b-it,1232.3987249375868,5.904804293115146,"[np.float64(-11.392496866374131), np.float64(11.671871061337015)]",-0.7614849375929321,10.97621191609611,"[np.float64(-19.628010666735953), np.float64(23.17740857935165)]",7.081595774962447,9.01504071955112,"[np.float64(-15.662582968972373), np.float64(19.894606756833433)]",-10.21049830818878,7.699347358281646,"[np.float64(-17.601291472989548), np.float64(12.166375569559603)]",-14.566853295710924,8.150076510766233,"[np.float64(-13.619878012073944), np.float64(18.026028712107934)]"
yi-large,1221.986790129421,4.208687044299393,"[np.float64(-7.698365754849419), np.float64(8.752021662895913)]",8.514261907749907,8.425150183592983,"[np.float64(-16.537707673787544), np.float64(17.86898899718941)]",20.520926518009325,7.810470048972191,"[np.float64(-12.468983990787564), np.float64(18.2287284566808)]",-3.304895989685192,6.1757716760495756,"[np.float64(-15.451707698694639), np.float64(8.485749887249636)]",11.006355286147587,6.757648329825329,"[np.float64(-10.404996998026116), np.float64(15.350393116138603)]"
nemotron-4-340b-instruct,1221.542012620363,3.568346535978813,"[np.float64(-7.226058786235171), np.float64(6.5003168917583025)]",7.45002129470337,6.694018095249338,"[np.float64(-12.437217069001342), np.float64(13.890373579788957)]",-6.142333290069301,6.279552126696585,"[np.float64(-9.41098990401461), np.float64(14.707334070768846)]",-13.151174079568305,5.194915108079065,"[np.float64(-13.128343263989782), np.float64(7.574305340732009)]",9.111377781954864,5.5951219955314,"[np.float64(-8.477974499734035), np.float64(13.243721790025457)]"
claude-3-sonnet-20240229,1219.9661333376998,1.6848318470478663,"[np.float64(-3.2355326274912386), np.float64(3.334306192679378)]",-14.72467104133045,2.9427684671575776,"[np.float64(-5.466332341637218), np.float64(5.915150311765721)]",26.38643014815442,3.391121082039829,"[np.float64(-3.7186328867658034), np.float64(9.197279711551708)]",-26.154661490062875,3.3002291076565506,"[np.float64(-9.448666459528436), np.float64(2.7921992348650093)]",6.48060942239388,3.1637590157885023,"[np.float64(-3.635311982638197), np.float64(8.339045732335178)]"
command-r-plus,1213.6816582520341,1.8024917249095083,"[np.float64(-3.507632606741936), np.float64(3.8999930272668735)]",5.670137755754955,3.3755702737070474,"[np.float64(-6.378896617331168), np.float64(6.748114431280805)]",-13.73432328431094,3.493324103245534,"[np.float64(-4.3553114631518355), np.float64(9.330834114362213)]",-17.706034841574994,3.4621697030851704,"[np.float64(-9.443842604986425), np.float64(3.3730928194788454)]",-6.527131924231143,3.3761521687665828,"[np.float64(-3.7284694821958375), np.float64(8.95610130632025)]"
gpt-4-0314,1213.4085467040895,2.3227634815734164,"[np.float64(-4.512539627247179), np.float64(4.531121241260962)]",-13.365744949094061,4.243265145048824,"[np.float64(-7.922825042454192), np.float64(8.719659789538493)]",8.690204747900271,4.139267843441569,"[np.float64(-4.931137747738294), np.float64(10.46021245389459)]",-29.243818599603948,3.8096364858502896,"[np.float64(-10.438286055651627), np.float64(4.314559003302719)]",22.961615916270315,3.73308728319981,"[np.float64(-4.633521162274576), np.float64(9.686327198447948)]"
reka-core-20240501,1211.583060673578,1.9385720970835567,"[np.float64(-3.8699863148253826), np.float64(3.6028383147502154)]",8.713328114687819,3.978832943188897,"[np.float64(-7.622607785682895), np.float64(8.132451582943736)]",-1.5531426625698066,3.836166087972497,"[np.float64(-4.643141894056775), np.float64(10.360556678620314)]",-11.107284128011328,3.5631588306012922,"[np.float64(-9.767159075390227), np.float64(3.6567548949685467)]",5.514489388790675,3.5439706270622926,"[np.float64(-4.7054375105300466), np.float64(9.330512553491854)]"
claude-3-haiku-20240307,1209.241535119094,1.7500767942056095,"[np.float64(-3.4301979413287427), np.float64(3.493118459836978)]",-35.85456330054247,2.9003357638033767,"[np.float64(-5.204571071610136), np.float64(6.031939389505105)]",16.344263858780078,3.2820918376817247,"[np.float64(-3.5866950240422106), np.float64(9.035647354364897)]",-29.68143062796953,3.3629806399666786,"[np.float64(-9.159367731143096), np.float64(3.24085887214461)]",9.87051835357817,3.162713136533897,"[np.float64(-3.3723870653602965), np.float64(8.377684356677165)]"
gemma-2-9b-it,1203.7231940768716,5.587855777672399,"[np.float64(-10.732068679897111), np.float64(10.978024300267634)]",-0.9604763976589503,10.764493280262664,"[np.float64(-22.083910910426155), np.float64(21.35943630369801)]",-13.510087252439451,9.265773954425441,"[np.float64(-16.25350889103288), np.float64(20.273894063877957)]",-17.070333575249812,7.646462149714238,"[np.float64(-17.53408105255256), np.float64(12.615903967300987)]",3.358249579384403,8.007094839056737,"[np.float64(-12.580389906727225), np.float64(18.596095975545836)]"
glm-4-0520,1201.5047317859967,4.905782437509902,"[np.float64(-9.567714584157557), np.float64(9.95201877868135)]",48.62713812118381,9.210212444847635,"[np.float64(-17.657742626506895), np.float64(17.84438505359788)]",11.99592835594823,7.840088484772677,"[np.float64(-12.45710068490061), np.float64(17.35282161223894)]",7.690070305685692,6.6733768941412785,"[np.float64(-15.796725484304803), np.float64(10.844043166934714)]",18.963421414084973,6.945909261967087,"[np.float64(-11.118936283677662), np.float64(15.70630466469081)]"
gpt-4-0613,1190.926935116252,1.9214132383517455,"[np.float64(-3.7294393027700607), np.float64(3.827745862054144)]",-40.696412635377634,3.568960766393895,"[np.float64(-6.512429988571576), np.float64(7.581524011572149)]",1.4955383326290905,3.587611629891626,"[np.float64(-4.165336651096988), np.float64(9.515690858558369)]",-22.14608353797701,3.5430844279459768,"[np.float64(-9.452183532701396), np.float64(3.620260143649933)]",17.663555367844467,3.2809852478494648,"[np.float64(-3.73308939554758), np.float64(8.471276156205413)]"
claude-1,1190.2368418550554,4.321652569853277,"[np.float64(-8.235706753027443), np.float64(8.315490735913272)]",-18.401150007001917,8.867309659951301,"[np.float64(-16.996019029419756), np.float64(16.911606454542074)]",3.743612034748379,6.246185895388048,"[np.float64(-9.404669747250994), np.float64(14.797809421484931)]",-29.734766464372697,5.686228926285095,"[np.float64(-14.493533259034121), np.float64(7.925436768958221)]",-18.948638220673896,5.444274816805506,"[np.float64(-7.615773682848818), np.float64(13.289473808996402)]"
reka-flash-preview-20240611,1187.6577932525233,3.9155735726091674,"[np.float64(-7.566720679803211), np.float64(7.675779245801095)]",-4.692616761238898,7.537126733258233,"[np.float64(-14.954498753745273), np.float64(14.145542647774363)]",7.2281379087734985,6.729946825415923,"[np.float64(-10.550208859086176), np.float64(15.395770126960098)]",-15.126671655961816,5.534422128824568,"[np.float64(-13.787181082131859), np.float64(7.344356726974331)]",-9.935700824163279,5.87447214038622,"[np.float64(-8.978137443534411), np.float64(13.779373233630727)]"
llama-3-70b-instruct,1186.7743361933071,1.4760041215305528,"[np.float64(-2.676472339443535), np.float64(3.0107981153330456)]",-50.89857136585173,2.7705447905129392,"[np.float64(-5.039217341868429), np.float64(5.705959655992693)]",-9.642696905502396,3.0952799061774745,"[np.float64(-3.3440664405940765), np.float64(8.585200176809446)]",67.34525830918865,3.0831397016192787,"[np.float64(-8.969095728994745), np.float64(2.309377035165525)]",-0.9072789353874494,2.8655932346511097,"[np.float64(-2.529917134184725), np.float64(7.824739257865716)]"
qwen-max-0428,1186.766062876819,2.649636497579791,"[np.float64(-5.069326973243278), np.float64(5.380749712520583)]",62.428259733544934,6.1741463717038805,"[np.float64(-11.309970990643833), np.float64(12.863152363494244)]",7.97580228285511,5.141502736812999,"[np.float64(-7.904713029572932), np.float64(12.283381492150022)]",3.3923689300239728,4.4458413228775155,"[np.float64(-11.581634042835034), np.float64(5.40673493571514)]",11.073258261361156,4.827821122315313,"[np.float64(-7.122944023145736), np.float64(11.155366944286513)]"
qwen2-72b-instruct,1181.910277798138,3.004114711033279,"[np.float64(-6.0407276900102715), np.float64(6.001220862317496)]",68.48740638852831,5.848758001924165,"[np.float64(-11.039721149673738), np.float64(11.928371265635661)]",-3.3766830144652933,5.40856709630486,"[np.float64(-7.606793170518397), np.float64(13.430968034975425)]",9.143976426657568,4.479211690030585,"[np.float64(-12.888914400759356), np.float64(5.443631743898072)]",13.44659447280741,4.81819072473617,"[np.float64(-6.794788385914421), np.float64(11.643517002233528)]"
gemini-pro-dev-api,1181.5370922088268,3.921011803571061,"[np.float64(-7.208350703247788), np.float64(8.13761688809268)]",-23.003764837994922,7.371310396544503,"[np.float64(-14.039820112918068), np.float64(15.063774692832034)]",-22.642677977412205,6.070957360674889,"[np.float64(-9.249062194431927), np.float64(14.309044549238017)]",-37.76189040263807,5.291760702948955,"[np.float64(-13.59481193127792), np.float64(7.224657254989452)]",-10.65971875462013,5.568490109830499,"[np.float64(-8.677107151635166), np.float64(13.313925779900007)]"
deepseek-coder-v2,1180.7271080491228,4.785202869200118,"[np.float64(-9.253368882515815), np.float64(9.36735503885302)]",15.988601125505573,8.741221168337544,"[np.float64(-17.488425197463172), np.float64(17.27510951590671)]",63.754468842948114,7.7272500227971594,"[np.float64(-11.513941733655074), np.float64(18.107979261626937)]",-34.09597501769337,6.492522421699627,"[np.float64(-15.641880003631446), np.float64(9.628056143953756)]",42.70017273898778,6.629190500121971,"[np.float64(-10.964787492211777), np.float64(15.540194220742038)]"
reka-flash-21b-20240226-online,1175.869934337482,3.6430389170067548,"[np.float64(-7.068064448188807), np.float64(6.889138667379484)]",-9.61825888353403,6.421813986166081,"[np.float64(-12.141933941933146), np.float64(13.310487142525966)]",0.9218860242371923,5.945482115885739,"[np.float64(-9.430169471669933), np.float64(13.44243325095746)]",-11.551729954198914,5.24954375426209,"[np.float64(-13.293048969498136), np.float64(6.870230832077697)]",-2.1736283245451395,5.473175869578267,"[np.float64(-8.065025998283456), np.float64(13.025394218291721)]"
command-r,1174.9816167322833,2.078761434415879,"[np.float64(-3.925211162080359), np.float64(4.180864030926159)]",12.17070802139847,3.661075864223399,"[np.float64(-6.799822019901333), np.float64(7.511630653437931)]",-8.622440572582661,4.035113632758069,"[np.float64(-5.33355424775101), np.float64(10.580347930461762)]",-14.611841928317146,3.6730847718953776,"[np.float64(-9.920672762998878), np.float64(3.9460037449503123)]",-23.323188082537737,3.661181605773428,"[np.float64(-4.6173880485790235), np.float64(9.270298783290674)]"
reka-flash-21b-20240226,1170.2105696056515,2.8309638058978983,"[np.float64(-5.208891159079258), np.float64(5.569011843486123)]",-12.307339582271991,5.571551648920231,"[np.float64(-11.723269921631632), np.float64(10.14210560058165)]",4.882854846439944,4.827644493266471,"[np.float64(-7.22241932897556), np.float64(11.48478830870809)]",-15.55027078589163,4.403987379748476,"[np.float64(-11.724923782180099), np.float64(5.237927633355859)]",-6.1806706044971325,4.628706573697374,"[np.float64(-6.38865861907375), np.float64(11.695342844404214)]"
claude-2.0,1163.8326518012163,5.37431717594822,"[np.float64(-10.431992484914872), np.float64(10.835084406440728)]",-7.8427868342555085,11.995732718401193,"[np.float64(-23.067140877290804), np.float64(24.128779430922375)]",11.66406121965312,7.861177971577695,"[np.float64(-13.021028189371403), np.float64(17.642492277246536)]",-25.596500178001815,6.4569082461063765,"[np.float64(-15.844990133078944), np.float64(9.864658903208582)]",2.883105137392908,6.688123708404055,"[np.float64(-10.943897272119877), np.float64(15.144549563427253)]"
mistral-large-2402,1163.1155391115408,2.0643160728511263,"[np.float64(-4.205368842468488), np.float64(4.018411391082054)]",-32.10906722772737,3.8218123435873586,"[np.float64(-7.0833113430680115), np.float64(7.71826404694562)]",9.688847911794388,3.8439748079421627,"[np.float64(-5.252632278362471), np.float64(9.895617456470502)]",2.075709074914838,3.7002402400366368,"[np.float64(-10.049241501862285), np.float64(3.7954003395694498)]",20.396592897722638,3.461772854912288,"[np.float64(-4.24639889441988), np.float64(8.97396846262134)]"
gpt-3.5-turbo-0314,1162.0944632429487,10.756464602093905,"[np.float64(-21.00042646818133), np.float64(19.467203912330206)]",9.330512059053891,16.156414773894955,"[np.float64(-32.4125472310329), np.float64(31.933614493674366)]",14.430992879887798,12.780715639064372,"[np.float64(-21.863272115492727), np.float64(27.548446503164058)]",-56.302341564966156,11.479518103484429,"[np.float64(-25.045276124772446), np.float64(19.34263039237554)]",22.495024940046395,10.58756737279936,"[np.float64(-18.165921459072653), np.float64(22.745725917284524)]"
qwen1.5-110b-chat,1161.424739045549,2.5737646513753742,"[np.float64(-4.989359513570662), np.float64(4.8974836125064485)]",58.32743664242218,5.455885553225559,"[np.float64(-10.530141157986328), np.float64(11.094417747474047)]",10.857305374474656,5.105141056338187,"[np.float64(-7.801993902357996), np.float64(12.46753527560147)]",11.88211711681554,4.006529357934848,"[np.float64(-11.000703535246274), np.float64(4.923165754861862)]",9.66169431512001,4.608566020080501,"[np.float64(-7.24481590262438), np.float64(10.779718680249648)]"
gpt-3.5-turbo-0613,1160.994556931878,3.148052419922541,"[np.float64(-6.272456195404857), np.float64(6.198447924927223)]",-36.8556117772359,7.797158266958209,"[np.float64(-14.49757167289864), np.float64(16.222039738095454)]",21.255480154377675,5.003968666256019,"[np.float64(-7.886364080944379), np.float64(12.068096490269086)]",-40.93931574192706,4.216662522953708,"[np.float64(-11.25259372080728), np.float64(4.905608576930852)]",13.19746589861124,4.236408578627246,"[np.float64(-5.968619938204002), np.float64(10.500070766657824)]"
claude-2.1,1156.2493551354687,3.015262713229979,"[np.float64(-5.670107735851616), np.float64(6.278153269715176)]",-45.343980875766434,5.994501650027561,"[np.float64(-11.815775524911466), np.float64(12.225765785012882)]",21.719689448854513,4.786097441921554,"[np.float64(-6.939554223744171), np.float64(11.792008316785243)]",-39.715297884513475,4.396966599258975,"[np.float64(-11.648772815307105), np.float64(5.281750147335046)]",10.297480316191564,4.324143362753592,"[np.float64(-5.29886918682954), np.float64(10.839427683381933)]"
mistral-next,1153.434964537645,5.217817461568683,"[np.float64(-10.642044412399628), np.float64(10.981311463756356)]",-49.66904457970826,10.49799377255734,"[np.float64(-19.446303281619244), np.float64(20.18259848027278)]",7.626697193317165,7.870847474171693,"[np.float64(-13.219191380770384), np.float64(17.66831809209304)]",-20.187084516321622,6.438049202925447,"[np.float64(-15.211139930934042), np.float64(9.688530614166737)]",14.17063837906638,6.499504271471215,"[np.float64(-10.392188950595473), np.float64(14.913496988809248)]"
mistral-medium,1152.6263079727714,3.0054451187003655,"[np.float64(-6.004802049416867), np.float64(6.126067248456593)]",-24.144856681857416,5.693033398122636,"[np.float64(-10.651165033809553), np.float64(11.132215629259267)]",10.516948079016275,5.020061866064139,"[np.float64(-7.189347478240956), np.float64(13.151455906363033)]",8.578270512346105,4.321074768189053,"[np.float64(-11.441390019476227), np.float64(5.165616228125163)]",4.82206096998789,4.36576364116829,"[np.float64(-5.770668393713542), np.float64(11.364973931515394)]"
mixtral-8x22b-instruct-v0.1,1152.2020931306643,2.2850978124428862,"[np.float64(-4.5024639894722895), np.float64(4.86579287611994)]",-6.891246187176111,4.529564675221675,"[np.float64(-8.985819423850748), np.float64(8.618539626423928)]",8.359286790213796,4.36111659416828,"[np.float64(-5.911323528088269), np.float64(10.889070249537998)]",5.437339962123975,3.848553000616418,"[np.float64(-10.321320287915356), np.float64(4.25962085091873)]",11.510372696021589,3.9330480684827336,"[np.float64(-4.641098026643542), np.float64(10.274789150332591)]"
llama-3-8b-instruct,1149.921870889686,1.729890511736415,"[np.float64(-3.426859433057416), np.float64(3.49745699526693)]",-41.41793556754733,3.285188887161278,"[np.float64(-5.969480579718372), np.float64(6.907344713084825)]",-4.406018161920126,3.5024279123341633,"[np.float64(-4.129271451285532), np.float64(8.680935728999195)]",47.854916621255136,3.329317218566492,"[np.float64(-9.335556214146443), np.float64(3.288569420525043)]",-16.13838623547589,3.2153093177253993,"[np.float64(-3.4260697537671234), np.float64(8.453400374908814)]"
glm-4-0116,1148.510781349938,5.186154380536958,"[np.float64(-10.743275424735884), np.float64(9.248815969281168)]",76.10765331551858,10.108677265574778,"[np.float64(-18.69693146580721), np.float64(19.51731180048101)]",11.685867141572894,8.507930853139653,"[np.float64(-13.98188369260703), np.float64(18.46553976104496)]",44.54830445025538,6.842096329564359,"[np.float64(-15.831308325974064), np.float64(10.527261696156899)]",20.745813856100952,7.518698850656833,"[np.float64(-11.607519055445032), np.float64(17.43586739111031)]"
qwen1.5-72b-chat,1148.3386993293825,2.4656661539766414,"[np.float64(-4.586956281288849), np.float64(5.392985035645552)]",58.39540668043468,4.852503554306909,"[np.float64(-9.160821722998044), np.float64(8.988410921970932)]",19.32490158302827,4.457177573242626,"[np.float64(-6.32655686033582), np.float64(11.507171968681122)]",10.787328183388105,3.9264031956882173,"[np.float64(-10.567726734673805), np.float64(4.722883537854694)]",-1.189221574965715,3.842514339711322,"[np.float64(-4.6758308847741645), np.float64(10.238422029494469)]"
gpt-3.5-turbo-0125,1147.1100538874543,2.0411144056874035,"[np.float64(-3.83042209567202), np.float64(4.096561386154917)]",-45.858634082334916,3.7510755325994682,"[np.float64(-7.145479246289781), np.float64(7.322077770613518)]",18.736633332139437,3.7724762049971727,"[np.float64(-4.505439400599961), np.float64(10.239889107031225)]",-38.27238207599195,3.6557426310652774,"[np.float64(-10.084049123585153), np.float64(3.473241355554798)]",10.836346170024765,3.61500253635323,"[np.float64(-4.406840195496475), np.float64(9.22711418629016)]"
zephyr-orpo-141b-A35b-v0.1,1143.4877633011538,6.281815263273838,"[np.float64(-12.190894753990051), np.float64(11.74349402839266)]",-26.593068313674898,11.21190694890383,"[np.float64(-22.206307137437864), np.float64(21.332772380616795)]",-0.6681081001134184,10.52973113872176,"[np.float64(-18.91253665614035), np.float64(22.46615827258224)]",3.565591649517122,8.430696139685919,"[np.float64(-19.361468434537443), np.float64(12.150264856313143)]",-2.3210145227521672,9.602292985934215,"[np.float64(-16.169422518708593), np.float64(21.515780877118438)]"
gemini-pro,1139.4330928062504,7.403297238649309,"[np.float64(-14.628907463324367), np.float64(14.427517689508704)]",1.6644779845193143,15.198974920422827,"[np.float64(-29.52779579755211), np.float64(29.86217877647771)]",-6.662438577882884,10.29887415270665,"[np.float64(-17.31744378565259), np.float64(22.168957937783553)]",-6.921593576622075,8.929807711532021,"[np.float64(-20.413691876905137), np.float64(14.63858769418622)]",-22.839013231912325,8.364431461990831,"[np.float64(-14.013038362479415), np.float64(18.67451359652665)]"
claude-instant-1,1133.6018266451456,4.496625770755714,"[np.float64(-8.722687702197618), np.float64(8.102129475985976)]",-15.32579739507833,9.784757864055141,"[np.float64(-19.017067693926272), np.float64(18.792877160286473)]",2.549913471005614,6.578783740038747,"[np.float64(-10.668885061339463), np.float64(14.6961046467216)]",-12.509599296864907,5.574663711104886,"[np.float64(-13.99139229306932), np.float64(8.094497922652623)]",7.466698494649587,5.631874008034111,"[np.float64(-7.753158228445996), np.float64(13.303502381799523)]"
wizardlm-70b,1129.1238423361046,6.516399681543557,"[np.float64(-13.045274868086608), np.float64(12.664382559477644)]",-30.27425559448579,14.445840988569048,"[np.float64(-28.018524460627482), np.float64(29.98981022684411)]",-26.05249855233115,9.417353063431074,"[np.float64(-16.346338458086322), np.float64(20.134077670299618)]",4.6734577253315654,7.602771673940611,"[np.float64(-17.282113321117205), np.float64(12.791672051014768)]",-18.53519217084969,8.019723183085695,"[np.float64(-13.60921372550912), np.float64(17.685233694345165)]"
snowflake-arctic-instruct,1126.0853209745628,2.6779410245007407,"[np.float64(-5.014089762396907), np.float64(5.299899068196737)]",-3.3740742788750637,5.093993090120392,"[np.float64(-9.44129926710389), np.float64(9.754144430518968)]",-10.981739393171226,4.743159391917013,"[np.float64(-6.712872453056145), np.float64(12.116410674786474)]",-12.786406253848213,4.194594535276771,"[np.float64(-11.091810929879598), np.float64(5.150122572020749)]",-12.825983468253822,4.123520870492974,"[np.float64(-5.667005588244748), np.float64(10.673094419628312)]"
qwen1.5-32b-chat,1126.0273804714507,3.0138239307515873,"[np.float64(-5.968922740111566), np.float64(5.842214343148953)]",68.63884806063199,5.52556877233754,"[np.float64(-10.392682885840628), np.float64(11.139264112047798)]",21.81037750112879,5.59305275229386,"[np.float64(-8.854352410659732), np.float64(13.276234434282072)]",5.104091176263998,4.651360918467935,"[np.float64(-11.715716699805487), np.float64(5.754560947109778)]",7.966753378674111,4.918250400976697,"[np.float64(-7.083696956190179), np.float64(11.749581824370967)]"
yi-1.5-34b-chat,1125.7481962016946,3.1430592804414292,"[np.float64(-5.938335070468838), np.float64(6.4330595054416335)]",102.83449010149958,6.083052809713152,"[np.float64(-11.401291237363893), np.float64(12.117156727282719)]",1.1097603201050228,5.734844484476473,"[np.float64(-8.88357079723776), np.float64(13.481809879765663)]",62.57962382382713,4.8787043769286615,"[np.float64(-12.174669198214538), np.float64(6.853932545661053)]",5.410061149276945,5.144873045190008,"[np.float64(-7.618429643433785), np.float64(12.56005514602816)]"
phi-3-medium-4k-instruct,1125.5035967152162,3.771069260584023,"[np.float64(-7.420294347233266), np.float64(7.265624165983354)]",-6.544358880891617,6.905271529350809,"[np.float64(-13.027687216169458), np.float64(12.984106433207025)]",4.644308518220606,6.652249722929761,"[np.float64(-11.068335311462619), np.float64(15.216472335032297)]",11.813772588639806,5.490357438531029,"[np.float64(-13.76255060894596), np.float64(7.902548373277506)]",20.69670977200211,5.772117617229225,"[np.float64(-8.574775708580994), np.float64(13.480924839678394)]"
tulu-2-dpo-70b,1122.39559319609,6.91247684007889,"[np.float64(-13.666185861972053), np.float64(13.410704694566903)]",-71.94117275014042,14.923779282255506,"[np.float64(-28.67158233382444), np.float64(29.706817634385217)]",-6.397083824004204,9.933150197981515,"[np.float64(-16.28233696450558), np.float64(21.142720306081713)]",-3.679264271227348,8.107523399360268,"[np.float64(-18.97314383461823), np.float64(12.370980870600642)]",7.87729777401583,8.270836871274538,"[np.float64(-12.90699314845197), np.float64(18.564467060702576)]"
mixtral-8x7b-instruct-v0.1,1114.0,2.0898587007391694,"[np.float64(-3.7607521504301076), np.float64(4.080816163232612)]",-37.10913010132825,3.7039968736505076,"[np.float64(-6.905823967170949), np.float64(7.448425128031204)]",-3.8763595447465815,3.874534917334335,"[np.float64(-4.763853870661651), np.float64(10.165481379537216)]",25.15863485099822,3.6138690496931254,"[np.float64(-10.168594588540827), np.float64(3.426302347386194)]",9.913234964238264,3.474192114433203,"[np.float64(-4.034051981749229), np.float64(9.31899223921355)]"
openchat-3.5-0106,1113.8015555486154,4.46533376078441,"[np.float64(-8.367912031256765), np.float64(8.731645273316644)]",-2.591423486537971,7.850459454102149,"[np.float64(-14.435286741528923), np.float64(16.97662878605491)]",17.071729998023145,6.829204283954318,"[np.float64(-11.473251871941176), np.float64(15.899250132596464)]",-3.41651570278906,5.832543182074718,"[np.float64(-14.459080226362971), np.float64(8.504670179093752)]",-10.947347695519753,5.977464794696907,"[np.float64(-9.355256865623598), np.float64(14.040255630795972)]"
qwen1.5-14b-chat,1112.0550614752551,3.348998884453233,"[np.float64(-6.541487102703513), np.float64(6.363199438552556)]",56.58136951236024,5.642821316503038,"[np.float64(-10.3095050464715), np.float64(11.242531896847154)]",10.852119964178055,5.953505513465173,"[np.float64(-8.97878809910755), np.float64(14.205157524385235)]",10.472401498349836,4.986641052006438,"[np.float64(-12.471986156197737), np.float64(6.6056184891248435)]",8.207986006105209,5.167628006334244,"[np.float64(-8.062273593449909), np.float64(12.398835950235792)]"
llama2-70b-steerlm-chat,1111.2542089602998,10.07868617211126,"[np.float64(-20.245220816675555), np.float64(20.633213546426305)]",-28.089679509687425,17.555833341711075,"[np.float64(-33.84274945784733), np.float64(36.68130011494577)]",-51.806922147197994,12.571341880057595,"[np.float64(-21.57889075407252), np.float64(25.79846247164521)]",-2.897569677353296,11.343157333894498,"[np.float64(-25.718487276786153), np.float64(19.41051706394836)]",-13.210227223797322,11.868121633638838,"[np.float64(-19.737597340626273), np.float64(25.023386921693355)]"
starling-lm-7b-beta,1110.747310224183,3.899336501081589,"[np.float64(-7.065139404769752), np.float64(7.690302143579174)]",34.613898123061986,6.257414303113865,"[np.float64(-11.295189429888083), np.float64(12.650676895140116)]",18.024930423539725,6.228911919386931,"[np.float64(-10.454286442065989), np.float64(13.747138870811558)]",18.567424104275666,5.386919493295239,"[np.float64(-14.056511141444396), np.float64(7.593357038012432)]",1.0380010819477565,5.481792391142863,"[np.float64(-7.674485133274395), np.float64(13.001318832719981)]"
llama-2-70b-chat,1108.021856904185,2.6995863838892893,"[np.float64(-4.88794153429194), np.float64(5.77208285787151)]",-78.42801340422324,5.394854771550006,"[np.float64(-10.442954278072136), np.float64(10.838059836074336)]",-14.703360949943416,4.545180596594105,"[np.float64(-6.966931126211629), np.float64(11.127703432347609)]",23.940840809921138,3.861583405117661,"[np.float64(-10.31972949903967), np.float64(4.668316523292766)]",-18.16892228043715,4.032182766345007,"[np.float64(-5.80727980839449), np.float64(10.301550288007935)]"
gpt-3.5-turbo-1106,1106.2286797062316,4.527913626221944,"[np.float64(-8.827844399605965), np.float64(8.650554051315567)]",-62.11289733561488,11.342352777707958,"[np.float64(-21.840460961588235), np.float64(22.623615699891737)]",20.01748246535362,6.890420905124763,"[np.float64(-11.537757777966778), np.float64(15.275994749384147)]",-35.909348772783886,5.882899267421972,"[np.float64(-14.735304262315957), np.float64(8.60018520864788)]",32.935006613127,5.895923587997598,"[np.float64(-8.402248521736098), np.float64(14.703045842050642)]"
vicuna-33b,1105.0461243365712,3.923338978213643,"[np.float64(-7.867777938949075), np.float64(8.230195477828829)]",-26.908655070372273,7.858822608913538,"[np.float64(-14.448706540738), np.float64(16.32225900854471)]",-18.358240192126306,6.099325245344445,"[np.float64(-8.861133728873853), np.float64(14.085989195244085)]",16.907596245667303,5.023412426517893,"[np.float64(-12.946069873103141), np.float64(6.5521019439312145)]",-18.37410334808449,5.19601526141571,"[np.float64(-7.756962693314497), np.float64(12.607814973264478)]"
phi-3-small-8k-instruct,1103.211011804809,3.4966445688039536,"[np.float64(-6.6721849681052845), np.float64(6.971602625103515)]",-16.302402706071064,6.403240251558434,"[np.float64(-11.983236519202997), np.float64(12.916662960582244)]",-4.7402226245595935,6.407490801404366,"[np.float64(-9.37723577621872), np.float64(15.120627470181855)]",27.199444491584217,5.2742308819138515,"[np.float64(-12.586597078870483), np.float64(7.60516621263076)]",13.041834450185972,5.526433509457537,"[np.float64(-8.00385848284733), np.float64(13.546237467667186)]"
openchat-3.5,1101.0301299091861,6.685366660453455,"[np.float64(-12.879740889749883), np.float64(13.155439258307524)]",2.0721871681816477,14.172844095122288,"[np.float64(-28.30022306768744), np.float64(26.426447362199184)]",-22.50558176198692,9.192888148660112,"[np.float64(-16.227899259159123), np.float64(20.45583347797034)]",-5.204182826573839,7.808715420152498,"[np.float64(-18.347755566143118), np.float64(12.120619090107706)]",1.6522409847596573,7.438871087442514,"[np.float64(-11.635721079047144), np.float64(17.224456841700746)]"
dbrx-instruct-preview,1100.7359961398251,2.739097082454577,"[np.float64(-5.339639235329287), np.float64(5.250469579701985)]",-4.08228517434043,4.774704766667501,"[np.float64(-9.39177868287384), np.float64(9.646216125138675)]",16.695324121786854,4.8377723536001005,"[np.float64(-6.827928777223047), np.float64(12.427299257786963)]",25.308819954926427,4.024665963462786,"[np.float64(-10.7750245642953), np.float64(4.903314217401874)]",4.904951075273545,4.093405103872824,"[np.float64(-5.363348199520131), np.float64(10.505235736942614)]"
yi-34b-chat,1100.7238355755567,4.003594193208227,"[np.float64(-7.926892444171472), np.float64(8.119828041240226)]",94.3155390749361,8.240335600496172,"[np.float64(-16.546865598092083), np.float64(16.456726358130283)]",-5.767087011630676,6.338488931470144,"[np.float64(-10.015340414132396), np.float64(14.499400722783204)]",36.07224029449352,5.4230924660845,"[np.float64(-14.421085409320966), np.float64(7.501502396329556)]",-6.154182172420226,5.860442571628614,"[np.float64(-9.75156824042697), np.float64(14.031042503119709)]"
starling-lm-7b-alpha,1098.5905712660951,5.449989598724078,"[np.float64(-10.400806216365936), np.float64(11.011257110195857)]",-16.99649415505732,10.031293507923564,"[np.float64(-19.906642760287706), np.float64(20.03024178703486)]",-2.7391667199454033,8.07673175629795,"[np.float64(-12.634102202992288), np.float64(18.131127855185643)]",17.045218528754027,6.666832950623095,"[np.float64(-15.83798277923443), np.float64(10.529385362068055)]",-11.89718859036402,7.236417384219512,"[np.float64(-11.937437870735474), np.float64(16.326291159433776)]"
gemma-1.1-7b-it,1096.9808513502494,3.0311540399828796,"[np.float64(-5.967753539234764), np.float64(6.002283441203417)]",-2.074665138009618,5.44890913103645,"[np.float64(-10.18274529775846), np.float64(10.43072879737023)]",3.2235002804490986,5.386988492121775,"[np.float64(-8.492977377039693), np.float64(12.662317936277889)]",14.350199657166732,4.590231216553044,"[np.float64(-11.440775402669201), np.float64(5.882005027757392)]",-9.112551827719646,4.819331626796086,"[np.float64(-6.477043686548086), np.float64(12.15599680629289)]"
pplx-70b-online,1095.021963835251,7.292222350428084,"[np.float64(-15.408928634374206), np.float64(12.761950230782304)]",13.185259930397498,15.435142564103572,"[np.float64(-29.053570266686357), np.float64(31.36007662626846)]",-31.675132074618528,9.497921710459266,"[np.float64(-15.980664062979088), np.float64(19.886942655023393)]",11.019877952899549,8.563583590807196,"[np.float64(-19.362872404131213), np.float64(15.245668128990875)]",-38.53481265164221,8.672574070143591,"[np.float64(-13.836184606276934), np.float64(20.29724952806361)]"
deepseek-llm-67b-chat,1092.4996841287166,8.331481781666787,"[np.float64(-17.18725058774521), np.float64(15.779807955111437)]",45.95671727225999,16.982563297359317,"[np.float64(-30.38535942397749), np.float64(34.44484296626763)]",10.297996776835094,11.374530323032277,"[np.float64(-20.22051346112565), np.float64(22.948485065694996)]",0.34869601655917004,9.476179125296598,"[np.float64(-21.616875280140388), np.float64(16.374801463714277)]",-10.283876934912547,10.174397174848265,"[np.float64(-15.989684971823381), np.float64(23.88712994328803)]"
nous-hermes-2-mixtral-8x7b-dpo,1090.4874643733192,9.801499090528806,"[np.float64(-18.57009993655356), np.float64(20.21072916472258)]",-25.75849583660077,18.58510173778556,"[np.float64(-37.05950011177323), np.float64(35.41077390891314)]",16.935561132831513,11.538161456163952,"[np.float64(-21.496910106347617), np.float64(23.025708625110077)]",24.842788483836316,10.949131361726538,"[np.float64(-24.82781242800749), np.float64(18.348464103009825)]",-42.4830405314654,9.893857991246357,"[np.float64(-16.526598020344903), np.float64(21.469839563506817)]"
qwen1.5-7b-chat,1086.2058984051655,7.003512119837847,"[np.float64(-13.68863119053617), np.float64(13.351860025051792)]",71.97024373712347,12.285112582306832,"[np.float64(-22.698321843781372), np.float64(26.12850870477199)]",24.22753040194223,10.455853564951003,"[np.float64(-18.010582830121326), np.float64(22.5397440040257)]",-4.071561724652349,8.917941492874021,"[np.float64(-19.667692953558415), np.float64(13.715908584311123)]",-7.9345498240374415,9.17423206924592,"[np.float64(-15.294470628859635), np.float64(19.644761333598154)]"
wizardlm-13b,1082.5628807493504,7.2467819241344875,"[np.float64(-14.101539930120907), np.float64(14.84515186305066)]",-10.14335337880361,13.76255829536688,"[np.float64(-27.519789160405853), np.float64(26.091232981548657)]",-13.454592828968531,10.722139245611254,"[np.float64(-19.10913235375521), np.float64(23.03114885794013)]",5.479566599575605,8.383243277098398,"[np.float64(-19.592799388603535), np.float64(12.375917211548146)]",-41.06601957580651,9.202811021112444,"[np.float64(-15.907018081658112), np.float64(19.580868383120105)]"
llama-2-13b-chat,1081.1945708668682,3.93244956955135,"[np.float64(-7.867686202070445), np.float64(7.310328375989911)]",-58.15489996393533,8.41072973889463,"[np.float64(-15.908041694504128), np.float64(17.313536472933357)]",-8.50066945616004,6.458390055847515,"[np.float64(-10.425110756246454), np.float64(14.987803353310166)]",11.760184171990309,5.20512853566337,"[np.float64(-12.652626555148421), np.float64(7.333315003347536)]",-9.867666612042383,5.481589937420523,"[np.float64(-7.6897295845057485), np.float64(12.928714877991213)]"
qwen-14b-chat,1081.1674528600424,8.204188871518292,"[np.float64(-16.083381828730808), np.float64(15.285217892130731)]",11.127442069410538,17.191468097951613,"[np.float64(-31.653157382365233), np.float64(35.29821423324276)]",32.63598381182464,11.96128911047287,"[np.float64(-20.385407234113366), np.float64(25.183972512074142)]",-35.167143298103184,9.534487884909906,"[np.float64(-20.274757004110825), np.float64(16.16919275726027)]",-17.498437590900398,10.169645797815782,"[np.float64(-17.915493974894012), np.float64(22.043752955594602)]"
vicuna-13b,1077.188870348149,4.489892309602059,"[np.float64(-8.91149341305686), np.float64(8.725242620495465)]",7.4119070450780935,8.742688129136521,"[np.float64(-16.89749219617601), np.float64(16.54146601766135)]",0.6177933687993357,6.68002854552671,"[np.float64(-10.719857620433944), np.float64(15.209213490766285)]",-15.499066224648878,5.528726014343603,"[np.float64(-12.873410778991975), np.float64(8.168606183866153)]",-14.948820045648798,5.828652284502331,"[np.float64(-9.143790609284817), np.float64(13.795202719667735)]"
openhermes-2.5-mistral-7b,1074.6987027005907,7.774620899676282,"[np.float64(-14.768230696987985), np.float64(15.06364484390383)]",-13.473048234185073,15.61361394797932,"[np.float64(-29.464685241494536), np.float64(31.018778713210025)]",-16.95193549014802,10.895989635765396,"[np.float64(-18.41369908363679), np.float64(23.845234287481215)]",27.991102066642217,8.923413701925266,"[np.float64(-20.88579831989534), np.float64(14.653758275951319)]",2.9894593580689435,9.07725035084509,"[np.float64(-16.78234576764079), np.float64(19.703159666642158)]"
phi-3-mini-128k-instruct,1071.6571113307828,3.2532436739482082,"[np.float64(-6.291423054928373), np.float64(6.2448028501446515)]",-2.2912710011119604,6.092464162641127,"[np.float64(-11.565509778451151), np.float64(12.119120881387)]",-23.45344077784811,5.667081970169167,"[np.float64(-8.66114164470795), np.float64(13.441853240168218)]",0.43639136488615976,4.71708576888812,"[np.float64(-11.864557567938324), np.float64(6.3689790746310715)]",0.007257175465028916,4.926025014257316,"[np.float64(-6.8834587288708065), np.float64(12.680488046163184)]"
codellama-34b-instruct,1070.3858212151436,6.668478658526783,"[np.float64(-13.149779039862779), np.float64(11.984597351021876)]",-57.10280781314243,15.531452189688256,"[np.float64(-30.59713941487179), np.float64(29.768198809264725)]",4.757856792205663,10.165180920239381,"[np.float64(-17.34605698787179), np.float64(22.427005659211098)]",-5.029470246527128,7.996603644225415,"[np.float64(-19.072738152846394), np.float64(12.254025530267786)]",-13.290022376534235,8.314581475625936,"[np.float64(-13.196272120539993), np.float64(19.253989507145594)]"
phi-3-mini-4k-instruct,1067.5983888167866,3.1394435751358323,"[np.float64(-5.698596765816546), np.float64(6.182991864626956)]",-24.07377681569235,6.681510517190205,"[np.float64(-13.04615370996734), np.float64(13.545367840490325)]",10.150661874648309,5.499832188178536,"[np.float64(-8.026153979548651), np.float64(13.510091609656337)]",27.63862275988083,4.839438542469562,"[np.float64(-12.493729718555176), np.float64(6.160260620303653)]",14.563805707042075,5.2314846869776135,"[np.float64(-7.509231780413849), np.float64(12.168806667217675)]"
solar-10.7b-instruct-v1.0,1063.6726694149293,9.047914691141976,"[np.float64(-17.774369456540626), np.float64(18.21819492993518)]",-23.08560777087123,17.343793015191245,"[np.float64(-33.29712496737552), np.float64(33.361845913741675)]",-15.064294098731422,12.42641371398754,"[np.float64(-21.312760515389066), np.float64(27.692815962871137)]",26.593687115533502,10.434826879114345,"[np.float64(-22.00347504339884), np.float64(17.90394563460449)]",2.4369341154259305,10.32262227479316,"[np.float64(-17.29666542650755), np.float64(22.383603185914442)]"
dolphin-2.2.1-mistral-7b,1059.7792391015405,12.338431327020105,"[np.float64(-24.5153546116112), np.float64(25.140646952226916)]",15.18452974922337,19.56838649434266,"[np.float64(-37.30998931156828), np.float64(37.11335349054276)]",-31.22092553068278,16.159791616214168,"[np.float64(-28.041226422920815), np.float64(33.47466004031904)]",30.44967646303589,13.494944150469456,"[np.float64(-29.36691029674112), np.float64(24.607255135019134)]",0.34044184357464585,14.05062344586174,"[np.float64(-23.194312077124298), np.float64(29.82204515700886)]"
vicuna-7b,1058.039652475944,8.285259406778355,"[np.float64(-16.66780791366864), np.float64(16.13545891379499)]",-36.64536608053322,14.31145554912841,"[np.float64(-27.559906762630753), np.float64(28.831727722564523)]",-17.90820216065714,10.667366970471338,"[np.float64(-18.770245222006913), np.float64(21.40815393324756)]",-36.372611556110144,9.48954031175137,"[np.float64(-21.465912543067972), np.float64(15.653084735137192)]",-2.701361945986695,9.213166499643442,"[np.float64(-14.60849792896306), np.float64(19.34196608976751)]"
falcon-180b-chat,1056.3496338580617,15.494331018293643,"[np.float64(-29.400074648156078), np.float64(29.738401805233252)]",-22.203219756010697,17.991504659968797,"[np.float64(-34.48058220507559), np.float64(35.12385260187714)]",-4.449517585973962,18.94206283154635,"[np.float64(-34.047911181209685), np.float64(40.621366681922815)]",3.001075362696963,15.923258572148743,"[np.float64(-34.187074293768774), np.float64(26.828235304805677)]",-24.77278946883953,16.087263273479508,"[np.float64(-28.41845825125948), np.float64(32.12488420230834)]"
mistral-7b-instruct-v0.2,1053.9873384172843,3.8225039012791577,"[np.float64(-7.402246838946667), np.float64(7.9107486866912495)]",-5.963425025839357,6.971763932812074,"[np.float64(-12.663975329127673), np.float64(14.29851235535731)]",0.03995505583494258,5.950785681738284,"[np.float64(-8.98534868084869), np.float64(14.145410128557588)]",55.452306042989825,5.186809919001123,"[np.float64(-12.64605669729604), np.float64(7.3690280125325245)]",-3.0253783863446726,5.209202398887985,"[np.float64(-8.318026497609864), np.float64(12.386492761284195)]"
zephyr-7b-alpha,1050.666155098179,12.89236045576876,"[np.float64(-26.781579865876893), np.float64(24.363316437837057)]",-14.105000896890605,19.167557418698983,"[np.float64(-37.09177695884492), np.float64(39.080524838855965)]",-1.5591813265382028,16.769267065711535,"[np.float64(-30.50536447688372), np.float64(33.578098242361534)]",21.83915082694029,14.117362935337708,"[np.float64(-28.735013523144296), np.float64(26.65868831026714)]",-18.443335362069725,15.171546767197247,"[np.float64(-25.77220144793347), np.float64(34.75609814826462)]"
zephyr-7b-beta,1049.156779861562,5.782649046201904,"[np.float64(-10.957854790557803), np.float64(11.384571327864023)]",-42.30788527833653,12.906504782319416,"[np.float64(-24.80949054245703), np.float64(25.74356493359386)]",-13.296458245310856,8.240702109956752,"[np.float64(-12.883319046821441), np.float64(18.73457457513199)]",38.55692581472492,6.9206336799481605,"[np.float64(-16.49029922012958), np.float64(10.726079963760586)]",-22.18564932932032,6.968016580277006,"[np.float64(-11.540018852124616), np.float64(15.793930863932566)]"
gemma-1.1-2b-it,1044.268260399991,4.4302244918368965,"[np.float64(-8.615266837256286), np.float64(9.01971337359555)]",8.893095240780866,8.374469774913772,"[np.float64(-15.962546389022563), np.float64(16.347560655396784)]",24.519485875006897,7.337437754830583,"[np.float64(-12.278900306643589), np.float64(16.382922054397326)]",2.352426023112158,5.934456558701041,"[np.float64(-15.192013588586892), np.float64(8.402067556988786)]",-19.427161003459226,6.707029480522126,"[np.float64(-10.973435662123535), np.float64(15.962578527681963)]"
mpt-30b-chat,1040.8896263730558,10.909404308844532,"[np.float64(-21.341906884567152), np.float64(20.487011103730765)]",-14.487691437608445,19.885886981717654,"[np.float64(-37.25576157950897), np.float64(38.92182557091104)]",-19.830217568566265,15.669089045969432,"[np.float64(-30.039983595251083), np.float64(31.77396746665742)]",31.248563148542218,12.271879147342702,"[np.float64(-27.17691382819126), np.float64(20.758767684794535)]",12.216299130706833,13.397056572077135,"[np.float64(-22.5278132336493), np.float64(29.149587952477855)]"
codellama-70b-instruct,1038.6296876531578,12.28511060288376,"[np.float64(-23.21367791263424), np.float64(24.207428612188096)]",24.911140803491783,18.183495138412834,"[np.float64(-34.2285893035467), np.float64(36.54487134576232)]",-1.0554490257871254,16.80394373848369,"[np.float64(-28.932296468516565), np.float64(35.965429568510245)]",28.593332180927973,14.54742927503815,"[np.float64(-30.27018149688017), np.float64(26.114877395451956)]",8.225592250390285,15.544199243138301,"[np.float64(-28.574607347777267), np.float64(33.281672534551184)]"
pplx-7b-online,1037.9623333218024,7.847406919183554,"[np.float64(-14.577905309866537), np.float64(15.230075986061934)]",21.29503247238764,15.392791378699936,"[np.float64(-27.840450004816525), np.float64(32.746237248870784)]",-21.11899204177015,10.192566628271628,"[np.float64(-18.32456813640211), np.float64(22.66068430833794)]",34.64309519544954,9.336561542413886,"[np.float64(-20.969302643116585), np.float64(15.601560936440823)]",-17.3371272666952,9.489015464917928,"[np.float64(-15.374374228328008), np.float64(21.64395234719594)]"
llama-2-7b-chat,1036.1455127758754,4.551931383955326,"[np.float64(-8.572778102208531), np.float64(9.27758299959396)]",-27.971043021026848,9.34918320812627,"[np.float64(-19.22924198855469), np.float64(18.953107968690798)]",-31.326844862292603,7.341673677617038,"[np.float64(-12.260840109436845), np.float64(16.375075006908247)]",44.839158915276585,5.789296476328521,"[np.float64(-15.301242021428948), np.float64(7.880742039046545)]",-22.41329746595718,6.050249379651856,"[np.float64(-9.55669354073312), np.float64(14.395397463942231)]"
guanaco-33b,1034.7614341270619,11.094681893881203,"[np.float64(-21.649795518802875), np.float64(20.733594086399194)]",-17.305929325392004,16.75184142067981,"[np.float64(-31.698333246781566), np.float64(33.11961909206988)]",-52.520806414148744,14.15693014636792,"[np.float64(-26.99161932293931), np.float64(28.124937940726802)]",29.680577851144164,12.176386952739652,"[np.float64(-25.930050330027214), np.float64(21.70594144812498)]",-12.409169263082129,12.196492673721067,"[np.float64(-21.38435728609322), np.float64(25.675014564714324)]"
gemma-7b-it,1029.2045548608198,5.588550666772404,"[np.float64(-11.02120119805204), np.float64(11.313702205005484)]",37.53774777886909,9.270659784386975,"[np.float64(-16.546990961375677), np.float64(18.749963314406926)]",6.869412940691344,8.018520634088416,"[np.float64(-13.580269491387671), np.float64(16.740840771705475)]",28.054948527447856,6.9374125995035,"[np.float64(-16.51525229501634), np.float64(10.322392119154134)]",11.224652950747737,7.509796412431259,"[np.float64(-11.617435922438817), np.float64(17.509617031813747)]"
stripedhyena-nous-7b,1027.7893733658643,7.77232981556205,"[np.float64(-13.627398600022161), np.float64(15.331650208743213)]",-16.158184668602168,17.598164562384692,"[np.float64(-32.90745234518532), np.float64(35.19348517023006)]",-9.867420254022798,11.19115157185734,"[np.float64(-18.636874362398633), np.float64(24.21941085288075)]",20.97769635740452,8.915163719801608,"[np.float64(-20.522607597474334), np.float64(14.14291070893296)]",-18.721561624042494,9.045521463265853,"[np.float64(-16.193397306388807), np.float64(19.17512396490494)]"
qwen1.5-4b-chat,1025.911415664198,5.909612097570807,"[np.float64(-10.661052506540045), np.float64(13.034697739441413)]",35.89483872396798,9.846717496978728,"[np.float64(-18.987022328757043), np.float64(19.558292837204153)]",3.6123746631164604,8.801323490252898,"[np.float64(-15.420801227674483), np.float64(19.305589567948594)]",-22.625236026766192,7.357725664495877,"[np.float64(-18.244349961498386), np.float64(11.936325009457622)]",-13.486241822604038,7.842998115183293,"[np.float64(-13.163162230766243), np.float64(17.167148975492992)]"
mistral-7b-instruct,1008.2700641845677,6.274534245206985,"[np.float64(-12.552964936429476), np.float64(11.938347719729109)]",-26.218334482609727,13.448778233355643,"[np.float64(-27.62776191711747), np.float64(26.92451050621156)]",0.21119795979318765,9.122015951661362,"[np.float64(-16.83115884271406), np.float64(19.169917847633446)]",31.75683636019342,7.65880070471035,"[np.float64(-18.019211843003557), np.float64(11.47953382266667)]",-3.72969679105186,7.6444485301311005,"[np.float64(-11.563994368498864), np.float64(18.11949442456462)]"
palm-2,996.9866601513518,7.512538640682342,"[np.float64(-14.43720607851617), np.float64(14.596341768068896)]",-69.23088135774631,14.848157180693418,"[np.float64(-29.563218504587), np.float64(29.37322221479218)]",-18.040853618854324,9.780123503803287,"[np.float64(-16.597466566296482), np.float64(21.70554585913001)]",43.29727207154819,8.644224674591849,"[np.float64(-18.880882587686585), np.float64(14.375072402295448)]",-0.18621305382197714,8.240786201765564,"[np.float64(-13.156225600235365), np.float64(17.886717266942675)]"
gemma-2b-it,995.4227072942936,7.575096272291702,"[np.float64(-14.665455063125364), np.float64(13.92492942372121)]",32.57895225780721,12.723922555684457,"[np.float64(-24.554372696457335), np.float64(26.13049117583786)]",6.9649864005007105,11.483515299642603,"[np.float64(-19.687620842538415), np.float64(25.010107714876906)]",19.523027876046385,9.363567071748296,"[np.float64(-20.923698489710844), np.float64(15.21180797662825)]",-7.204229719976517,9.65410805688685,"[np.float64(-17.113269791119777), np.float64(19.855295012189032)]"
olmo-7b-instruct,994.772862190533,6.76405502494153,"[np.float64(-13.278059912968047), np.float64(13.358042762848413)]",54.279323723654166,11.660122258827723,"[np.float64(-21.778925612231042), np.float64(23.65337736178857)]",8.124733179002495,10.596890769951383,"[np.float64(-18.72116624779091), np.float64(21.865300923610903)]",58.94543604871258,8.035164607824884,"[np.float64(-18.296715415467503), np.float64(12.62930277762193)]",-29.74576575548781,8.812865514030122,"[np.float64(-13.98151209830137), np.float64(18.803204236930874)]"
RWKV-4-Raven-14B,971.4897365668876,9.435874816014419,"[np.float64(-19.535032351970585), np.float64(17.575667420113632)]",-27.569079340605967,14.973716516984306,"[np.float64(-29.626543974273687), np.float64(30.033788082732002)]",-7.269686653981167,11.718323287684592,"[np.float64(-21.316045174703188), np.float64(24.509365360905534)]",-30.048515704272326,10.511719970187094,"[np.float64(-23.42716843125246), np.float64(17.46445101771299)]",-22.808463669875948,10.58898614169807,"[np.float64(-17.615503639262087), np.float64(23.62388016399005)]"
koala-13b,966.8925367204583,8.702153540464932,"[np.float64(-17.386679554344028), np.float64(16.63233634576204)]",-43.79363431661037,12.752042865265487,"[np.float64(-23.99990033971416), np.float64(25.531654116089534)]",-4.552841490150045,10.31244075612573,"[np.float64(-18.871865789513645), np.float64(21.78563421270561)]",31.18768644749332,9.625897204279354,"[np.float64(-22.02941064220613), np.float64(16.580685231004537)]",-35.95961166107503,9.146286944041076,"[np.float64(-15.029950730930508), np.float64(19.5376332075003)]"
alpaca-13b,955.474591938123,9.732021402292647,"[np.float64(-17.68306934571308), np.float64(19.803162826839184)]",-96.46657474958639,14.479280031746734,"[np.float64(-28.371449436051634), np.float64(28.124614449608842)]",-78.19774634427408,12.66608151749176,"[np.float64(-21.773917938357982), np.float64(27.922609709679946)]",-11.263047124125505,10.91052249480845,"[np.float64(-24.874925036050346), np.float64(18.184282711958623)]",-62.447851158138086,9.838303275862602,"[np.float64(-15.28806863955586), np.float64(21.27404785274195)]"
chatglm3-6b,945.6706371180735,8.592716543185894,"[np.float64(-17.048949192721125), np.float64(16.286069775796705)]",112.74168316483953,15.89695835413313,"[np.float64(-32.119796992696834), np.float64(29.976970311320287)]",-6.734080883065393,12.363628488084734,"[np.float64(-21.641606486230927), np.float64(25.328084051794402)]",32.55430503977118,9.811539695680219,"[np.float64(-21.999562777235845), np.float64(17.276528938918133)]",3.5061439945000585,10.132917919040315,"[np.float64(-17.781313985044644), np.float64(21.794751614851936)]"
mpt-7b-chat,944.1991746646127,10.635347655885527,"[np.float64(-20.270312006680456), np.float64(20.755059361908025)]",37.392594015207024,15.804614384045262,"[np.float64(-30.05995867665993), np.float64(30.502870730387063)]",-6.941572976273302,15.133822444010216,"[np.float64(-25.998469753756588), np.float64(31.239419224353558)]",8.152120082017458,11.749272425604108,"[np.float64(-25.677813907529437), np.float64(20.60864960063615)]",-25.208389735114896,11.564497886428997,"[np.float64(-22.585249277069522), np.float64(24.660154278045173)]"
chatglm2-6b,930.2610007312096,10.477165714383531,"[np.float64(-19.211366082243785), np.float64(20.701914810945027)]",66.9857475791915,19.857140970737042,"[np.float64(-37.787505922978596), np.float64(42.223765904623626)]",-37.767586641940284,15.177330194616626,"[np.float64(-27.107325617510647), np.float64(30.963934539124864)]",30.20960527593591,11.955036227957423,"[np.float64(-25.24659689608737), np.float64(21.237101254205726)]",-2.3444956865076083,13.93318074262025,"[np.float64(-24.81867195544269), np.float64(30.322302860376098)]"
gpt4all-13b-snoozy,924.3764295545761,13.035783672414466,"[np.float64(-27.329902578467227), np.float64(25.12906858997644)]",-7.990080030156926,16.595369841986727,"[np.float64(-31.93112685033272), np.float64(31.022471315673286)]",-20.81083989083799,17.082702511210467,"[np.float64(-32.19562557195579), np.float64(33.06966473170049)]",38.63598602062527,14.206860503819785,"[np.float64(-29.186767791203323), np.float64(26.607362333122836)]",9.535282510797918,14.047524679431973,"[np.float64(-24.227708822507623), np.float64(28.90334025360412)]"
oasst-pythia-12b,911.5469948243444,8.94930621333204,"[np.float64(-18.08105926090775), np.float64(17.646767698368876)]",-62.931453909193706,13.052248566822668,"[np.float64(-25.136821398930188), np.float64(26.26785815904202)]",-12.230824919882723,11.299356258434024,"[np.float64(-20.049139335822137), np.float64(26.11660271112548)]",10.014042151308065,9.780349951145178,"[np.float64(-21.214965795147556), np.float64(16.617627202313276)]",-4.336864377246117,9.914991808005544,"[np.float64(-18.388899839228067), np.float64(21.285642818307814)]"
fastchat-t5-3b,878.9077415727115,10.596691523708097,"[np.float64(-19.612620524671343), np.float64(20.81325140224226)]",-108.06887460981712,14.093133574613573,"[np.float64(-26.591507572854397), np.float64(28.597978770628984)]",-90.22055955993991,13.109913418906867,"[np.float64(-24.473513550433523), np.float64(28.808238289001977)]",41.686531615979874,11.275223235682295,"[np.float64(-24.778678117254714), np.float64(18.612847449075197)]",-35.96720513187071,11.075601492202189,"[np.float64(-18.646456415862936), np.float64(23.611816232914506)]"
chatglm-6b,873.7974645403856,10.107586515912212,"[np.float64(-19.11213700075257), np.float64(19.26854132226231)]",199.29200106479277,14.91574271798056,"[np.float64(-28.66509614596714), np.float64(29.809499725454742)]",10.943954093078206,12.203904875178118,"[np.float64(-23.12053423234984), np.float64(24.361006579145638)]",11.157811755882998,10.933716232870916,"[np.float64(-24.306227123546606), np.float64(18.145343744211775)]",18.308826013273844,10.915098625872567,"[np.float64(-19.474789587990728), np.float64(22.36670834993362)]"
dolly-v2-12b,856.4664997826485,11.752016378092183,"[np.float64(-24.172404807977273), np.float64(22.717956501981575)]",7.537752771040457,15.789522424797843,"[np.float64(-30.883410181323878), np.float64(31.116198202696186)]",-58.368697890410424,13.877655876147005,"[np.float64(-24.20650216157737), np.float64(30.770690663616925)]",-15.362899965472257,12.550933636863874,"[np.float64(-27.63635866519375), np.float64(21.89898777967756)]",4.269368557051646,12.508609158056467,"[np.float64(-21.976325530404544), np.float64(26.202256288321145)]"
llama-13b,853.0923121034634,12.655769796375903,"[np.float64(-25.806625052522463), np.float64(23.994862018443314)]",-8.252073762648571,15.642862057254789,"[np.float64(-27.65765187509709), np.float64(33.18216267898774)]",-88.77123476716297,14.854872969458624,"[np.float64(-25.148044917602483), np.float64(32.56759720030931)]",-26.193247842850372,13.626600665250615,"[np.float64(-28.64602300030401), np.float64(24.40204079082522)]",-35.73391872192578,14.588201973083397,"[np.float64(-27.036691330142105), np.float64(28.9058564349724)]"
stablelm-tuned-alpha-7b,842.6840112823911,10.954165538074847,"[np.float64(-22.162452272019095), np.float64(19.582760569291167)]",30.43960897676234,15.10463260997657,"[np.float64(-28.54031025332475), np.float64(29.091311544255475)]",31.910515414758,13.173544175123745,"[np.float64(-23.862802848734322), np.float64(27.858152527674335)]",18.64425490270281,11.823481786804702,"[np.float64(-24.686758730600662), np.float64(22.170677751903575)]",-13.421141293632596,11.738699349628583,"[np.float64(-20.547251906312447), np.float64(24.034822092926724)]"
