,model,Rating,Rating std,Rating alpha
gpt-4-0125-preview,gpt-4-0125-preview,1668.266853448802,9.742611115275245,"[np.float64(-11.101648657773922), np.float64(26.095184037905938)]"
gpt-4-1106-preview,gpt-4-1106-preview,1667.2978262864967,5.687309911849063,"[np.float64(-3.4458019283038084), np.float64(19.429356651589615)]"
gpt-4-0314,gpt-4-0314,1622.3248280259106,6.076024870285936,"[np.float64(-3.603888434741748), np.float64(20.342074784943406)]"
gpt-4-0613,gpt-4-0613,1605.6107183985216,5.423708079930035,"[np.float64(-3.0571839002959678), np.float64(18.883273755423716)]"
qwen1.5-72b-chat,qwen1.5-72b-chat,1572.8475645744813,12.838945897804303,"[np.float64(-18.709511328725966), np.float64(33.52128465919941)]"
claude-1,claude-1,1567.6774389765176,6.088368446794718,"[np.float64(-3.8815200942540287), np.float64(20.37504160024264)]"
mistral-medium,mistral-medium,1566.974588363003,6.472944407213421,"[np.float64(-4.073005658747661), np.float64(21.30612077668843)]"
gpt-3.5-turbo-0314,gpt-3.5-turbo-0314,1560.895402013349,10.279048987145847,"[np.float64(-11.328669184721548), np.float64(28.27185826302116)]"
gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,1559.4321473348039,10.4018414209795,"[np.float64(-11.604062883307051), np.float64(28.128303693399857)]"
claude-2.0,claude-2.0,1558.188912898808,7.197141139715028,"[np.float64(-5.7079403291882045), np.float64(22.156033575034144)]"
gemini-pro,gemini-pro,1553.0256711981058,8.546266357446251,"[np.float64(-8.128108760886334), np.float64(25.05662543585595)]"
gpt-3.5-turbo-0613,gpt-3.5-turbo-0613,1548.8324718771805,5.144128689054424,"[np.float64(-1.4796802730193122), np.float64(18.39982253176595)]"
claude-2.1,claude-2.1,1548.4718674071464,5.67233924737744,"[np.float64(-3.2600303984006587), np.float64(18.5377037593953)]"
gemini-pro-dev-api,gemini-pro-dev-api,1542.2280761527459,8.694187575042804,"[np.float64(-8.615179750999232), np.float64(26.117170254366783)]"
mixtral-8x7b-instruct-v0.1,mixtral-8x7b-instruct-v0.1,1537.4975793547096,6.393717233306951,"[np.float64(-4.8454911095118405), np.float64(20.242417602187743)]"
claude-instant-1,claude-instant-1,1537.4296363985138,5.9602469360574695,"[np.float64(-3.483885527792154), np.float64(19.473274340987928)]"
gpt-3.5-turbo-1106,gpt-3.5-turbo-1106,1530.2514208505393,6.473683838855401,"[np.float64(-3.722387922116013), np.float64(22.04256719776413)]"
yi-34b-chat,yi-34b-chat,1528.6952989295166,8.527223689161849,"[np.float64(-9.741394461745585), np.float64(24.592009503816143)]"
wizardlm-70b,wizardlm-70b,1525.4715558987612,8.213901530671318,"[np.float64(-7.12003659029142), np.float64(24.62032742904489)]"
vicuna-33b,vicuna-33b,1522.6770039996998,6.186924107998561,"[np.float64(-4.133856812201657), np.float64(20.101759920283257)]"
starling-lm-7b-alpha,starling-lm-7b-alpha,1521.071173440691,9.772346454929968,"[np.float64(-10.18595354089257), np.float64(28.37156423708916)]"
openchat-3.5,openchat-3.5,1517.5669508465344,8.183162176686416,"[np.float64(-8.437649870457335), np.float64(24.316674789558874)]"
nous-hermes-2-mixtral-8x7b-dpo,nous-hermes-2-mixtral-8x7b-dpo,1515.4262999271778,16.986907399510894,"[np.float64(-27.54425239777811), np.float64(39.72067784341334)]"
openhermes-2.5-mistral-7b,openhermes-2.5-mistral-7b,1511.3779323655763,10.119105320691428,"[np.float64(-11.54326689686809), np.float64(27.719738807378917)]"
deepseek-llm-67b-chat,deepseek-llm-67b-chat,1510.58924970859,10.851752613067164,"[np.float64(-13.816485537628296), np.float64(28.89345029733363)]"
llama-2-70b-chat,llama-2-70b-chat,1510.3357595505404,6.532250241365147,"[np.float64(-4.700821250684157), np.float64(20.691950017213912)]"
pplx-70b-online,pplx-70b-online,1509.518817693548,8.793338369711691,"[np.float64(-9.862575363169753), np.float64(25.580016624448035)]"
tulu-2-dpo-70b,tulu-2-dpo-70b,1505.1908164710767,8.8482854367726,"[np.float64(-9.398439812940524), np.float64(24.907347983276622)]"
solar-10.7b-instruct-v1.0,solar-10.7b-instruct-v1.0,1502.1188279700116,10.807710099245071,"[np.float64(-12.699634795442762), np.float64(28.278527903577924)]"
openchat-3.5-0106,openchat-3.5-0106,1497.5425516582288,19.56230681123969,"[np.float64(-30.394226878868494), np.float64(46.955487817840094)]"
dolphin-2.2.1-mistral-7b,dolphin-2.2.1-mistral-7b,1492.5216132329413,15.9832791851144,"[np.float64(-23.424493208698777), np.float64(39.56949559946929)]"
wizardlm-13b,wizardlm-13b,1491.732768921781,8.302627214266929,"[np.float64(-8.882271267278156), np.float64(24.131850445108057)]"
llama2-70b-steerlm-chat,llama2-70b-steerlm-chat,1485.5713457019972,11.78450957811821,"[np.float64(-13.666397241963978), np.float64(31.41601961340689)]"
mpt-30b-chat,mpt-30b-chat,1479.60158428111,12.351207698861487,"[np.float64(-16.390196605969095), np.float64(30.67658393039278)]"
mistral-7b-instruct-v0.2,mistral-7b-instruct-v0.2,1474.807662188122,31.016684140271476,"[np.float64(-48.44555738687245), np.float64(74.10779588107198)]"
codellama-34b-instruct,codellama-34b-instruct,1473.7478152984588,8.825200931815765,"[np.float64(-9.10621155674039), np.float64(25.52734910361687)]"
vicuna-13b,vicuna-13b,1472.4245925783484,6.631011069460417,"[np.float64(-4.421097349142201), np.float64(21.26789406177977)]"
falcon-180b-chat,falcon-180b-chat,1469.0924724595177,18.12809022917895,"[np.float64(-27.91464433981764), np.float64(41.24150494562764)]"
zephyr-7b-beta,zephyr-7b-beta,1467.8867409785933,6.939529590443485,"[np.float64(-5.057274441346635), np.float64(21.975151374736924)]"
pplx-7b-online,pplx-7b-online,1466.8012251873308,9.453638381188108,"[np.float64(-10.565422578169091), np.float64(26.190204772811285)]"
qwen-14b-chat,qwen-14b-chat,1460.3228019868334,9.649965008043836,"[np.float64(-10.062315261548292), np.float64(26.473607812749833)]"
palm-2,palm-2,1459.7727301414466,8.114115294143032,"[np.float64(-7.484635999294824), np.float64(23.88128127405662)]"
zephyr-7b-alpha,zephyr-7b-alpha,1459.3805909282726,14.457295292429055,"[np.float64(-18.336391533078995), np.float64(38.549245204772205)]"
stripedhyena-nous-7b,stripedhyena-nous-7b,1458.9661752405293,10.588164763325313,"[np.float64(-12.304604466400633), np.float64(29.5194754156621)]"
guanaco-33b,guanaco-33b,1458.9343687361259,11.688559229575576,"[np.float64(-15.235423124847102), np.float64(31.283423451906174)]"
qwen1.5-7b-chat,qwen1.5-7b-chat,1457.561250933198,20.03486980506103,"[np.float64(-29.91799732975437), np.float64(47.14084823521239)]"
llama-2-7b-chat,llama-2-7b-chat,1454.5705425007388,7.8349390145129085,"[np.float64(-7.406340253268354), np.float64(23.477733812072074)]"
llama-2-13b-chat,llama-2-13b-chat,1454.285721802181,6.947277872586048,"[np.float64(-5.002809670043234), np.float64(21.511055351198593)]"
qwen1.5-4b-chat,qwen1.5-4b-chat,1446.8581224186453,19.86988849622011,"[np.float64(-31.53728227326542), np.float64(47.77404074604692)]"
mistral-7b-instruct,mistral-7b-instruct,1439.519249909337,8.161864526284923,"[np.float64(-8.983042981531526), np.float64(23.61218216631937)]"
vicuna-7b,vicuna-7b,1432.026543355833,8.3645800423771,"[np.float64(-9.45988266348013), np.float64(23.772491294414067)]"
koala-13b,koala-13b,1410.6617674168663,8.9352587803064,"[np.float64(-8.478639563308207), np.float64(26.65735685492814)]"
chatglm3-6b,chatglm3-6b,1394.1908312220808,10.749791666672516,"[np.float64(-11.53116033509059), np.float64(29.574957189744282)]"
gpt4all-13b-snoozy,gpt4all-13b-snoozy,1388.0060766391553,15.027731289931053,"[np.float64(-18.733703440738736), np.float64(37.89363742322871)]"
alpaca-13b,alpaca-13b,1387.1892588052706,9.54569274241526,"[np.float64(-10.361701752674207), np.float64(27.337323992405572)]"
mpt-7b-chat,mpt-7b-chat,1370.8948101137719,10.70012254626444,"[np.float64(-10.563522848532784), np.float64(29.73767375494458)]"
RWKV-4-Raven-14B,RWKV-4-Raven-14B,1350.2759191585967,10.211746838474282,"[np.float64(-12.029811949515079), np.float64(28.735315778177437)]"
chatglm2-6b,chatglm2-6b,1346.876489982551,13.578885101187701,"[np.float64(-17.130862613009867), np.float64(34.48645644223984)]"
oasst-pythia-12b,oasst-pythia-12b,1341.76889287809,9.323265204031525,"[np.float64(-8.799980552110128), np.float64(26.21169213782582)]"
fastchat-t5-3b,fastchat-t5-3b,1317.1309086517501,10.801279030481012,"[np.float64(-11.80006091691962), np.float64(29.519272626344673)]"
chatglm-6b,chatglm-6b,1288.066657017766,10.39644263713202,"[np.float64(-11.664517533079106), np.float64(28.41006483915453)]"
dolly-v2-12b,dolly-v2-12b,1279.8589385167888,11.951306861012759,"[np.float64(-14.120429825477686), np.float64(31.386184261959215)]"
llama-13b,llama-13b,1279.1455952787353,14.346954392239024,"[np.float64(-19.582666797266484), np.float64(37.58695529874035)]"
stablelm-tuned-alpha-7b,stablelm-tuned-alpha-7b,1260.7114316290686,12.68355031583801,"[np.float64(-17.47468928311082), np.float64(32.99101774790893)]"
