,model,Rating,Rating std,Rating alpha
gpt-4-0125-preview,gpt-4-0125-preview,1668.2679041008353,9.74262783660207,"[np.float64(-11.101927982878124), np.float64(26.095335414199553)]"
gpt-4-1106-preview,gpt-4-1106-preview,1667.299186855474,5.687330617048279,"[np.float64(-3.446296215666507), np.float64(19.43070870311294)]"
gpt-4-0314,gpt-4-0314,1622.325151823427,6.0760285396225635,"[np.float64(-3.6035559032479796), np.float64(20.34240364797938)]"
gpt-4-0613,gpt-4-0613,1605.6112413101805,5.423723360736149,"[np.float64(-3.055911656580065), np.float64(18.88510684178982)]"
qwen1.5-72b-chat,qwen1.5-72b-chat,1572.8481125259202,12.838910465528242,"[np.float64(-18.707492679400957), np.float64(33.52182526368915)]"
claude-1,claude-1,1567.6780126597105,6.088390003178541,"[np.float64(-3.880829232128235), np.float64(20.375636467317918)]"
mistral-medium,mistral-medium,1566.9756553982174,6.472936460312221,"[np.float64(-4.0716156494645475), np.float64(21.30026952859862)]"
gpt-3.5-turbo-0314,gpt-3.5-turbo-0314,1560.8944378590616,10.279128589639049,"[np.float64(-11.329633339008979), np.float64(28.270880339468704)]"
gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,1559.4320692563495,10.401872683733025,"[np.float64(-11.603737111468945), np.float64(28.128257398977212)]"
claude-2.0,claude-2.0,1558.18971692292,7.197165143364224,"[np.float64(-5.707136305076119), np.float64(22.15708345325379)]"
gemini-pro,gemini-pro,1553.0256816742378,8.546283873310006,"[np.float64(-8.128088660424964), np.float64(25.05812259242225)]"
gpt-3.5-turbo-0613,gpt-3.5-turbo-0613,1548.832894036152,5.144148186083941,"[np.float64(-1.479240191459894), np.float64(18.40024469073751)]"
claude-2.1,claude-2.1,1548.4723671812958,5.672322773172603,"[np.float64(-3.2601338978761305), np.float64(18.538206134446455)]"
gemini-pro-dev-api,gemini-pro-dev-api,1542.2286163530853,8.694183128319812,"[np.float64(-8.61462331983148), np.float64(26.11764540420745)]"
mixtral-8x7b-instruct-v0.1,mixtral-8x7b-instruct-v0.1,1537.4983091337076,6.3937115576205334,"[np.float64(-4.844131712673061), np.float64(20.243555187451648)]"
claude-instant-1,claude-instant-1,1537.4299750819453,5.960254483403454,"[np.float64(-3.483948264767605), np.float64(19.474202280390728)]"
gpt-3.5-turbo-1106,gpt-3.5-turbo-1106,1530.251350794438,6.473683770435325,"[np.float64(-3.719769798862444), np.float64(22.042531597173365)]"
yi-34b-chat,yi-34b-chat,1528.696369783194,8.52725427593974,"[np.float64(-9.740323608068138), np.float64(24.59252719924484)]"
wizardlm-70b,wizardlm-70b,1525.4723344094652,8.213914823059927,"[np.float64(-7.12035572426862), np.float64(24.62046515108068)]"
vicuna-33b,vicuna-33b,1522.67744890325,6.186948137286708,"[np.float64(-4.133158732285665), np.float64(20.101395978286064)]"
starling-lm-7b-alpha,starling-lm-7b-alpha,1521.0721027341265,9.772326015373695,"[np.float64(-10.185016173902795), np.float64(28.37261023897895)]"
openchat-3.5,openchat-3.5,1517.5670290531064,8.183199806542486,"[np.float64(-8.437541165000539), np.float64(24.319432193593684)]"
nous-hermes-2-mixtral-8x7b-dpo,nous-hermes-2-mixtral-8x7b-dpo,1515.4272945131872,16.986912303928403,"[np.float64(-27.5427747655624), np.float64(39.72166138791181)]"
openhermes-2.5-mistral-7b,openhermes-2.5-mistral-7b,1511.378423987061,10.119099324742459,"[np.float64(-11.542775275383292), np.float64(27.71701268343213)]"
deepseek-llm-67b-chat,deepseek-llm-67b-chat,1510.5898148709368,10.851773035000164,"[np.float64(-13.815179708112964), np.float64(28.89401545968053)]"
llama-2-70b-chat,llama-2-70b-chat,1510.3338373602583,6.532352092237585,"[np.float64(-4.702743440966287), np.float64(20.69243988437347)]"
pplx-70b-online,pplx-70b-online,1509.5193555662402,8.793377045201597,"[np.float64(-9.861497770657706), np.float64(25.581641164192206)]"
tulu-2-dpo-70b,tulu-2-dpo-70b,1505.1918023446713,8.848273585554733,"[np.float64(-9.397465787198144), np.float64(24.90756612341147)]"
solar-10.7b-instruct-v1.0,solar-10.7b-instruct-v1.0,1502.1191723931427,10.807712562988979,"[np.float64(-12.698422886866183), np.float64(28.279742872301313)]"
openchat-3.5-0106,openchat-3.5-0106,1497.542706227306,19.56224641906781,"[np.float64(-30.392715710869197), np.float64(46.956735984500256)]"
dolphin-2.2.1-mistral-7b,dolphin-2.2.1-mistral-7b,1492.5223002044459,15.983288105618268,"[np.float64(-23.423551016422152), np.float64(39.57078810437429)]"
wizardlm-13b,wizardlm-13b,1491.7335100754583,8.302602335109679,"[np.float64(-8.882205928032363), np.float64(24.127660901247964)]"
llama2-70b-steerlm-chat,llama2-70b-steerlm-chat,1485.5723817619612,11.784523397669746,"[np.float64(-13.66570055997795), np.float64(31.417036962206566)]"
mpt-30b-chat,mpt-30b-chat,1479.618848471302,12.35214110835709,"[np.float64(-16.4056197094003), np.float64(30.688208238604375)]"
mistral-7b-instruct-v0.2,mistral-7b-instruct-v0.2,1474.8081904022258,31.016606437809685,"[np.float64(-48.44341838184505), np.float64(74.1083240951757)]"
codellama-34b-instruct,codellama-34b-instruct,1473.7485363937842,8.825212813927232,"[np.float64(-9.105069901483148), np.float64(25.528021066671954)]"
vicuna-13b,vicuna-13b,1472.4246076684826,6.631036469578669,"[np.float64(-4.421181487366084), np.float64(21.267935806016794)]"
falcon-180b-chat,falcon-180b-chat,1469.0926233244268,18.1280897404655,"[np.float64(-27.914493474908568), np.float64(41.24166507775885)]"
zephyr-7b-beta,zephyr-7b-beta,1467.8876084594506,6.93951917642739,"[np.float64(-5.056406960489312), np.float64(21.975959334629806)]"
pplx-7b-online,pplx-7b-online,1466.801825632188,9.453606961427273,"[np.float64(-10.56469584413935), np.float64(26.190883953361435)]"
qwen-14b-chat,qwen-14b-chat,1460.3228153454813,9.650001385346192,"[np.float64(-10.061203770790598), np.float64(26.47477719211861)]"
palm-2,palm-2,1459.772776547567,8.114118819780595,"[np.float64(-7.484256041063645), np.float64(23.881551745917022)]"
zephyr-7b-alpha,zephyr-7b-alpha,1459.3811217556672,14.457264679504794,"[np.float64(-18.335982034959898), np.float64(38.54952779852874)]"
stripedhyena-nous-7b,stripedhyena-nous-7b,1458.966648426012,10.588145363024104,"[np.float64(-12.30299854702298), np.float64(29.519959784507364)]"
guanaco-33b,guanaco-33b,1458.9353046163271,11.68857930119704,"[np.float64(-15.234578283301516), np.float64(31.2829564809972)]"
qwen1.5-7b-chat,qwen1.5-7b-chat,1457.5618006836612,20.03485747420952,"[np.float64(-29.917304702710453), np.float64(47.141455953532386)]"
llama-2-7b-chat,llama-2-7b-chat,1454.571754408712,7.834950721492339,"[np.float64(-7.406014166304885), np.float64(23.478951127166283)]"
llama-2-13b-chat,llama-2-13b-chat,1454.286642756105,6.947301317278022,"[np.float64(-5.0036181912876145), np.float64(21.510272843951725)]"
qwen1.5-4b-chat,qwen1.5-4b-chat,1446.858049204288,19.869956305915235,"[np.float64(-31.53553134667186), np.float64(47.77588333307426)]"
mistral-7b-instruct,mistral-7b-instruct,1439.5194038131415,8.161864629793191,"[np.float64(-8.983152828982384), np.float64(23.612989770567538)]"
vicuna-7b,vicuna-7b,1432.0268657092977,8.364592817024278,"[np.float64(-9.46018727360729), np.float64(23.773215276379915)]"
koala-13b,koala-13b,1410.6614463362075,8.935375826976337,"[np.float64(-8.480377131250634), np.float64(26.654071849969796)]"
chatglm3-6b,chatglm3-6b,1394.1911432488419,10.749773271042324,"[np.float64(-11.530848308329496), np.float64(29.576657892522917)]"
gpt4all-13b-snoozy,gpt4all-13b-snoozy,1388.006085891246,15.027676332343681,"[np.float64(-18.73452920112709), np.float64(37.89364667531936)]"
alpaca-13b,alpaca-13b,1387.1878462245686,9.545763595615359,"[np.float64(-10.359980250583249), np.float64(27.33652798689809)]"
mpt-7b-chat,mpt-7b-chat,1370.8946717334281,10.700157996719462,"[np.float64(-10.563705466268402), np.float64(29.737528496046025)]"
RWKV-4-Raven-14B,RWKV-4-Raven-14B,1350.275955656904,10.211882561858118,"[np.float64(-12.029796608876723), np.float64(28.735445554479384)]"
chatglm2-6b,chatglm2-6b,1346.8767747271547,13.578870605453849,"[np.float64(-17.128962503891216), np.float64(34.486741186843574)]"
oasst-pythia-12b,oasst-pythia-12b,1341.768472788215,9.32329123821869,"[np.float64(-8.800727014309587), np.float64(26.191352137469266)]"
fastchat-t5-3b,fastchat-t5-3b,1317.1309589281313,10.801275800266588,"[np.float64(-11.80078506865243), np.float64(29.51931160634217)]"
chatglm-6b,chatglm-6b,1288.0665657139111,10.396300844250328,"[np.float64(-11.665897206946738), np.float64(28.409930173224666)]"
dolly-v2-12b,dolly-v2-12b,1279.8573304314034,11.951429814747016,"[np.float64(-14.120021380040043), np.float64(31.387554106089283)]"
llama-13b,llama-13b,1279.1440703571727,14.346960462910667,"[np.float64(-19.582200519172375), np.float64(37.58553325549565)]"
stablelm-tuned-alpha-7b,stablelm-tuned-alpha-7b,1260.711240585445,12.683557792851508,"[np.float64(-17.475303906891895), np.float64(32.99078141871587)]"
