,model,Rating,Rating std,Rating alpha
openai/gpt-4-turbo-2024-04-09,openai/gpt-4-turbo-2024-04-09,1594.5247913482167,3.430161039649843,"[np.float64(-7.858821387663511), np.float64(4.871098422353725)]"
openai/gpt-4o-2024-05-13,openai/gpt-4o-2024-05-13,1592.8181932157677,4.95735029764325,"[np.float64(-11.460505869222288), np.float64(7.798930973806364)]"
google/gemini-1.5-pro,google/gemini-1.5-pro,1588.0672149948186,4.962916840764088,"[np.float64(-11.234572278487349), np.float64(7.970255765240381)]"
anthropic/claude-3-opus-20240229,anthropic/claude-3-opus-20240229,1579.6573652859406,4.727061303251267,"[np.float64(-10.359807506360994), np.float64(7.9601635221577)]"
openai/gpt-4-0125-preview,openai/gpt-4-0125-preview,1572.3195921618594,4.763782012763351,"[np.float64(-10.928272151111742), np.float64(7.43017887566225)]"
meta-llama/Meta-Llama-3-70B-Instruct,meta-llama/Meta-Llama-3-70B-Instruct,1570.8551322317987,4.695234050176468,"[np.float64(-10.249474937170817), np.float64(7.518438347651681)]"
reka/reka-core-20240501,reka/reka-core-20240501,1559.1182898899358,4.649979342220784,"[np.float64(-10.676402743407607), np.float64(7.547206166244678)]"
google/gemini-1.5-flash,google/gemini-1.5-flash,1554.288253980753,4.846377424101134,"[np.float64(-10.521028625134704), np.float64(7.9613229285751)]"
deepseek/deepseekv2-chat,deepseek/deepseekv2-chat,1551.6646855189836,4.792311888956928,"[np.float64(-10.483469043145533), np.float64(8.025112235130564)]"
yi/yi-large,yi/yi-large,1550.7465619059044,4.680064018444073,"[np.float64(-10.261134952641214), np.float64(7.643439715758859)]"
anthropic/claude-3-sonnet-20240229,anthropic/claude-3-sonnet-20240229,1547.2828381964262,4.712359084008433,"[np.float64(-10.545100584544343), np.float64(7.488754646839652)]"
princeton-nlp/Llama-3-Instruct-8B-SimPO,princeton-nlp/Llama-3-Instruct-8B-SimPO,1546.4873465215296,5.021249581544172,"[np.float64(-10.731768286287206), np.float64(8.74471264664703)]"
chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO,chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO,1546.3270356903454,4.567396501852726,"[np.float64(-10.116235601604785), np.float64(7.008534575200201)]"
Qwen/Qwen1.5-72B-Chat,Qwen/Qwen1.5-72B-Chat,1545.246858565586,3.99902393443602,"[np.float64(-9.066922890113801), np.float64(6.034445731804453)]"
01-ai/Yi-1.5-34B-Chat,01-ai/Yi-1.5-34B-Chat,1538.3149981585238,4.824583756868101,"[np.float64(-10.603343560326948), np.float64(7.664372805489165)]"
Qwen/Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct,1529.9723085174096,4.881702881179248,"[np.float64(-11.300072762789114), np.float64(7.676875330645316)]"
cohere/command-r-plus,cohere/command-r-plus,1515.725748293007,4.750301919072276,"[np.float64(-10.760313936721786), np.float64(7.931170973949065)]"
anthropic/claude-3-haiku-20240307,anthropic/claude-3-haiku-20240307,1513.7981027943465,3.4231075460365865,"[np.float64(-7.773048670902426), np.float64(4.956895447443912)]"
mistral/mistral-large-2402,mistral/mistral-large-2402,1510.9915845384844,4.852950980124614,"[np.float64(-10.198503648511632), np.float64(8.000995879830043)]"
reka/reka-flash-20240226,reka/reka-flash-20240226,1507.5686116267113,4.659071448259708,"[np.float64(-10.434088895925925), np.float64(7.619084128386021)]"
01-ai/Yi-1.5-9B-Chat,01-ai/Yi-1.5-9B-Chat,1497.893485343854,4.676045656889862,"[np.float64(-10.674626795603672), np.float64(7.57898664437721)]"
chujiezheng/Starling-LM-7B-beta-ExPO,chujiezheng/Starling-LM-7B-beta-ExPO,1488.3754819600995,4.931997936599894,"[np.float64(-10.73056089063516), np.float64(8.826559336652053)]"
ZhangShenao/SELM-Zephyr-7B-iter-3,ZhangShenao/SELM-Zephyr-7B-iter-3,1487.7729361885415,4.9249898837927875,"[np.float64(-10.736519846059764), np.float64(7.614241601013873)]"
mistralai/Mixtral-8x7B-Instruct-v0.1,mistralai/Mixtral-8x7B-Instruct-v0.1,1480.668635403707,4.815048879130478,"[np.float64(-10.400890630576896), np.float64(7.655414989596238)]"
Nexusflow/Starling-LM-7B-beta,Nexusflow/Starling-LM-7B-beta,1480.3572920572428,4.89113818541262,"[np.float64(-10.81218792960226), np.float64(7.525839639263268)]"
cohere/command-r,cohere/command-r,1479.9252297236903,4.718815965422528,"[np.float64(-10.835782698585035), np.float64(7.321326409646417)]"
meta-llama/Meta-Llama-3-8B-Instruct,meta-llama/Meta-Llama-3-8B-Instruct,1475.478186462899,4.8663078796420285,"[np.float64(-10.621499527110927), np.float64(8.018269345892804)]"
databricks/dbrx-instruct@together,databricks/dbrx-instruct@together,1474.8613862728928,5.020961643010085,"[np.float64(-11.042734175390024), np.float64(8.195819131083454)]"
openai/gpt-3.5-turbo-0125,openai/gpt-3.5-turbo-0125,1466.3840111885224,5.18856172080758,"[np.float64(-11.352634145680213), np.float64(8.576694814213852)]"
NousResearch/Hermes-2-Theta-Llama-3-8B,NousResearch/Hermes-2-Theta-Llama-3-8B,1465.1632342419462,4.958839993188357,"[np.float64(-10.683414223123464), np.float64(8.412366547514921)]"
NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,1457.9597280760077,5.129604413480994,"[np.float64(-11.302718753251384), np.float64(9.183898355088104)]"
allenai/tulu-2-dpo-70b,allenai/tulu-2-dpo-70b,1453.559596674668,4.970961765495117,"[np.float64(-11.202257944873963), np.float64(7.859395913500975)]"
mistralai/Mistral-7B-Instruct-v0.2,mistralai/Mistral-7B-Instruct-v0.2,1453.2765381854456,5.019888092288981,"[np.float64(-11.41651531783259), np.float64(8.027900787862336)]"
Qwen/Qwen1.5-7B-Chat@together,Qwen/Qwen1.5-7B-Chat@together,1445.6749093283509,5.056587903772635,"[np.float64(-10.89441828053009), np.float64(8.708564020271979)]"
reka/reka-edge,reka/reka-edge,1444.8384856206542,5.039245059019077,"[np.float64(-11.049103704272511), np.float64(8.35156585429695)]"
m-a-p/neo_7b_instruct_v0.1,m-a-p/neo_7b_instruct_v0.1,1436.9002757832477,5.0203280344222065,"[np.float64(-11.482700245778688), np.float64(8.344851054290302)]"
microsoft/Phi-3-medium-128k-instruct,microsoft/Phi-3-medium-128k-instruct,1436.852942878807,5.1571030448576405,"[np.float64(-11.489734963732417), np.float64(8.13038810557623)]"
meta-llama/Llama-2-70b-chat-hf,meta-llama/Llama-2-70b-chat-hf,1430.2171129022881,3.510656976807257,"[np.float64(-7.6029506190720895), np.float64(5.427618480948013)]"
microsoft/Phi-3-mini-128k-instruct,microsoft/Phi-3-mini-128k-instruct,1427.1500361658639,5.368643212322316,"[np.float64(-11.486063887382215), np.float64(9.566752235973809)]"
01-ai/Yi-1.5-6B-Chat,01-ai/Yi-1.5-6B-Chat,1424.9267733667284,5.236511827644796,"[np.float64(-11.667525326942268), np.float64(8.913069553414061)]"
Magpie-Align/Llama-3-8B-Magpie-Pro-SFT-v0.1,Magpie-Align/Llama-3-8B-Magpie-Pro-SFT-v0.1,1412.3133559902913,7.536508069302527,"[np.float64(-15.805794569665977), np.float64(12.433150795916163)]"
google/gemma-7b-it,google/gemma-7b-it,1369.6687078012028,5.614163503718943,"[np.float64(-11.74912157587596), np.float64(9.596564698272005)]"
meta-llama/Llama-2-7b-chat-hf,meta-llama/Llama-2-7b-chat-hf,1367.403284379639,5.207098291641338,"[np.float64(-10.84077802975753), np.float64(8.625766230730505)]"
google/gemma-2b-it,google/gemma-2b-it,1292.8253343953236,6.438953230088211,"[np.float64(-13.667954457092492), np.float64(11.594502806642367)]"
