,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length
gpt4_turbo,50.0,0.0,0.0,0.0,805.0,805.0,50.0,minimal,2049.0
Yi-34B-Chat,35.3416149068323,1.684737662232649,284.0,520.0,1.0,805.0,35.3416149068323,minimal,2123.0
gpt4,20.0,1.4024527464425107,158.0,641.0,6.0,805.0,20.0,minimal,1365.0
Mixtral-8x7B-Instruct-v0.1,19.937888198757765,1.4076743478646596,160.0,644.0,1.0,805.0,19.937888198757765,minimal,1465.0
cohere,17.267080745341616,1.3329707204687145,139.0,666.0,0.0,805.0,17.267080745341616,minimal,1983.0
gemini-pro,17.039800995024876,1.320962077510413,135.0,665.0,4.0,804.0,17.039800995024876,minimal,1315.0
tulu-2-dpo-70b,16.956521739130434,1.3219459096391208,136.0,668.0,1.0,805.0,16.956521739130434,minimal,1418.0
Mistral-7B-Instruct-v0.2,15.527950310559005,1.2772783712147573,125.0,680.0,0.0,805.0,15.527950310559005,minimal,1676.0
llama-2-70b-chat-hf,15.15527950310559,1.2646383931934608,122.0,683.0,0.0,805.0,15.15527950310559,minimal,1790.0
vicuna-33b-v1.3,13.354037267080743,1.1948045933988172,106.0,696.0,3.0,805.0,13.354037267080743,minimal,1479.0
claude-2.1,12.919254658385093,1.1796402736033835,103.0,700.0,2.0,805.0,12.919254658385093,minimal,1096.0
alpaca-7b,2.422360248447205,0.5314153465076921,18.0,784.0,3.0,805.0,2.422360248447205,minimal,396.0
