,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length
gpt4_turbo,50.0,0.0,0,0,805,805,50.0,minimal,2049
Yi-34B-Chat,34.84472049689441,1.6792603475419352,280,524,1,805,34.84472049689441,minimal,2123
gpt4,25.093167701863354,1.521412738876177,199,600,6,805,25.093167701863354,minimal,1365
Mixtral-8x7B-Instruct-v0.1,22.795031055900623,1.4781930926858895,183,621,1,805,22.795031055900623,minimal,1465
gemini-pro,20.372670807453417,1.4150044409806857,162,639,4,805,20.372670807453417,minimal,1315
Mistral-7B-Instruct-v0.2,19.627329192546583,1.4007375757958205,158,647,0,805,19.627329192546583,minimal,1676
cohere,19.25465838509317,1.3905884653013636,155,650,0,805,19.25465838509317,minimal,1983
tulu-2-dpo-70b,17.950310559006212,1.3520354181685712,144,660,1,805,17.950310559006212,minimal,1418
llama-2-70b-chat-hf,17.018633540372672,1.325331774127577,137,668,0,805,17.018633540372672,minimal,1790
claude-2.1,15.031055900621118,1.2572979835605944,120,683,2,805,15.031055900621118,minimal,1096
