,win_rate,standard_error,mode,avg_length,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,length_controlled_winrate
gpt4_1106_preview,50.0,0.0,minimal,2049,0,0,805,805,50.0,50.0
claude-3-opus-20240229,32.94723294723295,1.684820086378744,minimal,1388,255,520,2,777,32.94723294723295,47.450744462524334
mistral-large-2402,28.045515394912982,1.6433528001064484,minimal,1362,209,537,1,747,28.045515394912982,45.47989179343149
gpt4_0314,25.32383419689119,1.559413543118163,minimal,1371,193,574,5,772,25.32383419689119,40.88989260514661
gpt4_0613,18.14044213263979,1.3859486666304015,verified,1140,138,628,3,769,18.14044213263979,34.303198292783584
gpt-3.5-turbo-1106,11.558441558441558,1.1456138845300945,minimal,796,87,679,4,770,11.558441558441558,28.929334810369056
