\begin{tabular}{llllllllllllllllllllllllllllrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr}
\toprule
game & \multicolumn{3}{r}{sea_battle} & \multicolumn{3}{r}{two_rooms_and_a_boom} & \multicolumn{3}{r}{are_you_the_traitor} & \multicolumn{3}{r}{air_land_sea} & \multicolumn{3}{r}{santorini} & \multicolumn{3}{r}{hive} & \multicolumn{3}{r}{pit} & \multicolumn{3}{r}{arctic_scavengers} & \multicolumn{3}{r}{codenames} & gpt4 & gpt4-cot & random & gpt3 & gpt3-cot & gpt3 & random & gpt4 & gpt3-cot & gpt4-cot & rap & gpt3 & random & gpt4 & gpt3-cot & gpt4-cot & rap & gpt3 & random & gpt4 & gpt3-cot & gpt4-cot & gpt3 & random & gpt4 & gpt3-cot & gpt4-cot & rap & gpt3 & random & gpt4 & gpt3-cot & gpt4-cot & gpt3 & random & gpt4 & gpt4-cot & gpt3-cot & rap & gpt3 & random & gpt3-cot & gpt4 & gpt4-cot & rap & gpt3 & random & gpt3-cot & gpt4-cot & gpt4 & rap \\
metric & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & # matches & P(win) & rating & ('sea_battle', '# matches') & ('sea_battle', '# matches') & ('sea_battle', '# matches') & ('sea_battle', '# matches') & ('sea_battle', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('two_rooms_and_a_boom', '# matches') & ('are_you_the_traitor', '# matches') & ('are_you_the_traitor', '# matches') & ('are_you_the_traitor', '# matches') & ('are_you_the_traitor', '# matches') & ('are_you_the_traitor', '# matches') & ('are_you_the_traitor', '# matches') & ('air_land_sea', '# matches') & ('air_land_sea', '# matches') & ('air_land_sea', '# matches') & ('air_land_sea', '# matches') & ('air_land_sea', '# matches') & ('santorini', '# matches') & ('santorini', '# matches') & ('santorini', '# matches') & ('santorini', '# matches') & ('santorini', '# matches') & ('santorini', '# matches') & ('hive', '# matches') & ('hive', '# matches') & ('hive', '# matches') & ('hive', '# matches') & ('hive', '# matches') & ('pit', '# matches') & ('pit', '# matches') & ('pit', '# matches') & ('pit', '# matches') & ('pit', '# matches') & ('pit', '# matches') & ('arctic_scavengers', '# matches') & ('arctic_scavengers', '# matches') & ('arctic_scavengers', '# matches') & ('arctic_scavengers', '# matches') & ('arctic_scavengers', '# matches') & ('arctic_scavengers', '# matches') & ('codenames', '# matches') & ('codenames', '# matches') & ('codenames', '# matches') & ('codenames', '# matches') & ('codenames', '# matches') & ('codenames', '# matches') \\
\midrule
random & NaN & 0.176086 & 1.649150 & NaN & 0.233609 & 0.362798 & NaN & 0.001342 & -2.027480 & NaN & 0.000453 & -2.930012 & NaN & 0.102551 & -0.299480 & NaN & 0.166667 & 0.000000 & NaN & 0.196763 & 0.363366 & NaN & 0.002919 & -0.918040 & NaN & 0.184672 & 0.177621 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
gpt3 & NaN & 0.218267 & 1.863896 & NaN & 0.138223 & -0.161983 & NaN & 0.002320 & -1.480240 & NaN & 0.000758 & -2.415343 & NaN & 0.441656 & 1.160695 & NaN & 0.166667 & 0.000000 & NaN & 0.130748 & -0.045360 & NaN & 0.000836 & -2.168174 & NaN & 0.126982 & -0.196915 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
gpt3-cot & NaN & 0.082575 & 0.891879 & NaN & 0.174814 & 0.072871 & NaN & 0.038395 & 1.326045 & NaN & 0.003553 & -0.870179 & NaN & 0.156933 & 0.125980 & NaN & 0.166667 & 0.000000 & NaN & 0.239953 & 0.561812 & NaN & 0.000937 & -2.054731 & NaN & 0.214074 & 0.325360 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
gpt4 & NaN & 0.000008 & -8.361116 & NaN & 0.135981 & -0.178332 & NaN & 0.001284 & -2.072191 & NaN & 0.002218 & -1.341625 & NaN & 0.102559 & -0.299395 & NaN & 0.166667 & 0.000000 & NaN & 0.048227 & -1.042721 & NaN & 0.007509 & 0.026857 & NaN & 0.124711 & -0.214965 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
gpt4-cot & NaN & 0.353913 & 2.347226 & NaN & 0.192870 & 0.171164 & NaN & 0.007708 & -0.279596 & NaN & 0.826306 & 4.578925 & NaN & 0.101455 & -0.310225 & NaN & 0.166667 & 0.000000 & NaN & 0.314193 & 0.831374 & NaN & 0.009082 & 0.217097 & NaN & 0.081384 & -0.641785 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
rap & NaN & 0.169151 & 1.608964 & NaN & 0.124504 & -0.266517 & NaN & 0.948950 & 4.533463 & NaN & 0.166713 & 2.978233 & NaN & 0.094847 & -0.377575 & NaN & 0.166667 & 0.000000 & NaN & 0.070117 & -0.668472 & NaN & 0.978717 & 4.896990 & NaN & 0.268177 & 0.550684 & 23 & 16 & 25 & 14 & 14 & 14 & 25 & 12 & 14 & 12 & 3 & 14 & 24 & 12 & 14 & 11 & 3 & 5 & 18 & 5 & 5 & 3 & 5 & 22 & 6 & 5 & 4 & 2 & 5 & 18 & 5 & 5 & 3 & 6 & 16 & 3 & 3 & 3 & 1 & 11 & 25 & 6 & 7 & 4 & 1 & 14 & 23 & 14 & 11 & 11 & 3 \\
\bottomrule
\end{tabular}
