\begin{tabular}{lcccccccccc}
\toprule
Agent &  & \multicolumn{9}{c}{Score} \\
\cmidrule(lr){2-11}
 &  Overall & ALS & ARC & AYT & CN & HV & PT & SN & TRB & SB \\
\midrule
random & -0.50 & 1.07 & \textbf{0.48} & -2.52 & -2.67 & -1.15 & \underline{0.63} & 0.37 & -0.79 & 0.05 \\
human & \textbf{1.76} & \underline{1.49} & \underline{0.45} & 1.92 & 1.26 & \textbf{3.63} & \textbf{1.29} & -0.89 & \underline{1.70} & \textbf{1.25} \\
gpt-3 & -0.48 & 1.26 & -0.05 & -1.84 & -2.06 & \underline{1.27} & \underline{0.63} & -0.01 & -2.51 & -0.41 \\
gpt-3-cot & 0.06 & 0.03 & 0.22 & \underline{2.42} & 0.45 & -0.44 & \underline{0.63} & \underline{0.53} & -2.76 & 0.26 \\
gpt-4 & -0.89 & -7.38 & -0.12 & -2.73 & -0.65 & -1.31 & -4.42 & -0.08 & 0.62 & -1.40 \\
gpt-4-cot & \underline{0.16} & \textbf{2.13} & 0.27 & -0.19 & \textbf{2.41} & -1.13 & 0.63 & -0.53 & 1.22 & \underline{0.62} \\
gpt-4-rap & -0.10 & 1.41 & -1.25 & \textbf{2.94} & \underline{1.26} & -0.86 & \underline{0.63} & \textbf{0.62} & \textbf{2.51} & -0.37 \\
\bottomrule
\end{tabular}
