\begin{tabular}{lcccccccccc}
\toprule
Agent &  & \multicolumn{9}{c}{Score} \\
\cmidrule(lr){2-11}
 &  Overall & ALS & ARC & AYT & CN & HV & PT & SN & TRB & SB \\
\midrule
random & 0.49 & 0.72 & \textbf{0.60} & 0.25 & 0.18 & 0.41 & \underline{0.50} & 0.56 & 0.52 & \underline{0.58} \\
human & \textbf{0.85} & \textbf{1.00} & NaN & NaN & NaN & \textbf{1.00} & \textbf{1.00} & 0.43 & NaN & \textbf{0.78} \\
gpt-3 & 0.48 & 0.64 & 0.43 & 0.43 & 0.63 & \underline{0.80} & \underline{0.50} & 0.47 & 0.27 & 0.40 \\
gpt-3-cot & 0.60 & 0.43 & \underline{0.50} & \underline{0.93} & \underline{0.89} & 0.60 & \underline{0.50} & \textbf{0.61} & 0.33 & 0.55 \\
gpt-4 & 0.31 & 0.00 & 0.42 & 0.33 & 0.83 & 0.33 & 0.31 & 0.42 & 0.71 & 0.20 \\
gpt-4-cot & 0.60 & \underline{0.81} & \underline{0.50} & 0.64 & \textbf{1.00} & 0.50 & \underline{0.50} & 0.37 & \underline{0.75} & 0.51 \\
gpt-4-rap & \underline{0.62} & NaN & 0.33 & \textbf{1.00} & NaN & 0.50 & NaN & \underline{0.58} & \textbf{1.00} & 0.26 \\
\bottomrule
\end{tabular}
