\begin{table*}[t]
\centering
\setlength{\tabcolsep}{8pt} % 您可以根据需要调整列间距
% \renewcommand{\arraystretch}{1.2} % 略微增加行高以容纳 \scriptsize 内容
\begin{tabularx}{\textwidth}{@{}llllll@{}}
\toprule
\multirow{2}{*}{\textbf{Model}} & \multirow{2}{*}{\makecell[c]{\textbf{Pattern}\\\textbf{Prediction}}} & \multirow{2}{*}{\makecell[c]{\textbf{Multi-step}\\\textbf{Spatial}\\\textbf{Reasoning}}} & \multicolumn{3}{c}{\textbf{Spatial Relationship Prediction}} \\
\cmidrule(lr){4-6}
& & & \makecell[c]{\textbf{Spatial Pose}\\\textbf{Localization}} & \makecell[c]{\textbf{Layering}\\\textbf{Relationship}} & \makecell[c]{\textbf{Geometric}\\\textbf{Change}} \\
\midrule
\multicolumn{6}{c}{\textit{Open-source Models}} \\
\midrule
MiniCPM-o 2.6 & 26.99\scriptsize{$\pm0.42$} & 30.11\scriptsize{$\pm1.54$} & 28.98\scriptsize{$\pm0.88$} & 30.50\scriptsize{$\pm1.00$} & 23.75\scriptsize{$\pm0.09$} \\
llava-1.5-7b & 27.23\scriptsize{$\pm1.47$} & 29.05\scriptsize{$\pm1.90$} & 29.06\scriptsize{$\pm2.71$} & 30.94\scriptsize{$\pm0.97$} & 25.51\scriptsize{$\pm0.57$} \\
deepseek-vl2 & 28.40\scriptsize{$\pm0.07$} & 30.01\scriptsize{$\pm0.06$} & 26.71\scriptsize{$\pm1.40$} & 29.05\scriptsize{$\pm0.23$} & 24.30\scriptsize{$\pm1.10$} \\
NVILA-15B & 28.33\scriptsize{$\pm1.09$} & 32.51\scriptsize{$\pm0.90$} & 30.60\scriptsize{$\pm1.22$} & 31.00\scriptsize{$\pm1.53$} & 26.48\scriptsize{$\pm0.76$} \\
VideoLLaMA3-7B & 29.01\scriptsize{$\pm1.23$} & 30.86\scriptsize{$\pm0.14$} & 29.06\scriptsize{$\pm0.02$} & 28.74\scriptsize{$\pm1.04$} & 27.80\scriptsize{$\pm0.35$} \\
Qwen2.5-VL-7B & 28.40\scriptsize{$\pm0.82$} & 31.51\scriptsize{$\pm0.30$} & 28.43\scriptsize{$\pm0.08$} & 28.05\scriptsize{$\pm0.04$} & 28.83\scriptsize{$\pm0.72$} \\
Qwen2.5-VL-32B & 34.15\scriptsize{$\pm0.39$} & 36.82\scriptsize{$\pm0.48$} & 33.51\scriptsize{$\pm0.99$} & 32.59\scriptsize{$\pm0.48$} & 30.51\scriptsize{$\pm0.15$} \\
Qwen2.5-VL-72B & 36.29\scriptsize{$\pm0.11$} & \textbf{39.10}\scriptsize{$\pm0.88$} & 35.68\scriptsize{$\pm1.69$} & \textbf{38.04}\scriptsize{$\pm0.70$} & 31.89\scriptsize{$\pm0.85$} \\
InternVL2.5-78B & \textbf{36.76}\scriptsize{$\pm0.75$} & 38.55\scriptsize{$\pm0.08$} & \textbf{38.01}\scriptsize{$\pm0.11$} & 37.66\scriptsize{$\pm0.13$} & \textbf{32.48}\scriptsize{$\pm0.48$} \\
\midrule
\multicolumn{6}{c}{\textit{Close-source Models}} \\
\midrule
Claude-3.5-Sonnet & 35.89\scriptsize{$\pm1.47$} & 45.07\scriptsize{$\pm0.64$} & 39.55\scriptsize{$\pm0.63$} & 40.19\scriptsize{$\pm0.11$} & 39.73\scriptsize{$\pm0.10$} \\
GPT-4o & 42.71\scriptsize{$\pm0.66$} & 51.81\scriptsize{$\pm0.48$} & 48.24\scriptsize{$\pm1.73$} & \underline{50.42}\scriptsize{$\pm0.59$} & 46.72\scriptsize{$\pm0.50$} \\
Gemini2.5-Flash & 35.01\scriptsize{$\pm0.16$} & 48.92\scriptsize{$\pm0.13$} & 40.15\scriptsize{$\pm0.60$} & 39.91\scriptsize{$\pm1.09$} & 40.01\scriptsize{$\pm1.63$} \\
Gemini2.5-pro & \underline{42.68}\scriptsize{$\pm0.14$} & \underline{53.45}\scriptsize{$\pm0.74$} & \underline{49.06}\scriptsize{$\pm0.07$} & 47.68\scriptsize{$\pm0.07$} & \underline{47.10}\scriptsize{$\pm0.82$} \\
\midrule
\multicolumn{6}{c}{\textit{Human Performance}} \\
\midrule
human(common) & 51.18 & 88.52 & 55.12 & 50.55 & 50.15 \\
human(expert) & 98.45 & 100.00 & 96.44 & 92.10 & 85.38 \\
\bottomrule
\end{tabularx}
\caption{Accuracy (\%) of various MLLMs on different spatial reasoning tasks. Bold or underlined values indicate best performance across open-source models and all models, respectively.}
\label{main_1}
\end{table*}