% \begin{table*}[t]
% \centering
% \footnotesize
% \setlength{\tabcolsep}{6pt}
% \caption{Accuracies for the adversarial experiments. Models highlighted in gray are general-purpose, while the others are domain-specific. The best result in each column is in \textbf{bold}, the second-best \underline{underlined}. Evaluation conditions: \textit{Base}~=~clean baseline, \textit{Cap}~=~captions embedded in image, \textit{Pmt}~=~captions in prompt, \textit{Syc}~=~sycophancy, \textit{Leg}~=~legends in image.}
% \label{tab:robustness}
% \vspace{4pt}
% \begin{tabular}{lc|ccc:c|cccc}
% \toprule
% & \multicolumn{4}{c@{\hspace{20pt}}}{\textbf{Classification}} & \multicolumn{5}{c@{}}{\textbf{Detection}} \\
% \cmidrule(lr){2-5} \cmidrule(l){6-10}
% \textbf{Model} & \textit{Base} & \textit{Cap} & \textit{Pmt} & \textit{Syc} & \textit{Base} & \textit{Cap} & \textit{Pmt} & \textit{Syc} & \textit{Leg} \\
% \midrule
% Random & 41.67 & \multicolumn{3}{c}{41.67} & 25.00 & \multicolumn{4}{c}{25.00} \\\hdashline[0.5pt/2pt]
% \rowcolor{gray!10} GLM\_4.5V & \underline{69.9} & 0.2 & 8.5 & 20.7 & 79.1 & 3.8 & 12.6 & 20.5 & 14.2 \\
% \rowcolor{gray!10} Kimi-VL-A3B-Instruct & 55.6 & 0.0 & 0.6 & 11.5 & 74.8 & 4.2 & 7.5 & 10.0 & 20.5 \\
% \rowcolor{gray!10} MiMo-VL-7B & 53.0 & 0.5 & 1.8 & 6.7 & 63.6 & 0.9 & 1.5 & 9.0 & 1.6 \\
% \rowcolor{gray!10} Qwen2-VL-7B-Instruct & 51.6 & 0.2 & 2.4 & 10.1 & 64.1 & 16.4 & 8.5 & 18.0 & 22.3 \\
% \rowcolor{gray!10} Qwen2-VL-72B-Instruct & 59.4 & 1.1 & 5.8 & 35.8 & 78.0 & 6.3 & 5.8 & 20.8 & 8.6 \\
% \rowcolor{gray!10} Qwen3-VL-8B-Instruct & 67.2 & 0.3 & 8.4 & 28.2 & 79.7 & 19.5 & 19.8 & 35.7 & 16.2 \\
% \rowcolor{gray!10} Qwen3-VL-30B-A3B-Instruct & 67.2 & 0.1 & 13.0 & 29.5 & 79.3 & 20.9 & 37.2 & 34.4 & 26.1 \\
% Chiron-o1-8B & 48.6 & 5.7 & 8.7 & 54.8 & 58.2 & 18.8 & 8.7 & 31.7 & 23.9 \\
% HuatuoGPT-Vision-7B & 56.6 & \underline{21.3} & 7.0 & 30.6 & 51.6 & 25.3 & 0.7 & 5.1 & \underline{42.5} \\
% HuatuoGPT-Vision-34B & 59.1 & \textbf{22.7} & 10.1 & 15.0 & 55.1 & 25.3 & 2.4 & 6.2 & 35.2 \\
% Lingshu-7B & \textbf{78.8} & 1.2 & 18.8 & 45.4 & 77.0 & 4.7 & 8.6 & 22.5 & 17.2 \\
% Lingshu-32B & 68.9 & 2.6 & 29.9 & 56.8 & 76.6 & 7.8 & 12.9 & 35.0 & 22.3 \\
% Hulu-Med-7B & 61.2 & 1.3 & 0.0 & 18.6 & 68.0 & 1.8 & 0.1 & 5.0 & 25.5 \\
% Hulu-Med-32B & 68.4 & 1.3 & 4.1 & 35.4 & 71.6 & 3.0 & 8.2 & 21.8 & 14.8 \\
% \noalign{\vskip 0.5ex}
% \hdashline[0.5pt/2pt]
% \noalign{\vskip 0.5ex}
% Aloe-Vision-7B & 59.6 & 2.3 & 15.0 & 41.6 & 68.0 & \underline{65.9} & 10.2 & 26.5 & 34.5 \\
% Aloe-Vision-7B-AR & 65.3 & 13.7 & \underline{43.8} & 50.2 & 75.4 & \textbf{73.5} & \textbf{57.9} & \textbf{67.7} & \textbf{57.9} \\
% Aloe-Vision-72B & 63.1 & 3.5 & 8.1 & \textbf{75.0} & 71.6 & 3.9 & 2.9 & 20.1 & 9.2 \\
% Aloe-Vision-72B-AR & 66.7 & 16.0 & \textbf{51.0} & \underline{63.5} & 80.9 & 31.2 & \underline{57.4} & \underline{54.6} & 32.7 \\

% \bottomrule
% \end{tabular}
% \end{table*}


\begin{table*}[t]
\centering
\footnotesize
\setlength{\tabcolsep}{6pt}
\caption{Accuracies for the adversarial experiments. Models highlighted in gray are general-purpose, while the others are domain-specific. The best result in each column is in \textbf{bold}, the second-best \underline{underlined}.}
\label{tab:robustness}
\begin{tabular}{lc|ccc:c|cccc}
\toprule
& \multicolumn{4}{c}{\textbf{Classification}} & \multicolumn{5}{c}{\textbf{Detection}} \\
\cmidrule(lr){2-5} \cmidrule(l){6-10}
\textbf{Model} & \textit{Base} & \textit{Cap} & \textit{Pmt} & \textit{Syc}
& \textit{Base} & \textit{Cap} & \textit{Pmt} & \textit{Syc} & \textit{Leg} \\
\midrule

Random & 41.67 & \multicolumn{3}{c}{41.67} & 25.00 & \multicolumn{4}{c}{25.00} \\\hdashline[0.5pt/2pt]

GPT-5.2 & 70.2 & 6.4 & 10.9 & 38.5 & 70.9 & 14.3 & 5.3 & 9.7 & 8.7 \\
\hdashline[0.5pt/2pt]
\noalign{\vskip 1ex}

\rowcolor{gray!10} GLM\_4.5V & \underline{70.2} & 0.3 & 8.6 & 21.0 & 80.0 & 3.8 & 12.6 & 20.2 & 14.4 \\
\rowcolor{gray!10} Kimi-VL-A3B-Instruct & 55.6 & 0.0 & 0.6 & 11.5 & 75.8 & 4.2 & 7.4 & 10.0 & 21.1 \\
\rowcolor{gray!10} MiMo-VL-7B & 54.6 & 0.3 & 2.2 & 6.4 & 63.9 & 0.9 & 1.5 & 8.9 & 2.3 \\
\rowcolor{gray!10} Qwen2-VL-7B & 51.6 & 0.2 & 2.4 & 10.1 & 65.1 & 16.5 & 8.5 & 17.6 & 22.6 \\
\rowcolor{gray!10} Qwen2-VL-72B & 59.4 & 1.0 & 5.8 & 35.6 & 78.4 & 6.3 & 5.7 & 20.5 & 8.7 \\
\rowcolor{gray!10} Qwen3-VL-8B-Instruct & 67.6 & 0.3 & 8.6 & 28.2 & \underline{81.0} & 19.5 & 19.7 & 35.7 & 16.4 \\
\rowcolor{gray!10} Qwen3-VL-30B-A3B-Instruct & 66.8 & 0.1 & 13.2 & 28.9 & 81.0 & 20.8 & 37.1 & 34.3 & 26.2 \\
Chiron-o1-8B & 48.9 & 5.8 & 8.6 & 54.6 & 57.4 & 19.2 & 8.6 & 33.3 & 24.4 \\
HuatuoGPT-Vision-7B & 57.3 & \underline{21.4} & 7.2 & 30.7 & 51.9 & 25.5 & 0.7 & 5.2 & \underline{41.6} \\
HuatuoGPT-Vision-34B & 59.1 & \textbf{22.7} & 10.1 & 14.9 & 55.9 & 25.6 & 2.3 & 6.3 & 36.6 \\
Lingshu-7B & \textbf{78.8} & 1.2 & 18.8 & 45.4 & 78.1 & 4.7 & 8.6 & 22.3 & 17.8 \\
Lingshu-32B & 68.9 & 2.7 & 29.8 & 56.9 & 77.7 & 7.8 & 12.6 & 34.8 & 22.7 \\
Hulu-Med-7B & 61.2 & 1.3 & 0.0 & 18.6 & 68.9 & 1.8 & 0.1 & 5.0 & 26.3 \\
Hulu-Med-32B & 68.4 & 1.3 & 4.1 & 35.4 & 73.3 & 3.0 & 8.2 & 21.8 & 15.3 \\
\noalign{\vskip 0.5ex}
\hdashline[0.5pt/2pt]
\noalign{\vskip 0.5ex}
Aloe-Vision-7B-S1 & 59.6 & 2.3 & 15.0 & 41.6 & 69.1 & \underline{66.4} & 10.3 & 26.3 & 34.4 \\
Aloe-Vision-7B-S2 & 65.3 & 13.7 & \underline{43.8} & 50.2 & 76.9 & \textbf{75.3} & \textbf{58.5} & \textbf{68.6} & \textbf{58.0} \\
Aloe-Vision-72B-S1 & 63.0 & 3.5 & 8.2 & \textbf{75.0} & 72.5 & 4.0 & 2.9 & 19.7 & 9.4 \\
Aloe-Vision-72B-S2 & 66.6 & 16.2 & \textbf{51.0} & \underline{63.5} & \textbf{82.0} & 31.0 & \underline{57.0} & \underline{54.3} & 33.1 \\


\bottomrule
\end{tabular}
\end{table*}
