% \begin{tabular}{lcc}
% \toprule
% \textbf{Model} & \textbf{Malignant vs.\ Benign} & \textbf{Melanocytic vs.\ Non-melanocytic} \\
% \midrule
% red-box (pretrained)   & \textbf{0.784} & 0.912 \\
% TBM--All Tools (val on all)      & 0.748          & 0.911 \\
% TBM--VLM Stochastic              & 0.765          & \textbf{0.918} \\
% TBM--VLM Exact                   & 0.749          & 0.904 \\
% TBM--VLM Global                  & 0.738          & 0.907 \\
% TBM--Random                      & 0.727          & 0.901 \\
% Y-Net                            & 0.658          & 0.866 \\
% VLM (MedGemma, zero-shot)        & 0.488          & 0.499 \\
% \bottomrule
% \end{tabular}


\begin{tabular}{lcccc}
\toprule
\bfseries Model & \bfseries \makecell[c]{Camelyon17} & \bfseries \makecell[c]{ISIC-BM}  & \bfseries \makecell[c]{ISIC-MN} \\
\midrule
Gemma ~\cite{team2025gemma} & 50.0 & 49.5 & 50.4 \\
Gemma w/ Tool Prompts & 50.9 & 47.1 & 48.8 \\
MedGemma~\cite{sellergren2025medgemma} & 50.0  & 44.4 & 48.7\\
MedGemma w/ Tool Prompts & 50.0 &  46.8 & 50.0 & \\
VisProg~\cite{gupta2023visual} & 50.4 & 50.0& 50.0 & \\
LlavaMed~\cite{li2023llavamed} & 50.0 & 49.4 & 50.0 &\\
% ViperGPT~\cite{suris2023vipergpt} &  &   &  \\
\midrule
EfficientNet~\cite{tan2019efficientnet} & 88.6  & \textbf{78.4} & \underline{91.2} \\
Y-Net~\cite{mehta2018net}         & 88.2  & 65.8   & 86.6\\
LlavaMed FT & 66.2  &  51.5 & 58.0  &\\
\midrule
\frameworkabbr{} (ours) & \textbf{92.3}& 
\underline{77.5} & \textbf{91.7}\\
% \frameworkabbr{} without perturbation  & 90.2  & 0.749  & 0.904 \\
% \frameworkabbr{} without VLM & 92.1 & 0.748  & 0.911 \\
$\rightarrow$ without perturbation ($\alpha$ =1)  & -2.1  & -2.1  & -1.4 &\\
$\rightarrow$ with all modality-specific tools & \underline{-0.2} & -2.2  & -0.7 & \\
% $\TBMl$--VLM Perturbed  & 89.2 & 0.685 & 0.838 \\
% $\TBMl$--VLM Exact & 89.0  & 0.667  & 0.836 \\
% $\TBMl$      & 90.2  & 0.673 & 0.868 \\
\bottomrule
\end{tabular}