
\begin{table}[htp]

    \caption{\textbf{TCGA Benchmark: Performance vs Attention Interpretability.} Comparison of classification performance (AUC $\uparrow$) and attention faithfulness (AUPC $\downarrow$) across three tasks (BRCA subtyping, NSCLC subtyping, LUAD TP53 mutation prediction) and two feature extractors (ResNet50, UNI).}

    \label{tab:tcga_scores_rebuttal}
    \centering

\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccccccccccc}
 \hline   

& \multicolumn{2}{c}{\textbf{BRCA - ResNet50}} & \multicolumn{2}{c}{\textbf{BRCA - UNI}} 
& \multicolumn{2}{c}{\textbf{NSCLC - ResNet50}} & \multicolumn{2}{c}{\textbf{NSCLC - UNI}}  
& \multicolumn{2}{c}{\textbf{LUAD - ResNet50}} & \multicolumn{2}{c}{\textbf{LUAD - UNI}} 
& \multicolumn{2}{c}{\textbf{Average}} \\[-0.3em]

& \textbf{AUC} ($\uparrow$) &  \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) &  \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) &  \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) & \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) & \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) & \textbf{AUPC} ($\downarrow$) 
& \textbf{AUC} ($\uparrow$) & \textbf{AUPC} ($\downarrow$) 


 \\ \hline

MeanMIL  
& 89.0±3.8  & N/A
& 93.2±2.4  & N/A
& 91.1±3.0  & N/A
& 96.9±1.3 & N/A 
& 66.9±6.5  & N/A
& 74.5±6.3 & N/A 
& 85.3 & N/A \\


MaxMIL  
& 86.9±2.6  & N/A
& \textbf{95.4±1.5} & N/A
& \underline{94.4±1.6} &  N/A
& 97.5±1.0  & N/A 
& 61.4±9.2 & N/A
& \underline{76.0±5.4} & N/A 
& 85.3 & N/A \\


DSMIL %~\cite{li2021dualstreammultipleinstancelearning}  
& 88.4±3.0 & 84.0±17.3
& 94.1±1.6  & \textbf{35.1±26.8}
& 93.6±2.5  &  65.4±32.4
& 97.4±1.1  & \textbf{35.6±28.6}
& 67.8±6.3 & 80.4±18.8
& 66.9±4.7 & \textbf{56.8±29.0}
& 84.7 & 59.5 \\


AddMIL %~\cite{javed2022additivemilintrinsicallyinterpretable}  
& 88.0±2.0  & 72.4±28.0
& 93.7±2.6  & 71.7±33.8
& 92.0±2.3  &  \textbf{56.5±48.8}
& 94.6±3.0  & \underline{62.6±41.6}
& 63.4±3.8 &  85.3±27.8
&73.3±3.9 & 81.9±25.3
& 84.2 & 71.7 \\


MHIM %~\cite{mhim-mil}
& 90.0±1.9 & 86.4±20.9
& 94.2±1.1 & 73.5±23.9
& \textbf{94.8±1.5} & 67.9±33.0
& \textbf{97.9±0.9} & 77.9±26.0
& 68.0±3.1 &  78.8±20.2
& 73.8±3.7 &  77.3±26.9
& 86.4 & 76.9 \\


\hline

ABMIL %~\cite{ilse2018attentionbaseddeepmultipleinstance}  
& 89.2±2.6 & 85.5±22.4
& \underline{95.3±1.7}  & 76.1±25.3
& 93.4±1.7  & 65.9±42.7
& 97.6±1.0  & 70.7±33.6
& \underline{68.2±5.6} & 78.0±24.8
& 74.0±4.3 &  80.9±26.2
& 86.3 & 76.2 \\


\our \ours  
& \our \underline{90.1±2.8}  & \our \underline{68.8±16.5}
& \our 94.5±1.2  & \our \underline{71.2±21.8}
& \our 91.6±2.4  & \our 62.8±23.2
& \our 96.5±1.6  & \our 64.0±23.7
& \our \textbf{69.1±5.1} & \our \underline{73.3±20.4}
&  \our \textbf{77.5±2.5} & \our \underline{73.1±24.0}
& \our 86.6 & \our 68.8 \\

\hline

CLAM %~\cite{clam}  
& 89.4±1.8  & 84.9±21.9
& 94.5±2.3  & 73.1±25.5
& 94.1±1.9  & 64.6±44.3
& 97.9±0.8  & 74.1±29.0
& 65.5±6.2  & 76.5±34.5
& 74.0±3.0 & 78.5±26.1
& 85.9 & 75.3 \\



\our CIA-CLAM 
& \our 85.9±6.3   & \our \textbf{62.5±23.8}
& \our 94.9±1.7  &  \our 72.8±21.7
& \our 92.0±3.2  & \our 63.7±18.0
& \our \underline{97.8±1.0} & \our 72.2±23.3
& \our 67.1±5.3 & \our 74.9±15.6
& \our 75.3±4.4 & \our 73.9±22.9
& \our 85.5 & \our 70.0 \\

\hline

ACMIL %~\cite{zhang2024attentionchallengingmultipleinstancelearning}  
& 88.6±2.8  & 69.2±23.4
& 94.4±2.9  & 82.8±22.0
& 93.8±1.8  & 62.5±46.2
& \textbf{97.9±0.8}  & 73.8±27.7
& 67.5±5.0 & 85.3±27.8
& 75.5±7.4 & 81.9±25.3
& 86.3 & 75.9 \\


\our CIA-ACMIL
& \our \textbf{90.3±1.4 }  & \our 69.5±26.7
& \our 94.9±2.3  &  \our 81.1±19.6
&  \our 92.0±3.2  & \our \underline{61.7±18.0}
& \our 97.3±1.8 & \our 68.8±26.9
& \our 67.0±2.8 &  \our \textbf{71.6±21.9}
&  \our 75.7±6.9 &  \our 75.8±24.9
& \our 86.2 & \our 71.4 \\

\hline

\end{tabular}
}
\end{table}
