\begin{table}[htbp]
\centering
\label{tab:context_strategy}
\resizebox{0.5\columnwidth}{!}{%
\begin{tabular}{l l c c c c}
\toprule
Context Pool & Selection Strategy & \multicolumn{2}{c}{BUS-BRA} & \multicolumn{2}{c}{CAMUS} \\
 & & Dice & Sig. & Dice & Sig. \\
\midrule
Full Set & L2 Distance & 0.911 & – & 0.940 & – \\
10\% Subset & Random  & 0.910 & $\dagger$  & 0.936 & $\dagger$ \\
Full Set & SSIM  & 0.902 & $\ddagger$ & 0.937 & $\dagger$ \\
\bottomrule
\end{tabular}%
}
\caption{Robustness of segmentation performance to context pool size and selection method. Minimal Dice score degradation is observed when using only 10\% of the training set or different context selection metrics (L2 vs. SSIM). BUS-BRA \cite{WilfridoGomezFlores.2023} results are stratified by scanner. Significance levels: $\dagger$ $p < 0.05$, $\ddagger$ $p < 0.01$. We compute the $p$-value for Dice score significance against the L2 distance selection strategy using Wilcoxon signed-rank pairwise tests with Bonferroni correction.}
\end{table}
