\begin{table*}[t]
\small
\caption{Dice score (\%) stratified by kidney stone size. Size categories are defined by the ground truth mask area relative to the image area ($H \times W$): No Stone (0), Small ($\le \frac{1}{8}HW$), Medium ($\frac{1}{8}HW$--$\frac{1}{4}HW$), and Large ($\ge \frac{1}{4}HW$). $n$ denotes the number of samples. Bold indicates the \textbf{best}. Lavender denotes the cross-supervised methods. Endo-SemiS achieves the best overall Dice with 10\% labels.}
\label{stone_size_table}
\setlength{\tabcolsep}{3.5pt}
\renewcommand{\arraystretch}{1.05}
\begin{center}
\begin{tabular}{l | c | c c c c c}
\toprule
Method & Label usage & No Stone & Small & Medium & Large & Overall \\
 &  & (n=1364) & (n=495) & (n=418) & (n=1682) &  \\
\midrule
U-Net   & 100\% & 91.3$\pm$29.6 & 63.8$\pm$38.5 & 82.0$\pm$25.9 & 87.6$\pm$22.8 & 85.3$\pm$29.2 \\
nnU-Net & 100\% & 90.2$\pm$29.1 & 64.9$\pm$38.9 & 82.2$\pm$25.3 & 88.6$\pm$21.6 & 85.5$\pm$28.5 \\
\midrule
U-Net   & 10\%  & 75.4$\pm$42.5 & 65.6$\pm$35.2 & 82.9$\pm$20.5 & 88.4$\pm$16.8 & 80.5$\pm$32.1 \\
nnU-Net & 10\%  & 80.9$\pm$39.4 & 57.6$\pm$40.6 & 77.4$\pm$29.6 & 85.3$\pm$23.7 & 79.5$\pm$33.8 \\
\midrule
UniMatch & 10\% & 89.0$\pm$31.3 & 69.2$\pm$35.7 & 81.4$\pm$25.0 & 88.5$\pm$19.3 & 85.5$\pm$27.6 \\
\midrule
\rowcolor{crosssup}
CPS     & 10\%  & 88.3$\pm$32.2 & 65.7$\pm$37.0 & 83.1$\pm$22.0 & 88.9$\pm$18.6 & 85.2$\pm$28.0 \\
\rowcolor{crosssup}
Cross Teaching & 10\% & \textbf{93.5$\pm$24.6} & 65.9$\pm$38.6 & 83.8$\pm$26.0 & 87.1$\pm$24.3 & 85.6$\pm$28.7 \\
\rowcolor{crosssup}
Endo-SemiS (Ours) & 10\% & 90.5$\pm$29.4 & \textbf{70.4$\pm$36.0} & \textbf{84.4$\pm$23.0} & \textbf{91.1$\pm$17.9} & \textbf{87.6$\pm$26.4} \\
\bottomrule
\end{tabular}
\end{center}
\end{table*}