\begin{figure}[!htbp]
    \centering
  \includegraphics[width=\linewidth]{figures/sam2_better.pdf}
     \caption{\textbf{Qualitative examples where SAM 2 outperforms SAM 3}. In all three examples, SAM~3 shows stronger initial localization but exhibits propagation failures, including hallucinated residual masks in later slices (Examples~1--2) and erosion/collapse of structure boundaries under challenging appearance or motion (Example~3). In the red-boxed columns of Examples~1--2 (Example~1: slice $\ge$ 91; Example~2: slice $\ge$ 188), the GT mask is empty (target absent); SAM~2 correctly predicts no mask, whereas SAM~3 produces residual masks, indicating over-propagation. [Colors: \sethlcolor{gtMask}\hl{\texttt{GT}}, \sethlcolor{samTwoBase}\hl{\texttt{SAM 2}}, \sethlcolor{samThreeBase}\hl{\texttt{SAM 3}}]}
    \label{fig:sam2better}
\end{figure}
