 \documentclass{midl} % Include author names
% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution
\usepackage{booktabs}
\usepackage{mwe} % to get dummy images
\jmlrvolume{-- Under Review}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026 submission}
\editors{Under Review for MIDL 2026}

\title[Short Title]{Ultra-ECP: Ellipse-Constrained and Point-Robust Foundation Model Adaptation for Fetal Cardiac Ultrasound Segmentation}

\midlauthor{
\small % <--- This scales the text down. Change to \footnotesize if you need it smaller.
\Name{Minh H. N. Le\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \Email{d142111009@tmu.edu.tw, johnminhle@ieee.org}\\
\addr $^{1}$International Ph.D. Program in Medicine, College of Medicine, Taipei Medical University, Taipei, Taiwan \addr $^{2}$AIBioMed Research Group, Taipei Medical University, Taipei, Taiwan 
\AND
\Name{Khanh T. Q. Le\midlotherjointauthor\nametag{$^{3}$}} \Email{khanh.tran@uit.edu.vn}\\
\addr $^{3}$PASSIO Lab, North Carolina A\&T State University, North Carolina, USA 
\AND
\Name{Tuan Vinh\nametag{$^{4}$}} \Email{tuan.vinh@hertford.ox.ac.uk}\\
\addr $^{4}$Medical Sciences Division, University of Oxford, Oxford, United Kingdom 
\AND
\Name{Thanh-Huy Nguyen\nametag{$^{5}$}} \Email{thanhhun@andrew.cmu.edu}\\
\addr $^{5}$Computational Biology Department, Carnegie Mellon University, Pittsburgh, USA
\AND
\Name{Han H. Huynh\nametag{$^{6}$}} \Email{m658112001@tmu.edu.tw}\\
\addr $^{6}$International Master Program for Translational Science, College of Medical Science and Technology, Taipei Medical University, Taipei, Taiwan 
\AND
\Name{Khoa D. Pham\nametag{$^{7}$}} \Email{kdpham@aggies.ncat.edu}\\
\addr $^{7}$Industrial and Systems Engineering Department, North Carolina A\&T State University, North Carolina, USA 
\AND
\Name{Dang Nguyen\nametag{$^{9}$}} \Email{kevin\_nguyen@hsph.harvard.edu}\\
\addr $^{9}$ Harvard T.H. Chan School of Public Health, Harvard University, Boston, MA, USA; Center for Materials Innovation and Technology, VinUniversity, Hanoi, Vietnam.
\AND
\Name{Anh Mai Vu\nametag{$^{8}$}} \Email{mvu9@cougarnet.uh.edu}\\
\addr $^{8}$Department of Electrical and Computer Engineering, University of Houston, Houston, TX 77204, USA 
\AND
\Name{Hien Q. Kha\nametag{$^{1,2}$}} \Email{d142111015@tmu.edu.tw}\\
\addr $^{1}$International Ph.D. Program in Medicine, College of Medicine, Taipei Medical University, Taipei, Taiwan  \addr $^{2}$AIBioMed Research Group, Taipei Medical University, Taipei, Taiwan 
\AND
\Name{Phat K. Nguyen\nametag{$^{1,2}$}} \Email{m142113007@tmu.edu.tw}\\
\addr $^{1}$International Master Program in Medicine, College of Medicine, Taipei Medical University, Taipei, Taiwan \\
\addr $^{2}$AIBioMed Research Group, Taipei Medical University, Taipei, Taiwan 
\AND
\Name{Ulas Bagci\nametag{$^{9}$}} \Email{ulas.bagci@northwestern.edu}\\
\addr $^{9}$Department of Radiology, Northwestern University Feinberg School of Medicine, Chicago, IL, USA 
\AND
\Name{Min Xu\nametag{$^{5}$}} \Email{mxu1@andrew.cmu.edu}\\
\addr $^{5}$Computational Biology Department, Carnegie Mellon University, Pittsburgh, USA 
\AND
\Name{Carl Yang\nametag{$^{10}$}} \Email{ j.carlyang@emory.edu}\\
\addr $^{10}$Department of Computer Science, Emory University, Atlanta, GA, USA \AND
\Name{Phat K. Huynh\nametag{$^{7}$}} \Email{pkhuynh@ncat.edu}\\
\addr $^{7}$Industrial and Systems Engineering Department, North Carolina A\&T State University, North Carolina, USA 
\AND
\Name{Nguyen Quoc Khanh Le\nametag{$^{11}$}} \Email{khanhlee@tmu.edu.tw}\\
\addr $^{11}$In-Service Master Program in Artificial Intelligence in Medicine, College of Medicine, Taipei Medical University, Taiwan; AIBioMed Research Group, Taipei Medical University, Taipei, Taiwan
}

\begin{document}

\maketitle
\begin{abstract}
Accurate fetal cardiac segmentation from four-chamber ultrasound images is essential for reliable prenatal biometrics, yet general-purpose foundation models such as SAM remain sensitive to point-prompt placement, produce anatomically inconsistent masks, and require costly full-model fine-tuning. We introduce \textbf{Ultra-ECP}, a parameter-efficient framework that adapts UltraSAM for robust single-point fetal cardiac segmentation. Ultra-ECP integrates three components: (i) a LoRA-based adaptation applied to the prompt encoder and mask decoder, reducing trainable parameters by over 98\%; (ii) an Ellipse-Aware Loss that regularizes predictions toward anatomically plausible elliptical cardiac shapes; and (iii) a Point-Robust Augmentation strategy that simulates click imprecision to enhance robustness. Evaluated on the FOCUS dataset, Ultra-ECP outperforms SAM, MedSAM, and fine-tuned UNet baselines. For thoracic segmentation, the method achieves a mean DSC of \textbf{95.09\%} and HD95 of \textbf{25.96 px}. For cardiac segmentation, Ultra-ECP obtains a mean DSC of \textbf{92.60\%} and HD95 of \textbf{18.25 px}, while maintaining stability under point displacements up to 10 pixels. Predictions are consistently smooth and elliptical, addressing common failure modes of existing approaches. Ultra-ECP provides an effective and computationally lightweight pathway for adapting large vision models to fetal cardiac biometrics, enabling reliable and clinically practical semi-automated tools.
\end{abstract}

\begin{keywords}
Fetal cardiac ultrasound, cardiac segmentation, foundation models, parameter-efficient learning, prenatal biometrics.
\end{keywords}

% \section{Introduction}

% This is where the content of your paper goes.  Some random
% notes\footnote{Random footnote are discouraged}:
% \begin{itemize}
% \item You should use \LaTeX \cite{Lamport:Book:1989}.
% \item JMLR/PMLR uses natbib for references. For simplicity, here, \verb|\cite|  defaults to
%   parenthetical citations, i.e. \verb|\citep|. You can of course also
%   use \verb|\citet| for textual citations.
% \item Eprints such as arXiv papers can of course be cited \cite{Hinton:arXiv:2015:Distilling}. We recomend using a \verb|@misc| bibtex entry for these as shown in the sample bibliography.
% \item You should follow the guidelines provided by the conference.
% \item Read through the JMLR template documentation for specific \LaTeX
%   usage questions.
% \item Note that the JMLR template provides many handy functionalities
% such as \verb|\figureref| to refer to a figure,
% e.g. \figureref{fig:example},  \verb|\tableref| to refer to a table,
% e.g. \tableref{tab:example} and \verb|\equationref| to refer to an equation,
% e.g. \equationref{eq:example}.
% \end{itemize}

% \begin{table}[htbp]
%  % The first argument is the label.
%  % The caption goes in the second argument, and the table contents
%  % go in the third argument.
% \floatconts
%   {tab:example}%
%   {\caption{An Example Table}}%
%   {\begin{tabular}{ll}
%   \bfseries Dataset & \bfseries Result\\
%   Data1 & 0.12345\\
%   Data2 & 0.67890\\
%   Data3 & 0.54321\\
%   Data4 & 0.09876
%   \end{tabular}}
% \end{table}

% \begin{figure}[htbp]
%  % Caption and label go in the first argument and the figure contents
%  % go in the second argument
% \floatconts
%   {fig:example}
%   {\caption{Example Image}}
%   {\includegraphics[width=0.5\linewidth]{example-image}}
% \end{figure}

% \begin{algorithm2e}
% \caption{Computing Net Activation}
% \label{alg:net}
%  % older versions of algorithm2e have \dontprintsemicolon instead
%  % of the following:
%  %\DontPrintSemicolon
%  % older versions of algorithm2e have \linesnumbered instead of the
%  % following:
%  %\LinesNumbered
% \KwIn{$x_1, \ldots, x_n, w_1, \ldots, w_n$}
% \KwOut{$y$, the net activation}
% $y\leftarrow 0$\;
% \For{$i\leftarrow 1$ \KwTo $n$}{
%   $y \leftarrow y + w_i*x_i$\;
% }
% \end{algorithm2e}

% \clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% % Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank a bunch of people.}


% \bibliography{midl-samplebibliography}


% \appendix

% \section{Proof of Theorem 1}

% This is a boring technical proof of
% \begin{equation}\label{eq:example}
% \cos^2\theta + \sin^2\theta \equiv 1.
% \end{equation}

% \section{Proof of Theorem 2}

% This is a complete version of a proof sketched in the main text.

\section{Introduction}

Fetal cardiac biometric assessment relies heavily on accurate segmentation of the four-chamber view in prenatal ultrasound. Precise delineation of the cardiac chambers enables reliable measurement of cardiac dimensions, structural assessment, and the early detection of congenital abnormalities. Despite rapid advances in deep learning, automated fetal cardiac segmentation remains challenging due to image quality variability, acoustic shadowing, low contrast boundaries, and operator-dependent probe orientation~\cite{ronneberger2015u,chen2021transunet}. Conventional CNN-based architectures such as UNet and TransUNet often require extensive labeled data and exhibit limited generalization across acquisition settings.

Recent foundation models, particularly the Segment Anything Model (SAM)~\cite{kirillov2024segment}, have demonstrated strong zero-shot segmentation capabilities across diverse domains. However, their application to fetal ultrasound remains non-trivial. SAM exhibits high sensitivity to precise point-prompt localization leading to unstable performance, anatomically inconsistent predictions such as irregular boundaries or leakage into adjacent thoracic structures, and substantial computational demands when fully fine-tuned on domain-specific data~\cite{ma2024medsam}. These limitations restrict SAM's practical use in clinical fetal cardiology workflows.

To address these challenges, we present Ultra-ECP, a parameter-efficient fine-tuning framework designed to adapt UltraSAM—a variant of SAM pretrained on medical images—for robust single-point fetal cardiac segmentation. Ultra-ECP synergistically integrates three components: Prompt- and Decoder-level LoRA adaptation enabling efficient specialization to cardiac anatomy while reducing trainable parameters by over 98\%, Ellipse-Aware Loss which incorporates anatomical priors by regularizing predicted masks toward physiologically plausible elliptical shapes, and Point-Robust Augmentation simulating natural user click variability to enhance prompt robustness~\cite{hu2021lora}. 
\begin{figure}[htbp]
\centering
\includegraphics[width=1\textwidth]{figure/fig1_overview.png}
\caption{Overview of Ultra-ECP framework. LoRA modules are injected into UltraSAM's prompt encoder and mask decoder. Ellipse-Aware Loss regularizes predictions toward anatomical cardiac shapes, while Point-Robust Augmentation simulates clinical click imprecision.}
\label{fig:overview}
\end{figure}

\section{Related Work}

\subsection{Fetal Ultrasound Segmentation}
Early approaches for fetal cardiac segmentation relied on classical image processing and hand-crafted features~\cite{ciarlo2003novel}. Deep learning methods, particularly U-Net variants, have since dominated, achieving good performance on curated datasets~\cite{ronneberger2015u,feng2022deep}. Recent works explore transformer-based architectures like TransUNet~\cite{chen2021transunet} and Swin-UNet~\cite{cao2023swin}, though these require substantial training data and struggle with domain shifts common in clinical ultrasound.

\subsection{Segment Anything Model and Medical Adaptation}
SAM~\cite{kirillov2024segment} introduced promptable segmentation with a ViT image encoder, lightweight prompt encoder, and mask decoder. Medical adaptations include MedSAM~\cite{ma2024medsam} fine-tuned on diverse medical images and UltraSAM as a medical-pretrained variant. While promising, these models exhibit sensitivity to prompt placement and produce anatomically implausible outputs in fetal ultrasound, motivating targeted adaptation strategies.

\subsection{Parameter-Efficient Fine-Tuning}
LoRA~\cite{hu2021lora} enables efficient adaptation of large models by injecting low-rank matrices into weight updates. Recent applications include vision transformers~\cite{hu2021lora} and medical imaging~\cite{zhang2023lorafusion}. Anatomical priors via shape regularization~\cite{oktay2018anatomically} and data augmentation for robustness~\cite{shorten2019survey} complement PEFT approaches.

\section{Method}

\subsection{Dataset}

We evaluate Ultra-ECP on the FOCUS dataset, a publicly available collection of fetal four-chamber ultrasound images curated for cardiac biometric measurement~\citep{baudhuin2022focus, wu2025focus}. FOCUS comprises 300 second-trimester four-chamber view images acquired during routine perinatal care. Each image is manually annotated by an experienced sonographer with pixel-wise masks for the fetal thoracic cavity and cardiac chambers, enabling evaluation of both thoracic and cardiac segmentation performance.

All images are de-identified and released under the Creative Commons Attribution 4.0 International (CC-BY 4.0) license. The dataset is distributed via Zenodo together with a detailed description of the acquisition protocol and annotation guidelines~\citep{baudhuin2022focus,wu2025focus}. We adopt the official split protocol provided by the authors when available, and otherwise construct a patient-level split into training, validation, and test subsets to avoid patient overlap across sets. Thoracic segmentation is treated as a secondary task, while cardiac chamber segmentation represents our primary target for fetal biometric applications.


\subsection{Overview}
Ultra-ECP adapts UltraSAM for the task of single-point fetal cardiac segmentation (Fig.~\ref{fig:overview}). Given an ultrasound image $I$ and a single spatial prompt $p\in\mathbb{R}^2$, Ultra-ECP predicts a binary mask $M$ representing the cardiac chamber region. The framework introduces three key innovations that address SAM's limitations in this clinical setting: parameter-efficient fine-tuning via LoRA inserted into the prompt encoder and mask decoder, Ellipse-Aware Loss encouraging anatomically plausible outputs, and Point-Robust Augmentation improving resilience to noisy user prompts. The image encoder remains completely frozen to preserve pretrained representational power and reduce overfitting.

\subsection{Backbone Foundation Model: UltraSAM}
UltraSAM follows the standard SAM architecture comprising a ViT-based image encoder producing high-dimensional feature embeddings, a prompt encoder that embeds points, boxes, or masks, and a lightweight mask decoder fusing image and prompt embeddings into segmentation logits. Due to its strong medical-domain pretraining, UltraSAM offers a superior initialization compared to SAM~\cite{kirillov2024segment}. However, its prompt encoder and mask decoder are not optimized for fetal cardiac anatomy, motivating targeted adaptation.

\subsection{Parameter-Efficient Fine-Tuning with LoRA}
Full-model fine-tuning is computationally expensive and prone to overfitting, especially with limited ultrasound data. Ultra-ECP employs Low-Rank Adaptation (LoRA)~\cite{hu2021lora} to selectively adapt the most task-critical modules. Let $W$ denote a pretrained weight matrix in a transformer layer. LoRA introduces two low-rank matrices $A\in\mathbb{R}^{d\times r}$ and $B\in\mathbb{R}^{r\times k}$, such that the fine-tuned weight becomes $W'=W+BA$. Only $A$ and $B$ are optimized during training. LoRA modules are injected into multi-head attention and linear layers of the prompt encoder and corresponding layers in the mask decoder. The image encoder remains frozen. This design reduces trainable parameters by $>$98\%, enabling training on a single consumer GPU.

\subsection{Ellipse-Aware Loss for Anatomical Regularization}
Fetal cardiac chambers in the four-chamber view exhibit a stable, elliptical morphology. Standard segmentation losses such as Dice or BCE are shape-agnostic and may produce anatomically implausible outputs. To incorporate anatomical priors, Ultra-ECP introduces Ellipse-Aware Loss $L_{\text{ellipse}}$, computed in three steps: first, fit an ellipse to the predicted binary mask $P$ using a robust least-squares ellipse fitting algorithm producing ellipse parameters $E_p$; second, generate an ellipse mask $M_e$ that fills the fitted ellipse; third, penalize deviations between $P$ and $M_e$ via $L_{\text{ellipse}}=1-\text{IoU}(P,M_e)$. The total training loss combines Dice segmentation loss and ellipse regularization: $L_{\text{total}}=L_{\text{seg}}(P,G)+\alpha L_{\text{ellipse}}$, where $\alpha$ controls the regularization strength (set to 1.0 in experiments). This encourages smooth, elliptical masks consistent with cardiac anatomy and mitigates noisy or leaking boundaries.

\subsection{Point-Robust Augmentation for Clinical Usability}
In real-world fetal cardiac examinations, user clicks may not precisely correspond to the cardiac center. To enhance robustness, Ultra-ECP introduces Point-Robust Augmentation simulating natural user imprecision. For each training sample, the true center $(c_x,c_y)$ of the ground-truth cardiac mask is computed, a random offset $(\Delta x,\Delta y)$ within a jitter radius $R$ is sampled, and the perturbed point $p'=(c_x+\Delta x,c_y+\Delta y)$ is used as the prompt while retaining the same ground truth mask. This forces the model to rely on contextual cardiac features rather than precise point localization, improving prompt robustness.

\subsection{Training Details}
Training uses AdamW optimizer with learning rate $1\times10^{-4}$. The entire image encoder remains frozen while LoRA parameters in the prompt encoder and mask decoder are trainable. Batch size is 4 with 100 epochs and early stopping. Preprocessing includes image normalization and optional contrast enhancement. Augmentation comprises horizontal flips, brightness/contrast jitter, and point jitter as described.

\section{Experiments}

% \subsection{Dataset}
% Ultra-ECP is evaluated on the FOCUS dataset~\cite{focussongxiong2025} providing 2D ultrasound scans paired with thoracic and cardiac chamber annotations. Data are split into training, validation, and test sets using a patient-level split to prevent leakage. Two segmentation tasks are evaluated: thoracic segmentation as a secondary task and cardiac segmentation as the primary target.

\subsection{Baselines}
Ultra-ECP is compared against SAM zero-shot~\cite{kirillov2024segment}, MedSAM zero-shot~\cite{ma2024medsam}, UltraSAM zero-shot, UNet fully trained on FOCUS~\cite{ronneberger2015u}, TransUNet fully trained on FOCUS~\cite{chen2021transunet}, prompt-only fine-tuning of SAM without LoRA or regularization, and ablation variants of Ultra-ECP including LoRA only, LoRA plus Ellipse-Aware Loss, and LoRA plus Point-Robust Augmentation.

\subsection{Evaluation Metrics}
Following prior work~\cite{baudhuin2022focus}, performance is measured by Dice Similarity Coefficient (DSC), Hausdorff Distance (HD95), robustness curve under point displacements at 0, 2, 5, 10 pixels, and ellipse fitting error defined as mean squared distance between predicted mask boundary and fitted ellipse boundary.

\subsection{Statistical Analysis}

All quantitative comparisons are performed at the image level using paired statistical tests. For each method, we compute the Dice Similarity Coefficient (DSC) and 95th percentile Hausdorff Distance (HD95) per test image. To assess whether Ultra-ECP significantly outperforms baseline methods, we apply the Wilcoxon signed-rank test to paired DSC and HD95 values between Ultra-ECP and each comparator.

A two-sided significance level of $p < 0.05$ is used. When multiple pairwise comparisons are conducted, we control for the family-wise error rate using the Holm--Bonferroni procedure. In tables, statistically significant improvements of Ultra-ECP over a given baseline are marked with an asterisk. We additionally report median and interquartile range (IQR) for DSC and HD95 to better characterize performance variability across the test set.

\subsection{Model Complexity Analysis}

To quantify the efficiency of Ultra-ECP, we compare the number of trainable parameters across all methods. For UltraSAM, full fine-tuning corresponds to updating all weights in the image encoder, prompt encoder, and mask decoder. In contrast, Ultra-ECP freezes the image encoder and inserts LoRA modules only into the prompt encoder and mask decoder. As a result, Ultra-ECP optimizes less than two percent of the parameters of UltraSAM, consistent with the parameter ratios in our ablation study.

Table~\ref{tab:complexity} summarizes the relative trainable parameter counts. UltraSAM full fine-tuning is normalized to one hundred percent. Ultra-ECP requires only $1.8\%$ of these parameters, while UNet and TransUNet fall between these extremes. This reduction in trainable parameters lowers memory consumption and simplifies optimization, making Ultra-ECP practical to train on a single commodity GPU without sacrificing segmentation accuracy.

\begin{table}[htbp]
\centering
\caption{Relative trainable parameter counts for the evaluated models. Values are expressed as a percentage of the total parameters of fully fine-tuned UltraSAM.}
\label{tab:complexity}
\begin{tabular}{lp{3.5cm}p{4cm}c}
\toprule
Method & Image encoder trainable & Prompt / decoder trainable & Trainable params (\%) \\
\midrule
UltraSAM (full) & Yes & Yes & 100 \\
UNet & -- & -- & $<$ 100 \\
TransUNet & -- & -- & $<$ 100 \\
UltraSAM (zero-shot) & No & No & 0 \\
\textbf{Ultra-ECP (ours)} & No & Yes (LoRA only) & 1.8 \\
\bottomrule
\end{tabular}
\end{table}

\section{Results}

\subsection{Quantitative Results}

Table~\ref{tab:quantitative} summarizes the quantitative performance of all evaluated segmentation models on the FOCUS dataset. Ultra-ECP achieves the highest accuracy across both thoracic and cardiac tasks, outperforming SAM, MedSAM, and fully trained UNet and TransUNet baselines. In particular, Ultra-ECP achieves thoracic DSC of 95.09\% and a cardiac DSC of 92.60\%, while also delivering the lowest HD95 values. These improvements highlight the effectiveness of combining LoRA-based adaptation with anatomical regularization and prompt-robust training, enabling state-of-the-art performance with less than 2\% of UltraSAM’s trainable parameters.%

Table~\ref{tab:ablation} presents an ablation study quantifying the contribution of each component within Ultra-ECP. Introducing LoRA alone substantially boosts performance relative to the UltraSAM baseline, reducing HD95 from 27.4\,px to 23.8\,px. Adding the Ellipse-Aware Loss further improves anatomical consistency, reflected in higher DSC and reduced boundary errors. Finally, incorporating Point-Robust Augmentation yields the full Ultra-ECP model, which achieves the best overall accuracy and robustness, maintaining 91.3\% DSC under 10\,px prompt displacement while requiring only 1.8\% trainable parameters.%



\begin{table}[htbp]
\centering
\caption{Quantitative comparison on FOCUS dataset. Ultra-ECP achieves state-of-the-art performance with minimal trainable parameters.}
\label{tab:quantitative}
% \usepackage{graphicx} required in preamble
\resizebox{\columnwidth}{!}{%
    \begin{tabular}{lcccc}
    \toprule
    Method & Thoracic DSC (\%) & Thoracic HD95 (px) & Cardiac DSC (\%) & Cardiac HD95 (px) \\
    \midrule
    SAM (zero-shot) & 87.2 & 42.5 & 82.1 & 35.8 \\
    MedSAM (zero-shot) & 89.4 & 38.2 & 84.3 & 31.2 \\
    UltraSAM (zero-shot) & 91.2 & 33.1 & 87.5 & 27.4 \\
    UNet & 93.1 & 29.8 & 89.2 & 24.6 \\
    TransUNet & 94.2 & 27.5 & 90.8 & 22.1 \\
    % \ldots & \ldots & \ldots & \ldots & \ldots \\
    \midrule
    \textbf{Ultra-ECP (ours)} & \textbf{95.09} & \textbf{25.96} & \textbf{92.60} & \textbf{18.25} \\
    \bottomrule
    \end{tabular}%
}
\end{table}

\begin{table}[htbp]
\centering
\caption{Ablation study showing contributions of each Ultra-ECP component.}
\label{tab:ablation}
\begin{tabular}{lcccc}
\toprule
Variant & Cardiac DSC (\%) & HD95 (px) & Param. (\%) & Robustness (10px) \\
\midrule
UltraSAM baseline & 87.5 & 27.4 & 100 & 78.2 \\
+ LoRA & 90.1 & 23.8 & 1.8 & 82.4 \\
+ Ellipse Loss & 91.7 & 20.5 & 1.8 & 85.1 \\
+ Point Aug & \textbf{92.60} & \textbf{18.25} & 1.8 & \textbf{91.3} \\
\bottomrule
\end{tabular}
\end{table}

Performance remains stable under prompt displacement up to 10 px (Fig.~\ref{fig:robustness}). Compared to SAM/MedSAM, Ultra-ECP yields superior anatomical consistency and robustness. Compared to UNet, Ultra-ECP delivers higher accuracy with $<$2\% trainable parameters.

\begin{figure}[htbp]
\centering
\includegraphics[width=\textwidth]{figure/049_3panel_ultrasam_thoracic.png}
\caption{Robustness to point displacement. Ultra-ECP maintains stable DSC across 0-10 pixel perturbations.}
\label{fig:robustness}
\end{figure}

\subsection{Qualitative Results}
\begin{figure}[htbp]
\centering
\includegraphics[width=\textwidth]{figure/043_3panel_ultrasam.png}
\caption{Qualitative comparison. Ultra-ECP produces smooth, anatomically consistent cardiac masks even with noisy prompts, unlike SAM baselines.}
\label{fig:qualitative}
\end{figure}

Predicted masks are smooth, continuous, and closely aligned with cardiac boundaries (Fig.~\ref{fig:qualitative}). Ellipse regularization reduces boundary noise, and point jitter training prevents the collapse or drift commonly observed in SAM-based methods.

\section{Discussion}

Ultra-ECP demonstrates that large vision foundation models can be adapted to specialized fetal ultrasound tasks using a parameter-efficient strategy that integrates anatomical priors. By injecting LoRA modules into the prompt encoder and mask decoder of UltraSAM, the proposed framework learns cardiac-specific representations while keeping the image encoder frozen. This design preserves the general visual knowledge encoded in the backbone and substantially reduces the number of trainable parameters, yet still achieves state-of-the-art performance on the FOCUS dataset for both thoracic and cardiac segmentation.

The Ellipse-Aware Loss plays a central role in enforcing anatomical plausibility. Standard segmentation losses such as cross-entropy or Dice are agnostic to shape, and may tolerate fragmented boundaries or irregular contours that are inconsistent with fetal cardiac anatomy. By explicitly penalizing deviations between the predicted mask and a fitted ellipse, Ultra-ECP encourages smooth, elliptical cardiac shapes that align with the four-chamber view morphology. This regularization reduces boundary noise and mitigates leakage into adjacent thoracic structures, which are common failure modes of SAM-based approaches on ultrasound images.

Point-Robust Augmentation further improves clinical usability by reducing sensitivity to the exact location of the user-provided point prompt. In routine obstetric practice, the sonographer may only roughly indicate the cardiac chamber rather than clicking its geometric center. Our experiments show that Ultra-ECP maintains stable performance when the point prompt is perturbed within a radius of up to ten pixels, whereas zero-shot SAM and MedSAM exhibit marked degradation. This robustness is important for practical deployment, where time constraints and probe motion can lead to imperfect prompts.

Compared with conventional architectures such as UNet and TransUNet, Ultra-ECP offers a favorable balance between accuracy and efficiency. It consistently improves DSC and HD95 while training less than two percent of UltraSAM parameters. This suggests that parameter-efficient fine-tuning combined with domain-specific priors is a promising alternative to training bespoke models from scratch for each new ultrasound task.

\subsection{Limitations and Future Work}

This study has several limitations. First, Ellipse-Aware Loss relies on robust ellipse fitting. In cases with severe acoustic shadowing, dropout, or highly irregular cardiac contours, the fitted ellipse may not fully capture the true anatomy, which can introduce bias in the regularization term. Second, we consider only single-point prompts centered on the cardiac region. Although this setting is natural for interactive use, future work could explore multi-point or box prompts to improve controllability further.

Third, our evaluation is restricted to a single publicly available four-chamber dataset. While FOCUS covers a range of imaging conditions, domain shifts across scanners, institutions, and gestational ages may still affect generalization. Extending Ultra-ECP to multi-center cohorts and cine sequences, and incorporating temporal consistency across frames, are important directions for future research. Finally, the current implementation focuses on two-dimensional images; integrating Ultra-ECP into three-dimensional or multi-plane fetal echocardiography pipelines remains an open challenge.

\subsection{Clinical Implications}

Accurate and robust fetal cardiac segmentation is a prerequisite for reliable measurement of cardiothoracic ratios and other biometric indices used in congenital heart disease screening. By reducing annotation burden and stabilizing segmentation quality under imperfect prompts, Ultra-ECP has the potential to support semi-automated workflows in routine obstetric ultrasound. In particular, integrating the proposed method into scanner software could enable near real-time visualization of cardiac masks and automated computation of key biometric parameters, while keeping the clinician in the loop for prompt placement and quality control.


% \section{Discussion}

% Ultra-ECP demonstrates that large vision foundation models can be effectively adapted to specialized ultrasound tasks using lightweight training strategies. By focusing adaptation on the prompt encoder and mask decoder, the method maintains computational efficiency while learning cardiac-specific priors. The integration of anatomical constraints through Ellipse-Aware Loss addresses a notable failure mode of SAM—fragmented or irregular masks—while Point-Robust Augmentation directly improves usability in real clinical workflows where precise point placement cannot be guaranteed.

% Limitations include reliance on accurate ellipse fitting for loss computation, potential performance degradation for severe shadowing or uncommon cardiac views, and the single-point prompt paradigm which while practical may be extended to multi-point or box prompts in future work.

\section{Conclusion}

We presented Ultra-ECP, a parameter-efficient, anatomically informed framework for adapting UltraSAM to fetal cardiac segmentation. Ultra-ECP achieves robust, high-accuracy segmentation from a single point prompt while requiring minimal trainable parameters. The framework significantly improves anatomical consistency and prompt robustness, enabling reliable and clinically deployable fetal cardiac biometric tools. Future work will explore multi-view consistency, temporal modeling across cine sequences, and extension to broader prenatal structural assessments.

\section*{Ethics Statement}

This study is conducted on the FOCUS dataset, a publicly released collection of de-identified fetal four-chamber ultrasound images for cardiac biometric measurement~\citep{wu2025focus}. All images in FOCUS are distributed under the CC-BY 4.0 license. No personally identifiable information or protected health information is included in the data.

We did not collect new patient data or perform any additional interventions beyond the original study that created the FOCUS dataset. As such, no new institutional review board approval was required for the present analysis. The proposed method is intended as a decision-support tool to assist clinicians in fetal cardiac biometric assessment and is not designed to replace expert judgment.

\midlacknowledgments{
We thank the creators of the FOCUS dataset for making their data publicly available, and the members of the AIBioMed Research Group and collaborating institutions for helpful discussions. This work was partially supported by institutional research grants from Taipei Medical University and collaborating universities.%
}

\bibliography{midl-samplebibliography}

\end{document}
