\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{multirow}
\usepackage{mwe} % to get dummy images

\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\jmlrvolume{-- 315}
% \jmlrvolume{-- nnn}
\editors{Accepted for publication at MIDL 2026}

% % % % % % % % % % % % % % % % % % % % % %  TITLE
\title[SuD-CoTAN: Sulcal Depth-guided Fetal Cortical Surface Reconstruction]{SuD-CoTAN: Sulcal Depth-guided Anatomically Consistent Fetal Cortical Surface Reconstruction}

% % % % % % % % % % % % % % % % % % % % % %  AUTHORS
\midlauthor{\Name{Irina Grigorescu\nametag{$^{1,2}$}} \orcid{0000-0002-9756-3787} \Email{irina.grigorescu@kcl.ac.uk}\\
\Name{Jiaxin Xiao\nametag{$^{2}$}} \Email{jiaxin.1.xiao@kcl.ac.uk}\\
\Name{Yourong Guo\nametag{$^{2}$}} \Email{yourong.guo@kcl.ac.uk}\\
\Name{Vanessa Kyriakopoulou\nametag{$^{1}$}} \Email{vanessa.kyriakopoulou@kcl.ac.uk}\\
\Name{Alena Uus\nametag{$^{1,2}$}} \Email{alena.uus@kcl.ac.uk}\\
\Name{Vyacheslav Karolis\nametag{$^{1,2}$}} \Email{slava.karolis@kcl.ac.uk}\\
\Name{Kaili Liang\nametag{$^{2}$}} \Email{kaili.liang@kcl.ac.uk}\\
\Name{Mohamed A. Suliman\nametag{$^{2}$}} \Email{mohamed.suliman@kcl.ac.uk}\\
\Name{Qiang Ma\nametag{$^{3}$}} \Email{q.ma20@imperial.ac.uk}\\
\Name{Daniel Rueckert\nametag{$^{3}$}} \Email{d.rueckert@imperial.ac.uk}\\
\Name{Bernhard Kainz\nametag{$^{3}$}} \Email{b.kainz@imperial.ac.uk}\\
\Name{A. David Edwards\nametag{$^{1}$}} \Email{ad.edwards@kcl.ac.uk}\\
\Name{Joseph V. Hajnal\nametag{$^{1}$}} \Email{jo.hajnal@kcl.ac.uk}\\
\Name{Mary Rutherford\nametag{$^{1}$}} \Email{mary.rutherford@kcl.ac.uk}\\
\Name{Maria Deprez\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \Email{maria.deprez@kcl.ac.uk}\\
\Name{Emma C. Robinson\midlotherjointauthor\nametag{$^{1,2}$}} \Email{emma.robinson@kcl.ac.uk}\\
\addr $^{1}$ Department of Early Life Imaging, School of Biomedical Engineering \& Imaging Sciences, King’s College London, London, United Kingdom \\
\addr $^{2}$ Department of Biomedical Computing, School of Biomedical 
Engineering \& Imaging Sciences, King’s College London, London, United Kingdom \\
\addr $^{3}$ BioMedIA, Department of Computing, Imperial College London, London, UK}

% % % % % % % % % % % % % % % % % % % % % %  DOCUMENT BEGINS
\begin{document}

% % % % % % % % % % % % % % % % % % % % % %  MAKETITLE
\maketitle

% % % % % % % % % % % % % % % % % % % % % %  ABSTRACT
\begin{abstract}
Accurate and anatomically consistent fetal cortical surface reconstruction is essential for studying early brain development, yet existing methods often lack reliable vertex-wise correspondence and fail to harmonise their outputs across heterogeneous magnetic resonance imaging (MRI) datasets. We introduce Sulcal Depth-guided CoTAN (SuD-CoTAN), a learning-based framework that fits anatomically and topologically consistent cortical meshes directly to T2-weighted MRI and performs alignment to age-matched templates in one single step.
All models are trained exclusively on normative samples from the developing Human Connectome Project (dHCP) and evaluated within-sample and on a  different acquisition protocol.
Results show that SuD-CoTAN generalises to new datasets in ways that harmonise global morphometric properties by better capturing the surface geometry of individual cases; its template fitting is precise, delivering vertex-wise anatomical correspondences that result in sharp weekly averages of sulcal depth and curvature maps in template space. 
This supports direct vertex-wise Gaussian Process regression of neurodevelopmental trends without a need for any additional registration. Collectively, this whole pipeline runs in  $\sim$3 seconds. This suggests that SuD-CoTAN offers promise as a screening tool for cortical malformations during fetal development. 
\end{abstract}

% % % % % % % % % % % % % % % % % % % % % %  KEYWORDS
\begin{keywords}
fetal MRI, cortical surface reconstruction, deep learning
\end{keywords}


% % % % % % % % % % % % % % % % % % % % % %  INTRODUCTION
\section{Introduction}
Characterising fetal cortical development is central to detecting conditions where the folding process deviates from typical trajectories.
In the fetal period, disruptions to cortical folding are associated with a range of neurodevelopmental disorders, making precise characterisation of surface geometry critical for quantifying typical and atypical maturation \cite{garcia2025longitudinal,story2021brain,kyriakopoulou2014cortical}. However, obtaining surfaces that are both anatomically meaningful and harmonised across magnetic resonance imaging (MRI) protocols remains challenging, particularly in fetal MRI.

Current approaches for surface extraction often rely on deformation of the template surface mesh in order to preserve topology of the cortical surfaces \cite{schuh2017deformable,zollei2020infant,ma2022cortexode} or implicit representations \cite{cruz2021deepcsr,wang2023ibeat,gopinath2023cortical}; however, these approaches cannot guarantee vertex-wise anatomical correspondences across the population.
These correspondences need to be estimated post-hoc, using spherical projection and registration \cite{robinson2014msm,robinson2018multimodal,besenczi2024high}, to enable accurate detection of subtle cortical abnormalities through high-resolution (vertex-level) comparisons. Additionally, changes in scanner and reconstruction protocols translate to differences in apparent resolution, contrast, and partial volume, resulting in inconsistent cortical surface measures across different acquisition protocols. Classical neonatal pipelines \cite{makropoulos2018developing} can produce high-quality surfaces for neonatal MRI, yet they fail to generalise to fetal scans, where lower resolution leads to partial-volume effects that blur fine cortical structures (see Figure~\ref{fig:overview}).
 
Current best practice requires lengthy processing, involving several error-prone steps, including intensity-based tissue segmentation \cite{scott2011growth,gholipour2017normative,makropoulos2018developing,uus2021multi}, deformable mesh fitting at the boundaries, inflation (for visualisation), and projection to the sphere followed by surface registration %surface registration %
\cite{robinson2014msm,robinson2018multimodal,besenczi2024high}.
This makes them highly non-practical for use in the clinic.
Ideally, surface reconstruction should instead be robust: generating anatomically and topologically correct meshes, that are harmonised across acquisition protocols and automatically aligned to a normative reference cohort for precision detection of outliers.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% FIG 1
\begin{figure}[hb!]
\centering
\includegraphics[width=\textwidth,angle=0]{figscr/fig1-overview-CR.png}
\caption{Traditional neonatal surface-reconstruction pipelines do not perform well on fetal MRI (see red arrows), while our proposed SuD-CoTAN framework is able to produce anatomically plausible surfaces for fetal scans from both dHCP and iFIND. The CoTAN (baseline) deep learning models (see Section~\ref{sec:ablation}) underperform on the unseen iFIND dataset.}
\label{fig:overview}
\end{figure}

% % % % % % % % % % % %  Contributions
\textbf{Contributions:}
We introduce \textbf{Sulcal Depth-guided CoTAN (SuD-CoTAN)}, a learning-based framework for fetal cortical surface reconstruction. \textbf{SuD-CoTAN} builds on CoTAN  framework \cite{ma2023conditional}, which produces topologically consistent surfaces. Unlike CoTAN, \textbf{SuD-CoTAN} also provides vertex-wise anatomical consistency,  where reconstructed surfaces are directly aligned to weekly fetal templates. Additionally, \textbf{SuD-CoTAN} is robust to variations in acquisition protocol, producing consistent surface measures across different fetal datasets.
We evaluate our approach on fetal MRI scans from both normative samples from the developing Human Connectom Project (dHCP) and controls from the Intelligent Fetal Imaging and Diagnosis (iFIND) clinical research study, demonstrating geometric accuracy and cross-dataset harmonisation. 
Importantly, the iFIND dataset is entirely unseen during training, and it is acquired on a different scanner, with a different protocol. As such, performance on iFIND directly reflects out-of-distribution generalisation.
Results show the method achieves superior surface precision and enhanced harmonisation relative to baseline methods, with sufficient coherence across acquisitions to support the high-resolution (per-vertex) normative modeling of sulcal depth.

% % % % % % % % % % % % % % % % % % % % % %  METHODS
\section{Methods}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% FIG 2
\begin{figure}[t!]
\centering
\includegraphics[width=.9\textwidth,angle=0]{figscr/fig2-frameworkOverview-CR.png}
\caption{Traditional pipelines (top row) rely on several separate image processing steps: tissue segmentation, followed by surface fitting, followed by spherical alignment to a population-average template for vertex-wise analysis. By contrast, our method (bottom row) integrates template alignment directly into the reconstruction process, producing analysis-ready, template-aligned surfaces directly from raw fetal MRI in a single step.}
\label{fig:frameworkoverview}
\end{figure}

Our proposed single-step surface reconstruction and template alignment framework is illustrated in Figure~\ref{fig:frameworkoverview}.
While traditional pipelines depend on brain tissue segmentations to extract cortical surfaces, and rely on spherical registration to templates to enable vertex-wise comparisons,
our method operates on fetal MRI directly, and integrates template alignment into the reconstruction, to produce analysis-ready, template-aligned surfaces from one processing step. This pipeline is described in Figure~\ref{fig:networkArchitecture}, with an architecture that builds from CoTAN \cite{ma2023conditional} but introduces key methodological and data processing advances that: 1) introduce a novel \textbf{SulcNet} module that estimates sulcal depth during training to guide and constrain template alignment; 2) adapt training with augmentations to achieve harmonisation; and 3) allow CoTAN to be run successfully for fetal data for the first time.   

% % % % % % % % % % % % % generation of pGT and templates -- updates section
\subsection{Generation of pseudo-ground truth surfaces and initial templates}\label{sec:descriptionofpgtandtemplates}
CoTAN \cite{ma2023conditional} relies on the availability of pseudo-ground truth (pGT) surfaces to supervise training, as well as an initial cortical template used to initialise surface reconstruction.
However, pGT surfaces are non-trivial to obtain \cite{ma2024weakly}, due to the lower image quality of fetal MRI stemming from challenges associated with scanning free-moving fetuses within the maternal body, where volumetric data need to be reconstructed from stacks of acquired 2D slices \cite{wright2018lstm,cordero2019automating,kuklisova2012reconstruction,uus2025scanner}.
To address these challenges, we employ a fetal-specific processing workflow consisting of the following steps.
First, $T_2$-weighted ($T_2$w) fetal MRI volumes are segmented using BOUNTI \citep{uus2023bounti}, an automated brain tissue parcellation model for 3D T2w fetal MRI, adapted from our neonatal protocols \cite{makropoulos2014automatic,makropoulos2016regional} through extensive manual correction.
Second, using the BOUNTI-derived tissue labels, approximate pGT white and pial cortical surfaces are generated by adapting our neonatal surface extraction pipeline \cite{schuh2017deformable} to fetal anatomy.

Fetal cortical templates were generated separately for each hemisphere and for each week of gestation (from 25–36 weeks) using Multimodal Surface Matching (MSM) \cite{robinson2014msm,robinson2018multimodal,besenczi2024high,da2025differential}. 
Template construction follows established fetal atlas methodologies \cite{kuklisova2011atlas,serag2012multi,bozek2018construction,williams2023structural,karolis2023developing} and consists of iterative surface alignment and kernel-weighted averaging with temporally adaptive regression, yielding one template per gestational week, which is subsequently aligned to the 36-week space through template-to-template alignment.
Each fetal template comprises inner (white) and outer (pial) cortical surfaces, inflated surfaces, spherical representations, and associated cortical feature maps, including sulcal depth, curvature, and cortical thickness.
For the purpose of this study, however, we utilise the inflated surfaces and their corresponding sulcal depth maps as initial weekly templates, replacing the single initial template mesh used in the original CoTAN framework \cite{ma2023conditional}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% FIG 3
\begin{figure}[t]
\centering
\includegraphics[width=\textwidth,angle=0]{figscr/fig3-networkArchitecture-CR.png}
\caption{Overview of the \textbf{SuD-CoTAN} framework. 
Left: The model takes fetal brain MRI as input and deforms a gestational-age–specific template surface into a subject-specific white matter (WM) prediction. A pre-trained \textbf{SulcNet} provides sulcal-depth guidance to ensure consistent deformations across subjects.
Right: For pial surface reconstruction, the initial meshes are subject-specific predicted WM surfaces and are subsequently deformed into pial surfaces as proposed in \cite{ma2024weakly}.}
\label{fig:networkArchitecture}
\end{figure}

%%%%%%%%%%%%%%%%%%%%%%%%% sudcotan
\subsection{SuD-CoTAN: Sulcal Depth–guided Cortical Surface Reconstruction}
Our proposed \textbf{SuD-CoTAN} takes as input a fetal $T_2$w MRI scan and the gestational age (GA) of the subject, and predicts a conditional time-varying velocity field (CTVF) that deforms the closest-in-age initial surface into the subject-specific white matter (WM) (inner) surface (see Figure~\ref{fig:networkArchitecture}).
To enable single-step surface fitting while implicitly enforcing surface-template correspondence, %and implicit surface–image registration, 
\textbf{SuD-CoTAN} integrates \textbf{SulcNet} as a feature-based guidance module.
Specifically, through minimizing the normalised cross correlation (NCC) between the predicted sulcal depth map and an age-matched template sulcal depth map, \textbf{SulcNet} encourages anatomically consistent deformations.
For pial (outer) surface reconstruction, a second CoTAN-based network is trained following the formulation introduced in \cite{ma2024weakly}. 
In this stage, the initial surface is given by the predicted WM surface from the pre-trained \textbf{SuD-CoTAN} model, which is then further deformed via a learned CTVF to match the pGT pial surfaces.

%%%%%%%%%%%%%%%%%%%%%%%%% sulcnet
\subsection{SulcNet: Sulcal Depth Prediction Network}
\textbf{SulcNet} itself is a spherical U-Net \cite{monti2017geometric,zhao2019spherical}, adapted from \cite{suliman2022deep}, that takes the 3D coordinates of the deformed WM surface as input and outputs average surface convexity (or sulcal depth) at each vertex.
The input to this network corresponds to vertex coordinates from white matter meshes that have been resampled to the resolution and mesh topology of a sixth-order icosphere (with 40,962 vertices) -  taking advantage of implicit vertex correspondence of spheres and anatomical (WM) surfaces to define convolutional kernels on the sphere, while learning from coordinates that reflect the native WM geometry. The architecture then follows a six-stage encoder–decoder design, with each resolution defined by progressively coarser/finer icospheres \cite{suliman2025unsupervised}, learning MoNet convolutions \cite{monti2017geometric} with LeakyReLU activations ($\alpha = 0.2$), hexagonal mean-pooling and transpose-convolutions for upsampling \cite{zhao2019spherical}. A compact feedforward layer \cite{suliman2022deep} produces the final sulcal depth prediction per vertex. These generated sulcal depth maps are then used to guide template matching through integration of a sulcal depth loss (see Section~\ref{sec:trainingdetails}).

% % % % % % % % % % % %  Training
\subsection{Optimisation}\label{sec:trainingdetails}
%%%%%%%%%%%%% 
\textbf{SuD-CoTAN} is trained with a combination of four losses \cite{ma2023conditional,ma2024weakly}: a bidirectional Chamfer (bi-Chamfer) distance loss ($\mathcal{L}_{cd}$) \cite{bongratz2022vox2cortex} between the predicted and pGT vertices, and three smoothness penalties: a normal consistency loss $\mathcal{L}_{nc}$, constraining cosine similarity between adjacent face normals, the mesh Laplacian loss  $\mathcal{L}_{lap}$ to promote smoothness, and an edge length loss $\mathcal{L}_{edge}$ to discourage irregular or excessively stretched edges. 
During training, the pre-trained \textbf{SulcNet} outputs a sulcal depth map ($SD_{predicted}$) from each predicted WM surface, which is then compared with that of the closest-in-age template ($SD_{template}$) using NCC.
The total WM loss becomes: 
\begin{equation}\label{eq:wmloss}
    \mathcal{L}^{WM} = \mathcal{L}_{cd} + \lambda_{lap} \mathcal{L}_{lap} + \lambda_{nc} \mathcal{L}_{nc} + \lambda_{edge} \mathcal{L}_{edge} + \lambda_{SD} \mathcal{L}_{NCC}(SD_{predicted}, SD_{template})
\end{equation}

%%%%%%%%%%%%% 
% \textbf{Pial surface fitting model.}
For pial surfaces, we follow the previously proposed method from \cite{ma2024weakly}, where the Chamfer distance is replaced with a single-directional Chamfer loss $\mathcal{L}_{cd-1d}$ between predicted and pGT pial surfaces. 
Two additional penalties are applied: an inflation loss $\mathcal{L}_{infl}$, which constrains surface inflation to follow normal directions \cite{ma2024weakly}, and a pial-outside-WM loss $\mathcal{L}_{pow}$ (Appendix \ref{app:pow}), which penalizes pial vertices whose displacement vectors point inward relative to WM normals. %, ensuring all pial vertices lie outside or on the WM surface.
The total pial loss becomes: 
\begin{equation}
    \mathcal{L}^{pial} = \mathcal{L}_{cd-1d} + \lambda_{lap} \mathcal{L}_{lap} + \lambda_{nc} \mathcal{L}_{nc} + \lambda_{edge} \mathcal{L}_{edge} + \lambda_{infl} \mathcal{L}_{infl} + \lambda_{pow} \mathcal{L}_{pow}
\end{equation}

%%%%%%%%%%%%% Sulcnet
\textbf{SulcNet} uses a Smooth L1 loss and it is trained independently on pGT meshes and sulcal depth maps derived from normative samples using our classical surface processing pipeline \cite{uus2023bounti,schuh2017deformable,makropoulos2018developing,schuh2017deformable}.
The trained \textbf{SulcNet} is then frozen during optimisation of the \textbf{SuD-CoTAN} network.
The final model was selected based on the lowest validation mean squared error (MSE) and NCC.

%%%%%%%%%%%%% 
\textbf{Training.} WM and pial models were trained for 400 epochs using the Adam optimizer.
Following \cite{ma2023conditional,ma2024weakly}, we first pre-train the models using relatively large loss weights, then fine-tune them with smaller weights (Appendix~\ref{app:weights} and Appendix~\ref{app:lambdaSD} for choice of $\lambda_{SD}$).
During training, we apply MONAI \cite{cardoso2022monai} augmentations designed to mimic realistic variations encountered in fetal MRI, namely, bias-field inhomogeneities, %(degree = 3, coefficients $\sim \mathcal{U}(–0.5,0.5)$ to simulate scanner- and coil-related inhomogeneities, 
Gaussian noise, % injection with $\sigma \sim \mathcal{U}(0.0, 0.02)$ to model variations in image noise across different acquisitions, 
Gaussian smoothing, %with spatial $\sigma \sim \mathcal{U}(0.5, 1.5)$mm to mimic differences in through-plane resolution and motion-induced blurring, 
gamma contrast adjustments, %through random gamma correction $\gamma \sim \mathcal{U}(0.75, 1.25)$, 
and random histogram shifts %using 3–5 control points to reproduce scanner- and protocol-dependent contrast variability 
(see Appendix~\ref{app:augmentations} for more details on how these were parameterised).
%%%%%%%%%%%%% 
\textbf{SulcNet} was trained for 1000 epochs using an Adam optimizer, incorporating geometric augmentations consisting of small random translations and rotations of the meshes (see Appendix~\ref{app:augmentations} for more details).

%%%%%%%%%%%%% 
% \noindent 
\textbf{Evaluation.}
We evaluate our proposed framework in four ways: first, we assess \textbf{SulcNet} for its suitability for anatomical guidance during \textbf{SuD-CoTAN} training (see Section~\ref{sec:sulcnet}); second, we analyse population-level consistency and developmental trajectories across our cohorts (see Section~\ref{sec:populationandGP}); and third, we evaluate cross-dataset harmonisation using global surface-based cortical metrics (see Section~\ref{sec:globalmetrics}).
Finally, we evaluate \textbf{SuD-CoTAN} against three related configurations (see Table \ref{tab:ablationmodels}) to isolate the effects of data augmentation (Section~\ref{sec:ablation}).
In summary: (i) the baseline CoTAN model uses inflated weekly templates as initial surfaces and no augmentation; (ii) CoTAN + augmentation (CoTAN+aug) incorporates intensity and contrast augmentations; (iii) the CoTAN + sulcal depth guidance (CoTAN+SD) adds \textbf{SulcNet} guidance without augmentation; and (iv) our proposed \textbf{SuD-CoTAN} combines augmentations with \textbf{SulcNet} guidance.

\begin{table}[t]
\centering
\caption{Ablation study model configurations for WM and pial surface reconstruction.}
\begin{tabular}{l||c|c}
\textbf{Model} & \textbf{Augmentation} & \textbf{Sulcal Depth Guidance} \\
\hline\hline
CoTAN (baseline)   & -- & -- \\
CoTAN+aug          & \checkmark & -- \\
CoTAN+SD           & -- & \checkmark \\
\textbf{SuD-CoTAN (proposed)} & \checkmark & \checkmark \\
\end{tabular}
\label{tab:ablationmodels}
\end{table}

% % % % % % % % % % % % % % % % % % % % % %  RESULTS
\section{Experiments and Results}

% % % % % % % % % % % %  Training Dataset
\subsection{Fetal MRI Data Selection and Preprocessing}
We evaluate our framework on two fetal $T_2$w MRI datasets, using one for training (dHCP) and the other exclusively for testing (iFIND\footnote{data released on NIMH Data Archive (NDA), collection 5690: \href{https://nda.nih.gov/edit\_collection.html?id=5690}{nda.nih.gov/edit\_collection.html?id=5690}}).
All scans were motion corrected and reconstructed using slice-to-volume registration (SVR) to $0.5$mm isotropic resolution \cite{kuklisova2012reconstruction,wright2018lstm,cordero2019automating,uus2025scanner}.
For all experiments, $T_2$w images were affinely aligned to a 36-week fetal brain atlas \cite{uus2021multi}, and in this study we only consider the left hemisphere.

\textbf{Training Dataset.} The training and validation dataset consists of 210 dHCP scans (20.86 - 38.29 weeks GA) acquired on a 3T Philips Achieva MRI system with a 32-channel cardiac coil ($T_E=250$ms, $1.1 \times 1.1$mm in-plane resolution, and $2.2$mm slice thickness) using a dedicated protocol \cite{price2019developing}.
We use 200 subjects for training and 10 for validation (see Table~\ref{tab:datasetpartition}, ``Training'' rows).
Both our cortical surface reconstruction models and the sulcal depth prediction network were trained exclusively on these dHCP training subjects.

% % % % % % % % % % % %  Testing Dataset
\textbf{Testing Dataset.}  Additional 20 dHCP subjects were reserved for testing, together with 173 iFIND subjects (22.29 - 31.86 weeks GA) scanned on a $1.5$T Philips Ingenia MRI system using a 28-channel torso coil ($T_E=80$ms, $1.25 \times 1.25$mm in-plane resolution, and $2.5$mm slice thickness). %All data was collected at King’s College London. \cite{}
We select 20 iFIND subjects, age-matched to the dHCP test set to avoid confounding generalisation performance with gestational age effects, and generate pseudo-ground truth surfaces using the same fetal-specific pipeline, consisting of BOUNTI-based tissue segmentations, followed by cortical surface reconstruction with a modified neonatal dHCP pipeline (see Section~\ref{sec:descriptionofpgtandtemplates}).
We use these dHCP and iFIND subjects to evaluate the performance of our proposed model (Table \ref{tab:ablation}), as well as for testing \textbf{SulcNet} generalisation (Figure \ref{fig:sulcnetpredictions}). For more details on data partition see  Table~\ref{tab:datasetpartition}, ``Evaluation'' rows.
For subsequent cortical metric analyses, we select a subset of 140 subjects from the entire dHCP dataset to match the GAs of the 173 iFIND subjects (see Table~\ref{tab:datasetpartition}, ``Feature Extraction'' rows), ensuring that age-related effects do not confound cross-dataset comparisons.

% % % % % % % % % % % %  SulcNet
\subsection{SulcNet: Evaluation of Predicted Sulcal Depth Maps}\label{sec:sulcnet}
First, we evaluate \textbf{SulcNet} to quantify its accuracy in sulcal depth prediction and thus suitability for guiding surface reconstruction.
Figure \ref{fig:sulcnetpredictions} presents qualitative (left) and quantitative (right) results.
Visually, the two representative examples (28.14 and 33.86 weeks GA) show strong agreement between ground-truth and predicted sulcal depth maps.
Moreover, \textbf{SulcNet} achieved a low MSE of $0.32 \pm 0.17$mm and a high NCC of $0.99 \pm 0.01$, indicating strong correspondence between predicted and pGT sulcal depth maps.
The high NCC values are particularly important, as our proposed cortical surface reconstruction model relies on predicted sulcal depth maps that remain closely aligned with age-specific template sulcal depth patterns in terms of NCC.
These results suggest that \textbf{SulcNet} provides the consistency and accuracy required for reliable, age-aware guidance during cortical surface reconstruction.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% FIG 4
\begin{figure}[t]
\centering
\includegraphics[width=.99\textwidth,angle=0]{figscr/fig4-SulcNetTestSetPredictions-CR.png}
\caption{Predictions from \textbf{SulcNet} showing (left): two examples comparing ground truth (GT) \textit{vs.} predicted (PR) sulcal depth (SD) maps; and \textit{right}: quantitative evaluation of mean squared error (MSE) and normalised cross correlation (NCC) between GT and PR for dHCP test examples. Blue dots represent the mean.}
\label{fig:sulcnetpredictions}
\end{figure}

% % % % % % % % % % % % Comparison of Population Averages Between Models
\subsection{Population-level Consistency and Developmental Trajectories}\label{sec:populationandGP}
To assess whether \textbf{SuD-CoTAN} produces more developmentally consistent population-level representations, we generated age-stratified average and standard deviation sulcal depth maps from the unseen iFIND test dataset subjects (Table~\ref{tab:datasetpartition}, ``Feature Extraction'' rows) across gestational ages 22–32 weeks (Figure~\ref{fig:averageSD}).
Figure~\ref{fig:averageSD}A presents example average and standard deviation sulcal depth maps projected onto inflated surfaces. 
Visually, mean sulcal depth maps are better defined with lower standard deviation when sulcal depth guidance is used (\textbf{SuD-CoTAN}).
Figure~\ref{fig:averageSD}B plots the distribution of variance (left) and the mean sharpness (right) across the surface, for all GAs. 
These results show that \textbf{SuD-CoTAN} yields higher sharpness and lower variability relative to the CoTAN+aug model.
For similar results on curvature, see Appendix~\ref{app:averageCurv}, while a combined dHCP+iFIND analysis can be found in Appendix~\ref{app:averageSDcombined} and Appendix~\ref{app:averageCurvcombined}.

\begin{figure}[t]
\centering
\includegraphics[width=1.0\textwidth,angle=0]{figscr/fig5-AverageSulcalDepthsIFINDonly-CR.png}
\caption{Sulcal depth maps across gestational ages for the iFIND dataset.
\textbf{A.} Example mean ($\overline{\text{SD}}$) and standard deviation ($\sigma$) sulcal depth (SD) maps for subjects scanned at 26, 29 and 32 weeks for the CoTAN+aug model (first two rows) and our proposed \textbf{SuD-CoTAN} model (last two rows).
\textbf{B.} Sulcal depth maps variability (lower is better) and the mean absolute Laplacian of the average sulcal depth map (as a proxy for sharpness, where higher is better) for each gestational age, comparing the CoTAN+aug (yellow) and our proposed model (cyan).
}
\label{fig:averageSD}
\end{figure}

% % % % % % % % % % % % normative model 
To further quantify developmental coherence, we modeled sulcal depth trajectories across the 313 dHCP and iFIND subjects (Table~\ref{tab:datasetpartition} ``Feature Extraction'') using vertex-wise Gaussian Process (GP) regression, fitted using the GPyTorch variational framework \cite{gardner2018gpytorch}, using a combination of linear and Radial Basis Function (RBF) kernels. We compared sulcal depth outputs from the CoTAN+aug and \textbf{SuD-CoTAN} models, and assessed accuracy using the mean absolute error (MAE) between GP-predicted and the subject-specific sulcal depth values calculated from the reconstructed cortical surfaces.
Figure~\ref{fig:normativeModel} showcases representative trajectories at two anatomical locations (gyral crown in orange and sulcal fundus in blue), alongside whole-brain GP means and MAE maps.
Relative to CoTAN+aug, \textbf{SuD-CoTAN} yields lower MAE and trajectories with clearer separation between gyral and sulcal  developmental trends, representing a deepening of the sulcal fundus and a rising gyral crown with GA.
These patterns indicate that the proposed model better captures anatomically meaningful developmental trajectories at the population level.

\begin{figure}[t]
\centering
\includegraphics[width=.9\textwidth,angle=0]{figscr/fig6-normativeModel-CR.png}
%\caption{Normative Model}
\caption{Gaussian Process (GP)-derived sulcal depth (SD) trajectories. Representative developmental trajectories at a gyral crown (orange) and a sulcal fundus (blue), alongside GP mean maps and mean absolute error (MAE) maps for the CoTAN+aug and \textbf{SuD-CoTAN} models.}
\label{fig:normativeModel}
\end{figure}

% % % % % % % % % % % %  Harmonisation of surface-based metrics
\subsection{Harmonisation of Surface-based Metrics}\label{sec:globalmetrics}
To evaluate cross-dataset harmonisation, we analysed global cortical metrics from the predicted surfaces of the same 313 dHCP and iFIND subjects, including mean cortical thickness, mean absolute sulcal depth, mean absolute curvature, and average surface area.
Figure~\ref{fig:globalAverageMetrics} shows the average metrics as a function of GA corresponding to the proposed \textbf{SuD-CoTAN} model (see Appendix~\ref{app:harmonisation} for the other three models).
Polynomial regression controlling for GA revealed no significant cohort effects in terms of cortical thickness, sulcal depth, and surface area, while mean curvature still shows a minor cohort difference.
As opposed to the CoTAN and CoTAN+SD models, \textbf{SuD-CoTAN} showed the strongest improvement in harmonising cortical thickness, an encouraging result given its reliance on accurate white and pial surface reconstructions.
Notably, despite using sulcal-depth guidance, the CoTAN+SD model did not harmonise mean sulcal depth across datasets, highlighting the importance of augmentation.

\begin{figure}[]
\centering
\includegraphics[width=1\textwidth,angle=0]{figscr/fig7-globalAverageMetrics-CR.png}
\caption{Global average cortical metrics as a function of gestational age. Dashed red boxes indicate metrics showing a statistically significant cohort effect between dHCP (red) and iFIND (blue) predicted surfaces, while black boxes mark metrics where this cohort effect is non-significant.}
\label{fig:globalAverageMetrics}
\end{figure}

% % % % % % % % % % % %  Ablation
\subsection{Ablation Study}\label{sec:ablation}
The results of our ablation study are summarised in Table \ref{tab:ablation}, where we assess geometric accuracy (average symmetric surface distances (ASSD) and 90th percentile of Hausdorff distances (HD)), mesh quality (self-intersecting faces (SIF)), and the stability of predicted sulcal depth maps in the template space, quantified as the average standard deviation across mean sulcal depth (SD) maps computed from the predicted WM surfaces ($\overline{\sigma(\text{SD})}$). All results were calculated on combined  dHCP and iFIND test sets.

\begin{table}[]
\centering
\caption{White matter (WM) and pial surface predictions on the dHCP and iFIND test datasets, showing average symmetric surface distances (ASSD) and 90th
percentile of Hausdorff distances (HD) between the predicted and the pseudo-ground truth (pGT) surfaces, and the ratio of self-intersecting faces (SIFs) in the predicted surfaces. First row shows average standard deviation across mean sulcal depth maps ($\overline{\sigma(\text{SD})}$) calculated from the predicted white matter surfaces. Values in bold represent best performing models (\textit{p$<$.05}).}
\begin{tabular}{lr|cccc}
 & \textbf{Metric} & CoTAN & CoTAN+SD & CoTAN+aug & SuD-CoTAN \\
 &  & (baseline) & & & (proposed) \\
\hline\hline
\rule{0pt}{1.1em}\textbf{SD}  & $\overline{\sigma(\text{SD})}$ $\downarrow$  & 1.16 & \textbf{1.13} & 1.20 & \textbf{1.14} \\
\hline
\multirow{3}{*}{\textbf{WM}} 
 & ASSD   $\downarrow$     & 0.16$\pm$0.10 & 0.18$\pm$0.10 & \textbf{0.14$\pm$0.10} & \textbf{0.15$\pm$0.10} \\
 & HD   $\downarrow$      & 0.40$\pm$0.30 & 0.44$\pm$0.30 & \textbf{0.29$\pm$0.10} & \textbf{0.30$\pm$0.10} \\
 & SIF(\%)  $\downarrow$     & \textbf{0.00$\pm$0.00} & 0.01$\pm$0.01 & 0.01$\pm$0.03 & 0.0003$\pm$0.001 \\
\hline
\multirow{3}{*}{\textbf{Pial}} 
 & ASSD  $\downarrow$        & 0.15$\pm$0.10 & 0.17$\pm$0.10 & \textbf{0.14$\pm$0.01} & \textbf{0.14$\pm$0.01} \\
 & HD   $\downarrow$      & 0.36$\pm$0.14 & 0.41$\pm$0.19 & \textbf{0.30$\pm$0.07} & \textbf{0.30$\pm$0.07} \\
 & SIF(\%)  $\downarrow$     & \textbf{0.001$\pm$0.001} & 0.03$\pm$0.09 & 0.03$\pm$0.09 & 0.01$\pm$0.02 
\end{tabular}
\label{tab:ablation}
\end{table}

We observe that sulcal-depth guided models (\textbf{SuD-CoTAN} and CoTAN+SD models) achieved the lowest sulcal-depth variability, indicating that \textbf{SulcNet} guidance improves anatomical consistency without compromising surface integrity. Removing augmentation significantly increases geometric errors due to poor generalisability to iFIND dataset (see iFIND example in Figure~\ref{fig:overview}), highlighting its importance for cross-dataset generalisation.
%Without augmentation, errors were significantly higher, %on iFIND when compared to dHCP, 
% highlighting the importance of augmentations for cross-dataset generalisation.
Mesh quality remained high across all settings, with SIF equal to zero for the baseline CoTAN model, nearly zero for \textbf{SuD-CoTAN}, and only slightly higher in the other models.
%%%%%%%%%% r1c3>
Although CoTAN+aug and \textbf{SuD-CoTAN} exhibit similar geometric accuracy, our primary goal was not to further optimise surface fitting error, but to ensure that introducing sulcal depth guidance as an additional anatomical constraint does not degrade surface reconstruction quality. In fact, the added SulcNet-based supervision preserves surface fidelity while enabling anatomically informed, template-aligned representations suitable for vertex-wise population analysis.
%%%%%%%%%% r1c3<

% % % % % % % % % % % % % % % % % % % % % %  DISCUSSION & CONCLUSION
\section{Discussion and Conclusions}
This study proposed \textbf{SuD-CoTAN}, a sulcal-depth–guided framework for fetal cortical surface reconstruction, delivering anatomically informed surfaces with improved geometric accuracy, sharper population-level maps, and more consistent developmental trajectories.
Harmonisation analysis further showed that the method reduces cohort differences in cortical metrics, producing more comparable measurements across dHCP and iFIND.
Moreover, GP modelling of sulcal depth trajectories reveals clearer developmental patterns, with deepening sulci and rising gyral crowns over gestation, supporting the biological plausibility of the reconstructed surfaces. 
These results indicate that anatomically informed guidance provides stable and biologically meaningful constraints that improve cross-dataset generalisation without compromising surface quality.

To achieve these results, our proposed \textbf{SuD-CoTAN} incorporates three key contributions. First, it combines surface reconstruction with sulcal depth-driven alignment across individuals, ensuring that reconstructed surfaces are template-aligned and vertex-wise comparable. This allows high-resolution screening for cortical alterations in development, enabling each individual’s cortex to be compared directly against a normative cohort in a biologically interpretable way. Second, the method employs augmentation-based harmonisation across datasets, producing surfaces that are robust to variations in acquisition protocols, and ensuring consistent cortical metrics across both dHCP and iFIND. Third, \textbf{SuD-CoTAN} provides a unified, single-step framework that integrates reconstruction and alignment, avoiding multi-stage registration pipelines (e.g., MSM) that require additional software, multiple processing steps, and carry risks of cumulative errors. Collectively, these contributions allow clinicians and researchers to efficiently generate harmonised, anatomically aligned cortical surfaces ready for population-level analysis or clinical screening.

Despite these benefits, a key limitation of this work stems from the substantial natural variability in cortical folding. The human cortex exhibits diverse folding patterns, including branching, splitting, and tertiary folds, that cannot be fully captured by registration to a single population-average template \cite{guo2025motifs}.
Future work will explore strategies that better accommodate individual folding diversity \cite{guo2025motifs}, as well as more powerful augmentation approaches, such as those proposed in FetalSynthSeg \cite{zalevskyi2024improving}, to enhance robustness across datasets with significant domain shifts.
Overall, our proposed model represents a promising step toward robust, harmonised fetal cortical surface analysis suitable for multi-site studies and future large-scale developmental neuroimaging applications.

% % % % % % % % % % % % % % % % % % % % % %  ACKNOWLEDGEMENTS
% Acknowledgements, references, and appendix do not count toward the page limit (if any)
\clearpage 
% Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank an anonymous group of people.}

% % % % % % % % % % % % % % % % % % % % % %  BIBLIOGRAPHY
\bibliography{midl26_315}

\clearpage
% % % % % % % % % % % % % % % % % % % % % % 
% % % % % % % % % % % % % % % % % % % % % % 
% % % % % % % % % % % % % % % % % % % % % %  APPENDIX
% % % % % % % % % % % % % % % % % % % % % % 
% % % % % % % % % % % % % % % % % % % % % % 
\appendix
\counterwithin{figure}{section}
\counterwithin{table}{section}
\counterwithin{equation}{section}

% %%%%%%%%%%%%%%%%%%%%
\section{Dataset Split}\label{app:trainvalidtest}
\begin{table}[h]
\centering
\caption{Number of scans used for training and evaluating our models, as well as cortical feature extraction.}
\begin{tabular}{l|c|ccc}  
\textbf{Task} & \textbf{Dataset} & \textbf{Data Split} & \textbf{Subjects} & \textbf{GA} (weeks) \\ 
\hline \hline
% ----- WITH GT SURFACES -----
\multirow{2}{*}{\textbf{Training}} & \textbf{dHCP} & \textbf{Train} &  200  & 29.1 ($\pm3.88$) \\ 
& \textbf{dHCP} & \textbf{Valid} & 10 & 28.9 ($\pm6.03$) \\ 
\hline
\multirow{2}{*}{\textbf{Evaluation}} & \textbf{dHCP} & \textbf{Test} & 20 & 28.5 ($\pm3.79$)  \\ 
& \textbf{iFIND} & \textbf{Test} & 20 & 27.9 ($\pm3.14$) \\ 
\hline 
% ----- WITHOUT GT SURFACES -----
\multirow{2}{*}{\textbf{Feature Extraction}} & \textbf{dHCP} & \textbf{Train/Valid/Test} & 140 & 27.3 ($\pm2.49$) \\ 
& \textbf{iFIND} & \textbf{Test} & 173 & 26.7 ($\pm2.74$)  
%\textbf{VM} & \textbf{Age Matched} & 45 & 28.2 ($\pm3.00$)
\end{tabular}
\label{tab:datasetpartition}
\end{table}

%%%%%%%%%%%%%%%%%%%%
\section{Pial-outside-WM Loss}\label{app:pow}
Letting $v_i^{wm}$ and $v_i^{pial}$ denote corresponding WM and pial vertices, and $n_i^{wm}$ the unit WM normal, we define:
\begin{equation}\label{eq:pow}
    \mathcal{L}_{pow} = \frac{1}{N} \sum_{i=0}^{N-1} \text{ReLU} \left( - n_i^{wm} \cdot \widehat{\left( v_i^{pial} - v_i^{wm} \right)} \right) \text{,}
\end{equation}
where $\widehat{(\cdot)}$ denotes vector normalisation. The ReLU ensures that only inward-pointing displacements (\textit{i.e.}, violations) contribute to the loss.

%%%%%%%%%%%%%%%%%%%%
\section{Training loss weights used for SuD-CoTAN}\label{app:weights}
\begin{table}[h]
\centering
\caption{Weights for training SuD-CoTAN.}
\begin{tabular}{c|lcc}
\textbf{Surface Type} & \textbf{Loss Term} & \textbf{Pre-training} & \textbf{Fine-tuning} \\
\hline
\multirow{4}{*}{\textbf{WM}} & $\lambda_{\text{lap}}$  & 0.5          & 0.1 \\
& $\lambda_{\text{nc}}$   & $5 \times 10^{-4}$ & $1 \times 10^{-4}$ \\
& $\lambda_{\text{edge}}$ & $5 \times 10^{-4}$ & $1 \times 10^{-4}$ \\
& $\lambda_{\text{SD}}$   & 10           & 1 \\
\hline
\multirow{5}{*}{\textbf{Pial}} & $\lambda_{\text{lap}}$  & 0.5          & 0.1 \\
& $\lambda_{\text{nc}}$   & $5 \times 10^{-4}$ & $1 \times 10^{-4}$ \\
& $\lambda_{\text{edge}}$ & $5 \times 10^{-4}$ & $1 \times 10^{-4}$ \\
& $\lambda_{\text{infl}}$ & 2.5 & 2.5 \\
& $\lambda_{\text{pow}}$ & 2.5 & 2.5 \\
\end{tabular}
\label{tab:lossweights}
\end{table}

\clearpage
%%%%%%%%%%%%%%%%%%%%
\section{Data augmentations during training}\label{app:augmentations}
\begin{table}[h]
\centering
\caption{Data augmentations applied during model training.}
\begin{tabular}{l|l|l}
\textbf{Model} & \textbf{Augmentation} & \textbf{Parameters} \\
\hline\hline
\multirow{5}{*}{\textbf{SuD-CoTAN}} & Bias-field augmentation & degree = 3; coeff. $\sim \mathcal{U}(-0.5, 0.5)$ \\
& Gaussian noise & $\sigma \sim \mathcal{U}(0.0, 0.02)$ \\
& Gaussian smoothing & $\sigma \sim \mathcal{U}(0.5, 1.5)$mm \\
& Gamma correction & $\gamma \sim \mathcal{U}(0.75, 1.25)$ \\
& Histogram shifts & 3--5 random control points \\
\hline
\multirow{2}{*}{\textbf{SulcNet}} & Translation & $t_x, t_y, t_z \sim \mathcal{U}(-3, 3)$mm \\
 & Rotation & $r_x, r_y, r_z \sim \mathcal{U}(-15, 15)^\circ$ \\
\end{tabular}
\label{tab:augmentations}
\end{table}

% \clearpage
%%%%%%%%%%%%%%%%%%%%
\section{Average Curvature Maps for the iFIND dataset}\label{app:averageCurv}
\begin{figure}[ht]
\centering
\includegraphics[width=1\textwidth,angle=0]{figscr/figE1-AverageCurvaturesIFINDonly-CR.png}
\caption{Curvature maps across gestational ages for the iFIND dataset.
\textbf{A.} Example mean ($\overline{\text{C}}$) and standard deviation ($\sigma$) curvature (C) maps for subjects scanned at 26, 29 and 32 weeks for the CoTAN+aug model (first two rows) and our proposed \textbf{SuD-CoTAN} model (last two rows).
\textbf{B.} Curvature maps variability (lower is better) 
and the mean absolute Laplacian of the average curvature map (as a proxy for sharpness, where higher is better) for each gestational age, comparing the CoTAN+aug (yellow) and our proposed model (cyan).
}
\label{fig:averageCurv}
\end{figure}

\clearpage
%%%%%%%%%%%%%%%%%%%%
\section{Harmonisation of Surface-based Metrics}\label{app:harmonisation}
\begin{figure}[ht]
\centering
\includegraphics[width=1\textwidth,angle=0]{figscr/figF1-globalAverageMetricsOthers-CR.png}
\caption{Global average cortical metrics as a function of gestational age. Dashed red boxes indicate metrics showing a statistically significant cohort effect between dHCP (red) and iFIND (blue) predicted surfaces.}
\label{fig:globalAverageMetricsOthers}
\end{figure}


%%%%%%%%%%%%%%%%%%%%
\clearpage
\section{Weighting for Sulcal Depth Guidance}\label{app:lambdaSD}

An ablation study was conducted on dHCP data (training/validation/testing) using a reduced training schedule ($N_{epochs} = 100$) to select an appropriate value for $\lambda_{SD}$ when using \textbf{SulcNet} guidance.
Table \ref{tab:ablationlambdaSD} reports the mean $\pm$ standard deviation HD and ASSD between predicted and pGT surfaces, as well as mean $\pm$ standard deviation NCC scores between sulcal depth maps corresponding to the predicted surfaces and their closest-in-age template sulcal depth maps.
Four configurations were evaluated: $\lambda_{SD} = 0$ (no guidance), $\lambda_{SD} = 1$ (constant), $\lambda_{SD} = 10$ (constant), and a two-stage schedule: $\lambda_{SD} = 10$ for the first 50 epochs, then $\lambda_{SD} = 1$ for the last 50 epochs. 
Overall, the two-stage strategy ($\lambda_{SD} = 10 \rightarrow 1$) provides an effective compromise by achieving strong anatomical alignment with the template while preserving high geometric fidelity to the pseudo-ground-truth surfaces.

\begin{table}[h]
\centering
\caption{Ablation study for selecting $\lambda_{SD}$ when training with \textbf{SulcNet} guidance. Bold values indicate best performance; italicized values indicate worst performance.}
\begin{tabular}{c|ccc}
$\lambda_{SD}$ & HD ($\downarrow$) & ASSD ($\downarrow$) & NCC ($\uparrow$) \\ 
\hline
0                    & \textbf{0.89 $\pm$ 0.19} & \textbf{0.43 $\pm$ 0.08} & \textit{0.24 $\pm$ 0.05} \\ 
1                    & \textbf{0.92 $\pm$ 0.20} & \textbf{0.45 $\pm$ 0.08} & 0.46 $\pm$ 0.08 \\ 
10                   & \textit{1.15 $\pm$ 0.24} & \textit{0.55 $\pm$ 0.10} & \textbf{0.71 $\pm$ 0.05} \\ 
10 $\rightarrow$ 1   & \textbf{0.86 $\pm$ 0.20} & \textbf{0.42 $\pm$ 0.08} & 0.69 $\pm$ 0.05 \\ 
\end{tabular}
\label{tab:ablationlambdaSD}
\end{table}

\clearpage
%%%%%%%%%%%%%%%%%%%%
\section{Average Sulcal Depth Maps}\label{app:averageSDcombined}
\begin{figure}[ht]
\centering
\includegraphics[width=1.0\textwidth,angle=0]{figscr/figH1-AverageSulcalDepthsDHCPandIFIND-CR.png}
\caption{Sulcal depth maps across gestational ages for subjects pooled from both dHCP and iFIND datasets.
\textbf{A.} Example mean ($\overline{\text{SD}}$) and standard deviation ($\sigma$) sulcal depth (SD) maps for subjects scanned at 26, 29 and 32 weeks for the CoTAN+aug model (first two rows) and our proposed \textbf{SuD-CoTAN} model (last two rows).
\textbf{B.} Sulcal depth maps variability (lower is better) 
and the mean absolute Laplacian of the average sulcal depth map
% and mean SD map gradient 
(as a proxy for sharpness, where higher is better) for each gestational age, comparing the CoTAN+aug (yellow) and our proposed model (cyan) for each cohort individually.}
\label{fig:averageSDcombined}
\end{figure}

\clearpage
%%%%%%%%%%%%%%%%%%%%
\section{Average Curvature Maps}\label{app:averageCurvcombined}
\begin{figure}[ht]
\centering
\includegraphics[width=1.0\textwidth,angle=0]{figscr/figI1-AverageCurvaturesDHCPandIFIND-CR.png}
\caption{Curvature maps across gestational ages for subjects pooled from both dHCP and iFIND datasets.
\textbf{A.} Example mean ($\overline{\text{C}}$) and standard deviation ($\sigma$) curvature (C) maps for subjects scanned at 26, 29 and 32 weeks for the CoTAN+aug model (first two rows) and our proposed \textbf{SuD-CoTAN} model (last two rows).
\textbf{B.} Curvature maps variability (lower is better) 
and the mean absolute Laplacian of the average curvature map
%and mean curvature map gradient 
(as a proxy for sharpness, where higher is better) for each gestational age, comparing the CoTAN+aug (yellow) and our proposed model (cyan) for each cohort individually.
}
\label{fig:averageCurvcombined}
\end{figure}

\end{document}
