\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution


\usepackage{mathtools}
\usepackage[T1]{fontenc}
\usepackage{booktabs}
\usepackage{multirow}
%\usepackage{tabularx}
\usepackage{pgf}
\usepackage{pgfplots}
\pgfplotsset{compat=1.16}
\usepackage{tikz}
\usepackage[font=small,labelfont=bf,tableposition=top]{caption}
\usepackage{placeins}
\usetikzlibrary{calc}
\usetikzlibrary{shapes.geometric}
\usetikzlibrary{spy}
\DeclareCaptionLabelFormat{andfigure}{#1~#2  \&  \figurename~\thefigure}
\newcommand{\R}{\ensuremath{\mathbb{R}}}
\newcommand{\diff}[1]{\ensuremath{\operatorname{d}\!{#1}}}
\newcommand{\vx}{\ensuremath{\mathbf{x}}}
\newcommand{\vz}{\ensuremath{\mathbf{z}}}
\newcommand{\sdfvec}{\ensuremath{\mathbf{s}}}
\newcommand{\vs}{\sdfvec}
\newcommand{\latentspace}{\ensuremath{\mathbb{R}^d}}
\newcommand{\latentcode}{\ensuremath{\vz}}
\newcommand{\nnweights}{\ensuremath{\theta}}
\newcommand{\shapeopsingle}{\ensuremath{S_{\nnweights}}} 
\newcommand{\shapeopsinglenoind}{\ensuremath{S_{\nnweights}}} 
\newcommand{\shapeopmulti}{\ensuremath{\mathbf{S}_{\nnweights}}} 

\newcommand{\ptos}{Points2Surf\xspace}
\newcommand{\ptom}{Point2Mesh\xspace}
\newcommand{\atlasnet}{AtlasNet\xspace}
\newcommand{\deepsdf}{DeepSDF\xspace}
\newcommand{\ssm}{SSM\xspace}


\newcommand{\best}[1]{{\color{purple}\textbf{#1}}}
\newcommand{\second}[1]{{\color{teal}\textit{#1}}}

\usepackage{array}
\newcolumntype{?}[1]{!{\vrule width #1}}

\makeatletter
\tikzset{spy on other/.code={%
  \pgfutil@g@addto@macro\tikz@lib@spy@collection{%
    \setbox\tikz@lib@spybox=\hbox{\pgfpicture#1\endpgfpicture}}}}
\makeatother


\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
\jmlrvolume{-- 023}
\editors{Accepted for publication at MIDL 2024}

\title[Cardiac DeepSDF]{Shape of my heart: Cardiac models through learned signed distance functions}
\midlauthor{\Name{Jan Verh\"ulsdonk\midljointauthortext{Contributed equally}\nametag{$^{1}$}} \Email{verhuelsdonk@iam.uni-bonn.de}\\
\Name{Thomas Grandits$^\ast$\nametag{$^{2,4}$}} \Email{thomas.grandits@uni-graz.at}\\ 
\Name{Francisco Sahli Costabal\nametag{$^{3}$}} \Email{fsc@ing.puc.cl}\\
\Name{Thomas Pinetz\nametag{$^{1}$}} \Email{pinetz@iam.uni-bonn.de}\\
\Name{Rolf Krause\nametag{$^{4,5}$}} \Email{rolf.krause@usi.ch}\\
\Name{Angelo Auricchio\nametag{$^{4,6}$}} \Email{angelo.auricchio@eoc.ch}\\
\Name{Gundolf Haase\nametag{$^{2}$}} \Email{gundolf.haase@uni-graz.at}\\
\Name{Simone Pezzuto\nametag{$^{4,7}$}} \Email{simone.pezzuto@unitn.it}\\
\Name{Alexander Effland\nametag{$^{1}$}} \Email{effland@iam.uni-bonn.de}\\
\addr $^{1}$ Institute for Applied Mathematics, University of Bonn, Germany \\
\addr $^{2}$ Department of Mathematics and Scientific Computing, University of Graz, Austria \\
\addr $^{3}$ Institute for Biological and Medical Engineering, Pontificia Universidad Católica de Chile, Chile \\
\addr $^{4}$ Center for Computational Medicine in Cardiology, Università della Svizzera italiana, Switzerland \\
\addr $^{5}$ FernUni Schweiz, Brig, Switzerland \\
\addr $^{6}$ Instituto Cardiocentro Ticino, EOC, Switzerland \\
\addr $^{7}$ Department of Mathematics, University of Trento, Italy \\
}

\begin{document}

\maketitle

\begin{abstract}
  The efficient construction of anatomical models is one of the major challenges of patient-specific in-silico models of the human heart.
  Current methods frequently rely on linear statistical models, allowing no advanced topological changes, or requiring medical image segmentation followed by a meshing pipeline, which strongly depends on image resolution, quality, and modality. 
  These approaches are therefore limited in their transferability to other imaging domains.
  In this work, the cardiac shape is reconstructed by means of three-dimensional deep signed distance functions with Lipschitz regularity. 
  For this purpose, the shapes of cardiac MRI reconstructions are learned to model the spatial relation of multiple chambers. 
  We demonstrate that this approach is also capable of reconstructing anatomical models from partial data, such as point clouds from a single ventricle, or modalities different from the trained MRI, such as the electroanatomical mapping (EAM).
\end{abstract}

\begin{keywords}
Deep Signed Distance Function, Shape Reconstruction, Cardiac Modeling, Lipschitz Regularized Network
\end{keywords}

\section{Introduction}
Modern personalized precision medicine frequently targets patient-specific therapies with improved therapy outcomes, reduced intervention times, and thus lower costs.
In the case of cardiac personalized treatment, this necessitates complex simulation models from patient data~\citet{corral_acero_digital_2020}.
This future vision of personalized cardiac treatment relies on generated 3D models, which should represent the anatomy of the corresponding patient.

Current methods to generate anatomical models usually require computed tomography (CT) or magnetic resonance (MR) images, which are segmented and subsequently meshed~\cite{strocchi_publicly_plos_2020}.
Yet, while automatic cardiac image segmentation is well-researched and constantly improving thanks to machine learning~\cite{painchaud_cardiac_2020,campello_multi_centre_2021,zhuang_cardiac_2022}, it is limited to a specific image modality and resolution.
Other modalities such as electroanatomical catheter mapping~\cite{bhakta_principles_2008} are difficult to fit within standard frameworks, albeit they are very important for patient-specific modelling~\cite{RuizPINN2022}.

In this work, we propose the use of implicitly learned representations through signed distance functions (SDFs). We start from the \emph{DeepSDF} method~\cite{park_deepsdf_2019}, which encodes SDF-based surfaces through a decoder-only neural network with a small-dimensional input latent code. The resulting neural network is a cardiac shape atlas. Shape inference can be achieved from location measurements such as electro-anatomical mappings and is applicable to arbitrary resolutions.
Additionally, interpolation in the latent space yields shape interpolation in the physical space, a useful feat to create novel shapes.
Differently from the original \emph{DeepSDF} method, here we take advantage of Lipschitz-regularized neural networks~\cite{liu_learning_2022} so to avoid overfitting of the network and to enforce smooth interpolations between samples in the latent space.
This method is especially efficient for learning on relatively small training sets as considered here.

We demonstrate that this Lipschitz-regularized \emph{DeepSDF} architecture is suited for constructing cardiac models by learning from a database of $44$ publicly available, post-processed cardiac models.
The resulting \emph{DeepSDF} model is designed to encode bi-ventricular shapes and also infer new shapes from sparse point clouds, even if only partial domains are available (e.g., only endocardial measurements of a single chamber often encountered in electroanatomical mappings).
The proposed method shares some similarities with the very recent work by~\citet{sander2023}; however, our approach exhibits several major advantages compared with this paper:
1. applying a \emph{Lipschitz-regularized} version of \emph{DeepSDF} methodology to the cardiac domain on the example of bi-ventricular models and demonstrating advantageous accuracy in the presence of sparse data in comparison to state-of-the-art methods, 
2. thanks to the multi-chamber approach both topological constraints are implicitly incorporated and adjacent anatomical structures benefit from additional available data, which might also aid uncertainty quantification in future research, 
3. our approach is more robust against measurement errors modeling noise due to the iterative noise estimation and the additional prior penalization, which allows for the reconstruction of cardiac shapes from in-vivo measurements of an electroanatomical mapping procedure.


\section{Related work}
Surface reconstruction from point clouds is an intensively researched topic and provides a variety of possible approaches~\cite{ma_surface_2022}.
Classical approaches assume no prior knowledge of the given point clouds, but in turn, require dense point clouds and are often sensitive to noise~\cite{carr_reconstruction_2001,kazhdan_poisson_2006,kazhdan_screened_2013,ummenhofer_global_2015,fuhrmann_floating_2014}.
Modern deep-learning approaches try to overcome these limitations by learning prior information about shapes using learned SDFs~\cite{park_deepsdf_2019,hanocka_2020_point2mesh}, occupancy fields~\cite{mescheder_occupancy_2019}, level sets~\cite{michalkiewicz_deep_2019} or implicit fields~\cite{chen_learning_2019}.

In this context, the \emph{DeepSDF} method is one of the most active fields of research.
Such methods usually try to infer a multitude of shapes from at least partially dense point clouds.
However, this method is prone to overfitting the data, thus often requiring regularization both in the latent space, as well as in the neural network by imposing dropout rules~\cite{park_deepsdf_2019}.
In \citet{liu_learning_2022}, Lipschitz-regularized linear layers are proposed,
which are able to overcome some of these limitations.
Alternative forms of regularization are, for instance, based on the eikonal equation~\cite{gropp_implicit_2020}, other methods rely on local SDFs~\cite{chabra_deepshapes_2020,jiang_local_2020,tretschk_patchnets_2020,erler_2020_points2surf}, neural-pulls~\cite{ma_neuralpull_2021}, or learned unsigned distance functions~\cite{chibane_neural_2020}.
For further methods and comparisons, we refer the reader to~\citet{ma_surface_2022}.

\emph{Statistical Shape Models} (SSMs) are classical methods for (cardiac) shape reconstruction, which are tailored to the whole heart~\cite{ecabert_ssm_2008,hoogendoorn_atlas_2013,lotjonen_ssm_2004,ordas_ssm_2007,unberath_ssm_2015,zhuang_ssm_2010}, the atria~\cite{nagel_atlas_2021}, or the ventricles~\cite{bai_atlas_2015,petersen_reference_2017}.
Only a few works have shown that deep learning methods can be applied to the cardiac domain to reconstruct cardiac shapes from point clouds~\cite{beetz_point2meshnet_2023,kong2023sdf4chd,zhao_convolutional_2022,sander2023,alblas_going_2023,wang_joint_2021}, whereas most of the learning based methods heavily rely on image input~\cite{beetz_interpretable_2022,kong_learning_2021}.



\section{Method}
In this section, we comment on the representation of cardiac shapes by learned SDFs and provide details about training and shape completion.

\subsection{General setting}
We assume that each shape describing the heart is volumetrically represented as an SDF given by $f_S: \R^3 \rightarrow \R$ mapping from spatial points $\vx \in \R^3$ to their respective signed distances $s \in \R$.
These signed distances encode the signed projection distance at a point $\vx$ to the surface (negative values inside, positive values outside).
In such a way, the cardiac surface is represented as the zero-level set of this SDF, i.e., $\lbrace \vx \in \R^3 \colon f_S(\vx)=0\rbrace$. 
A straightforward approach to learning such an SDF for arbitrary shapes involves representing the SDF by a neural network $\shapeopsingle: \R^3 \rightarrow \R$, in which case the SDF is fully defined by the network's architecture and weights $\nnweights$.
However, modeling surfaces solely through their neural network weights would necessitate a separate SDF and subsequent neural network for each individual surface, while also neglecting any similarities between the available training shapes.
Following~\cite{park_deepsdf_2019}, we instead model all shapes using a single neural network and additionally provide this network with a $d$-dimensional latent representation $\latentcode \in \latentspace$ of the shape as an additional input, together with the aforementioned spatial coordinates, i.e.~$\shapeopsingle: \R^{3+d} \rightarrow \R$.
Hence, we encode multiple heart geometries with the same network, but different latent codes.
We propose to model each of the left and right endocardial and epicardial surfaces with a shared latent code and neural network by letting the DeepSDF simultaneously estimate a signed distance for each of the closed bi-ventricular surfaces $\sdfvec = (s_1, s_2, s_3, s_4)^\top \in \R^4$.
In summary, the learned DeepSDF is given by the vector-valued function $\shapeopmulti: \R^{3+d} \rightarrow \R^4$.

\subsection{Training and network architecture}
The anatomical samples $X_i \coloneqq \left(\vx_k, \sdfvec_k \right)_{k=1}^{K_i}$ used for learning are $K_i$ pairs  of spatial coordinates $\vx_k$ with their sampled signed distance vector $\sdfvec_k$ for the $i$-th biventricular shape.
The set of all $N$ available anatomical bi-ventricular samples is denoted as $X \coloneqq \left( X_i \right)_{i=1}^N$.
To each anatomical shape $i$, we associate a coupled latent code representation $\latentcode_i \in \latentspace$ also learned from data.
We denote the set of learned latent codes as $Z\coloneqq \left( \latentcode_i \right)_{i=1}^N$.
Our goal during training is to minimize the mismatch between the sampled signed distances and the ones estimated by the network through a simple quadratic loss term.
Additionally, to restrict the latent codes $Z$, we follow~\cite{park_deepsdf_2019} and assume a zero mean Gaussian prior distribution with covariance $\sigma^2I$ on the latent codes, which gives rise to the loss term
\begin{equation*}
    \mathcal{L}(\nnweights, Z) = \frac{1}{N} \sum_{i=1}^N \sum_{(\vx_k, \sdfvec_k) \in X_i} \frac{1}{4 K_i} \lVert \shapeopmulti(\vx_k, \latentcode_i) - \sdfvec_k\rVert^2 + \frac{1}{\sigma^2} \Vert\latentcode_i\Vert^2_2.
\end{equation*}
The parameter $\sigma$ is used to balance between reconstruction accuracy for the training shapes and regularity in the latent space, which is essential for shape completion as described in \sectionref{sec:shape_completion}. See \sectionref{sec:numerical_experiments} for the choice of $\sigma$ in our model. 
In~\citet{liu_learning_2022}, it was shown that learning $\mathcal{L}$ directly may result in overfitting and might provide poor interpolation properties for the latent space.
To overcome these issues, a Lipschitz penalization on the network was proposed in order to better control the smoothness.
For this purpose, the Lipschitz bound $L$ of the network is estimated by $L = \prod_{i=1}^M \Vert W_i \Vert_p$ for an $M$-layer deep network, where $W_i$ are the network weights of the $i$-th layer.
We closely follow the implementation of~\cite{liu_learning_2022} where the linear layers of our network are replaced with Lipschitz-normalized layers.
For each layer an additional weight $c_i$ is introduced such that $\text{softplus}(c_i) = \ln(1+e^{c_i})$ serves as an upper bound $\text{softplus}(c_i) \ge \Vert W_i \Vert_p$ for the Lipschitz constant.
Integrating this Lipschitz-regularization into our previous loss-functional $\mathcal{L}$ leads to our finally used cost-functional 
\begin{equation}
J(\nnweights, Z) = \mathcal{L}(\nnweights, Z) + \alpha \prod_{c_i \in C(\theta)} \text{softplus}(c_i),\label{eq:lipschitz_nn_loss}
\end{equation}
where $C(\theta) = (c_i)_{i=1}^M$ denotes the network parameter dependent per-layer Lipschitz bounds.

\begin{figure}[htb]
\center
    \scalebox{0.6}{\input{source/architecture.tex}}
    \caption{Left: a schematic representation of the employed DeepSDF. Right: comparison of ground truth meshes (LV in red, RV in blue) with reconstruction on the training dataset. The reconstructed meshes are color-coded with the signed distance to the ground truth mesh in mm.} \label{fig:network}
\end{figure}
As mentioned, for samples  $X_i \coloneqq \left(\vx_k, \sdfvec_k \right)_{k=1}^{K_i}$ the network inputs are the latent code $\latentcode_i$ and the spatial coordinates $\vx_i$ of the sample point.
Following~\citet{liu_learning_2022} the spatial coordinates are multiplied by the factor $C_{s} = 100$ to balance the Lipschitz regularity of the latent space and the spatial coordinates.
We use $5$ hidden layers with $256$ neurons and $\tanh$ activation functions.
In the third hidden layer, the latent code and the spatial coordinate are concatenated to the output of the previous layer.
The last layer is a linear layer with four signed distance functions as output, one for each of the left, right, epi- and endocardial surfaces.
The latent code size was chosen as $d = 64$.
A schematic representation of the network is shown in Figure~\ref{fig:network} (left).

For the training of our network, we built a public shape library of watertight 3D shapes for endo-/epicardium of the left/right ventricles~\cite{grandits_public_2024} ($4$ shapes in total) based on \citet{rodero_virtual_2021,strocchi_publicly_zenodo_2020}.
Further details on the construction of the training data is provided in appendix \ref{sec:data_main}.

\subsection{Shape completion} \label{sec:shape_completion}
After learning the SDF network, a new anatomical bi-ventricular shape can be inferred from sparse point clouds of any combination of surfaces or given signed distance.
For this, we consider $K$ given samples consisting of triplets~$\tilde{Y}$ of spatial coordinates $\vx_k \in \R^3$, a single signed distance $s_k \in \R$, and an index of the surface $j_k \in \{1, 2, 3, 4\}$.
If a point $\vx_k$ lies on the surface $j_k$, then $s_k = 0$.
Finding the bi-ventricular reconstruction of a point cloud thus reduces to finding its latent code representation $\latentcode$ by minimizing the following problem
\begin{equation}
    \min_{\latentcode} \frac{1}{K} \sum_{(\vx_k, s_k, j_k) \in \tilde{Y}} \left( \left( \shapeopmulti (\vx_k, \latentcode ) \right)_{j_k} - s_k \right)^2 + \frac{\beta}{\sigma^2} \Vert \latentcode \Vert_2^2,
    \label{eq:inference_loss}
\end{equation}
where the subscript refers to the vector component and $\beta$ is an additional weight that is increased depending on the noise of the input point cloud.
For noise-free point clouds, we set $\beta = 1$ to obtain the maximum-a-posterior (MAP) estimation~\cite{park_deepsdf_2019}.

For noisy input point clouds, the expected data loss of a perfect reconstruction can be approximated with the variance of the noise $\xi$.
We set $\beta = C_{b} \xi$, where $C_{b}=100$ is a chosen scaling factor.
In the case of real-world data, the underlying noise structure is unknown and estimated as follows:
We start with a reconstruction $\shapeopmulti^0$ for an arbitrary initial noise estimation $\bar{\xi}_0$.
We then iteratively estimate the \emph{empirical variance~$\bar{\xi}_{n+1}^2$} based on $\shapeopmulti^n$ via
\begin{equation}\label{eq:noise_iteration}
    \bar{\xi}_{n+1}^2=\frac{1}{K-1}\sum_{(\vx_k, s_k, j_k) \in \tilde{Y}} \left( \left( \shapeopmulti^n (\vx_k, \latentcode ) \right)_{j_k} - s_k \right)^2.
\end{equation}
This estimation exhibits a high experimental convergence rate (see appendix \sectionref{sec:noise_estimation}).

\section{Experiments}
In this section, we state details of the training procedure and the network architecture.
Moreover, we introduce distinct metrics for the evaluation of the surface reconstruction quality, which are exploited in all benchmarks.
Finally, we evaluate the network on in-vivo measured catheter data.

\subsection{Numerical experiments} \label{sec:numerical_experiments}
In this section, we present the numerical results obtained with the proposed method.
During the training process, we optimize both the network parameters $\nnweights$ and the latent code representations $\latentcode_i$ of the training data.
The network is fit to $4$ surfaces, namely the epi- and endocardium of the right and left ventricle, and the associated hyperparameters $\sigma$ and $\alpha$ are optimized for the best performance of the reconstruction in the shape completion process.
For the loss term, we thus obtained $1/\sigma^2\approx 1.8\times 10^{-7}$ and $\alpha=1.9\times10^{-6}$.
The network is trained with the Adam optimizer~\cite{kingma_2014_adam} for $3000$ epochs with a learning rate of $0.005$ which is decreased twice with the factor of 0.2 after $2700$ and $2900$ epochs.
For the mesh generation, we compute the signed distance function on a $128^3$ grid on the bounding box of all training points and reconstruct the zero-level set using the contour filter of PyVista~\cite{sullivan_pyvista_2019}, based on marching cubes.
The final reconstruction quality of the training models is depicted in \figureref{fig:network} (right).

We test the regularity of our network with respect to the latent space input with inter- and extrapolation between two latent codes of the training dataset.
The shapes of the reconstructed hearts change uniformly between every inter- and extrapolation point and generate meaningful results (see appendix \figureref{fig:interpolation}).
Points that are drawn from the prior distribution with covariance $\sigma^2 I$ produce meaningful heart geometries (see appendix \figureref{fig:sampling}).
With our model, we are able to learn multiple implicit surfaces at the same time and encode them in a joint latent space representation.
Therefore, it is easily possible to calculate the latent code during inference time based on an arbitrary subset of surface points.
We test this by sampling sparse point clouds on the endocardium of the left ventricles from the test set and optimizing the latent code according to \eqref{eq:inference_loss}, still providing us with a full 4-chamber biventricular shape.
Not only is our method capable of a close reconstruction of the endocardium of the left ventricle, but it can also predict possible shapes of the other three surfaces. We provide visual results for a reconstruction from 50 lv endocardial points in \figureref{fig:short_results} and \figureref{fig:lv_to_rv}.
Note however that the accuracy of the unmeasured right ventricular shape significantly decreases.
Numerical results of the reconstruction quality on the endocardium of the left ventricle can be found in \sectionref{sec:validation}.



\subsection{Validation}\label{sec:validation}
Our method is compared with \ptos~\cite{erler_2020_points2surf}, \ptom~\cite{hanocka_2020_point2mesh}, and a variant of our network without Lipschitz regularization.
In all experiments, only the endocardium of the left ventricle is considered.
All methods are compared on point clouds with different cardinalities $n$, ranging from very sparse point clouds with $n=50$ to relatively dense ones with $n=2000$.
Additionally, the coordinates of the points are perturbed with noise drawn from a Gaussian normal distribution with zero mean and fixed covariance $\xi^2I$.
For each number of points $n$ and level of noise $\xi$ we test the methods on the same 44-point clouds sampled from the meshes of the data set (for \ptos we apply a pre-trained model~\cite{erler_2020_points2surf}).
In the appendix we additionally provide a comparison to a \ssm (see Appendix \ref{sec:ssm}).
To test our method on all 44 meshes of the dataset we selected 11 disjoint test sets and train a different network on each remaining training sets.
For both our unregularized ($\alpha=0$) and regularized \deepsdf we take $\beta=\max(1,C_{s}\xi^2)$ and minimize the objective in \eqref{eq:inference_loss} using the Adam optimizer~\cite{kingma_2014_adam} with a learning rate of $10^{-2}$ for 50\,000 epochs.
In our unregularized network we used standard linear layers without any regularization and changed the activation functions to ReLu.
For the unregularized layers this seems to improve the performance drastically, whereas for the regularized case we found that the choice of activation function did not influence the results too much.
The results for $n=50$ on four meshes computed with our method are depicted in \figureref{fig:noise} in the appendix.
For the comparison, we evaluated the performance in terms of \emph{L2-Chamfer-distance} (CD), which can be seen in \figureref{fig:short_results}. For two point clouds $\mathbf{X}$ and $\mathbf{Y}$ the CD is given as
$d_\text{CD}(\mathbf{X},\mathbf{Y})= \frac{1}{\vert \mathbf{X} \vert} \sum_{x\in \mathbf{X}} \min_{y\in \mathbf{Y}}\Vert x-y\Vert_2 + \frac{1}{\vert \mathbf{Y} \vert} \sum_{y\in \mathbf{Y}} \min_{x\in \mathbf{X}}\Vert x-y\Vert_2$.
In appendix~\ref{app:metrics} and \tableref{tab:results} further comparisons in terms of \emph{Hausdorff-distance} (HD), and \emph{Large deformation diffeomorphic metric mapping} (LDDMM) with their respective definitions are provided.
For every test case, we state the mean and standard deviation across the four meshes of the test set.
Note that \ptos did not converge for sparse point clouds ($n=50$ and $n=200$). We thus omitted the associated results in the table.
Our method performs particularly well on sparse point clouds compared to the other methods.
The obtained meshes are qualitatively similar to the ground truth meshes and geometric features like the curvature can be recovered properly (see appendix \figureref{fig:optical_comparison}).
\begin{table}
    \scalebox{1.0}{\input{source/result_table_short}}
    \begin{minipage}[t]{2.5cm}
        \begin{tikzpicture}[baseline={([yshift={-\ht\strutbox}]current bounding box.west)}]
        \node[inner sep=0pt] (gt) at (0,0) {\includegraphics[width=\linewidth,trim={245 57 270 60},clip]{source/lv_to_rv/lv_endo.png}};
        \node[inner sep=0pt] (gt) at (0,-2.8) {\includegraphics[width=\linewidth,trim={175 57 260 60},clip]{source/lv_to_rv/rv_endo.png}};
        \draw [line width=0.05mm] (-2.1,1.4) -- (-2.1,-4.6);
        \node at (-1.7,0) [rotate=90] (l7) {LV Endo};
        \node at (-1.7,-2.8) [rotate=90] (l7) {RV Endo};
        \node at (-0.24,-4.3) [] (l7) {\includegraphics[width=1.4\linewidth]{source/lv_to_rv/colorbar_hor_mm.png}};
        \end{tikzpicture}
    \end{minipage}
    \vspace{0.1cm}
    \captionof{figure}{Left: mean and standard deviation of the Chamfer distance (CD) for different numbers of input points $n$ and different levels of noise $\xi$ (lowest value in \best{bold magenta}, second lowest value in \second{teal and italic}; short version of \tableref{tab:results} in the appendix). The asterisk denotes the non Lipschitz regularized version of our network. Right: reconstruction of both endocardia from points on the LV endocardium (gt mesh in grey, reconstruction color-coded with implicit distance).} \label{fig:short_results}
\end{table}

\subsection{Inference from electroanatomical mapping data}\label{sec:eam}
\begin{figure}
  \centering
  \input{source/noga_short.tex}
  \caption{Reconstruction of the LV endocardium from the EAM data. We compare the \ssm reconstruction (left panel) to our approach for an optimal noise level estimation of $\bar{\sigma}=3.368$ mm (right panel). We also report the absolute distance from the geometry provided by the EAM system.} \label{fig:noga_short}
\end{figure}

Electro-anatomical mapping (EAM) is a common routine for patients undergoing catheter ablation.
As the catheter is inserted into the heart, it continuously builds a three-dimensional point cloud that is triangulated into a surface model.
Our EAM data consists of a set of points located on the endocardial surface of the left ventricle. 
The point cloud is usually sparse and unevenly distributed, but commonly allows for a reasonable estimate of endocardial geometry.
The exact data acquisition process is described in detail in appendix~\ref{app:eam}.
The overall LV shape is well-approximated in most regions except for the apex, as can be seen in  \figureref{fig:noga_short} and \figureref{fig:noga}, and includes the outflow aortic tract due to the learned modality. 
Compared to the LV surface obtained from the EAM system, our shape is much smoother and is even defined in regions where data is missing.
Since the underlying noise structure is unknown, we estimate the noise variance iteratively as described in \eqref{eq:noise_iteration}. 
For the EAM data, we obtain an estimate of $3.4$~mm, consistent with the size of the electrode's tip ($2-4$~mm).
A table containing the iterates of the estimator can be found in \tableref{tab:noise_estimation} in the appendix.

{\ }

\section{Discussion and outlook}
We have presented a novel method to represent cardiac anatomy based on signed-distance functions.
The quantitative comparison with other methods shows that our approach can reconstruct the shape of hearts on a state-of-the-art level, especially for sparse and noisy data.
In contrast to the methods we compared against, our approach does not require knowledge of the normals at each point of the point cloud.
This knowledge however could be incorporated at the shape completion stage by fitting the normalized gradient of the SDF against the given normals.

The requirement of point clouds is a mixed blessing, as it requires the construction of a point cloud from images through a learned method, or by segmenting the image stack and using its surface. On the other hand, enables the algorithm to operate on different modalities.
Additionally, the algorithms expects the point clouds in a canonical pose (CT-based), which however could be automated using rigid point registration methods~\citet{jian_robust_2010}.
Learning multiple SDFs with one network allows the generation of shapes based on input data from different chambers of the heart or a combination of multiple surfaces.

While the present work only encodes bi-ventricular surfaces, the method itself can be extended to encode additional chambers and shapes, such as the atria or aorta.
This method has important implications in cardiac modeling, digital twinning for precision medicine, and the creation of virtual cohort of patients. We plan to further extend it to time-dependent shape models and shape uncertainty quantification~\cite{Gander2021UQ}.

\newpage
\section{Acknowledgements}
This work was supported by the Swiss National Science Fund [Cardiotwin, Weave/Lead Agency, Project number 214817], by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) [EXC-2047/1-390685813] and [EXC2151-390873048], PRIN-PNRR [project no. P2022N5ZNP], and Swiss National Supercomputing Centre [production grant no. s1074].


\bibliography{midl24_023.bib}


\newpage
\appendix



\section{Dataset}
\label{sec:data_main}
Based on~\citet{rodero_virtual_2021,strocchi_publicly_zenodo_2020}, we built a public shape library of watertight 3D shapes for endo-/epicardium of the left/right ventricles ($4$ shapes in total).
Note that the dataset is composed of $20$ patients that are diagnosed as healthy (no cardiac conditions detected)~\citet{rodero_virtual_2021} and $24$ patients are diseased with various heart failures recruited for cardiac resynchronization therapy (CRT) upgrade~\citet{strocchi_publicly_zenodo_2020,strocchi_publicly_plos_2020}.
The generated shape data is available as a Zenodo record~\cite{grandits_public_2024}.

\subsection{Surface mesh generation}
As a basis for our meshes, we used the publicly available data described in~\citet{rodero_virtual_2021,strocchi_publicly_zenodo_2020}.
Our dataset was produced by applying the following procedure to each of the meshes:
\noindent
First, after loading the data, we localize the left ventricular apex using the available universal ventricular coordinates (UVCs, \cite{bayer_universal_2018}) and use it as the Cartesian origin for each of the meshes.
Next, we extract the left/right ventricular (LV/RV) endo-/epicardia by using the available surface tags.
The provided meshes encode the walls and valves into separate tags, from which we extracted the surfaces.
We then identify all points of the wall that touch the associated valve (e.g.~LV wall with mitral and aortic valves).
To identify all reachable points, we compute an eikonal solution using~\cite{grandits_fast_2021} by placing an initial point on the inside of the wall (closest to the blood pool) with minimal velocity across points that touch any of the valves.
The wall is then separated into epi- and endocardium by applying a thresholding filter on the solution.
Finally, we remove non-manifold parts, recalculate the inside/outside orientation and close the surface to receive a watertight, proper manifold surface for the endo- and epicardia separately.
Note that all steps exploit \texttt{VTK}~\cite{vtkBook} through \texttt{PyVista}~\cite{sullivan_pyvista_2019}.

\subsection{Surface sampling}
The training data was then generated by sampling $3,000$ surface points (surface samples) and additional $1,000$ points, displaced in normal direction randomly (uniform) by up to $30$mm (band samples) per surface, i.e.~$16,000$ points per patient.
This point cloud was sampled with a curvature-based weighting to create more samples around features of interest (e.g.~apex, valves).
Specifically, we generated the dataset samples using face-weights $w$ following~\citet[Eq.~2.6]{gao_gaussian_2019} that are defined as follows: 
consider a 2-dimensional compact surface $M$, isometrically embedded in $\R^3$, with its Gaussian curvature~$\kappa$ and mean curvature~$\eta$.
The weighting function is then defined as 
\begin{equation}
    w_{\lambda, \rho}(\vx) = \frac{\lambda |\kappa(\vx) |^\rho}{\int_M |\kappa(\xi)|^\rho \diff{\text{vol}_M}(\xi)} 
                    + \frac{(1 - \lambda) | \eta (\vx) |^\rho}{\int_M |\eta(\xi)|^\rho \diff{\text{vol}_M}(\xi)}.
\end{equation}
In our experiments, we used $w_{0.1, 0.75}$ as the curvature weight, purely by visual inspection.
We used the library \texttt{trimesh}~\cite{trimesh} for sampling and computing $\kappa$ and $\eta$.


\begin{figure}[htb]
    \centering
    \begin{tikzpicture}
    
        \pgfmathsetmacro{\offsetx}{1.65};
    
        \foreach \i in {0,1,2,3,4,5,6,7,8}{
            \node[inner sep=0pt] (im\i) at (\offsetx*\i,0) {\includegraphics[width=0.11\textwidth,trim={62 0 62 0},clip]{source/interpolation/interpolation_\i.png}};
            }
        \node[inner sep=0pt] (text1) at (0,-1) {$t=-0.5$};
        \node[inner sep=0pt] (text1) at (\offsetx*2,-1) {$t=0$};
        \node[inner sep=0pt] (text1) at (\offsetx*4,-1) {$t=0.5$};
        \node[inner sep=0pt] (text2) at (\offsetx*6,-1) {$t=1$};
        \node[inner sep=0pt] (text1) at (\offsetx*8,-1) {$t=1.5$};
    \end{tikzpicture}
    \caption{The figure shows the generation of heart models via interpolation and extrapolation of latent code vectors. In detail, we take two latent code representations $\latentcode_1,\latentcode_2$ from the training dataset and decode the linear combinations $t \latentcode_1 +(1-t)\latentcode_2$. We present the reconstructed endocardia.} \label{fig:interpolation}
\end{figure}

\begin{figure}[htb]
    \centering
    \input{source/sampling.tex}
    \caption{We reconstruct points in the latent space that are sampled from a zero-mean Gaussian distribution with covariance $\sigma^2I$.} \label{fig:sampling}
  \end{figure}


\begin{figure}[h]
  \centering
  \scalebox{0.7}{
  \begin{tikzpicture}
      \pgfmathsetmacro{\offsetx}{4.6};
      \pgfmathsetmacro{\offsety}{-3.8};
      \node[inner sep=0pt] (im1) at (0,0) {\includegraphics[width=4.0cm,trim={200 0 200 0},clip]{source/lv_to_rv/lv_endo.png}}; 
      \node[anchor=south](im1_label) at (im1.north) {\large LV endo};
      \node[inner sep=0pt] (im2) at (\offsetx,0) {\includegraphics[width=4.0cm,trim={200 0 200 0}]{source/lv_to_rv/lv_epi.png}}; 
      \node[anchor=south](im2_label) at (im2.north) {\large LV epi};
      \node[inner sep=0pt] (im3) at (2*\offsetx,0*\offsety) {\includegraphics[width=4.0cm,trim={150 0 250 0},clip]{source/lv_to_rv/rv_endo.png}}; 
      \node[anchor=south](im3_label) at (im3.north) {\large RV endo};
      \node[inner sep=0pt] (im4) at (3*\offsetx,0*\offsety) {\includegraphics[width=4.0cm,trim={150 0 250 0},clip]{source/lv_to_rv/rv_epi.png}}; 
      \node[anchor=south](im4_label) at (im4.north) {\large RV epi};
      \node[inner sep=0pt] (im4) at (3.6*\offsetx+0.7,-0.21*\offsety) {\includegraphics[width=1.7cm]{source/lv_to_rv/colorbarmm.png}}; 
  \end{tikzpicture}
  }
      \caption{We reconstruct all four surfaces from 50 points on the endocardium of the left ventricle. 
      The reconstructed meshes are color-coded with the mesh distance, i.e. the distance to the nearest point on the ground truth mesh in mm.} \label{fig:lv_to_rv}
  \end{figure}

\begin{figure}[h]
  \centering
  \input{source/reconstruction_test.tex}
  \caption{Reconstruction quality of the left ventricle from $50$ points with different levels of noise. We color-coded the implicit distance to the ground truth mesh.} \label{fig:noise}
\end{figure}


\FloatBarrier
\section{Metrics}\label{app:metrics}
To quantify the quality of reconstructed meshes we use the \emph{L2-Chamfer-distance} (CD), the \emph{Hausdorff-distance} (HD), and a \emph{Large deformation diffeomorphic metric mapping} (LDDMM) loss terms between the reconstructed mesh and the ground truth mesh.
We calculate the L2-Chamfer distance and the Hausdorff distance by sampling 50\,000 points on each mesh. For two point clouds $\mathbf{X}$ and $\mathbf{Y}$ the CD is given as
\[
    d_\text{CD}(\mathbf{X},\mathbf{Y})= \frac{1}{\vert \mathbf{X} \vert} \sum_{x\in \mathbf{X}} \min_{y\in \mathbf{Y}}\Vert x-y\Vert_2 + \frac{1}{\vert \mathbf{Y} \vert} \sum_{y\in \mathbf{Y}} \min_{x\in \mathbf{X}}\Vert x-y\Vert_2.
\]
The Hausdorff distance (HD) is defined as
\[
    d_\text{HD}(\mathbf{X},\mathbf{Y})= \max \lbrace \max_{x\in \mathbf{X}} \min_{y\in \mathbf{Y}}\Vert x-y\Vert_2,  \max_{y\in \mathbf{Y}} \min_{x\in \mathbf{X}}\Vert x-y\Vert_2 \rbrace.
\]
With the LDDMM loss, we measure how well the reconstructed mesh $\mathcal{M}_r$ can be registered to the original ground truth mesh $\mathcal{M}_{gt}$.
To obtain numerically comparable results across the different methods we first remesh the results to obtain meshes with the same resolution (for this task, we use the Blender software package~\cite{blender} with a voxel resolution of 0.9~mm).
To obtain the LDDMM loss, we calculate the center points $c_F$, the normals $n_F$ and the area $A_F$ of every face $F$ from the set of faces $\mathcal{F}_r$ and $\mathcal{F}_{gt}$, respectively. 
For $\gamma=1$ let
\[
    C(\mathcal{F}_1, \mathcal{F}_2) = \sum_{F_1\in \mathcal{F}_1}\sum_{F_2 \in \mathcal{F}_2} e^{-\gamma \Vert c_{F_1}-c_{F_2}\Vert_2^2} \langle n_{F_1},n_{F_2} \rangle A_{F_1}A_{F_2}.
\]
Then, the LDDMM loss is defined as
\[
    d_{L} (\mathcal{M}_{gt},\mathcal{M}_{r}) = C(\mathcal{F}_{gt},\mathcal{F}_{gt})+ C(\mathcal{F}_{r},\mathcal{F}_{r}) - 2C(\mathcal{F}_{gt},\mathcal{F}_{r}).
\]

\section{Fitting the Statistical Shape Model}\label{sec:ssm}
The statistical shape model describes a heart shape as a variation of a mean shape $\mu$ in different directions (modes). 
We used the publicly available SSM from the cardiac atlas project that is based on $630$ healthy Biobank reference patients, further described in~\citet{petersen_reference_2017}.
A mean point cloud $\mathbf{X}\in\R^{N\times3}$ of $N$ points together with 200 eigenmodes $\mathbf{V}_i\in\R^{N\times3}$ for $i=1,\ldots,200$ and corresponding eigenvalues $\lambda\in \R^{200}$ are provided, which is restricted to the subset of points corresponding to the endocardium of the left ventricle.
For a given sample with weights $\alpha\in \R^{200}$ and a spatial offset $\mathbf{b}\in\R^3$, we obtain a representation of the resulting point cloud $C$ as
$\mathbf{C} = \mathbf{X} + \mathbf{1}_N \mathbf{b}^\top + \sum_{i=1}^{200}\alpha_i \lambda_i \mathbf{V}_i$.
For a given target point cloud $\mathbf{Y}$ we use this model to optimize the asymmetric Chamfer distance
$d_\text{aCD}(\mathbf{Y},\mathbf{C}) = \frac{1}{\vert \mathbf{Y} \vert} \sum_{y\in \mathbf{Y}} \min_{x\in \mathbf{C}}\Vert x-y\Vert_2$
w.r.t. the weights $\alpha$ and the spatial offset $\mathbf{b}$. Additionally, we use a standard $\ell_2$ loss term to obtain the final objective function
\[
    \mathcal{J}_\mathbf{Y}(\alpha,\mathbf{b}) = d_\text{aCD}\left(\mathbf{Y},\mathbf{X} + \mathbf{1}_N \mathbf{b}^\top + \sum_{i=1}^{200}\alpha_i \lambda_i \mathbf{V}_i\right)+\beta\Vert \alpha\Vert_2.
\]
This loss function is optimized for 5\,000 epochs with the Adam optimizer~\cite{kingma_2014_adam}, with a learning rate of $0.005$.

\newpage

\begin{table}[h!]
    \centering
    \scalebox{0.9}{\input{source/result_table}}
    \caption{Mean and standard deviation of the Chamfer distance (CD), the Hausdorff distance, and the LDDMM-loss for different numbers of input points $n$ and different levels of noise $\xi$. For every test case, the lowest value is printed in bold and the second lowest value is printed in italic. The asterisk denotes the non Lipschitz regularized version of our network.} \label{tab:results}
\end{table}

\begin{figure}[h]
    \centering
    \input{source/optical_comparison.tex}
    \caption{Mesh quality for different methods on mesh 43. We depict the result for 200 points. We color-coded the absolute distance to the ground truth mesh on the remeshed results.} \label{fig:optical_comparison}
\end{figure}
\FloatBarrier



\section{Multichamber Experiments}
In this section, we investigate the advantages of jointly encoding of left and right endocardial shapes in comparison to independently reconstructing the two shapes.
We then compare the average chamfer distances across both endocardia for the case of a separate reconstruction and a joint reconstruction for four test meshes.
Additionally, we perform the same experiment on partially observable regions, where the points on the left ventricle are only sampled from its top half (ventricular base) and the points of the right ventricle are sampled from its lower half (ventricular apex).
The results of both experiments can be found in Table~\ref{tab:lv_rv}.
For the case of full data availability, the joint reconstruction seems to be advantageous only for the case of very sparse data ($n=10$), but for the cases where only partial data is available, the joint reconstruction improves the resulting reconstruction in all but two cases.

\begin{table}[h!]
    \centering
  \begin{tabular}{c|c|c|c?{1.5pt}c|c}
        & & \multicolumn{2}{c?{1.5pt}}{full data}& \multicolumn{2}{c}{partial data}   \\
    \midrule
       $\sigma$ &$n$ & $CD_{\text{separate}}$ & $CD_{\text{joint}}$ & $CD_{\text{separate}}$ & $CD_{\text{joint}}$  \\
       \midrule
       \parbox[t]{1mm}{\multirow{4}{*}{$0$}}  
        & 10 &  2.71 & \best{2.47} & 3.31 & \best{2.50} \\
        & 20 &  \best{1.57} & 1.86 & 2.76 & \best{2.38} \\
        & 30 &  \best{1.43} & 1.42 & \best{2.34} & 2.77\\
        & 40 &  \best{1.17} & 1.31 & \best{2.41} & 2.46\\
        \midrule
        \parbox[t]{1mm}{\multirow{4}{*}{$2$}}  
         & 10 & 3.13 & \best{3.06} & 4.16 & \best{2.81} \\
         & 20 &  2.43 & \best{2.39} & 3.24 & \best{2.72}\\
         & 30 &  2.01 & 2.01 & 3.35 & \best{2.50}\\
         & 40 &  \best{1.83} & 2.12 & 3.24 & \best{2.82}\\
         \midrule
         \parbox[t]{1mm}{\multirow{4}{*}{$5$}}  
          & 10 &  3.63 & \best{3.49} & 3.95 & \best{3.92} \\
          & 20 &  \best{3.42} & 3.47 & 4.51 & \best{4.12}\\
          & 30 &  \best{3.04} & 3.25 & 3.76 & \best{3.70}\\
          & 40 &  \best{2.86} & 2.87 & 3.89 & \best{3.71} \\
  \end{tabular}
  \caption{Average reconstruction quality per surface for inference based on $n$ points on the LV Endocardium only.}
  \label{tab:lv_rv}
\end{table}




\section{Obtaining the EAM data}\label{app:eam}
The EAM data has been acquired with the NOGA-XP system (Biologic Delivery Systems, Biosense Webster) equipped with a conventional 7-Fr deflectable-tip mapping catheter (NAVI-STAR, Biosense Webster). 
Spatial positions of the tip of the catheter were acquired at 100 Hz, and aligned in time using the automatically detected R-peak of the 12-lead ECG. 
Points were accepted by the system according to a set of criteria for catheter stability and signal quality. 
The institutional review board approved the study protocol, and all patients gave written and oral informed consent for the investigation (the study is compliant with the Declaration of Helsinki). 
Further information on the study has been previously reported~\citet{Maffessanti2020scar,Pezzuto2021ECG}.
We pre-processed the data by applying a $-90^\circ$ rotation about the $X$-axis (NOGA to LPS-MRI coordinate system), followed by a translation to align the LV epicardial apex with the origin.
Note that only an approximate alignment is possible since the epicardium is not present in the EAM data.

\begin{figure}
  \centering
  \input{source/noga.tex}
  \caption{Reconstruction of the LV endocardium from the EAM data. We compare the \ssm reconstruction (left panel) to our approach for an optimal noise level estimation of $\bar{\sigma}=3.368$ mm (right panel). We also report the absolute distance from the geometry provided by the EAM system.} \label{fig:noga}
\end{figure}
\section{Estimation of noise level}\label{sec:noise_estimation}
In Table \tableref{tab:noise_estimation} we present the numerical convergence of the noise estimator $\bar{\xi}_{i}$ for the first six steps on the EAM data (c.f. Section \ref{sec:eam}).
Additionally, the performance of this method is tested for a synthetic point cloud (Heart 43, $n=500$, $\xi=5$), where the correct noise level is obtained.
\begin{table}[h!]
    \centering
  \begin{tabular}{c|c|c|c|c}
       step & \multicolumn{2}{c|}{synthetic}& \multicolumn{2}{c}{EAM data}   \\
       \midrule
       $\bar{\xi}_{0}$&  0     & 15    & 0     & 15    \\
       $\bar{\xi}_{1}$&  3.159 & 4.882 & 1.499 & 4.672 \\
       $\bar{\xi}_{2}$&  4.878 & 5.063 & 3.184 & 3.686 \\
       $\bar{\xi}_{3}$&  5.063 & 5.060 & 3.344 & 3.398 \\
       $\bar{\xi}_{4}$&  5.060 & 5.060 & 3.797 & 3.367 \\
       $\bar{\xi}_{5}$&  5.060 & 5.060 & 3.410 & 3.364 \\
       $\bar{\xi}_{6}$&  5.060 & 5.060 & 3.368 & 3.364 \\
  \end{tabular}
  \caption{Iteration of the variance estimation for the synthetic case (Heart 43, $n=500$, $\xi=5$) and the EAM data.}
  \label{tab:noise_estimation}
\end{table}



\end{document}

