\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe}
\usepackage{svg,multicol}
\usepackage{graphicx}
\usepackage[english]{babel}
\usepackage{comment}
\usepackage{graphicx}
\usepackage{float}
\usepackage{caption}

\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
\jmlrvolume{-- nnn}
\editors{Accepted for publication at MIDL 2024}

\newcommand{\psrc}{\ensuremath{p_{\text{source}}}}
\newcommand{\psink}{\ensuremath{p_{\text{sink}}}}
\newcommand{\betasrc}{\ensuremath{\beta_{\text{source}}}}
\newcommand{\betasink}{\ensuremath{\beta_{\text{sink}}}}


\title[FVINN for Myocardial Perfusion Simulation]{Finite Volume Informed Graph Neural Network for Myocardial Perfusion Simulation}

\midlauthor{\Name{Raoul {Sallé de Chou}\nametag{$^{1,2}$}} \Email{raoul.salle-de-chou@inria.fr}\\
\Name{Matthew Sinclair\nametag{$^{3}$}}\Email{msinclair@heartflow.com}\\
\Name{Sabrina Lynch\nametag{$^{3}$}}\Email{slynch@heartflow.com}\\
\Name{Nan Xiao\nametag{$^{3}$}}\Email{nxiao@heartflow.com}\\
\Name{Laurent Najman\nametag{$^{4}$}}\Email{laurent.najman@esiee.fr}\\
\Name{Irene E. Vignon-Clementel\midljointauthortext{Contributed equally}\nametag{$^{1}$}}\Email{irene.vignon-clementel@inria.fr}\\
\Name{Hugues Talbot\midlotherjointauthor\nametag{$^{2}$}}\Email{hugues.talbot@centralesupelec.fr}\\
\addr $^{1}$ Inria, Palaiseau, France \\
\addr $^{2}$ CentraleSupelec, Inria, Université Paris-Saclay, France\\
\addr $^{3}$ HeartFlow Inc., Mountain View, USA\\
\addr $^{4}$ Univ Gustave Eiffel, CNRS, LIGM, France
}

\begin{document}
\maketitle

\begin{abstract}
Medical imaging and numerical simulation of partial differential equations (PDEs) representing biophysical processes, have been combined in the past few decades to provide noninvasive diagnostic and treatment prediction tools for various diseases. Most approaches involve solving computationally expensive PDEs, which can hinder their effective deployment in clinical settings. To overcome this limitation, deep learning has emerged as a promising method to accelerate numerical solvers. One challenge persists however in the generalization abilities of these models, given the wide variety of patient morphologies. This study addresses this challenge by introducing a physics-informed graph neural network designed to solve Darcy equations for the simulation of myocardial perfusion. Leveraging a finite volume discretization of the equations as a ``physics-informed" loss, our model was successfully trained and tested on a 3D synthetic dataset, namely meshes representing simplified myocardium shapes. Subsequent evaluation on genuine myocardium meshes, extracted from patients Computed Tomography images, demonstrated promising results, and generalized capabilities. Such a fast solver, within a differentiable learning framework, will enable to tackle inverse problems based on $\text{H}_2$O-PET perfusion imaging data.
\end{abstract}

\begin{keywords}
Graph Neural Network, Partial Differential Equations, Physics-informed Neural Network, Finite Volume method, Digital Twins, Perfusion simulation
\end{keywords}

\section{Introduction}
\label{SEC:Introduction}
Integrating medical imaging techniques and numerical simulation methods has shown to be promising as a noninvasive diagnostic and treatment predictive tool for various diseases \cite{vignon2006outflow,vignon2010primer,vignon2023proof,pant2022multiscale,schwarz2023beyond}. The underlying models usually entail solving complex partial differential equations (PDEs): the associated computational complexity can cause difficulties in real clinical applications. Hence, the computational biomedical community has been seeking ways to derive reduced-order methods \cite{guibert2014group,caiazzo2016reduced}. Deep Learning (DL) methods for PDE resolution, which can be interpreted as pre-computing solutions over a large database of patients, have gained significant attention \cite{chen2021graph, pegolotti2024learning}. However, training data-driven models for PDE resolution has its own set of problems, particularly to achieve generalization across diverse geometric configurations such as patient-specific organ morphometries.
Physics-informed neural networks, introduced by \cite{raissi2019physics}, incorporate prior physical knowledge by integrating PDE equations into the loss function via automatic differentiation. This constrains the solution search space, facilitating training. However, these models usually require retraining when modifying the geometry of the domain. Other physics-informed approaches aim to improve learning generalization abilities by combining traditional solver discretization schemes with neural networks \cite{wandel2021teaching,zhao2023physics,li2023finite,nastorg2023implicit}. The most promising methods leverage the Finite Element Method (FEM) or the Finite Volume Method (FVM), combined with Graph Neural Networks (GNN). GNNs, designed for processing graphs, are applicable to unstructured meshes of varying shapes, unlike Convolutional Neural Networks, which are limited to Euclidean grids.
In \cite{li2023finite}, a GNN with a finite volume constrained loss combined with ground truth data, successfully predicted the 2D Navier-Stokes solutions over a diverse range of shapes, starting from the solution of one time step and predicting the next one. \cite{donon2020deep} developed a FEM-informed loss function that effectively addressed the Poisson equation on various 2D shapes, employing a GNN without reliance on ground truth data. These neural networks demonstrated in some cases superior generalization abilities compared to data-driven-only models, but were only tested in 2D cases, with a small number of unknowns and with simple shapes.

Our study introduces a Finite-Volume Informed Graph Neural Network for solving the 3D Darcy equations to model myocardial perfusion. It is trained on a synthetic dataset with diverse non-convex shapes resembling patient left ventricular geometries. Our model incorporates a physics-informed loss, thus eliminating the need for ground truth data. The PDE is discretized with Finite Volumes. Once the training is complete, the model can be applied in inference mode to new geometries to predict a solution, without retraining. We show preliminary results of our model and its generalization ability on a patient-specific left-ventricle myocardium dataset from coronary computed tomography angiography (CCTA) images. To our knowledge, this is the first physics-informed model tested on a real-world 3D dataset with a clinical purpose, and the first fully-based finite volume-informed neural network (FVINN), hence without the need for ground-truth data.

\section{Datasets generation}
\label{SEC:Datasets generation}
\subsection{Myocardial perfusion simulation}
\label{SSEC:Myocardial perfusion simulation}
In prior investigations,~\cite{papamanolis2021myocardial} formulated a patient-specific simulation pipeline for diagnosing coronary artery diseases based on CT-scans. The model is initiated with the segmentation of the aorta, epicardial coronary arteries, and left ventricle myocardium from CCTA image data. To account for small arteries that fill the myocardium volume and are too small to segment on CCTA images, synthetic coronary trees are generated from the epicardial segmented roots \cite{jaquet2018generation}. Reduced order blood flow  equations are then solved in the defined vascular network, with both segmented and synthetic components, to simulate the blood flow supply to the left ventricle myocardium.

Subsequently, to simulate perfusion in the myocardium and to account for the microcirculation (blood flow in the arterioles, capillaries, $\ldots$), a porous-media model governed by Darcy's law is utilized \cite{chapelle2010poroelastic}. The model comprises a source term driven by coronary flow and a homogeneous sink term (to simulate an ideal venous system). In order to obtain the source term, $\psrc$, maps are generated by associating a perfusion territory in the myocardium with each outlet of the synthetic coronary tree. These territories are estimated through a Voronoi tessellation. Finally, a $\psrc$ value is assigned to each perfusion region. This value is the pressure at the vasculature outlet, determined by solving the flow equations in the network.

We will focus in this study on predicting the FVM solution of the Darcy equations on different meshes and $\psrc$ maps, by a FVINN. We describe in the following subsection the different datasets for training, validation, and test.

\subsection{Synthetic dataset}
\label{SSEC:Synthetic dataset}
To facilitate model training, we generated a synthetic dataset comprising half-ellipsoidal shells aimed at simulating simplified myocardium morphologies. The dataset was created using gmsh~\cite{geuzaine2009gmsh}. As illustrated in Figure \ref{synthetic shapes}, the geometries were generated by defining 5 points connected by elliptical arcs. The resulting 2D shape was then extruded around the z-axis to form a volume, within which a volume mesh was generated. For the training dataset, 2000 distinct geometries were generated, with an additional 400 for the validation dataset. Various lengths $L1, L2, L3, L4,$ and $L5$ were sampled to diversify the geometries. Furthermore, different $\psrc$ maps were assigned to each geometry: within each volume, 1000 seed points were sampled. A Voronoi tessellation was grown from these points to emulate the perfusion regions associated with network outlets. Each Voronoi region was assigned a random $\psrc$ value sampled from a normal distribution, in line with the observed $\psrc$ distribution from patient-specific simulations \cite{papamanolis2021myocardial}.
\begin{figure}[!t]
\begin{multicols}{3}
    \includesvg[width=0.7\linewidth]{Figures/synthetic2d.svg}\par
    \includegraphics[width=0.45\linewidth]{Figures/synthetic_mesh.jpg}\par
    \includegraphics[width=0.6\linewidth]{Figures/psource_patient.png}\par
\end{multicols}
\caption{Left: schematic of the synthetic shape generation. Centre: example of one final synthetic 3D mesh. Right: example of one patient myocardium with its $\psrc$ map.}\label{circular}\label{synthetic shapes}
\end{figure}

\subsection{Patient myocardium data}
\label{SSEC:Patient myocardium data}
As a test dataset, the left ventricle myocardium of 400 patients were segmented from CCTA image data, as part of the human-in-the-loop process for the HeartFlow FFR$_{\text{CT}}$ Analysis (HeartFlow Inc., US). CCTA images were received from multiple sites around the world by HeartFlow, Inc. \cite{taylor2013computational}. 3D tetrahedral meshes of the myocardium volumes were generated with gmsh. Synthetic $\psrc$ maps were also attributed to each myocardium mesh according to the previously explained method \ref{SSEC:Synthetic dataset}. Each mesh was composed of approximately 60k nodes in both datasets.

\section{Methodology}
\label{SEC:Methodology}
\subsection{Governing equations}
\label{SSEC:Governing equations}
In this work we focus on solving the 3-dimensional (3D) single compartment Darcy model for an incompressible flow:
\begin{equation}\label{darcy1}
\textbf{w} + \textbf{K}\nabla p=0 \text{ in } \Omega
\end{equation}
\begin{equation}\label{darcy2}
\nabla . \textbf{w}= \betasrc(\psrc-p)-\betasink(p-\psink) \text{ in } \Omega
\end{equation}
\begin{equation}\label{darcy_neum}
\frac{\partial \textbf{w}}{\partial n}=0\text{ in } \partial\Omega
\end{equation}
with $\textbf{K}$ the permeability tensor, $\textbf{w}$ the Darcy velocity, $p$ the capillary bed pressure, $\psrc$ and $\psink$ the source and sink pressure terms respectively, $\betasrc$ and $\betasink$ parameters describing the conductance of flow entering and exiting the myocardium respectively. These two parameters are patient specific; their computation is described in \cite{papamanolis2021myocardial}. By applying the divergence operator to \equationref{darcy1}, we obtain the following Poisson equation, which can be solved for $p$ with no-flux boundary conditions (\equationref{darcy_neum}):
\begin{equation}\label{darcy_final}
K\Delta p=-\betasrc(\psrc-p)+\betasink(p-\psink) \text{ in } \Omega
\end{equation}

\subsection{Finite volume method}
\label{SSEC:Finite volume method}
FVM is a discretization method for the approximation of PDE. FVM is based on a volume integral formulation of the PDE over a finite number of control volumes discretizing the domain where the equations are solved in.
The methodology unfolds by integrating the equations over each control volume. Introducing the vector $n$, normal to the control volume surface, and applying the divergence theorem on the diffusion term allows the replacement of volume integrals with surface integrals:
\begin{equation}\label{darcy_surface}
\int_S K\nabla p . n dS = \int_{\Omega} -\betasrc(\psrc-p)+\betasink(p-\psink)\;d\Omega
\end{equation}
In this study, we consider the vertex-centered FVM where the control volumes are delimited by the dual mesh of a 3D tetrahedron element mesh (see \figureref{dualmesh}). Considering the mean value approach, the flux values (left-hand side of the equation here) are approximated by their values on the centre of each surface while the source (right-hand side) are approximated by their value at the centre of the control volume ($p$ in the following is the discrete pressure):
\begin{equation}\label{darcy_mean}
K\sum_{f\in \text{faces}}\nabla p_f.n_fS_f= (-\betasrc(\psrc-p)+\betasink(p-\psink))\;\Omega
\end{equation}
The values on each face are linearly interpolated between the nodes connected to the edge crossing the face $f$. First order approximations are used for the pressure gradient.
\begin{figure}[htbp]
\floatconts
  {fig:example}
  {\caption{ Example of 2D vertex-based control volume where the element centroids are connected to the edges centroids. In this study, generalization of such control volume to 3D tetrahedral meshes by replacing edges centroids by face centroids is employed. i is the indice of the node related to the control volume in red. f denotes one face of the control
volume and j the indice of one neighbors of i.}}
  {\includesvg[width=0.20\linewidth]{Figures/mesh_element.svg}}\label{dualmesh}
\end{figure}
\subsection{Graph Unet Attention Network}
\label{SSEC:Graph Unet Attention Network}
\subsubsection{Graph Neural Network for solving PDEs}
\label{SSSEC:Graph Neural Network for solving PDEs}
GNNs represent a specific class of neural networks designed for processing graph structures. Specifically, meshes utilized for solving discretized PDEs can be viewed as graphs denoted as $G=(N, V, E)$, where $N$ is the number of nodes, $V$ is a matrix of node features, and $E$ is the adjacency matrix indicating connections between nodes. GNNs typically consist of multiple layers that update the node feature matrix by exchanging information between nodes along graph edges.

The solution to the Darcy equations introduced in Section \ref{SSEC:Governing equations} necessitates solving a linear system of equations. Iterative methods, such as the Jacobi method, are very efficient to solve these large systems of equations. The Jacobi method starts with an initial guess of the solution and iteratively updates it based on the previous state:
\begin{equation}\label{jacobi}
p^{k+1}_{i}= \frac{1}{a_{ii}}(b_i - \sum_{j\in N_i}a_{ij}p^{k}_{j})
\end{equation}
With $p^{k}_{i}$ the Darcy pressure of node $i$ at iteration $k$, $N_i$ the indices of nodes belonging to node $i$ neighbourhood, $b_i$ and $a_{ij}$ coefficients computed from the different constants in \ref{darcy_mean}. The solution is obtained once the difference between two successive states is small enough. As mentioned by \cite{liu2021multi}, this sequence of operations is similar to the way GNNs perform.
\subsubsection{Graph Attention layer}
\label{SSSEC:Graph Attention layer}
The graph attention network was first introduced by \cite{velickovic2017graph}. Given a graph $G=(N, V, E)$, for every node $i\in N$, attention coefficients are computed only between neighbours: $\alpha_{ij} = \text{softmax}_j(\text{LeakyReLU}(a^t [W h_i||W h_j])$. With $a$ a single-layer feedforward neural network, $W$ a weight matrix and $h_i$ the vector of node features of nodes $i$.
The attention mechanism finally updates the feature vector $h_i$, similarly to the Jacobi method \ref{jacobi}, through a nonlinear function $\sigma$:
\begin{equation}\label{attention_final}
h_i = \sigma(\sum_{j\in N_i}\alpha_{ij} W h_j)
\end{equation}
\subsubsection{Graph pooling and unpooling}
\label{SSSEC:Graph pooling and unpooling}
Similarly to \cite{zhao2021gau}, we employ a Graph Attention U-Net (GATU-net, \figureref{model architecture}), with a slight deviation in the pooling and unpooling operations. The GATU-net architecture comprises three levels. During each pooling operation, a random sub-sample of the preceding upper-level mesh nodes serves as input. The node features of these sub-samples are computed by averaging the node features of the k-nearest neighbours from the upper mesh's level. Notably, the unpooling function is the one proposed in PointNet++ \cite{qi2017pointnet++}, where features are computed through distance-weighted interpolation from the k-nearest neighbours of the previous lower-level stage's mesh during the decoding phase.

A node encoder is employed to encode node features into a latent space. For each node, the encoder takes as input the node coordinates, along with values for $\psrc$, $\betasrc$, and $\betasink$. Then, a node decoder processes the output of the GATU-net and predicts a pressure value for every node.

\begin{figure}[htbp]
\floatconts
  {fig:example0}
  {\caption{GATU-net architecture. Each grid corresponds to a mesh with a different node feature matrix computed thanks to the graph attention layers.}}
  {\includesvg[width=0.65\linewidth]{Figures/gatunet.svg}}\label{model architecture}
\end{figure}

\subsection{Finite volume informed loss function}
\label{SSEC:Finite volume loss}
The loss function to train the model is derived from \equationref{jacobi}; it is the absolute mean over all nodes of the residual:
\begin{equation}\label{darcy}
|p_{\theta}(i)-(\frac{1}{a_{ii}}(b_i - \sum_{j\in N_i}a_{ij}p_{\theta}(j)))|
\end{equation}
With $p_{\theta}(i)$ the pressure output of the node $i$ of the decoder. It should be noted that no reference pressure solution is used to train the model. In addition, the same GATU-net architecture is trained with a classical L1 loss function between the network output and a ground truth from FVM. The latter model will be denoted as the supervised model (SM) in the next sections.
The FVINN can be trained on a dataset comprising varying geometries and $\psrc$ maps. The goal of the proposed method is to find a set of convolutions which minimize \equationref{darcy} for every node of the meshes. Once trained, the model can be applied to a new geometry and $\psrc$ map to predict the solution in inference mode.

\section{Results}

In this section, we present the results of FVINN and SM compared to the FVM solution. For both models, we report the actual, relative and normalized mean absolute error (MAE) and Mean max absolute error per shape (MMAE). The normalized error is the actual error divided by the standard deviation of the FVM solution. The Computational time to solve the linear system of equations given in Equation (\ref{darcy_mean}) with using the GPU-enabled pytorch function linalg.solve is approximately 4.28 seconds. The forward pass of our network to compute the solution takes 0.56 second.

The model was successfully trained on the synthetic dataset. SM obtained a slightly better MAE than FVINN on both the training and validation datasets. Specifically, on the validation dataset, the SM achieved a MAE of 3.74 Pa compared to 4.04 Pa for FVINN (\tableref{mae}). The relative MAE on the validation dataset was approximately 0.20\% for both models, which corresponds to about 0.15 standard deviations of the FVM solution, as demonstrated by the normalized MMAE.

On the test dataset, the MAE was also smaller for the SM compared to FVINN, with normalized MAE values of 0.40 and 0.33, respectively. However, FVINN exhibited better MMAE on all three datasets. Notably, the MMAE on the test dataset was 99.23 Pa and 35.10 Pa for SM and FVINN, respectively (\tableref{mmae}). The high MMAE, combined with a relatively low MAE, indicates localized high errors for the SM, as displayed in \figureref{results_fig}. No particular locations of these errors were found.

\begin{figure}[htbp]
\floatconts
  {fig:example1}
  {\caption{Comparison of the FVINN and SM results with the FVM ground truth on one case from the test dataset.}}
  {\includegraphics[width=0.65\linewidth]{MIDL 2024 full paper/Figures/midl_revised.PNG}}\label{results_fig}
\end{figure}

\begin{table}[htbp]
\scriptsize
\floatconts
   {tab:example0}%
  {\caption{Mean absolute error (MAE) and standard deviation of the absolute error on training, validation and test datasets.}}
  {\begin{tabular}{lllll}
  \bfseries Dataset & \bfseries Model & \bfseries MAE (Pa)& \bfseries Relative MAE ($\%$)& \bfseries Normalised MAE\\ \hline
  Training & FVINN & 4.02 $\pm$ 1.19 & 0.20 $\pm$ 0.05 & 0.16 $\pm$ 0.04\\
  & SM & \textbf{3.75} $\pm$ 0.89 & \textbf{0.18} $\pm$ 0.04 & \textbf{0.15} $\pm$ 0.03\\ \hline
  Validation & FVINN & 4.04 $\pm$ 1.15 & 0.20 $\pm$ 0.05& 0.16 $\pm$ 0.03\\
  & SM & \textbf{3.74} $\pm$ 0.86 & \textbf{0.18} $\pm$ 0.04 & \textbf{0.15} $\pm$ 0.07\\ \hline
  Myo & FVINN & 8.06 $\pm$ 2.28 & 0.40 $\pm$ 0.11 & 0.31 $\pm$ 0.08\\
  & SM & \textbf{6.79} $\pm$ 4.44 & \textbf{0.33} $\pm$ 0.18& \textbf{0.26} $\pm$ 0.17\\
  \end{tabular}}\label{mae}
\end{table}

\begin{table}[htbp]
\scriptsize
\floatconts
   {tab:example1}%
  {\caption{Mean max absolute error (MMAE) per shape and its stadard deviation on training, validation and test datasets.}}%
  {\begin{tabular}{lllll}
  \bfseries Dataset & \bfseries Model & \bfseries MMAE (Pa)& \bfseries Relative MMAE ($\%$)& \bfseries Normalised MMAE\\ \hline
  Training & FVINN & \textbf{18.50} $\pm$ 6.94 & \textbf{0.92} $\pm$ 0.35 & \textbf{0.75} $\pm$ 0.20\\
  & SM & 18.56 $\pm$ 5.04 & 0.93 $\pm$ 0.30 & 0.76 $\pm$ 0.19\\ \hline
  Validation & FVINN & \textbf{18.19} $\pm$ 5.36 & \textbf{0.90} $\pm$ 0.26& \textbf{0.76} $\pm$ 0.18\\
  & SM & 18.31 $\pm$ 1.68 & 0.92 $\pm$ 0.22 & 0.78 $\pm$ 0.21\\ \hline
  Myo & FVINN & \textbf{35.10} $\pm$ 11.70 & \textbf{1.75} $\pm$ 0.58 & \textbf{1.37} $\pm$ 0.41\\
  & SM & 99.23 $\pm$ 381.79 & 3.92 $\pm$ 5.75& 3.90 $\pm$ 10.88\\
  \end{tabular}}\label{mmae}
\end{table}

\section{Discussion}
In this study, we presented a graph neural network for 3D myocardial perfusion prediction. The model incorporates a finite volume-informed loss function and requires no ground truth data for training. Successfully trained on a fully synthetic dataset, the neural network was then tested on patient-specific myocardium meshes. To our knowledge, this is the first application of such a model on a dataset of patient-specific data and large size meshes. Despite our model not being trained against ground truth from simulations, it achieves comparable accuracies on synthetic datasets, validating this approach. However, while SM obtained better MAE, the MMAE of our FVINN was better on the three datasets and almost three times smaller than SM on the patient-specific dataset. Our model demonstrates more robust generalization results on unseen geometries, which may differ significantly from the original distribution of shapes in the training dataset. Nevertheless, the synthetic datasets used in our study represent highly simplified myocardium shapes, which limits our ability to draw conclusions about the efficacy of FVINN or SM for predicting Darcy solutions on patient left-ventricle myocardium meshes. Future work should involve training these models on patient-specific or more complex synthetic data to further investigate their performance.
In conclusion, our study shows promising results for developing deep learning-based models as PDE solvers for real-world applications. Beyond reducing computational complexity, models such as the FVINN offer the potential to solve inverse problems from $\text{H}_2\text{O}$-PET perfusion imaging data. Moreover, the integration of physics-based loss functions with an unsupervised framework holds potential for predicting more robust results while reducing the reliance on data, thereby supporting their application in medical context.

% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{Research funding for this project was provided by an industrial grant from Heartflow, Inc.}


\bibliography{midl24_325}
\appendix
\section{Dataset Generation}
The L1-L5 ranges were found empirically to represent the patient-specific LV volume ranges. The ranges in millimetres were [10, 20], [55, 85], [10, 20], [10, 20], [10, 20] for L1, L2, L3, L4 and L5, respectively. Uniform distributions were used within each range to sample new geometries. All the meshes were centred and align along z-axis.
\begin{figure}[htbp]
\floatconts
  {fig:example2}
  {\caption{Example of meshes of the synthetic dataset.}}
  {\includegraphics[width=0.50\linewidth]{MIDL 2024 full paper/Figures/example_synth_midl.PNG}}\label{app_synth}
\end{figure}

\section{Supplementary materials for model architecture and training}
The node encoder and decoder were simple feed-forward neural networks consisting of two hidden layers with 64 neurons each. LeakyReLU activation functions, along with batch normalization layers, were applied after every GAT convolution and layer of the node encoder and decoder.

For every pooling block of the GATU-net, the coarse mesh connectivity was computed as follows: clusters of nodes were generated by assigning each node of the upper level to the closest node of the sub-sample. Then, edges were established between two sub-sample nodes if their clusters were connected by an edge in the upper level.

We employed the Adam optimizer with an initial learning rate of 0.001 and a 0.99 exponential decay. Batches of 8 meshes were fed to the network during training for 400 epochs. The training lasted two days using two GPUs.

\section{Supplementary visual results}
\begin{figure}[htbp]
\floatconts
  {fig:example3}
  {\caption{Comparison of the FVINN and SM results with the FVM ground truth on one case from the validation dataset.}}
  {\includegraphics[width=.65\linewidth]{MIDL 2024 full paper/Figures/midl_revised_synth.PNG}}\label{results_fig2}
\end{figure}

\begin{figure}[htbp]
\floatconts
  {fig:example4}
  {\caption{Comparison of the FVINN and SM results with the FVM ground truth on one case from the test dataset.}}
  {\includegraphics[width=0.65\linewidth]{MIDL 2024 full paper/Figures/midl_revised_patient2.PNG}}\label{results_fig3}
\end{figure}
\end{document}