\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{graphicx}
\usepackage{multirow}
\usepackage{siunitx}
\usepackage[]{placeins}
\usepackage{amsmath}
\usepackage{chngcntr}

\jmlryear{2021}
\jmlrworkshop{Full Paper -- MIDL 2021}

\title[Residual learning for 3D motion corrected quantitative MRI]{Residual learning for 3D motion corrected quantitative MRI: Robust clinical T1, T2 and proton density mapping}


\midlauthor{\Name{Carolin M. Pirkl\nametag{$^{1,2}$}}\Email{carolin.pirkl@tum.de}\\
\Name{Matteo Cencini\nametag{$^{3,4}$}} \Email{matteo.cencini@gmail.com}\\
\Name{Jan W. Kurzawski\nametag{$^{5}$}} \Email{jan.kurzawski@gmail.com}\\
\Name{Diana Waldmannstetter\nametag{$^{1}$}} \Email{diana.waldmannstetter@tum.de}\\
\Name{Hongwei Li\nametag{$^{1,6}$}} \Email{hongwei.li@tum.de}\\
\Name{Anjany Sekuboyina\nametag{$^{1,7}$}} \Email{anjany.sekuboyina@tum.de}\\
\Name{Sebastian Endt\nametag{$^{1,2}$}} \Email{sebastian.endt@tum.de}\\
\Name{Luca Peretti\nametag{$^{8,3,4}$}} \Email{lucaperetti.lp@gmail.com}\\
\Name{Graziella Donatelli\nametag{$^{9,4}$}} \Email{graziella$\_$donatelli@hotmail.com}\\
\Name{Rosa Pasquariello\nametag{$^{3}$}} \Email{rosa.pasquariello@fsm.unipi.it}\\
\Name{Mauro Costagli\nametag{$^{3,10}$}} \Email{mauro.costagli@fsm.unipi.it}\\
\Name{Guido Buonincontri\nametag{$^{3,4}$}} \Email{guido.buonincontri@gmail.com}\\
\Name{Michela Tosetti\nametag{$^{3,4}$}} \Email{michela.tosetti@fsm.unipi.it}\\
\Name{Marion I. Menzel\midljointauthortext{Contributed equally}\nametag{$^{2,11}$}} \Email{menzel@ge.com}\\
\Name{Bjoern H. Menze\midlotherjointauthor\nametag{$^{1,6}$}} \Email{bjoern.menze@uzh.ch}\\
    \addr $^{1}$Department of Computer Science, Technical University of Munich, Garching, Germany\\ 
	\addr $^{2}$GE Healthcare, Munich, Germany\\ 
	\addr $^{3}$IRCCS Fondazione Stella Maris, Pisa, Italy\\ 
	\addr $^{4}$Fondazione Imago7, Pisa, Italy\\ 
	\addr $^{5}$Pisa Division, National Institute for Nuclear Physics (INFN), Pisa, Italy\\ 
	\addr $^{6}$Department of Quantitative Biomedicine, University of Zurich, Zurich, Switzerland\\
	\addr $^{7}$Department of Neuroradiology, Klinikum rechts der Isar, Munich, Germany\\ 
	\addr $^{8}$Department of Computer Science, University of Pisa, Pisa, Italy\\ 
	\addr $^{9}$Azienda Ospedaliero-Universitaria Pisana, Pisa Italy\\ 
	\addr $^{10}$Department of Neuroscience, Rehabilitation, Ophtalmology, Genetics, Maternal and Child Sciences (DINOGMI), University of Genova, Genova, Italy\\ 
	\addr $^{11}$Department of Physics, Technical University of Munich, Garching, Germany
}

\begin{document}

\maketitle

\begin{abstract}
Subject motion is one of the major challenges in clinical routine MR imaging. Despite ongoing research, motion correction has remained a complex problem without a universal solution. In advanced quantitative MR techniques, such as MR Fingerprinting, motion does not only affect a single image, like in single-contrast MRI, but disrupts the entire temporal evolution of the magnetization and causes parameter quantification errors due to a mismatch between the acquired and simulated signals. In this work, we present a deep learning-empowered retrospective motion correction for rapid 3D whole-brain multiparametric MRI based on Quantitative Transient-state Imaging (QTI). We propose a patch-based 3D multiscale convolutional neural network (CNN) that learns the residual error, i.e. after initial navigator-based correction, between motion-affected quantitative T1, T2 and proton density maps and their motion-free counterparts. For efficient model training despite limited data availability, we propose a physics-informed simulation to apply continuous motion-patterns to motion-free data. We evaluate the performance of the residual CNN on 1.5T and 3T MRI data of ten healthy volunteers. We analyze the generalizability of the model when applied to real clinical cases, including pediatric and adult patients with large brain lesions. Our study demonstrates that image quality can be significantly improved after correcting for subject motion. This has important implications in clinical setups where large amounts of motion affected data must be discarded.
\end{abstract}

\begin{keywords}
3D multiparametric MRI, motion correction, deep learning, residual learning, multiscale CNN
\end{keywords}

\section{Introduction}
Motion robustness is a key feature for routine imaging in general. It is especially crucial for pediatric or elderly patients and for patients affected by diseases that prevent them from maintaining a still position throughout the acquisition. It is therefore a clinical priority to develop techniques that effectively resolve motion artifacts. As their appearance highly depends on the individual acquisition, e.g. the used readout schemes, the targeted clinical question, the condition of the patient and the body region to be imaged, there is no universal solution. Consequently, a number of conceptionally different correction methods have been presented, ranging from prospective to retrospective, image-based methods \cite{zaitsev_motion_2015, godenschweger_motion_2016}. 

Fast 3D multiparametric MRI techniques based on transient-state MRI \cite{ma_fast_2018, gomez_rapid_2020} are excellent candidates for the clinical practice, as they offer high quantification accuracy together with high repeatability and reproducibility \cite{ buonincontri_three_2021}. Their reduced scan times enable improved motion robustness compared to conventional quantitative MRI with lengthy scanning protocols. While motion artifacts are generally reduced in these fast acquisition schemes, they are not entirely immune to motion. In fact, subject movements do not only affect a single time point of the acquisition, but corrupt the entire temporal magnetization evolution, captured by the acquired k-t space, and therewith subsequent parameter estimation. While previous work on motion correction for transient-state imaging has mainly concentrated on 2D acquisition schemes \cite{mehta_image_2018, cruz_rigid_2019, xu_rigid_2019}, there is only little work on motion correction for 3D multiparametric MRI. 

\citet{kurzawski_retrospective_2020} presented a navigator-based retrospective rigid motion correction for a 3D Quantitative Transient-state Imaging (QTI) technique based on a segmented readout scheme to acquire the k-t-space. Their proposed motion correction strategy relied upon self-navigators embedded within each acquisition segment, which enabled the recovery of a critical amount of the underlying parameter information degraded due to subject motion occurring between consecutive segments. Despite significant improvement of the image quality, resulting quantitative T1, T2 and proton density (PD) maps showed remaining artifacts originating from subject movements on a time-scale below the temporal resolution of the self-navigators of \SI{7}{s}. 

Here, we propose a deep learning (DL) method to resolve artifacts due to continuous motion that are not captured by the navigator-based approach. Our work is motivated by recent advances of DL at the interface between MR physics and medical computer vision that have been demonstrated to make MR imaging more robust to subject motion \cite{usman_retrospective_2020, oksuz_brain_2021, gong_deep_2021, pawar_motion_2018, miao_cnn_2016, hou_3-d_2018, hou_computing_2018}, e.g. by directly removing motion-induced artifacts or by estimating the underlying motion parameters for subsequent realignment. We adopt the concept of residual learning \cite{zhang_beyond_2017, jin_deep_2017, liu_motion_2020} and propose a 3D multiscale residual convolutional neural network (CNN) to improve on the previously presented navigator-based motion correction, presetting the following contributions: (1) We propose a 3D multiscale residual CNN to learn the non-linear relationship between the motion-corrupted T1, T2 and PD maps and the residual error maps, i.e. the deviation from the motion-free counterpart that remained after navigator-based correction \cite{kurzawski_retrospective_2020}. (2) We rely on a 3D CNN architecture that captures the intrinsically 3D nature of the subject movements together with the 3D MR acquisition scheme to efficiently resolve motion artifacts and infer high-quality T1, T2 and PD maps. (3) We present a physics-informed simulation framework to retrospectively apply realistic continuous motion patterns to motion-free datasets, enabling a supervised training setup without the necessity for large amounts of paired acquisitions or fully sampled data. (4) We evaluate the performance of the proposed method on 1.5T and 3T MRI data of ten healthy volunteers who underwent QTI imaging twice: the first time they kept their head as still as possible, and the second time they voluntarily moved their heads during acquisition. We also apply our method to clinical cases, including pediatric and adult patients with large brain lesions, to demonstrate its generalizability and capability to improve motion-affected datasets in cases with pathological findings.
%\FloatBarrier
\section{Material and methods}
\subsection{Residual learning for retrospective 3D motion correction}
We propose a residual learning technique to resolve artifacts that could not be corrected by the navigator-based method of \citet{kurzawski_retrospective_2020}, which is recapped below to present a more complete picture. We demonstrate our method with its key components, the residual CNN model and the physics-informed motion simulation, on data acquired with 3D QTI.
\paragraph{Navigator-based rigid motion correction}
The navigator-based correction identifies motion-induced misalignment in the acquired image-time series. To do so, the full k-t-space data is subdivided into subsequently acquired segments, from which we reconstruct equal-contrast navigator images. These navigators are then aligned to the first baseline navigator to estimate the spatial mismatch and to subsequently correct the k-t-space data accordingly. The corrected k-t-space data is then fed into the reconstruction pipeline as described in \ref{recon_pipeline} to yield the motion-corrected parametric maps. The massive spatial undersampling of the fast 3D acquisition scheme limits the resolvable motion time-scale to \SI{7}{s} as the lower SNR in temporally higher resolved self-navigators hampers a correct realignment.  
\paragraph{Residual learning CNN architecture and training}{\label{CNN_training}}
We propose a 3D patch-based multiscale residual CNN to learn the deviation of the motion-corrupted parameter maps from the high-quality, motion-free reference. Learning a residual mapping has been shown to be more effective than a direct mapping \cite{zhang_beyond_2017, tamada_motion_2019, ulas_direct_2018} as the residual maps capture a more sparse representations of the artifacts.

The proposed CNN architecture\footnote{Code available on https://github.com/CarolinMA/MRP\_MoCo} (\figureref{fig:CNN}) receives a 3D input patch ($24\times24\times24$ voxels) of the quantitative maps degraded by motion artifacts that remained after navigator-based correction. The quantitative T1, T2 and PD parameters are reflected by three input channels. The model then spans out in a local and a global pathway. This dual pathway structure was shown to efficiently account for spatial image information on multiple scales \cite{kamnitsas_efficient_2017, kim_improving_2017, ulas_direct_2018}: The local path (with 3D convolutions and ReLU activations) processes more localized, spatially adjacent features. The dilated 3D convolutional layers in the global path allow to gather more global, contextual information due to an increased receptive field (\tableref{tab:ablation}). Local and global features are concatenated and fed into a block of fully-connected layers, efficiently processing the decoded spatial relationships. To maintain the spatial dimensions throughout the network, the fully-connected layers are implemented as convolutional layers with $1\times1\times1$ kernels, to eventually output the residual maps, i.e. the difference of the navigator-corrected and the motion-free maps. 
We trained the residual CNN based on in-vivo 3D QTI data from ten healthy volunteers. For each subject, two datasets were acquired with the instruction to hold still for the first scan and to rapidly move the head during the second session as detailed by \citet{kurzawski_retrospective_2020}. All subjects were scanned on a 1.5T and a 3T scanner (HDxt and MR750 scanners, GE Healthcare, Milwaukee, WI) with the sequence parameters described in \ref{recon_pipeline}. For a supervised training setup, we only considered the motion-free data and created a database of artificially motion-corrupted 3D QTI data as described below. The in-vivo data with real motion was only used for testing. The DL-model was then trained to learn the residual maps between the parametric maps with simulated motion artifacts and the motion-free counterpart. The retrospectively corrupted data of seven subjects were used for model training and two subjects' data for validation, with $10,000/3,000$ randomly sampled 3D patches, respectively. The remaining subject data was held back for testing. We trained the residual CNN for a maximum of $100$ epochs with a batch size of $20$, using Adam optimization to minimize the L1 loss function with a learning rate of $1e^{-4}$, keeping the model state with the best validation loss. 
\begin{figure} 
\floatconts
  {fig:CNN}
  {\caption{The multiscale CNN receives the parametric maps after navigator-based correction as input and outputs the residual maps.}}
  {\includegraphics[width=\linewidth]{figures/CNN_rebuttal.pdf}}
    
\end{figure}

\paragraph{Physics-informed simulation of motion-corrupted data}
To allow the proposed DL-model to learn how diverse motion patterns propagate to the inferred multiparametric maps, we simulated motion-corrupted data from the motion-free 3D QTI acquisitions. To do so, we applied continuous rigid motion patterns, i.e. translation and rotation, to the individual time frames of the acquired k-t-space. To imitate continuous head movements, we continuously varied the misalignment of consecutive k-t-space time points. We achieved a realistic artifact appearance as we applied ranges of the artificial translation and rotation patterns as experimentally observed by \citet{kurzawski_retrospective_2020}, i.e. translations $\SI{-20}{\mm}\leq\Delta x,\Delta y,\Delta z \leq \SI{20}{\mm}$ and rotations $\SI{-20}{\degree}\leq\Delta roll,\Delta pitch,\Delta yaw \leq \SI{20}{\degree}$. We then performed a navigator-based correction to mitigate artifacts due to inter-segment movements in the first place. The thereby obtained parametric maps with remaining artifacts due to intra-segment movements, illustrated in \figureref{fig:3T_simulation}, were the input to the CNN. 

\subsection{Data acquisition and processing}
\paragraph{In-vivo data}
All in-vivo data presented in this study were acquired in accordance with the 1964 Helsinki declaration and its later amendments or comparable ethical standards. Approval was granted by the local ethics boards. 
\paragraph{MR acquisition and reconstruction}\label{recon_pipeline}
In-vivo data from ten healthy volunteers, a pediatric and an adult patient were scanned with an inversion-prepared 3D SSFP QTI implementation \cite{gomez_rapid_2020} with variable flip angle ramps, TI=\SI{18}{\ms}, TE=\SI{0.5}{\ms} and TR=\SI{8.5}{\ms}. The acquisition of transient state image series relies on in-plane spirals with spherical rotations to sample the k-t-space (=3D+time, i.e. $225\times225\times225$\SI{}{\mm^3} field of view with $1.125\times1.125\times1.125$\SI{}{\mm^3} isotropic voxel size and 880 time points). By design, the acquisition is built by consecutive segments (n=56) of the same excitation scheme, each with a duration of \SI{7}{s}, and iteratively fills the k-t-space by randomly permuting in-plane and spherical rotation angles of the readouts. The k-t-space data are then reconstructed using zero-filling, followed by projection onto a low rank subspace, gridding onto a Cartesian grid, 3D inverse fast Fourier transform and subsequent coil sensitivity estimation and combination. Quantitative maps of T1, T2 and PD are estimated by matching the reconstructed subspace images to a pre-computed dictionary with granularity and parameter ranges as specified in \citet{kurzawski_retrospective_2020}.

\subsection{Experimental setup}
\paragraph{Cross-validation experiment on healthy volunteer data}
We evaluated the performance of the residual CNN, trained on solely simulated motion, in a ten-fold cross-validation experiment by repeating the training setup, as described in \ref{CNN_training}, ten times. Following this leave-one-out scheme, the data of the held-back volunteer with real motion after initial navigator-based correction was used for model testing in each instance. At test time, we divided the parametric maps into 3D patches of $24\times24\times24$ voxels, shifted along all three dimensions with a step size of 4 voxels, for patch-wise CNN processing. Predicted residual error patches are added to the motion-corrupted input and averaged to eventually yield the full 3D motion-corrected T1, T2 and PD maps. We ran the cross-validation experiment for 1.5T and 3T data individually. The obtained quantitative maps were compared to the co-registered motion-free reference using the voxel-wise concordance correlation coefficient (CCC) and coefficient of determination ($\mathrm{R^2}$) as performance metrics. 

\paragraph{Generalization analysis on clinical quantitative MRI} 
For further performance analysis, we applied the best-performing model in the cross-validation experiment to clinical 3D QTI scans of a pediatric (8-year old) patient with subtotal agenesis of the corpus callosum, scanned at 1.5T, and an adult patient with glioblastoma, scanned at 3T. 

\section{Results and discussion}
\paragraph{Cross-validation experiment on healthy volunteer data}
The proposed 3D residual CNN, trained on purely artificially corrupted data, provided T1, T2 and PD maps with an image quality comparable to the motion-free reference maps. This is observed when visually comparing the quantitative maps of a representative test case of the cross-validation experiment for both the 1.5T (\figureref{fig:1_5T_axial_results}, \figureref{fig:1_5T_results_2}) and 3T data (\figureref{fig:3T_axial_results}, \figureref{fig:3T_results_2}). 

Quantitative evaluation of the cross-validation experiment by means of the CCC and $\mathrm{R^2}$  (\tableref{tab:quant_results}) substantiates the qualitative finding and reflects the improvement achieved by the navigator-based realignment and the subsequent residual CNN. For both 1.5T and 3T data, quantitative measures indicate that the residual CNN further improved the outcome of the navigator-based correction for all parametric maps. As already visually observed, mean CCC and $\mathrm{R^2}$ values reflect the higher impact of the DL-model on T2 and PD than T1 maps. Furthermore, \tableref{tab:quant_results} shows that after CNN-based motion-correction, we achieved better correspondence with the motion-free reference for the 3T data than for the 1.5T scans. However, from \figureref{fig:1_5T_axial_results} and \figureref{fig:1_5T_results_2} we observe that the residual CNN does not only remove motion-induced artifacts, but additionally suppresses noise-like aliasing. This effect is more pronounced for the 1.5T data with intrinsically lower SNR and image quality than for a 3T field-strength with higher SNR. The additional denoising results in parametric maps with image qualities that go beyond the motion-free reference, which in turn explains the lower overall agreement observed with the motion-free reference acquisitions.  

The cross-validation experiment shows that the combination of the residual CNN with the navigator-based correction efficiently resolves head movements on two time-scales: 1) The self-navigator-based estimation and subsequent realignment of motion-induced displacements in the k-t-space has proven to recover a significant amount of the parameter information corrupted by abrupt inter-segment movements. 2) With the 3D residual multiscale CNN, we reliably resolve residual artifacts and phase inconsistencies due to continuous intra-segment movements that are unresolved by the limited temporal resolution of the self-navigators, providing high-quality and artifact-free parameter maps. 

The proposed physics-informed motion simulation allows us to retrospectively apply continuous motion directly to the k-t-space and propagate it through the reconstruction pipeline. We make implicit use of the forward encoding operator from k-space to parameter-space to generate self-contained, paired training data for supervised model training. Thus, we present an efficient training scheme that does not require large amounts of motion and motion-free data pairs to be acquired. Also, in contrast to other physics-guided methods, we do not rely on fully sampled data to be used as reference for supervised network training. This is from particular practical relevance as the acquisition of fully-sampled 3D+time QTI data is infeasible due to prohibitively long scan times \cite{yaman_self-supervised_2020}.  

The 3D patch-based CNN implementation allowed us to fully capture the spatial correlations that inevitably arise from 1) the subject movements in the 3D space, which cause spatially correlated image artifacts, and 2) the 3D design of the MR acquisition with spatial undersampling and multicoil imaging that provoke a mixing of signal components. With the adaption of the residual learning concept, we transferred the non-linear disentangling of the primary parameter information and the secondary image artifacts into the sparse representation of the residual maps. 
%3T+1.5T axial views
\begin{figure} 
\floatconts
  {fig:1_5T_3T_results}
  {\caption{Proposed motion correction for representative volunteer scans at 1.5T (a) and 3T (b) (axial views). T1, T2 and PD maps show pronounced motion-induced artifacts (\textit{Motion}) compared to the \textit{No motion reference}. Remaining artifacts after \textit{Navigator}-based correction are resolved by the residual CNN (\textit{Navigator + residual CNN}), providing high-quality parameter maps.}}
  {\subfigure[1.5T][t]{\label{fig:1_5T_axial_results}%
      \includegraphics[width=0.43\linewidth]{figures/FSM_1_5/009_meas_AXIAL_test_009_epoch_best_6iter.pdf}}
    \hspace{7mm}%\hfill%
    \subfigure[3T][t]{\label{fig:3T_axial_results}%
      \includegraphics[width=0.43\linewidth]{figures/AUOP_3_0/008_meas_AXIAL_test_008_epoch_best_6iter.pdf}}%
  }
    
\end{figure}
\paragraph{Generalization analysis on clinical quantitative MRI} 
For the clinical test cases at 1.5T and 3T, \figureref{fig:clinical_data},  \figureref{fig:pediatric_data_2} and \figureref{fig:adult_data_2} indicate that the residual CNN yields high-quality, artifact-free parametric maps. In both cases, the navigator-based approach did not improve image quality of the parametric maps as much as seen for the volunteer data (\figureref{fig:1_5T_3T_results}, \figureref{fig:1_5T_results_2}, \figureref{fig:3T_results_2}). This is attributed to the fact that there were no pronounced abrupt movements but the patients moved their heads continuously, i.e. on a faster scale of what can be resolved by the self-navigators. The patient datasets also showcase the generalization capabilities of the residual CNN. We observed reliable motion-correction results in the presence of pathological findings in both adult and pediatric patients whose brain anatomy differs from that of healthy adults in the training data.
%clinical data axial views
\begin{figure} 
\floatconts
  {fig:clinical_data}
  {\caption{Proposed motion correction for representative clinical test cases (axial views). (a) Pediatric patient with subtotal agenesis of the corpus callosum and inter-hemispheric cyst, scanned at 1.5T. (b) Adult patient with glioblastoma in the temporo-parietal region with cystic-necrotic and hemorrhagic components, and marked perilesional edema, scanned at 3T. For both patients, the residual CNN improved image quality of all parametric maps (\textit{Navigator + residual CNN}), mitigating image artifacts attributed to head movements during scan sessions.}} %\textit{Motion} T2 and PD maps indicate higher sensitivity to motion-induced phase errors than T1 maps.}}
  {%
    \subfigure[Pediatric patient (1.5T)][t]{\label{fig:pediatric_axial_results}%
      \includegraphics[clip, trim=0cm 0.2cm 0cm 0.7cm, width=0.37\linewidth]{figures/FSM_1_5/15Tax.pdf}}
    \hspace{7mm}
    \subfigure[Adult patient (3T)][t]{\label{fig:adult_axial_results}%
      \includegraphics[clip, trim=0cm 0.2cm 0cm 0.7cm, width=0.37\linewidth]{figures/AUOP_3_0/3Tax.pdf}}%
  }
  
\end{figure}

\paragraph{Limitations and outlook} Although the proposed multiscale CNN has shown convincing efficiency and functionality in this proof of concept, more advanced DL architectures might have the potential to improve on our baseline. We also plan to further investigate on the intrinsic denoising capacities of our method as revealed by the 1.5T experiments. As suggested from the clinical evaluation, patient data seemed to be affected by continuous head movements without any abrupt position changes. It is hence subject to our current and follow-up work to investigate what motion scales can be resolved by the residual CNN when applied as a stand-alone tool. We also plan to explore potential application scenarios of the presented DL-empowered motion correction in other body regions and motion patterns.  
\section{Conclusion}
In this work, we propose a 3D multiscale residual CNN for retrospective motion correction in fast 3D whole-brain multiparametric MRI. We present a physics-informed motion simulation, allowing for efficient model training without the requirement of large amounts of paired data. The 3D CNN architecture captures the intrinsically 3D relationships of the motion-induced corruptions to reliably recover high-quality T1, T2 and PD maps. Taking advantage of the sparsity in the residual maps, we can substantially improve the quality of quantitative maps suffering from subject movement - in case of healthy volunteers but also for pediatric and adult patients with pathological findings. This is particularly important in clinical setups where scans frequently have to be repeated, possibly under sedation, because of motion artifacts. With fast scanning time and higher motion-immunity, quantitative MRI may become a standard for clinical practices. 
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{This project receives financial support from the Italian Ministry of Health and the Tuscany Region under the project Ricerca Finalizzata, grant No. GR-2016-02361693, Deutsche Forschungsgemeinschaft (DFG) through Research Training Group GRK 2274, TUM International Graduate School of Science and Engineering (IGSSE), GSC 81, and the European Union’s Horizon 2020 research and innovation programme, grant agreement No. 952172.}

\FloatBarrier
\bibliography{pirkl21.bib}

\pagebreak

\appendix
\counterwithin{figure}{section}
\counterwithin{table}{section}
\FloatBarrier
\section{Supplementary figures and tables}
\subsection{Physics-informed simulation of motion-corrupted data}
%3T simulation
\begin{figure}[ht] 
\floatconts
    {fig:3T_simulation}
    {\caption{Physics-informed motion simulation illustrated for a representative volunteer dataset acquired at 3T. Continuous rigid, i.e. translation and rotation, motion patterns are applied to the individual time frames of the acquired motion-free k-t-space data (\textit{No motion reference}), imitating continuous head movements (\textit{Artificial motion}). Navigator-based motion correction is then applied to mitigate artifacts due to inter-segment movements in the first place (\textit{Navigator}). The obtained parametric maps with remaining artifacts due to continuous intra-segment movements are the input to the residual CNN.}}
    {%
    \subfigure[Axial views][t]{\includegraphics[width=0.4\textwidth]{figures/AUOP_3_0/Simulated_motion/001_simulation_AXIAL_test_001_epoch_best_6iter.pdf}}%
    \hspace{10mm}
    \subfigure[Coronal views][t]{\includegraphics[width=0.4\textwidth]{figures/AUOP_3_0/Simulated_motion/001_simulation_CORONAL_test_001_epoch_best_6iter.pdf}}
    \hfill
    \subfigure[Sagittal views][t]{\includegraphics[width=0.4\textwidth]{figures/AUOP_3_0/Simulated_motion/001_simulation_SAGITAL_test_001_epoch_best_6iter.pdf}}%
  }
\end{figure}

\clearpage
\FloatBarrier
\subsection{Cross-validation experiment on healthy volunteer data}
% 1.5T Results
\begin{figure}[ht]
\floatconts
    {fig:1_5T_results_2}
    {\caption{Proposed motion correction for a representative volunteer test dataset acquired at 1.5T (coronal and sagittal views). }}
    {%
    \subfigure[Coronal views][t]{\includegraphics[width=0.49\textwidth]{figures/FSM_1_5/009_meas_CORONAL_test_009_epoch_best_6iter.pdf}}
    \hfill
    \subfigure[Sagittal views][t]{\includegraphics[width=0.49\textwidth]{figures/FSM_1_5/009_meas_SAGITAL_test_009_epoch_best_6iter.pdf}}%
  }
\end{figure}

% 3T Results
\begin{figure}[ht]
\floatconts
    {fig:3T_results_2}
    {\caption{Proposed motion correction for a representative volunteer test dataset acquired at 3T (coronal and sagittal views). }}
    {%
    \subfigure[Coronal views][t]{\includegraphics[width=0.49\textwidth]{figures/AUOP_3_0/008_meas_CORONAL_test_008_epoch_best_6iter.pdf}}
    \hfill
    \subfigure[Sagittal views][t]{\includegraphics[width=0.49\textwidth]{figures/AUOP_3_0/008_meas_SAGITAL_test_008_epoch_best_6iter.pdf}}%
  }
\end{figure}

%3T+1.5T quantitative analysis table
\begin{table}
    \centering
    \small
    \floatconts
    {tab:quant_results}
    {\caption{Quantitative evaluation of the cross-validation experiment for motion-corrupted, measured volunteer data summarized by concordance correlation coefficient (CCC) and coefficient of determination ($\mathrm{R^2}$) metrics between the result of the respective correction method, i.e. only navigator-based correction (\textit{Navigator}) and navigator-based correction with subsequent residual CNN-based correction (\textit{Navigator + residual CNN}), and the motion-free parameter maps as reference.}}
    {}
    {%
    \begin{tabular}{l|l|ccc|ccc}
    \textit{\textbf{}}         &                  & \multicolumn{3}{c|}{\textbf{1.5T}} & \multicolumn{3}{c}{\textbf{3T}} \\ \hline
    \textbf{Correction}        & \textbf{Metrics} & T1         & T2        & PD        & T1        & T2        & PD       \\ \hline
    No correction              &                  & 0.48       & 0.38      & 0.48      & 0.68      & 0.55      & 0.44     \\
    Navigator & CCC              & 0.72       & 0.61      & 0.61      & 0.82      & 0.75      & 0.60     \\
    Navigator + residual CNN     &                  & \textbf{0.78}       & \textbf{0.71}      & \textbf{0.71}      & \textbf{0.87}      & \textbf{0.83}      & \textbf{0.83}     \\ \hline
    No correction              &                  & 0.51       & 0.38      & 0.5       & 0.68      & 0.56      & 0.78     \\
    Navigator & $\mathrm{R^2}$   & 0.72       & 0.61      & 0.63      & 0.81      & 0.76      & 0.87     \\
    Navigator + residual CNN     &                  & \textbf{0.79}       & \textbf{0.72}      & \textbf{0.76}      & \textbf{0.87}      & \textbf{0.84}      & \textbf{0.91}    
    \end{tabular}%
    }
\end{table}

\FloatBarrier
\subsection{Generalization analysis on clinical quantitative MRI} 
% pediatric results
\begin{figure}[ht]
\floatconts
    {fig:pediatric_data_2}
    {\caption{Proposed motion correction for the pediatric case acquired at 1.5T (coronal and sagittal views). }}
    {%
    \subfigure[Coronal views][t]{\includegraphics[width=0.37\textwidth]{figures/FSM_1_5/15Tcor.pdf}}
    \hspace{10mm}
    \subfigure[Sagittal views][t]{\includegraphics[width=0.37\textwidth]{figures/FSM_1_5/15Tsag.pdf}}%
  }
\end{figure}

% adult results
\begin{figure}[ht]
\floatconts
    {fig:adult_data_2}
    {\caption{Proposed motion correction for the adult patient's dataset acquired at 3T (coronal and sagittal views). }}
    {%
    \subfigure[Coronal views][t]{\includegraphics[width=0.37\textwidth]{figures/AUOP_3_0/3Tcor.pdf}}
    \hspace{10mm}
    \subfigure[Sagittal views][t]{\includegraphics[width=0.37\textwidth]{figures/AUOP_3_0/3Tsag.pdf}}%
  }
\end{figure}

\FloatBarrier
\subsection{Ablation study} 

\begin{table}[ht] 
    \centering
    \small
    \floatconts
    {tab:ablation}
    {\caption{Cross-validation experiment for quantitative comparison of the proposed \textit{multiscale CNN} with global and local paths and a \textit{singlescale CNN} comprising only two local paths, both applied after initial \textit{Navigator}-based correction. Motion-correction performance is again summarized by concordance correlation coefficient (CCC) and coefficient of determination ($\mathrm{R^2}$) between the motion-corrected and the motion-free parameter maps.}}
    {}
    {%
    \begin{tabular}{l|l|ccc|ccc}
    \multicolumn{1}{l|}{\textbf{}}                                                                 & \textbf{}            & \textbf{}     & \textbf{1.5T} & \textbf{}     & \textbf{}     & \textbf{3T}   & \textbf{}     \\ \hline
    \multicolumn{1}{l|}{\textbf{CNN implementation}}                                               & \textbf{Metrics}     & \textbf{T1}   & \textbf{T2}   & \textbf{PD}   & \textbf{T1}   & \textbf{T2}   & \textbf{PD}   \\ \hline
    \begin{tabular}[l]{@{}l@{}}Navigator + multiscale CNN \\ (global + local path)\end{tabular}    & \multirow{3}{*}{CCC} & \textbf{0.78} & \textbf{0.71} & \textbf{0.71} & \textbf{0.87} & \textbf{0.83} & \textbf{0.83} \\ \cline{1-1} \cline{3-8} 
    \begin{tabular}[l]{@{}l@{}}Navigator + singlescale CNN \\ (2 local paths)\end{tabular}         &                      & 0.75          & 0.65          & 0.63          & 0.85          & 0.78          & 0.77          \\ \hline
    \begin{tabular}[l]{@{}l@{}}Navigator + multiscale CNN \\ (global + local pathway)\end{tabular} & \multirow{3}{*}{$\mathrm{R^2}$}  & \textbf{0.79} & \textbf{0.72} & \textbf{0.76} & \textbf{0.87} & \textbf{0.84} & \textbf{0.91} \\ \cline{1-1} \cline{3-8} 
    \begin{tabular}[l]{@{}l@{}}Navigator + singlescale CNN \\ (2 local paths)\end{tabular}         &                      & 0.75          & 0.66          & 0.68          & 0.86          & 0.8           & 0.87         
    \end{tabular}%
    }
\end{table}

\end{document}