\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution
\input{extras}
\usepackage{mwe} % to get dummy images
\jmlrvolume{-- 64}
\jmlryear{2025}
\jmlrworkshop{Full Paper -- MIDL 2025}
\editors{Accepted for publication at MIDL 2025}

\title[Joint Supervised and Self-supervised Learning for MRI Reconstruction]{Joint Supervised and Self-supervised Learning for MRI Reconstruction}


\midlauthor{\Name{George Yiasemis \nametag{$^{1,2}$}} \orcid{0000-0002-1348-8987} \Email{g.yiasemis@nki.nl} \\
\Name{Nikita Moriakov \nametag{$^{1,2}$}} \Email{n.moriakov@nki.nl}\\
\Name{Clara I. S\'anchez \nametag{$^{2}$}} \Email{c.i.sanchezgutierrez@uva.nl}\\
\Name{Jan-Jakob Sonke \nametag{$^{1,2}$}} \Email{j.sonke@nki.nl}
\\
\Name{Jonas Teuwen \nametag{$^{1,2,3}$}} \Email{j.teuwen@nki.nl}\\
\addr $^{1}$ Netherlands Cancer Institute
\addr $^{2}$ University of Amsterdam
\addr $^{3}$ Radboud University Medical Center
}

\begin{document}

\maketitle

\vspace{-20pt}
\begin{abstract}
    Magnetic Resonance Imaging (MRI) is a crucial imaging modality, but its inherently slow acquisition process limits the ability to obtain fully-sampled $k$-space data in motion-prone anatomical regions. The absence of such fully-sampled datasets, which serve as ground truths, hinders the supervised training of deep learning (DL) models—currently the state-of-the-art approach for MRI reconstruction. While self-supervised learning (SSL) methods attempt to overcome this limitation by training solely on subsampled $k$-space data, their performance remains inferior to supervised learning (SL). We propose Joint Supervised and Self-supervised Learning (JSSL), a novel training approach designed to enhance MRI reconstruction quality in cases where fully-sampled $k$-space data is unavailable for the \emph{target} anatomy. JSSL jointly trains a model in a SSL setting using subsampled data from the target anatomy and in a SL manner using fully-sampled data from a \emph{proxy} dataset, where full sampling is feasible. We evaluate JSSL on two distinct combinations of target and proxy datasets, demonstrating substantial improvements over conventional SSL methods through both quantitative and qualitative results. Additionally, we provide practical ``rule-of-thumb" guidelines for selecting training strategies in MRI reconstruction. Our code is available at \url{https://github.com/NKI-AI/direct}.
    % Magnetic Resonance Imaging (MRI) is a crucial modality, but its inherently slow acquisition process poses challenges in obtaining fully-sampled $k$-space data under motion. The lack of fully-sampled acquisitions, serving as ground truths, complicates the training of deep learning (DL) algorithms in a supervised manner.  To address this limitation, self-supervised learning (SSL) methods have emerged as a viable alternative, leveraging available subsampled $k$-space data to train neural networks for MRI reconstruction. Nevertheless, these approaches often fall short when compared to supervised learning (SL). We propose Joint Supervised and Self-supervised Learning (JSSL), a novel training approach for DL-based MRI reconstruction algorithms aimed at enhancing reconstruction quality in cases where target datasets containing fully-sampled $k$-space measurements are unavailable. JSSL operates by simultaneously training a model in a SSL setting, using subsampled data from the target dataset(s), and in a SL manner, utilizing proxy datasets with fully-sampled $k$-space data. We demonstrate JSSL's efficacy using two distinct combinations of target and proxy data. Quantitative and qualitative results showcase substantial improvements over conventional SSL methods. Furthermore, we provide ``rule-of-thumb" guidelines for training MRI reconstruction models. Our code is available at \url{https://github.com/NKI-AI/direct}.
\end{abstract}

\begin{keywords}
MRI Reconstruction, Inverse Problems, Deep Learning, SSL
% Self-supervised Learning
\end{keywords}

\vspace{-3pt}
%%------------------------------------------------SECTION 1 ------------------------------------------------%%
\section{Introduction}
\label{sec:sec1}

\vspace{-2pt}
Magnetic Resonance Imaging (MRI) is widely used in clinical practice for its ability to non-invasively visualize detailed anatomical and physiological information. However, MRI data acquisition, known as $k$-space sampling, is inherently slow, increasing costs and limiting its feasibility in real-time applications such as MRI-guided radiotherapy. To accelerate scans, $k$-space subsampling techniques are employed, but they lead in lower-quality reconstructed images \cite{zbontar2019fastmri}.

Deep learning (DL)-based MRI reconstruction techniques \cite{fessler2019optimization, Pal2022-ce} have emerged as state-of-the-art solutions, outperforming conventional methods like Parallel Imaging \cite{Pruessmann1999} and Compressed Sensing \cite{1580791}. These models are typically trained using supervised learning (SL), where retrospectively subsampled $k$-space data serve as inputs, and fully-sampled data act as ground truth. 

Nevertheless, acquiring fully-sampled datasets, essential for SL training, is often infeasible or prohibitively expensive in certain anatomical regions, such as abdomen, cardiac cine, chest, or prostate imaging, where motion complicates adherence to the Nyquist-Shannon sampling theorem \cite{1697831}. Consequently, recent approaches have adopted self-supervised learning (SSL) strategies, which train DL-based algorithms using only subsampled $k$-space data, circumventing the need for fully-sampled ground truth.

A key SSL-based approach, Self-Supervised Learning via Data Undersampling (SSDU) \cite{Yaman2020}, proposed training reconstruction models by partitioning available subsampled $k$-space into input and target subsets, predicting one from the other. Inspired by Noise2Self \cite{batson2019noise2self}, SSDU has since become a benchmark for SSL-based MRI reconstruction. Extensions include parallel networks \cite{Hu2021}, Noisier2Noise \cite{millard2023} applying double subsampling, and architectures like Siamese networks with dual-domain loss functions \cite{yan2023dc} and coil sensitivity estimation \cite{hu2024spicerselfsupervisedlearningmri, millard2023}. These methods primarily rely on partitioning subsampled data, making SSDU a representative approach.

Other works combine supervised and self-supervised strategies. Noise2Recon \cite{desai2021noise2recon} leverages both fully-sampled and subsampled data from the \emph{same} domain for reconstruction and denoising, while \cite{zhou2022dsformer} employs paired fully-sampled and subsampled data from \emph{different} modalities, both still requiring fully-sampled data. Test-time training \cite{darestani2022testtimetrainingclosenatural} adapts pre-trained models to data shifts at inference via SSL-driven data-consistency loss, effective for domain shifts but impractical for real-time tasks due to re-training overhead. (A detailed review is provided in Appendix \ref{sec:ap1}.)

In this work, we propose Joint Supervised and Self-supervised Learning (JSSL), a novel method for training DL-based MRI reconstruction models without ground truth, fully-sampled $k$-space data for the target domain for SL. JSSL leverages fully-sampled data from \textit{proxy} dataset(s) and subsampled data from \textit{target} dataset(s) to jointly train models using both supervised and self-supervised paradigms, which reflects a realistic setting where in addition to the subsampled data for our clinical scenario some fully-sampled data from e.g. public reconstruction challenges are available. Our contributions include:
\begin{itemize}[leftmargin=*,topsep=0pt]
    \item JSSL presents the first approach to combine SL and SSL-based training in proxy and target organ domains within a unified pipeline for accelerated MRI reconstruction.
    \item We experimentally demonstrate that JSSL outperforms SSL-based MRI reconstruction approaches, with a specific focus on subsampled prostate and cardiac datasets.
    \item We offer practical ``rule-of-thumb" guidelines for selecting appropriate training frameworks for accelerated MRI reconstruction models.
\end{itemize}


\section{Materials and Methods}
\label{sec:sec2}
\vspace{-5pt}
\subsection{Introduction to MRI Acquisition and Reconstruction}
\label{sec:sec2.1}
% 
A reconstructed image from subsampled multi-coil data \(\Tilde{\vec{y}}_{\mat{M}}\) can be obtained using the root-sum-of-squares (RSS) method: $\text{RSS} \circ \mathcal{F}^{-1}(\Tilde{\vec{y}}_{\mat{M}}) := \left(\sum_{k=1}^{n_c}|\mathcal{F}^{-1}(\Tilde{\vec{y}}_{\mat{M}}^k)|^2\right)^{1/2}$. 
However, to recover a higher-quality image, a reconstruction is formulated as an optimization problem:
% 
\vspace{-3pt}
\small
\begin{equation}
    \vec{x}^{*} =  \arg\min_{\vec{x}^{'}}\frac{1}{2}\left|\left| \mathcal{A}_{\vec{M}, \mat{S}}(\vec{x}^{'}) - \Tilde{\vec{y}}_{\mat{M}}\right|\right|_2^2 + \mathcal{G}(\vec{x}^{'}),
    \vspace{-3pt}
\label{eq:var_problem}
\end{equation}
\normalsize
% 
where \(\mathcal{G}\) represents an arbitrary regularization functional, and \(\mathcal{A}_{\vec{M}, \mat{S}}: \mathbb{C}^{n} \rightarrow \mathbb{C}^{n \times n_c}\) denotes the forward operator, which sequentially applies the coil expansion operator \(\mathcal{E}_\mat{S}\), the Fourier transform \(\mathcal{F}\), and a subsampling mask operator \(\mat{U}_{\mat{M}}\):
\vspace{-3pt}
% 
\small
\begin{equation}
        \mathcal{A}_{\vec{M}, \mat{S}} (\vec{x}) = \mat{U}_{\mat{M}} \circ \mathcal{F} \circ \mathcal{E}_\mat{S}(\vec{x}) = \Big\{\mat{U}_{\mat{M}} \circ \mathcal{F} (\mat{S}^k \vec{x}) \Big \}_{k=1}^{n_c}, \quad     \mat{U}_\mat{M}(\vec{y})_j = \vec{y}_j \cdot \mathbb{1}_\mat{M}(j).
        \vspace{-3pt}
\end{equation}
\normalsize
% 
Equation \eqref{eq:var_problem} is typically solved using unrolled optimization methods \cite{Yiasemis_2022_CVPR}, which leverage both the forward operator \(\mathcal{A}_{\vec{M}, \mat{S}}\) and its adjoint \(\mathcal{A}^{*}_{\vec{M}, \mat{S}}: \mathbb{C}^{n \times n_c} \rightarrow \mathbb{C}^{n}\). The adjoint operator reconstructs an image by applying subsampling via \(\mat{U}_{\mat{M}}\), inverse Fourier transform \(\mathcal{F}^{-1}\), and coil combination through the reduction operator \(\mathcal{R}_\mat{S}\):
% 
\vspace{-3pt}
\small
\begin{equation}
     \mathcal{A}^{*}_{\vec{M}, \mat{S}}(\vec{y}) = \mathcal{R}_\mat{S}\circ \mathcal{F}^{-1} \circ \mat{U}_{\mat{M}} (\vec{y}) =\\
     \sum_{k=1}^{n_c} {\vec{S}^k}^{*} \mathcal{F}^{-1} \left(\mat{U}_{\mat{M}} (\vec{y}^k) \right).
     \vspace{-3pt}
\end{equation}
\normalsize
% 
Deep learning (DL)-based approaches eliminate the need for explicit regularization terms, instead learning the reconstruction directly from data \cite{bioengineering10091012}.


\subsection{MRI Reconstruction with Supervised Learning}
\label{sec:subsec2.2}
In supervised learning settings, fully-sampled $k$-space datasets are assumed to be available. Let $\mathcal{D}^{\text{SL}}= \{\vec{y}^{(i)}\}_{i=1}^{N}$ represent such a dataset, which is retrospectively subsampled during training: $\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)} = \mat{U}_{\mat{M}_{i}} (\vec{y}^{(i)})$,  and let $f_{\boldsymbol{\psi}}$ denote a DL-based reconstruction network with parameters $\boldsymbol{\psi}$.  Note that the architecture of $f_{\boldsymbol{\psi}}$ can be configured to output image reconstructions, $k$-space data, or both, but here we assume that both input and output lie in the image domain. The objective in SL-based MRI reconstruction is to minimize the discrepancy between the fully-sampled and predicted $k$-spaces:
 % 
\footnotesize
\begin{equation}
\begin{gathered}
    \boldsymbol{\psi}^{*} = \arg\min_{\boldsymbol{\psi}} \frac{1}{N}\sum_{i=1}^{N} \mathcal{L}_{K}\left(\vec{y}^{(i)},\hat{\vec{y}}^{(i)}\right), \,\,
    \hat{\vec{y}}^{(i)} = \text{DC}_{\mat{M}_{i}} \left(\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)},  \mathcal{F} \circ \mathcal{E}_{\mat{S}}  \circ f_{\boldsymbol{\psi}}  \left( \Tilde{\Vec{x}}_{\mat{M}_{i}}^{(i)} \right) \right),\,\, \Tilde{\Vec{x}}_{\mat{M}_{i}}^{(i)} = \mathcal{A}^{*}_{\vec{M}_{i}, \mat{S}} \big(\vec{y}^{(i)}\big).
\end{gathered}
\end{equation}
\normalsize
% 
Here, DC$_\mat{M}$ denotes the data consistency operator which ensures that the reconstructed data remain consistent with the available measurements: $\text{DC}_{\mat{M}}(\vec{w}_1, \, \vec{w}_2) = \mat{U}_{\mat{M}}(\vec{w}_1) + \mat{U}_{\mat{M}^{c}}(\vec{w}_2)$.
% 
Loss can also minimize the discrepancy between the fully-sampled and predicted images:
% 
\small
\begin{equation}
\begin{gathered}
    \boldsymbol{\psi}^{*} = \arg\min_{\boldsymbol{\psi}} \frac{1}{N}\sum_{i=1}^{N} \mathcal{L}_{\text{I}} \left( \vec{x}^{(i)}, \hat{\vec{x}}^{(i)} \right), \quad
    \vec{x}^{(i)} =   \text{RSS} \circ \mathcal{F}^{-1} \left( \vec{y}^{(i)}  \right),\quad \hat{\vec{x}}^{(i)} =  \left| f_{\boldsymbol{\psi}} \big( \Tilde{\Vec{x}}_{\mat{M}_{i}}^{(i)} \big) \right |,
    \vspace{-3pt}
\end{gathered}
\end{equation}
\normalsize
% 
where $\mathcal{L}_{K}$ and $\mathcal{L}_{\text{I}}$ denote arbitrary frequency and image domain loss functions.  Although effective, SL methods depend on fully sampled data, which may not always be available.


\subsection{MRI Reconstruction with Self-supervised Learning}
\label{sec:subsec2.3}

When fully-sampled $k$-space data are unavailable, DL models can be trained using SSL. Let $\mathcal{D}^{\text{SSL}} = \big\{\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}\big\}_{i=1}^{N}$ represent a dataset of subsampled acquisitions, where each instance $\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}$ is sampled from a set $\mat{M}_{i}$.  In SSL, training involves partitioning the acquired subsampled measurements \cite{Yaman2020}. For each sample $\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}$, the sampling pattern $\mat{M}_{i}$ is divided into two disjoint subsets, $\mat{\Theta}_i$ and $\mat{\Lambda}_i$, followed by projecting $\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}$ onto both:
\small
\begin{equation}
\begin{gathered}
    \mat{\Theta}_i \cup \mat{\Lambda}_i = \mat{M}_i, \quad \mat{\Theta}_i \cap \mat{\Lambda}_i = \emptyset, \quad
    \Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{(i)} = \mat{U}_{\mat{\Theta}_i}(\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}),\quad \Tilde{\vec{y}}_{\mat{\Lambda}_{i}}^{(i)} = \mat{U}_{\mat{\Lambda}_i}(\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}).
    \label{eq:ssl_partitioning}
\end{gathered}
\end{equation}
\normalsize
\noindent
Subsequently, one partition ($\Tilde{\vec{y}}_{\mat{\Lambda}_{i}}^{(i)}$) is used as input to the reconstruction network, while the other serves as the target ($ \Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{(i)}$) . The objective loss function is formulated in the $k$-space domain minimizing discrepancy between the target k-space partition and the predicted k-space restricted on the target partition ($ \hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i} }^{(i)} $):
\vspace{-5pt}
\small
\begin{equation}
\begin{gathered}
    \boldsymbol{\psi}^{*} = \arg\min_{\boldsymbol{\psi}} \frac{1}{N}\sum_{i=1}^{N} \mathcal{L}_{\textit{K}}(\Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{(i)},  \hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i} }^{(i)}
      ), \\ \hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i} }^{(i)} = 
     \mat{U}_{\mat{\Theta}_i} \left(\text{DC}_{\boldsymbol{\Lambda}_{i}}\Big(\Tilde{\vec{y}}_{\mat{\Lambda}_{i}}^{(i)},   \hat{\Vec{y}}_{\mat{\Lambda}_{i}}^{(i)} \Big) \right), \quad \hat{\Vec{y}}_{\mat{\Lambda}_{i}}^{(i)} = \mathcal{F} \circ \mathcal{E}_\mat{S} \circ  f_{\boldsymbol{\psi}}  ( \Tilde{\Vec{x}}_{\mat{\Lambda}_{i}}^{(i)} ), \quad \Tilde{\Vec{x}}_{\mat{\Lambda}_{i}}^{(i)} = \mathcal{A}^{*}_{\mat{\Lambda}_{i}, \mat{S}} \big(\Tilde{\vec{y}}_{\mat{M}_{i}}^{(i)}\big).
\end{gathered}
\end{equation}
\normalsize
% 
While most SSL-based MRI reconstruction methods rely on loss calculations in the frequency domain \cite{Yaman2020,millard2023,Hu2021,hu2024spicerselfsupervisedlearningmri}, some studies have explored dual-domain losses \cite{ZHOU2022102538,yan2023dc}. The loss can equivalently be computed in the image domain as follows:
% 
\vspace{-5pt}
\footnotesize
\begin{equation}
\begin{gathered}
    \boldsymbol{\psi}^{*} = \arg\min_{\boldsymbol{\psi}} \frac{1}{N}\sum_{i=1}^{N} \mathcal{L}_{\text{I}} \left( \Tilde{\Vec{x}}_{\mat{\Theta}_{i}}^{(i)} , \hat{\vec{x}}^{(i)} \right), \quad
    \Tilde{\Vec{x}}_{\mat{\Theta}_{i}}^{(i)} = \text{RSS} \circ \mathcal{F}^{-1} \left( \Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{(i)}  \right), \quad 
    \hat{\vec{x}}^{(i)} = \left|\mathcal{R}_{\mat{S}} \circ \mathcal{F}^{-1} \left( \hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i} }^{(i)} \right)\right|.
\end{gathered}
\end{equation}
\normalsize
SSL-based MRI reconstruction thus learns by partitioning acquired measurements, using one subset as input and the other as the training target. Although SSL reduces reliance on fully-sampled data, it often underperforms compared to SL.


\subsection{Joint Supervised and Self-supervised Learning}
\label{sec:subsec2.4}

To address the limitations of SSL in scenarios where fully-sampled data are unavailable in the target domain, we propose Joint Supervised and Self-supervised Learning. JSSL integrates SSL using subsampled measurements from target domain(s) with SL using fully-sampled acquisitions from proxy datasets in other organ domains. By leveraging knowledge from proxy datasets, JSSL aims to surpass the performance of conventional SSL methods that rely solely on subsampled target data. \Figure{full_overview} illustrates the end-to-end JSSL pipeline. A theoretical rationale for JSSL is provided in Appendix \ref{sec:ap3}, where we argue that training on a proxy task could reduce 
estimator error by reducing estimator variance while introducing a negligible bias.


%%------------------------------------------------FIGURE ------------------------------------------------%%

\begin{figure}[!hbt]
    \centering
    \includegraphics[width=0.95\textwidth]{diagram.jpg}
    \vspace{-15pt}
    \caption{ JSSL \textbf{(a)} training and \textbf{(b)} inference phases.}
    \label{fig:full_overview}
    \vspace{-13pt}
\end{figure}
%%------------------------------------------------FIGURE ------------------------------------------------%%

\paragraph{JSSL Training Framework}
\label{sec:subsec2.4.1}
To implement JSSL, we construct the overall loss function with two components: one for supervised learning and another for self-supervised learning. For simplicity we assume a single target and a single proxy dataset in our definitions.


\paragraph{Supervised Learning Loss}
\label{sec:subsec2.4.2}
The SL loss is calculated on the proxy dataset, which contains fully-sampled $k$-space data. It is formulated as follows:
% 
\vspace{-6pt}
\small
\begin{equation}
\begin{gathered}
    {\mathcal{L}}_{\boldsymbol{\psi}}^{\text{SL}}  :=  {\mathcal{L}_{\text{I}}}_{\boldsymbol{\psi}}^{\text{SL}} +  {\mathcal{L}_{K}}_{\boldsymbol{\psi}}^{\text{SL}} =    \frac{1}{N_{\text{prx}}} \sum_{i=1}^{N_{\text{prx}}} 
    \left[ \mathcal{L}_{\text{I}} \left( \vec{x}^{\text{prx}, (i)}, \hat{\vec{x}}^{\text{prx}, (i)} \right) \,
    + \, \mathcal{L}_{K}\left(\vec{y}^{\text{prx}, (i)}, \hat{\vec{y}}^{\text{prx}, (i)}\right) \right].
    \vspace{-6pt}
\label{eq:jssl_sl_loss}
\end{gathered}
\end{equation}
% 
\normalsize
Here, $\vec{x}^{\text{prx}, (i)}$ , $\hat{\vec{x}}^{\text{prx}, (i)}$ represent the ground truth and predicted images, respectively, for the $i$-th sample in the proxy dataset, while $\vec{y}^{\text{prx}, (i)}$, $\hat{\vec{y}}^{\text{prx}, (i)}$ represent the fully-sampled and predicted $k$-spaces, respectively, as defined in \Sec{subsec2.2}.

\paragraph{Self-supervised Learning Loss}
\label{sec:subsec2.4.3}
The SSL loss is calculated using the target dataset, consisting of subsampled $k$-space data without ground truth.  Motivated by SSL-based methods \cite{zhou2022dsformer,ZHOU2022102538} which established improved performance when using dual-domain loss, we calculate the SSL loss in both the image and $k$-space domains as follows:
% 
\vspace{-5pt}
\small
\begin{equation}
\begin{gathered}
    {\mathcal{L}}_{\boldsymbol{\psi}}^{\text{SSL}}  :=  {\mathcal{L}_{\text{I}}}_{\boldsymbol{\psi}}^{\text{SSL}} +  {\mathcal{L}_{K}}_{\boldsymbol{\psi}}^{\text{SSL}} =  \frac{1}{N_{\text{tar}}} 
    \sum_{i=1}^{N_{\text{tar}}} \Big[\mathcal{L}_{K}\left(\Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{\text{tar}, (i)}, \hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i}}^{\text{tar}, (i)}\right) + \mathcal{L}_{\text{I}} \left( \Tilde{\Vec{x}}_{\mat{\Theta}_{i}}^{\text{tar}, (i)}, \hat{\vec{x}}^{\text{tar}, (i)} \right) \Big],\\
    \vspace{-4pt}
     \Tilde{\Vec{x}}_{\mat{\Theta}_{i}}^{\text{tar}, (i)} = \text{RSS} \circ \mathcal{F}^{-1} ( \Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{\text{tar}, (i)}  ), \quad
     \hat{\vec{x}}^{\text{tar}, (i)} = \left| f_{\boldsymbol{\psi}} ( \Tilde{\Vec{x}}_{\mat{\Lambda}_{i}}^{\text{tar}, (i)} ) \right |,
     % \vspace{-3pt}
\label{eq:jssl_ssl_loss}
\end{gathered}
\end{equation}
% 
\normalsize
where, $\Tilde{\Vec{x}}_{\mat{\Lambda}_{i}}^{\text{tar}, (i)}$, $\Tilde{\vec{y}}_{\mat{\Theta}_{i}}^{\text{tar}, (i)}$, $\hat{\vec{y}}_{\mat{\Theta}_{i} \mat{\Lambda}_{i}}^{\text{tar}, (i)}$ are as defined in \Sec{subsec2.3}. 

\paragraph{JSSL Loss}
\label{sec:subsec2.4.4}
The JSSL loss is a fundamental component of our approach, defined as the combination of the SL and SSL losses: $\mathcal{L}_{\boldsymbol{\psi}}^{\text{JSSL}} := {\mathcal{L}}_{\boldsymbol{\psi}}^{\text{SL}} + {\mathcal{L}}_{\boldsymbol{\psi}}^{\text{SSL}}$ and the model's parameters are updated during training such that $\boldsymbol{\psi}^{*} = \arg\min_{\boldsymbol{\psi}} \mathcal{L}_{\boldsymbol{\psi}}^{\text{JSSL}}$.

\vspace{-5pt}

\paragraph{JSSL at Inference}
\label{sec:subsec2.4.5} 
During the inference phase, assuming $\Tilde{\vec{y}}^{\text{inf}}_{\mat{M}}$ denotes the subsampled $k$-space data, for subsampled $k$-space data $\Tilde{\vec{y}}^{\text{inf}}_{\mat{M}}$ the trained JSSL reconstruction model $f_{\boldsymbol{\psi}^{*}}$ estimates the underlying image as follows: $\hat{\vec{x}} =  \left | f_{\boldsymbol{\psi}^{*}} \big( \Tilde{\vec{x}}^{\text{inf}}_{\mat{M}} \big)\right |,$ where $ \Tilde{\vec{x}}^{\text{inf}}_{\mat{M}}  =  \mathcal{R}_{\mat{S}} \circ \mathcal{F}^{-1} \left( \Tilde{\vec{y}}^{\text{inf}}_{\mat{M}} \right).$

\vspace{-10pt}
\subsection{Coil Sensitivity Prediction}
\label{sec:subsec2.5}
\vspace{-6pt}
An initial approximation of coil sensitivity maps ($\mat{S}$) is derived from the autocalibration signal (ACS) \cite{https://doi.org/10.1002/mrm.10087}. While SSL-based approaches use this approximation \cite{Yaman2020,desai2021noise2recon,ZHOU2022102538}, or employ expensive algorithms like Espirit \cite{Uecker2013}, our JSSL approach takes this initial estimation and feeds it as input to a Sensitivity Map Estimator (SME) similarly to \cite{millard2023,pmlr-v227-zhang24a,hu2024spicerselfsupervisedlearningmri}. The SME aims to refine the sensitivity maps and it is trained end-to-end with the reconstruction model and is integrated in all training setups.



\vspace{-2pt}

%%------------------------------------------------SECTION 4 ------------------------------------------------%%

\section{Experiments}
\label{sec:sec3}

\subsection{Datasets}
\label{sec:subsec3.1}
We utilized fully-sampled multi-coil $k$-space data from the fastMRI brain, fastMRI knee \cite{zbontar2019fastmri}, fastMRI prostate T2 \cite{tibrewala2023fastmri}, and CMRxRecon 2023 cardiac cine MRI \cite{cmrxrecondataset} datasets. Their characteristics and data-splitting parameters are summarized in \Tab{S1}. To evaluate JSSL, we paired target and proxy datasets in two experimental sets: 
\noindent
\textbf{(A)} \textbf{Target:} Prostate, \textbf{Proxy:} Brain + Knee; \textbf{(B)} \textbf{Target:} Cardiac, \textbf{Proxy:} Brain + Knee + Prostate. During training, target datasets were retrospectively subsampled, with fully-sampled target data reserved only for inference. Proxy datasets were also retrospectively subsampled for training, with their fully-sampled measurements used to compute the SL loss component of $\mathcal{L}_{\boldsymbol{\psi}}^{\text{JSSL}}$. 

\subsection{Subsampling Schemes}
\label{sec:subsec3.2}
For our experiments, we applied random uniform subsampling to brain data and used equispaced subsampling for the knee, prostate, and cardiac data, commonly employed in the literature \cite{zbontar2019fastmri,cmrxrecondataset}. During training, we randomly selected acceleration factors of $R=4, 8, 12$ (only for \textbf{A}), and 16 (only for \textbf{B}). We retained 8\%, 4\%, 3\%, and 2\% of the fully-sampled ACS lines for $R=4, 8, 12,$ and 16, respectively. During inference, all these acceleration factors were tested.

\paragraph{SSL Subsampling Partitioning}
\label{sec:para3.2.1}
During the training of any SSL-based method, including JSSL, we split the subsampled data into two distinct sets, as explained in \Sec{subsec2.3}. Specifically, $\mat{\Theta}_{i}$ was obtained from ${\mat{M}_{i}}$ using a 2D Gaussian sampling approach with a standard deviation of 3.5 pixels, as it has been shown to outperform uniform partitioning \cite{Yaman2020}. Consequently, we set ${\mat{\Lambda}_{i}} = {\mat{M}_{i} \setminus \mat{\Theta}_{i}}$. Furthermore, the ratio $q_i = \frac{|\mat{\Theta}_{i}|}{|\mat{M}_{i}|}$ was chosen randomly between 0.3 and 0.8. Note that each $\mat{\Lambda}_{i}$ included a $w \times w = 4 \times 4$ window in the ACS region to enhance SME module training.



\subsection{Implementation \& Optimization}
\label{sec:subsec3.3}
\textbf{Model Architecture}
% \label{sec:para3.3.1}
We employed vSHARP, a physics-guided deep learning approach unrolled over $T=12$ iterations, previously used for accelerated MRI reconstruction \cite{yiasemis2023deep, yiasemis2023vsharp, cmrxrecon}. Each iteration's network $\left\{\mathcal{H}_{\boldsymbol{\theta}_{t}}\right\}_{t=0}^{T-1}$ was a U-Net \cite{Ronneberger2015} with four scales and 32 filters at the first scale. For the data consistency step we set $T_{\vec{x}}=10$. The SME module used a U-Net with 16 filters at the first scale. JSSL is model-agnostic, and we explore additional architectures in Appendix \ref{sec:ap5.1}.  

\noindent
\textbf{Optimization} We employed the Adam optimizer with $\epsilon=10^{-8}$, $(\beta_1, \beta_2) = (0.99, 0.999)$, and an initial learning rate (lr) of 0.003. A lr scheduler reduced it by 0.8 every 150,000 iterations. Training was conducted on two A6000 RTX GPUs with a batch size of two slices per GPU using the DIRECT toolkit \cite{DIRECTTOOLKIT}. All models were trained to convergence. Loss was computed combining image and frequency domain components motivated by prior work \cite{yiasemis2023vsharp}:
% 
\begin{equation*}
\begin{gathered}
    {\mathcal{L}_{\text{I}}}^{\text{SL}}, {\mathcal{L}_{\text{I}}}^{\text{SSL}} := 2 \left(\mathcal{L}_\text{SSIM}  + \mathcal{L}_{1}\right) + \text{HFEN}_1 + \text{HFEN}_2, \,\,
    {\mathcal{L}_{K}}^{\text{SL}}, {\mathcal{L}_{K}}^{\text{SSL}} := 2 \left( \text{NMSE} + \text{NMAE} \right).
\end{gathered}
\end{equation*}



\subsection{Training Setups Comparison}
\label{sec:subsec3.4}

We conducted the following experiments: \textbf{(1)} SSL in the target domain, \textbf{(2)} SSL in both target and proxy domains (SSL ALL), \textbf{(3)} SL in the target domain, \textbf{(4)} SL in both proxy and target domains (SL ALL), \textbf{(5)} SL in proxy domains only (SL PROXY), and \textbf{(6)} JSSL. Our primary goal was to assess JSSL against SSL approaches in scenarios where fully-sampled target data are unavailable. To verify that JSSL's performance does not simply result from using a larger dataset, we included SSL ALL, which incorporates all available data (target + proxy) under a SSL strategy. SL methods served as a reference, though their results are naturally expected to be superior when fully-sampled target data are accessible. 
% For a deeper analysis of JSSL and its performance across various configurations, we provide additional experiments in Appendix \ref{sec:ap5.2}.



\subsection{Evaluation}
\label{sec:subsec3.5}

The performance of our experiments was evaluated on the target test sets using three metrics: SSIM, PSNR, NMSE \cite{yiasemis2023retrospective}. Model checkpoints were selected based on validation set performance. Statistical tests assessed whether the top method in each category (SL, SSL including JSSL) significantly outperformed others. We first computed performance differences between the best and other methods within each category. The Shapiro-Wilk test checked normality; if satisfied ($p > \alpha=0.05$), a paired t-test was used; otherwise, the Wilcoxon signed-rank test was applied. Results where the best method (\textbf{bold}) was not statistically superior ($p > \alpha = 0.05$) are marked with an asterisk ($*$).




%%------------------------------------------------SECTION 5 ------------------------------------------------%%
\subsection{Results}
\label{sec:subsec3.6}


\vspace{-10pt}
%%------------------------------------------------TABLE ------------------------------------------------%%

\begin{table*}[!htb]
\centering
\caption{Results for fastMRI prostate (target) using brain and knee (proxy) datasets.}
\vspace{-8pt}
\setlength{\tabcolsep}{0.8pt}
{\renewcommand{\arraystretch}{1}
\resizebox{1\textwidth}{!}{%
\begin{tabular}{ccccccccccccc}
\hline
\multirow{2}{*}{\textbf{Setup}} &
  \multicolumn{3}{c}{\textbf{2x}} &
  \multicolumn{3}{c}{\textbf{4x}} &
  \multicolumn{3}{c}{\textbf{8x}} &
  \multicolumn{3}{c}{\textbf{16x}} \\ \cline{2-13} 
 &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE \\ \hline
SL &
  \textbf{0.974${\scriptscriptstyle\pm 0.010}$} &
  \textbf{41.8${\scriptscriptstyle\pm 2.3}$} &
  \textbf{0.002${\scriptscriptstyle\pm 0.001}$} &
  \textbf{0.930${\scriptscriptstyle\pm 0.022}$} &
  \textbf{37.5${\scriptscriptstyle\pm 1.8}$} &
  \textbf{0.005${\scriptscriptstyle\pm 0.002}$} &
  \textbf{0.868${\scriptscriptstyle\pm 0.033}$} &
  \textbf{33.9${\scriptscriptstyle\pm 1.6}$} &
  \textbf{0.011${\scriptscriptstyle\pm 0.003}$} &
  \textbf{0.799${\scriptscriptstyle\pm 0.045}$} &
  \textbf{31.0${\scriptscriptstyle\pm 1.6}$} &
  \textbf{0.021${\scriptscriptstyle\pm 0.005}$} \\
SL ALL &
  0.969${\scriptscriptstyle\pm 0.012}$ &
  41.1${\scriptscriptstyle\pm 2.3}$ &
  0.002${\scriptscriptstyle\pm 0.001}$ &
  0.922${\scriptscriptstyle\pm 0.024}$ &
  36.9${\scriptscriptstyle\pm 1.8}$ &
  0.005${\scriptscriptstyle\pm 0.002}$ &
  0.854${\scriptscriptstyle\pm 0.035}$ &
  33.2${\scriptscriptstyle\pm 1.5}$ &
  0.013${\scriptscriptstyle\pm 0.003}$ &
  0.771${\scriptscriptstyle\pm 0.049}$ &
  30.0${\scriptscriptstyle\pm 1.6}$ &
  0.026${\scriptscriptstyle\pm 0.006}$ \\
SL PROXY &
  0.961${\scriptscriptstyle\pm 0.016}$ &
  39.8${\scriptscriptstyle\pm 2.4}$ &
  0.003${\scriptscriptstyle\pm 0.002}$ &
  0.914${\scriptscriptstyle\pm 0.026}$ &
  36.4${\scriptscriptstyle\pm 1.8}$ &
  0.006${\scriptscriptstyle\pm 0.002}$ &
  0.839${\scriptscriptstyle\pm 0.041}$ &
  32.5${\scriptscriptstyle\pm 1.7}$ &
  0.015${\scriptscriptstyle\pm 0.004}$ &
  0.733${\scriptscriptstyle\pm 0.051}$ &
  28.6${\scriptscriptstyle\pm 1.5}$ &
  0.035${\scriptscriptstyle\pm 0.008}$ \\ \hline
SSL &
  0.956${\scriptscriptstyle\pm 0.015}$ &
  38.8${\scriptscriptstyle\pm 2.6}$ &
  0.004${\scriptscriptstyle\pm 0.002}$ &
  0.891${\scriptscriptstyle\pm 0.030}$ &
  34.7${\scriptscriptstyle\pm 2.0}$ &
  0.009${\scriptscriptstyle\pm 0.003}$ &
  0.801${\scriptscriptstyle\pm 0.038}$ &
  31.1${\scriptscriptstyle\pm 1.5}$ &
  0.020${\scriptscriptstyle\pm 0.005}$ &
  0.707${\scriptscriptstyle\pm 0.050}$ &
  28.0${\scriptscriptstyle\pm 1.6}$ &
  0.041${\scriptscriptstyle\pm 0.008}$ \\
SSL ALL &
  0.953${\scriptscriptstyle\pm 0.016}$ &
  38.6${\scriptscriptstyle\pm 2.5}$ &
  0.004${\scriptscriptstyle\pm 0.002}$ &
  0.892${\scriptscriptstyle\pm 0.031}$ &
  34.8${\scriptscriptstyle\pm 2.0}$ &
  0.009${\scriptscriptstyle\pm 0.004}$ &
  0.801${\scriptscriptstyle\pm 0.041}$ &
  31.1${\scriptscriptstyle\pm 1.6}$ &
  0.020${\scriptscriptstyle\pm 0.006}$ &
  0.699${\scriptscriptstyle\pm 0.052}$ &
  27.8${\scriptscriptstyle\pm 1.6}$ &
  0.043${\scriptscriptstyle\pm 0.010}$ \\
JSSL &
  \textbf{0.965${\scriptscriptstyle\pm 0.015}$} &
  \textbf{39.5${\scriptscriptstyle\pm 2.8}$} &
  \textbf{0.003${\scriptscriptstyle\pm 0.002}$} &
  \textbf{0.918${\scriptscriptstyle\pm 0.026}$} &
  \textbf{36.4${\scriptscriptstyle\pm 1.9}$} &
  \textbf{0.006${\scriptscriptstyle\pm 0.002}$} &
  \textbf{0.842${\scriptscriptstyle\pm 0.038}$} &
  \textbf{32.5${\scriptscriptstyle\pm 1.6}$} &
  \textbf{0.015${\scriptscriptstyle\pm 0.004}$} &
  \textbf{0.752${\scriptscriptstyle\pm 0.053}$} &
  \textbf{29.3${\scriptscriptstyle\pm 1.6}$} &
  \textbf{0.030${\scriptscriptstyle\pm 0.007}$} \\ \hline
\end{tabular}%
}}
\label{tab:metrics}
\end{table*}
%%------------------------------------------------TABLE ------------------------------------------------%%
\vspace{-20pt}
%%------------------------------------------------TABLE ------------------------------------------------%%

\begin{table*}[!htb]
\centering
\caption{
Results for cardiac (target) using brain, knee and prostate (proxy) datasets.
}
\vspace{-8pt}
\resizebox{\textwidth}{!}{%
\setlength{\tabcolsep}{0.8pt}
{\renewcommand{\arraystretch}{1.}
\begin{tabular}{ccccccccccccc}
\hline
\multirow{2}{*}{\textbf{Setup}} &
  \multicolumn{3}{c}{\textbf{2x}} &
  \multicolumn{3}{c}{\textbf{4x}} &
  \multicolumn{3}{c}{\textbf{8x}} &
  \multicolumn{3}{c}{\textbf{12x}} \\ \cline{2-13} 
 &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE &
  SSIM &
  pSNR &
  NMSE \\ \hline
SL &
  \textbf{0.991${\scriptscriptstyle\pm 0.003}$} &
  \textbf{48.1${\scriptscriptstyle\pm 2.5}$} &
  \textbf{0.004${\scriptscriptstyle\pm 0.003}$} &
  \textbf{0.984${\scriptscriptstyle\pm 0.005}$} &
  \textbf{45.7${\scriptscriptstyle\pm 2.0}$} &
  \textbf{0.006${\scriptscriptstyle\pm 0.002}$} &
  \textbf{0.965${\scriptscriptstyle\pm 0.011}$} &
  \textbf{40.6${\scriptscriptstyle\pm 2.2}$} &
  \textbf{0.018${\scriptscriptstyle\pm 0.007}$} &
  \textbf{0.946${\scriptscriptstyle\pm 0.018}$} &
  \textbf{37.8${\scriptscriptstyle\pm 2.3}$} &
  \textbf{0.035${\scriptscriptstyle\pm 0.015}$} \\
SL ALL &
  0.987${\scriptscriptstyle\pm 0.004}$ &
  46.5${\scriptscriptstyle\pm 2.6}$ &
  0.005${\scriptscriptstyle\pm 0.004}$ &
  0.979${\scriptscriptstyle\pm 0.006}$ &
  44.5${\scriptscriptstyle\pm 1.9}$ &
  0.007${\scriptscriptstyle\pm 0.003}$ &
  0.956${\scriptscriptstyle\pm 0.012}$ &
  39.4${\scriptscriptstyle\pm 1.9}$ &
  0.024${\scriptscriptstyle\pm 0.008}$ &
  0.932${\scriptscriptstyle\pm 0.019}$ &
  36.5${\scriptscriptstyle\pm 2.0}$ &
  0.047${\scriptscriptstyle\pm 0.016}$ \\
SL PROXY &
  0.875${\scriptscriptstyle\pm 0.037}$ &
  39.8${\scriptscriptstyle\pm 2.0}$ &
  0.022${\scriptscriptstyle\pm 0.009}$ &
  0.880${\scriptscriptstyle\pm 0.035}$ &
  37.6${\scriptscriptstyle\pm 2.0}$ &
  0.036${\scriptscriptstyle\pm 0.012}$ &
  0.848${\scriptscriptstyle\pm 0.034}$ &
  33.1${\scriptscriptstyle\pm 1.7}$ &
  0.099${\scriptscriptstyle\pm 0.027}$ &
  0.810${\scriptscriptstyle\pm 0.041}$ &
  30.0${\scriptscriptstyle\pm 2.2}$ &
  0.211${\scriptscriptstyle\pm 0.079}$ \\ \hline
SSL &
  0.944${\scriptscriptstyle\pm 0.017}$ &
  41.2${\scriptscriptstyle\pm 2.1}$ &
  0.016${\scriptscriptstyle\pm 0.007}$ &
  0.902${\scriptscriptstyle\pm 0.020}$ &
  36.2${\scriptscriptstyle\pm 2.0}$ &
  0.049${\scriptscriptstyle\pm 0.014}$ &
  0.854${\scriptscriptstyle\pm 0.025}$ &
  33.2${\scriptscriptstyle\pm 1.7}$ &
  0.097${\scriptscriptstyle\pm 0.020}$ &
  0.817${\scriptscriptstyle\pm 0.032}$ &
  \textbf{31.2${\scriptscriptstyle\pm 1.9}$} &
  \textbf{0.153${\scriptscriptstyle\pm 0.038}$} \\
SSL ALL &
  0.974${\scriptscriptstyle\pm 0.006}$ &
  44.0${\scriptscriptstyle\pm 1.9}$ &
  0.009${\scriptscriptstyle\pm 0.005}$ &
  0.929${\scriptscriptstyle\pm 0.016}$ &
  37.9${\scriptscriptstyle\pm 1.9}$ &
  0.033${\scriptscriptstyle\pm 0.011}$ &
  0.862${\scriptscriptstyle\pm 0.026}$ &
  33.0${\scriptscriptstyle\pm 1.7}$ &
  0.102${\scriptscriptstyle\pm 0.026}$ &
  0.814${\scriptscriptstyle\pm 0.034}$ &
  30.3${\scriptscriptstyle\pm 2.0}$ &
  0.191${\scriptscriptstyle\pm 0.059}$ \\
JSSL &
  \textbf{0.975${\scriptscriptstyle\pm 0.007}$} &
  \textbf{45.5${\scriptscriptstyle\pm 2.0}$} &
  \textbf{0.006${\scriptscriptstyle\pm 0.004}$} &
  \textbf{0.944${\scriptscriptstyle\pm 0.013}$} &
  \textbf{39.2${\scriptscriptstyle\pm 2.0}$} &
  \textbf{0.025${\scriptscriptstyle\pm 0.010}$} &
  \textbf{0.893${\scriptscriptstyle\pm 0.022}$} &
  \textbf{34.3${\scriptscriptstyle\pm 1.8}$} &
  \textbf{0.077${\scriptscriptstyle\pm 0.023}$} &
  \textbf{0.848${\scriptscriptstyle\pm 0.032}$} &
  31.1${\scriptscriptstyle\pm 2.1}$$^{*}$ &
  0.161${\scriptscriptstyle\pm 0.059}$$^{*}$ \\ \hline
\end{tabular}%
}
}
\label{tab:metrics-cine}
\vspace{-13pt}
\end{table*}
%%------------------------------------------------TABLE ------------------------------------------------%%


%%------------------------------------------------FIGURE ------------------------------------------------%%
\begin{figure}[!hbt]
    \centering
    \includegraphics[width=1\textwidth]{example_figure_comparative_full_with_sl_proxy_with_zoomed.png}
    \vspace{-23pt}
    \caption{Example reconstructions from experiment set \textbf{A} (prostate MRI) across different training setups. The zoomed-in region highlights a clinically significant PIRADS 4 lesion (indicated by an arrow and bounding box). JSSL preserves lesion visibility even at high accelerations, whereas SSL reconstructions exhibit oversmoothing and blurring artifacts.}
    \vspace{-17pt}
    \label{fig:example_figure}
\end{figure}
%%------------------------------------------------FIGURE ------------------------------------------------%%


% \paragraph{Training Setups Comparison}
% \label{sec:para3.6.1}
The quantitative results of our comparative studies are summarized in Tables \ref{tab:metrics} and \ref{tab:metrics-cine}, which detail metric averages and statistical significance. As expected, supervised methods consistently produced the best reconstruction results across both experimental setups.

From \Tab{metrics}, it is evident that in experiment set \textbf{A} (prostate as target), JSSL demonstrated superior reconstruction performance across all acceleration factors and metrics compared to both SSL and SSL utilizing all data (SSL ALL). Notably, JSSL approached the performance of supervised methods (SL, SL ALL), particularly at $R=2,4,8$. The use of proxy datasets in SSL settings (SSL ALL) did not enhance performance over SSL alone. Similarly, supervised training on all data (SL ALL) offered no significant advantage over SL alone. In SL PROXY, where training relied solely on proxy datasets, out-of-distribution inference on the prostate dataset resulted in better reconstruction quality tha SSL. However, JSSL outperformed SL PROXY in SSIM across all acceleration factors and matched or exceeded pSNR and NMSE, except at $R=2$, where SL PROXY showed a slight edge.

In experiment set \textbf{B} (cardiac as target), similar patterns were observed. JSSL consistently outperformed other SSL methods, except at $R=12$, where SSL achieved slightly better (but non-significant) pSNR and NMSE, as shown in \Tab{metrics-cine}. Unlike in \textbf{A}, SSL ALL showed performance improvements over SSL for cardiac data. SL PROXY, however, performed worse than all other methods.

For qualitative analysis, Figures \ref{fig:example_figure}, \ref{fig:example_figure_cardiac}, \ref{fig:S4}, and \ref{fig:S5} display sample reconstructions. At lower accelerations ($R=2,4$), all methods accurately reconstructed prostate data. At higher accelerations, supervised, SL PROXY, and JSSL setups exhibited fewer artifacts compared to SSL and SSL ALL. A similar trend was observed for cardiac data, where SSL-based reconstructions were visually weaker, particularly at high accelerations ($R=8,12$), yielding highly aliased images. Consistent with the quantitative results, out-of-distribution inference (SL PROXY) reconstructions exhibited noticeable artifacts.


%%------------------------------------------------FIGURE ------------------------------------------------%%
\begin{figure}[!hbt]
    \centering
    \includegraphics[width=1\textwidth]{example_figure_cardiac_with_sl_proxy_with_zoomed.png}
    \vspace{-23pt}
    \caption{Example reconstructions from experiment set \textbf{B} (cardiac MRI) across different training setups. The zoomed-in region focuses on the heart, showing that JSSL maintains sharper anatomical boundaries and clearer phase transitions, while SSL reconstructions suffer from blurring and structural loss.}
    \vspace{-17pt}
    \label{fig:example_figure_cardiac}
\end{figure}
%%------------------------------------------------FIGURE ------------------------------------------------%%

\section{Discussion and Conclusion}
\label{sec:sec4}
This study introduces Joint Supervised and Self-supervised Learning, a novel training framework aimed at improving MRI reconstruction quality when fully-sampled $k$-space data are unavailable for the target domain. By integrating SL on fully-sampled proxy datasets with SSL on subsampled target datasets, JSSL offers a practical alternative to  SSL methods, achieving superior reconstruction quality when acquiring fully-sampled data is infeasible.

Our results demonstrate that JSSL consistently yields higher reconstruction quality across various accelerations, even when proxy datasets differ anatomically from the target dataset. Beyond quantitative improvements, JSSL also enhances the clinical interpretability of reconstructed images. As observed in Figures \ref{fig:example_figure} and \ref{fig:example_figure_cardiac}, JSSL better preserves key anatomical structures across different accelerations compared to SSL. In the prostate samples, lesion visibility is maintained even at high acceleration rates, crucial for detecting clinically significant cancer. Similarly, in cardiac MRI, JSSL reconstructions exhibit clearer heart boundaries and cardiac phase transitions, reducing artifacts that could impair clinical assessment. 
% 
% Further experiments in Appendix \ref{sec:ap5.2} confirm that JSSL maintains superior performance under different configurations as well. 
Additionally, JSSL achieves consistent improvements across different model architectures, showcasing its robustness and independence from specific architectural choices.

%While JSSL shows clear advantages over conventional SSL methods, it also has certain limitations. 
The effectiveness of JSSL is influenced by the choice and similarity of proxy datasets. For instance, in our experiments SL PROXY struggled when proxies were highly dissimilar from the target domain. Moreover, incorporating proxy datasets may introduce biases that could impact model performance. Additionally, the partitioning strategy for self-supervised learning, the choice of loss functions, and their weighting in JSSL training may further affect results. Lastly, our comparisons are limited to SSDU as a representative SSL method, given that most self-supervised approaches are derivatives of SSDU. Extended discussion of limitations in  Appendix \ref{sec:ap7}. Ultimately, JSSL aims to enhance SSL performance and \emph{not} to compete with SL in cases where fully-sampled ground truth data for the target domain are available, as SL remains the optimal choice under such conditions. Based on our findings, we propose the following ``rule-of-thumb" training guidelines:
\begin{enumerate}[label=(\arabic*),leftmargin=*]
    \item Use SL when fully-sampled ground truth data are available for the target dataset.
    \item When only subsampled target data are present, and ground truth data are accessible from proxy datasets (e.g., fastMRI or CMRxRecon), adopt the JSSL approach.
    \item If only proxy ground truth data exist, supervised training in proxy domains can be effective, particularly when proxies are anatomically similar to the target domain. 
    % Avoid this approach if proxies are dissimilar.
    \item In scenarios with only subsampled target data proceed with SSL.
\end{enumerate}

% Ultimately, JSSL does not aim to replace SL in cases where fully-sampled ground truth data for the target domain are available, as SL remains the optimal choice under such conditions.  However, the goal of this work is not to compete with SL but to show that JSSL improves SSL performance.

%%------------------------------------------------SECTION 5 ------------------------------------------------%%


\section*{Acknowledgments}
This work was supported by institutional grants of the Dutch Cancer Society and of the Dutch Ministry of Health, Welfare and Sport. The authors would like to acknowledge the Research High Performance Computing (RHPC) facility of the Netherlands Cancer Institute (NKI).

\bibliography{bibliography}


\clearpage

% \Large
% \noindent
% \textbf{Joint Supervised and Self-Supervised Learning for MRI Reconstruction - }

% \normalsize
\appendix

\setcounter{figure}{0} 
\setcounter{table}{0} 
\renewcommand{\thefigure}{S\arabic{figure}}
\renewcommand{\thetable}{S\arabic{table}}


\input{appendix1}
\input{appendix3}
\input{appendix4}
\clearpage
\input{appendix5}
\input{appendix6}
\break
\clearpage
\input{appendix7}
\end{document}
