\documentclass{midl}
\usepackage{mwe}
\usepackage{float}

% \newcommand{\rmnum}[1]{\romannumeral #1}
% \newcommand{\Rmnum}[1]{\expandafter\@slowromancap\romannumeral #1@}

\jmlryear{2020}
\jmlrworkshop{Full Paper -- MIDL 2020}

\title[(CLP-NET)]{Laplacian pyramid-based complex neural network learning for fast MR imaging}

\midlauthor{\Name{Haoyun Liang\midljointauthortext{Contributed equally}\nametag{$^{1}$}}\\
\Name{Yu Gong\midlotherjointauthor\nametag{$^{1}$}}\\
\Name{Hoel Kervadec\nametag{$^{2}$}}\\
\Name{Cheng Li\nametag{$^{1}$}}\\
\Name{Jing Yuan\nametag{$^{3}$}}\\
\Name{Xin Liu\nametag{$^{1}$}}\\
\Name{Hairong Zheng\nametag{$^{1}$}}\\
\Name{Shanshan Wang\midljointauthortext{Corresponding author}\nametag{$^{1}$}} \Email{ss.wang@siat.ac.cn; sophiasswang@hotmail.com}\\
\addr $^{1}$ Paul C. Lauterbur Research Center for Biomedical Imaging, Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China \\
\addr $^{2}$ ÉTS Montréal, QC, Canada \\
\addr $^{3}$ School of Mathematics and Statistics, Xidian University, Xi'an, Shanxi, China 
}

\begin{document}

\maketitle

\begin{abstract}
A Laplacian pyramid-based complex neural network, CLP-Net, is proposed to reconstruct high-quality magnetic resonance images from undersampled k-space data. Specifically, three major contributions have been made: 1) A new framework has been proposed to explore the encouraging multi-scale properties of Laplacian pyramid decomposition; 2) A cascaded multi-scale network architecture with complex convolutions has been designed under the proposed framework; 3) Experimental validations on an open source dataset fastMRI demonstrate the encouraging properties of the proposed method in preserving  image edges and fine textures.  
\end{abstract}

\begin{keywords}
Deep learning, complex convolution, Laplacian pyramid decomposition
\end{keywords}

\section{INTRODUCTION}

Fast magnetic resonance imaging (MRI) is vital for clinical applications. Accordingly, different MR acceleration approaches have been proposed, such as designing physics-based fast imaging sequences \cite{oppelt1986fisp}, improving hardware-based parallel imaging techniques \cite{lustig2010spirit}, and developing signal processing-based image reconstruction from undersampling k-space strategies \cite{lustig2008compressed,Wang2017Learning}. Among them, k-space undersampling is one most popular method that can achieve near to perfect high-resolution images under very high acceleration factors with elaborately designed reconstruction algorithms. 

Recently, deep learning has found wide employment in the field of medical imaging. It has been involved in different aspects of medical image analysis, including image reconstruction \cite{wang2019dimension, chen2019model, wang2020deepcomplexmri}, segmentation \cite{yang2019clci, qi2019x}, and classification \cite{zhou2018radiomics}. Deep learning was first introduced to MR image reconstruction by Wang et al., where a Convolutional Neural Network (CNN) was used to learn the nonlinear mapping between  images reconstructed from the undersamled and fully sampled k-space \cite{wang2016accelerating}. Subsequently, many deep learning-based reconstruction algorithms have been developed. Schlemper et al. proposed a deep cascaded CNN (DCCN) for dynamic MR image reconstruction \cite{schlemper2017deep}. Generative Adversarial Network (GAN)-based reconstruction methods were utilized to alleviate the blurry issue identified in reconstructed MR images \cite{shende2019brief,quan2018compressed} . Hammernik et al. developed a Variational Network (VN)-based method to improve the image reconstruction quality \cite{hammernik2018learning}. Sun et al. designed an ADMM-Net based on the classic Alternating Direction Method of Multipliers (ADMM) algorithms to obtain accurate reconstruction results from undersampled k-space data \cite{sun2016deep}. A Model-based Deep Learned priors framework (MoDL) was proposed to combine the power of data-driven deep learning with model-based reconstruction schemes \cite{aggarwal2018modl}.

Despite all the successes achieved, reconstruction results of existing methods are still plagued by the notorious blurry issue of tissue and the missing details, especially when high acceleration factors are utilized. To this end, we propose a novel framework with Laplacian pyramid-based complex neural network (CLP-Net) to further utilize the prior information of the available big datasets. This framework explores the multi-scale properties of Laplacian pyramid decomposition for MR reconstruction from undersampled k-space data. A cascaded multi-scale network architecture with complex convolutions is embedded in the proposed framework, as well as a data consistency layer. Experimental results on the fastMRI dataset show that our method obtains better reconstruction results than the state-of-the-art. 

\section{METHODS}
\subsection{Problem Formulation}
In MR reconstruction, $y \in \mathcal{Y}$ is the noisy measurement and $x \in \mathcal{X}$ denotes the image to be recovered. That is:
\begin{equation}
\label{eq:Eq.1}
y = \mathcal{T}x + n
\end{equation}
where $\mathcal{T} = PF$ is an undersampled Fourier encoding matrix, and $n$ is the measurement noise modelled as additive white Gaussian noise. The corresponding fully sampled k-space data can be represented as $Fx$ and $F$ indicates the Fourier encoding matrix. $P$ denotes the undersampling mask selecting lines in k-space to be sampled. The Fourier encoding matrix is normalized as $F^{H} F=I$. $H$ is $Hermitian$ transpose operation. 

The task of this paper is to reconstruct high-quality MR images from undersampled k-space data, whose objective function can be written as:
\begin{equation}
\label{eq:Eq.2}
\underset{\Theta}{\operatorname{argmin}} \left\|x-f_{\mathrm{CNN}}\left(x_{u} ; {\theta}\right)\right\|^{2}
\end{equation}
where $f_{\mathrm{cnn}}$ denotes the proposed reconstruction network which takes the zero-filled image $x_{u}$ as inputs and outputs the corrsponding high-quality images.

\subsection{Complex Laplacian Pyramid Decomposition}
Laplacian pyramid is an image encoding method \cite{burt1983laplacian}, which treats operators in different scales as its basic functions. It has a low level of computational complexity and is easy to implement. In this paper, we modify the conventional Laplacian pyramid method to make it suitable for complex-valued images. Specifically, $g_{0}$ is the image to be decomposed with a shape of $h \times w \times 2$ ($2$ means concatenating the real part and the imaginary part). $g_{1}$ is obtained with $g_{0}$ passing through a Gaussian low-pass filter. Subtracting $g_{1}$ from $g_{0}$, we can get $L_{0}$, which indicates the difference between $g_{0}$ and $g_{1}$:
\begin{equation}
\label{eq:Eq.3}
L_{0} = g_{0} - g_{1}
\end{equation}
Afterward, $g_{1}$ is downsampled. By repeating the above steps with the downsampled images, a complex Laplacian pyramid $\{L_{0}, L_{1}, L_{2}, \ldots L_{n}\}$ can be finally obtained. $g_{0}$ can be reconstructed from $g_{1}$ and $L_{0}$, which means we only need to handle the reconstruction of $g_{1}$ instead of $g_{0}$. As a result, the computation can be reduced as $g_{1}$ has a smaller image matrix than $g_{0}$.

\subsection{Complex Convolution}
MR k-space data are complex-valued data. Accordingly, the reconstructed images are also complex-valued. Although the amplitude image is frequently utilized as a simplification of the complex-value MR images, the phase image can also provide valuable information. Therefore, building a complex convolution operation-based neural network is a more effective way for MR image reconstruction.

Complex convolutions were proposed in \cite{wang2020deepcomplexmri}. The complex-valued image $x$ can be denoted by $x_\text {real}$ and $x_\text {imag}$ and $x=x_\text {real}+i x_\text {imag}$. We can use a complex convolution filter $\Omega=\Omega_{\text {real}}+i \Omega_{\text {imag}}$ to convolve it. The components of $\Omega$, $\Omega_{\text {real}}$ and $\Omega_{\text {imag}}$, are real-valued. The convolution operation is shown in \equationref{eq:Eq.4}. Since convolution operation is distributive. $\Omega$ can be written in the rectangular form as $\Omega=|\Omega| e^{i \theta}=\Omega_{\text {real}}+i \Omega_{\text {inag}}=|\Omega| \cos \theta+i|\Omega| \sin \theta$, where $\theta$ and $|\Omega|$ are the phase and magnitude of $\Omega$, respectively. In order to reduce the risk of gradient vanishing, we use Rayleigh distribution to initialize the magnitude of $\Omega$ and use the uniform distribution between $-\pi$ and $\pi$ to initialize the phase of $\Omega$.
\begin{equation}
\label{eq:Eq.4}
\Omega * x =\left(\Omega_{\text {real}} * x_{\text {real}}-\Omega_{\text {imag}} * x_{\text {imag}}\right)+i\left(\Omega_{\text {real}} * x_{\text {imag}}+\Omega_{\text {imag}} * x_{\text {real}}\right)
\end{equation}

\subsection{Network Structure}
Directly train a deep learning network with image pairs obtained from undersampled and fully-sampled k-space data is computational intensive. To solve this problem, we improve the conventional Laplacian pyramid to make it suitable for processing complex-valued images and built CLP-Net based on it. The architecture of Laplacian pyramid-based block in CLP-Net is shown in \figureref{fig:Fig.1}.
\begin{figure}[hbtp]
\floatconts
  {fig:Fig.1}
  {\caption{Laplacian pyramid-based block in CLP-Net.}}
  {\includegraphics[width=1\linewidth]{figure1.pdf}}
\end{figure}

 Predictions (zero-filled image for the first layer) from the previous layers are fed into two network branches, one of which performs the complex Laplacian pyramid decomposition, and the other implements shuffle downsampling. Complex Laplacian pyramid decomposition produces two Laplacian error maps in different scales and one Gaussian map. The shuffle downsampling module is illustrated in \figureref{fig:Fig.2}. After downsampling, the resultant feature maps will be sent to the Conv Block-1 module shown in \figureref{fig:Fig.2} to extract shallow features. Conv Block-1 module consists of sixteen residual blocks, each consisting of two complex convolution layers and one ReLU activation layer. The extracted shallow features are then sent to the next three branches. Each branch has a Conv Block-2 module and a shuffle upsampling module except for the first branch, as illustrated in \figureref{fig:Fig.2}. Conv Block-2 module consists of three complex convolution layers and three ReLU activation layers. Then we reshape the last dimension of the resultant deep features from Conv Block -2 module to $k \times k$ and we call the reshaped features "Kernel". Besides, k is manually set. Then, we perform the following operations
\begin{equation}
\label{Eq.5}
F(m, n) = I_{neighbor}(m, n) \otimes K(m, n)
\end{equation}
where $K(m, n)$ means the pixel value of the "Kernel" at position $(m, n)$. And $I_{neighbor}(m, n)$ is the neighborhood of the pixel of the Laplacian error maps or the Gaussian maps at position $(m, n)$. In this paper, we investigate the effect of different k values on the reconstruction results and choose the optimal k. $F(m, n)$ is the pixel of the acquired feature maps $F$ at position $(m, n)$, and $\otimes$ means inner product operation.

\begin{figure}[hbtp]
\floatconts
    {fig:Fig.2}
    {\caption{Illustration of the shuffle downsampling modules.}}
    {\includegraphics[width=1\linewidth]{figure2.pdf}}
\end{figure} 

After the above operations, we can obtain the resultant image by performing Laplacian reconstruction and upsampling on the acquired feature maps $F$. The resultant image will be passed trough a data consistency (dc) layer which is first proposed by \cite{schlemper2017deep}. And the above whole process will be repeated several times, resulting into a cascaded structure. 

\section{EXPERIMENTS}
\subsection{Experimental Data and configuration}
In this paper, we evaluated our method on the public dataset fastMRI \cite{zbontar2018fastmri}. The training, validation and testing subsets of the single-coil k-space data include 973, 199 and 118 scans respectively. Each scan is  collected with one of of the three clinical 3T systems (Siemens Magnetom Skyra, Prisma and Biograph mMR) or one clinical 1.5T system (Siemens Magnetom Aera). Conventional Cartesian 2D Turbo Spin-Echo (TSE) protocol was used to acquire the dataset. Sequence parameters are, as per standard clinical protocol, matched as closely as possible between the systems, with a repetition time (TR) ranging between 2200 and 3000 milliseconds, and echo time (TE) between 27 and 34 milliseconds. The size of images scanned by different systems keep the same with a matrix size of $320 \times 320$, in-plane resolution of $0.5\text{mm}\times0.5\text{mm}$, slice thickness of 3mm, and there is no gap between adjacent slices. We used 1D random sampling pattern with the acceleration factor of 4 and 8 auto-calibration (ACS) lines for all the experiments. 

\subsection{Implementation Details}
The model was implemented with Pytorch 1.4 and trained with a NVIDIA GeForce RTX 2080Ti GPU. We used Adam algorithm \cite{kingma2014adam} to optimize the proposed network. The hyperparameters of Adam algorithm were set as $\beta_1=0.9$ and $\beta_2=0.999$. The loss function of proposed network is L1 loss. The number of epochs is 200. The initial learning rate was set to $\alpha=1.0\times 10^{-4}$ and the exponential decay rates of the learning rate for per-epoch is 0.95.

\subsection{Quantitative Evaluation Index}
Peak Signal to Noise Ratio (PSNR) and Structural Similarity Index Matrix (SSIM) are chosen to evaluate the quality of the reconstructed images. Theoretically, images with higher PSNR and higher SSIM have better quality. 

\subsection{Ablation Study}
\subsubsection{Ablation Study for the Kernel Size}

\begin{table}[htbp]
\floatconts
    {tab:Table.1}
    {\caption{The average value of PSNR and SSIM of the reconstructed images and floating-point operations per second (GFLOPs) per epoch of CLP-net with different kernel sizes.}}
    {\begin{tabular}{llll}
    & \bfseries SSIM & \bfseries PSNR & \bfseries GFLOPs\\
    k=5 & 0.633 & 28.19 & 196\\
    k=9 & 0.634 & 28.21 & 283\\
    k=11 & 0.635 & 28.26 & 345\\
    k=13 & 0.637 & 28.29 & 419\\
    \end{tabular}}
\end{table}

\begin{table}[htbp]
\floatconts
    {tab:Table.2}
    {\caption{The average value of PSNR and SSIM of the reconstructed images and floating-point operations per second (GFLOPs) per epoch of CLP-net with different cascaded structure.}}
    {\begin{tabular}{llll}
    \bfseries cascade & \bfseries SSIM & \bfseries PSNR & \bfseries GFLOPs\\
    3 & 0.624 & 28.02 & 117\\
    4 & 0.628 & 28.13 & 156\\
    5 & 0.633 & 28.19 & 196\\
    6 & 0.635 & 28.22 & 235\\
    \end{tabular}}
\end{table}

We utilized different-sized convolution kernels to test the influence on reconstruction results. As shown in \tableref{tab:Table.1}, although the network performance improves with the increase of kernel size, the improvement is marginal. However, an increase in the size of the convolution kernel leads to a sharp increase in the number of network parameters. Considering the computational efficiency and network performance, we selected a convolution kernel with the size of 5 in our proposed CLP-net.

\subsubsection{Ablation Study for the Cascaded Structure}
We implemented the network with different number of cascading modules to test the influence on network capacity. As shown in \tableref{tab:Table.2}, the reconstruction performance improves with the increase of the number of cascading modules. It proves that the cascaded structure is more applicable to MR image reconstruction. To balance the network performance with the computational complexity, we utilized 5 cascading modules in our proposed CLP-net.

\subsubsection{Ablation Study for the Laplacian pyramid decompose}
In order to verify the contribution from the Laplacian pyramid decomposition to MRI image reconstruction, we compared the reconstruction results of CLP-net with and without the Laplacian pyramid decomposition. The mean values of PSNR and SSIM are shown in \tableref{tab:Table.3}. It can be observed Laplacian pyramid decomposition improves the reconstruction performance by capturing multi-scale image information.
\begin{figure}[htbp]
\floatconts
    {fig:Fig.3}
    {\caption{Reconstruction results on fastMRI dataset included the original image and the error map image of different networks. From left to right: reference; zero-filled; U-net; KIKI-net; Cascade-net; k-space learning and CLP-net.}}
    {\includegraphics[width=1\linewidth]{figure3.pdf}}
\end{figure}

\subsubsection{Ablation Study for the Complex convolution}
The average value of PSNR and SSIM of the reconstruction results of CLP-net with complex-valued convolution and the CLP-net with real valued convolution is shown in \tableref{tab:Table.4}. As we can see, complex-valued convolution improves the reconstruction performance of CLP-net. Since the MRI image itself is a complex-valued image, complex convolution is more suitable for MRI reconstruction than real-valued convolution.

\begin{table}[htbp]
\floatconts
  {tab:Table.3}
  {\caption{The average value of PSNR and SSIM of the reconstructed images processed by CLP-net with and without Laplacian pyramid decompose.}}
  {\begin{tabular}{lll}
  & \bfseries SSIM & \bfseries PSNR\\
  LP decompose & 0.633 & 28.19\\
  no decompose & 0.613 & 27.33\\
  \end{tabular}}
\end{table}
\begin{table}[htbp]
\floatconts
  {tab:Table.4}
  {\caption{The average value of PSNR and SSIM of the reconstructed images processed by CLP-net with complex convolution and CLP-net with common convolution.}}
  {\begin{tabular}{lll}
  & \bfseries SSIM & \bfseries PSNR\\
  Complex conv & 0.633 & 28.19\\
  Common conv & 0.624 & 27.96\\
  \end{tabular}}
\end{table}

\subsection{Comparing with Other Methods}
\begin{table}[htbp]
\floatconts
  {tab:Table.5}
  {\caption{The average value of PSNR and SSIM of the reconstruction images processed by different networks on fastMRI dataset.}}
  {\begin{tabular}{lllllll}
  & \bfseries Zero-filled & \bfseries U-net & \bfseries KIKI-net & \bfseries Cascade-net & \bfseries kspace learning & \bfseries CLP-net\\
  \bfseries PSNR & 23.23 & 25.28 & 25.95 & 27.74 & 27.78 & \bfseries28.19\\
  \bfseries SSIM & 0.518 & 0.555 & 0.583 & 0.618 & 0.622 & \bfseries0.633
  \end{tabular}}
\end{table}

To evaluate the reconstruction performance of the proposed network, we also trained U-net \cite{ronneberger2015u}, KIKI-net \cite{eo2018kiki}, Cascade-net \cite{schlemper2017deep} and k-space learning \cite{han2019k} for the comparison purposes. The reconstruction results are shown in \figureref{fig:Fig.3}.

As we can see in \figureref{fig:Fig.3}, all the methods are able to generate high-quality images. The suppression of artifacts improves the visual quality of the reconstruction images greatly. Nevertheless, there are differences between the reconstruction results of different networks. The reconstruction effect of U-net is relatively poor compared to other networks (both the inputs and outputs of U-net are the respective amplitude MR images). There are slight artifacts in the reconstructed images of U-net shown in \figureref{fig:Fig.3}. In the zoomed regions-of-interest (ROI) marked by the red rectangle, the over-smoothing phenomenon can be observed in the reconstruction results of U-net. It leads to the loss of many texture details. Besides, there are more reconstruction residuals in the results of U-net than that of the other networks as shown in the error-map (\figureref{fig:Fig.3}). Although noise suppression ability of KIKI-net is better than U-net, the error-map shows that high-intensity noise still exists in some areas. As we can see in the zoomed ROI in \figureref{fig:Fig.3}, KIKI-net successfully suppressed the artifacts but lost many texture details. Cascade-net not only has better noise suppression capability than U-net and KIKI-net but also has excellent artifact suppression capability. As shown in \figureref{fig:Fig.3}, the suppression of artifacts improves the visual quality of the reconstructed image of Cascade-net significantly. The over-smoothing phenomenon in the reconstruction results of Cascade-net is relatively light. However, the texture detail loss is still a serious problem shown in the zoomed ROI (\figureref{fig:Fig.3}). Based on the Cascade-net reconstruction results, k-space learning further suppresses the reconstruction noise. The reconstructed images of k-space learning shown in \figureref{fig:Fig.3} are closer to the reference images visually than that of U-net, KIKI-net, and Cascade-net. More texture information was saved in the reconstructed image of k-space learning. However, blurry effects can still be observed. The reconstruction performance of CLP-net is the best among the different methods. As we can see in \figureref{fig:Fig.3}, reconstructed images of CLP-net possess more texture details and less image noises, which proves that CLP-net has an excellent reconstruction ability. Compared with the reconstruction results of the other networks, the tissue in the reconstructed images of CLP-net is clearer shown in the zoomed ROI in \figureref{fig:Fig.3}. It also proves that there is no excessive noise reduction in the reconstruction process of CLP-net.

We calculated the average PSNR and SSIM of the whole test dataset. The results are shown in Table  \tableref{tab:Table.5}. The proposed CLP-net achieves the best scores for both PSNR and SSIM.

\section{CONCLUSIONS}

In this paper, we propose a novel Laplacian pyramid-based complex neural network for fast MR imaging. Complex Laplacian pyramid decomposition provides encouraging multi-scale properties for MR reconstruction from undersampled k-space data. The proposed framework contains a cascaded multi-scale network architecture with complex convolutions and data consistency layers.  Experimental results demonstrated that our method achieved comparable and even superior reconstruction results than recently published state-of-the-art methods both quantitatively and qualitatively.

\midlacknowledgments{This work was supported by the National Natural Science Foundation of China (61871371, 81830056, 61671441), Science and Technology Planning Project of Guangdong Province (2017B020227012, 2018B010109009), the Basic Research Program of Shenzhen (JCYJ201805
07182400762) and Youth Innovation Promotion Association Program of Chinese Academy of Sciences (2019351).}

\bibliography{Liang20}

\end{document}