\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{graphicx}
\usepackage{enumitem}
\usepackage{esvect}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{booktabs}

% \jmlrvolume{-- Under Review}
\jmlryear{2021}
\jmlrworkshop{Full Paper -- MIDL 2021 submission}
% \editors{Accepted for MIDL 2021}

\title[FB-GACN]{Feedback Graph Attention Convolutional Network for MR Images Enhancement by Exploring Self-Similarity Features}


%%%[FB-GACN: Exploring self-similarity for MRI image enhancement]
%%Exploring self-similarity for 
%%Exploring the self-similarity of MRI images via feedback graph attention Convolutional network

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

% \footnotetext[2]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Xiaobin Hu\midljointauthortext{Contributed equally}\nametag{$^{1}$}} \Email{xiaobin.hu@tum.de}\\ \Name{Yanyang Yan\midlotherjointauthor\nametag{$^{2}$}} \Email{yanyanyang@iie.ac.cn}\\
\Name{Wenqi Ren\nametag{$^{\dag 2}$}} \Email{rwq.renwenqi@gmail.com}\\
\Name{Hongwei Li\nametag{$^{1}$}} \Email{hongwei.li@tum.de}\\
\Name{Amirhossein Bayat\nametag{$^{1}$}} \Email{amir.bayat@tum.de}\\
\Name{Yu Zhao\midljointauthortext{indicates the corresponding authors}\nametag{$^{1}$}} \Email{yuzhao90@outlook.com}\\
\Name{Bjoern Menze\nametag{$^{1}$}} \Email{bjoern.menze@tum.de}\\
\addr $^{1}$ Department of Computer Science, Technische Universit\"at M\"unchen, Munich \\
\addr $^{2}$ Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China
}

\begin{document}

\maketitle

\begin{abstract}
Artifacts, blur, and noise are the common distortions degrading MRI images during the acquisition process, and deep neural networks have been demonstrated to help in improving image quality. To well exploit global structural information and self-similarity details, we propose a novel MR image enhancement network, named Feedback Graph Attention Convolutional Network (FB-GACN).
As a key innovation, we consider the global structure of an image by building a graph network from image sub-regions that we consider to be node features, linking them non-locally according to their similarity. The proposed model consists of three main parts:
1) The parallel graph similarity branch and content branch, where the graph similarity branch aims at exploiting the similarity and symmetry across different image sub-regions in low-resolution feature space and provides additional priors for the content branch to enhance texture details.
2) A feedback mechanism with a recurrent structure to refine low-level representations with high-level information and generate powerful high-level texture details by handling the feedback connections. 
3) A reconstruction to remove the artifacts and recover super-resolution images by using the estimated sub-region self-similarity priors obtained from the graph similarity branch. We evaluate our method on two image enhancement tasks: i) cross-protocol super resolution of diffusion MRI; ii) artifact removal of FLAIR MR images. Experimental results demonstrate that the proposed algorithm outperforms the state-of-the-art methods.
\end{abstract}

\begin{keywords}
Magnetic resonance imaging, image enhancement, self-similarity,  graph similarity branch, feedback mechanism.
\end{keywords}

\section{Introduction}
For Magnetic Resonance Imaging (MRI) sequences, it is an inevitable dilemma to achieve a balance between image resolution, signal-to-noise ratio, and acquisition time \cite{1_mri}. Higher resolution imaging grasps more structural details and provides more diagnostic information, but requires longer acquisition time \cite{6_sr}. Since the signal-to-noise ratio is proportional to the slice thickness and the square root of scanning time, the longer acquisition time leads to the performance drop of the signal-to-noise ratio and tends to generate artifacts caused by physiologic motion such as respiratory motion and physical movement of subjects. \textcolor{black}{Considering the limited and costly MRI resource, some thick slices and low scan time MRI images are usually utilized to get a desired signal-to-noise ratio} \cite{lee2020deep, wu2019self,5_sr}. Consequently, the use of image enhancement techniques is an established field of research in medical image computing and imaging physics \cite{2_mri}, for example, to prevent blurring and information loss when co-aligning different image volumes in a multi-parametric sequence.

Recently, Convolutional Neural Network (CNN) based approaches have shown dramatic improvements over traditional super-resolution (SR) methods and exhibited state-of-the-art performance in natural and medical images. A super-resolution convolutional neural network (SRCNN) \cite{3_srcnn} was proposed  to learn a nonlinear mapping between the \textcolor{black}{low-resolution (LR)} and \textcolor{black}{high-resolution (HR)} images. Wide residual networks with fixed skip connections \cite{4_sr} was presented for MR images super-resolution. 
% A patch-based SR algorithm of ASL magnetic resonance images \cite{5_sr} computed nonlinear weights from non-local image patches. 
A new CNN-based model \cite{7_sr} was proposed for a diffusion tensor imaging SR task. Besides, Graph Neural Networks (GNN) have also shown their powerful ability to exploit structural information dealing with data of graph structure. The notation of GNN was firstly introduced \cite{8_sr}, and then further elaborated as a generalization of recursive neural networks, which is widely used to explore the structural characters in various applications including chemistry, recommender systems, and social network study to deal with challenge tasks, e.g., finding the chemical compounds that are most similar to a query compound, tackling the graph similarity computation for query systems \cite{18_sr}.
Nowadays, it is an interesting trend to combine GNN and CNN to develop their corresponding advantages \cite{9_sr}. \textcolor{black}{GNNs help with reducing the data dimensionality from image features extracted by CNN to high-level and compact features in graph nodes. FCNs are limited in the receptive field. Adding a GNNs could increase the receptive field of networks when dealing with large images. The combination of CCN and GNN is a convolutional graph neural network that generalizes the operation of convolution from grid data to graph data. It plays a central role in building up many complex GNN models \cite{wu2020comprehensive}.}
% For most conventional SR algorithms, high-resolution patches are directly used to restore their LR patches in image space. It easily generates inconsistent HR results after replacing the LR patches with the HR patches without considering the continuous relationship and self-similarity among patches.

 \textcolor{black}{To avoid generating inconsistent HR results after replacing the LR patches,}
% However, 
in our method, the similar patch pairs are matched in feature space and the graph attention mechanism is used to update features representation of each patch (node) with the adaptive weight combination of those similar patches' features.
As far as we know, it is the first work to explore the self-similarity and continuous relationship of MRI and fully exploit the feedback mechanism to increase the reconstruction accuracy for MR images. More specifically, in this paper, we propose a novel biomedical image enhancement network based on the feedback mechanism and graph attention convolutional network, where graph networks are employed as a self-similarity strategy which assigns larger weights to the more important and similar nodes or features. 

The main contributions of this paper are:
\begin{itemize}
\vspace{-0.2cm}
    \item [1)] We propose a Feedback Graph Attention Convolutional Network (FB-GACN) for MR image enhancement. To the best of our knowledge, it is the first work to construct a graph-based network into the image enhancement by exploring globally structural similarity among similar paired sub-regions.
\vspace{-0.2cm}    
    \item [2)] We propose a self-similarity learning strategy to update the features of each node in a graph. Learning the symmetry and similarity relationship of each pair, the content with same texture (e.g., edges, corners, and lesions) gets sharper and can be used to remove some artifacts. It recovers more texture details by employing the feedback mechanism (consecutive iterations) to facilitate LR images to reconstruct SR images. 
    %High-level information is provided in top-down feedback flows through feedback connections.
\vspace{-0.5cm} 
    \item [3)] We demonstrate the performance in two crucial tasks: i) cross-protocol super resolution of diffusion MRI and ii) MRI artifacts removal. The proposed network achieves better high-resolution criteria and superior visual quality compared to state-of-the-art methods. 
    % The results show that the main structures and details can be effectively recovered while some artifacts can be successfully removed.
    % 
\end{itemize}

\section{Method}
%\vspace{-2mm}
The whole pipeline consists of following three steps. Firstly, a stack of convolution layers extracts the low-resolution features of input distortion images. Afterward, the content branch and graph similarity branch work parallel to exploit the texture and self-similarity information. Finally, the upsampling block reconstructs final super-resolution results using the estimated patch correlation and texture priors. 

\noindent\textbf{Specialized design for MR images}: \textcolor{black}{Our method aims to learn the symmetry and self-similarity relationship of patch-based features in multi-modal brain MR images where the structure of the brain is normally symmetry, shown in Fig. \ref{fig:fig2} (a). To meet this requirement, we designed a specialized Graph-based structure to merge the high-similarity information of sub-regions by updating larger weights to the more important and similar nodes or features in a graph attention fashion.}

% The results show that the proposed method can successfully remove noise and artifacts, and get sharp and clear results.
%\vspace{-5mm}
% \subsection{Specialized Design }


\subsection{Architecture of FB-GACN}
% \vspace{-0.5cm}
\begin{figure*}
\centering
\includegraphics[width=0.8\textwidth]{network2_2.pdf}
\caption{Architecture of the proposed FB-GACN model. Our FB-GACN contains three parts: 1) The content block to generate the high-level texture details. 2) The graph attention branch to exploit the similarity and symmetric knowledge across MRI patches. 3) A reconstruction to remove the artifact and reconstruct super-resolution MRI by using the estimated patch correlation priors. The feedback mechanism is the recurrent structure to refine $x$ features with high-level $x^{T}$ by the feedback connections. }\label{fig:fig1}
\vspace{-0.7cm}
\end{figure*}
The structure of the proposed FB-GACN is illustrated in Fig. \ref{fig:fig1}. A long skip connection is added to pass the upsampled LR image to the output result as we only want to learn the residual modifications. After feature extraction, the output are low-resolution features with the dimension of $h\times w \times d$, where $h$ and $w$ denote the spatial dimension of the LR input and $d$ is the number of feature channels. Then the LR features are imported into the content branch and graph similarity branch, respectively. The upsampling block $U$ is made up of deconvolution layers to upscale the HR features, and convolutional layers to recover a residual image. The final reconstruction SR images are the pixel-wise sum of the upsampled LR input and the residual image. The mathematical formulation is elaborated as:
%\vspace{-0.1cm}
\begin{equation}
%\vspace{-0.1cm}
I^{SR}=f_{U}\left[f_G\left(f_E\left(I^{LR}\right)\right)+f_F\left(f_E\left(I^{LR}\right)\right)\right]+I^{LR}_{up},
\end{equation}
where $f_{E}(\cdot)$, $f_G(\cdot)$, $f_F(\cdot)$, and $f_U(\cdot)$ represent the operations of the feature extraction $E$, graph similarity branch $G$, content branch $F$ and upsamling $U$ blocks, respectively. The objective function is $L_1$ norm-based loss function. The network is trained by minimizing the objective function as following:
%\vspace{-0.1cm}
\begin{equation}
%\vspace{-0.1cm}
\ell_{(\theta)}=\frac{1}{n}\sum_{i=1}^{n}\left \| I_{i}^{SR}-I_{i}^{HR} \right\|_{1},
\end{equation}
whre $\theta$ and $n$ are the parameters of the network and the number of images pairs, respectively. $I_{i}^{SR}$ is the reconstruction of super-resolution MRI, and $I_{i}^{HR}$ is the corresponding ground truth.
% \vspace{-0.5cm}
% \subsection{Feedback Content Block}
% \begin{figure*}
% \vspace{-0.6cm}
% \centering
% \includegraphics[width=0.8\textwidth]{feedback.pdf}
% \caption{The schematic diagram of feedback mechanism. Blue arrows represent the feedback connections.}\label{fig:fig4}
% \vspace{-0.5cm}
% \end{figure*}
% The content block \cite{10_sr} is proposed to recover texture details. It consists of 3 deconvolutional and 3 convolutional layers with the feedback connections ($T=3$) to generate a high-level representations.
% The feedback content block can be unfolded to $T$ iterations, in which each iteration $T$ is temporally ordered from 1 to T. The $T_{th}$ iteration receives the feedback knowledge $(T-1)_{th}$ to correct low-level representations, and then passes more powerful high-level features to the next iteration.
%\subsection{Feedback Content Block}
% \subsection{Graph Attention Convolutional Block}
\begin{figure*}
\vspace{-0.5cm}
\centering
\includegraphics[width=0.8\textwidth]{graph_network_GAT.png}
% \vspace{-0.8cm}
\caption{\textbf{(a)} Exploring the self-similarity features to remove artifacts: Swapping the artifacts features in Patch 2 with clear features of Patch 1. \textbf{(b)} The employed attention mechanism. A shared linear transformation $W$ is applied to every node. Afterwards, a self-attention mechanism  $a$ is calculated on features to learn the correlation among nodes. \textbf{(c)} An illustration of multi-head attention mechanism by node $1$ on its neighbors. }\label{fig:fig2}
\vspace{-0.7cm}
\end{figure*}
\vspace{-4mm}
\subsection{Graph Similarity Branch}
\vspace{-1mm}
Graph similarity branch employs graph attention network layers (GAT) \cite{9_sr} to make use of the contextual information among image patches to help recover structure and remove artifacts. \textcolor{black}{After feeding the extracted LR feature maps to a convolutional layer with stride of $s$ and kernel size of $p$, we form a graph using the $n\times d$ matrix where we assume there exist $n$ nodes with $d$-th dimensional features. Each node is connected with five neighboring nodes and the attention coefficient of each node is updated.} 
% Finally, we reshape the features back to $h/s \times w/s \times d$ and process with deconvolution layer.
The single graph attention layer is shown in Fig. \ref{fig:fig2}.
% we reshape the output features with size of $h/s \times w/s \times d$  to a $n \times d$ matrix, where $n=h/s \times w/s$. 
% The single graph attention layer is shown in Fig. \ref{fig:fig2}. 
The input of the single attention layer is a set of node features, $\boldsymbol{h}=\{ \overrightarrow{h}_1,\overrightarrow{h}_2,...,\overrightarrow{h}_N$ \}, ${h}_i \in \mathbb{R}^{F}$, where $N$ is the number of nodes, and $F$ is the number of features in each node. The GAT layer updates a new set of node features, $\boldsymbol{h}^{'}=\{ \overrightarrow{h}_1^{'},\overrightarrow{h}_2^{'},...,\overrightarrow{h}_N^{'}$ \}, ${h}_i^{'} \in \mathbb{R}^{F}^{'}$. Then a learnable linear transformation and self-attention is performed on the nodes (a shared attention mechanism $a : {R}^{F}^{'}\times {R}^{F}^{'}\rightarrow {R}^{F}$ computes attention coefficients):
%\vspace{-0.2cm}
\begin{equation}
%\vspace{-0.2cm}
e_{ij}=a(\boldsymbol{W}\overrightarrow{h}_{i},\boldsymbol{W}\overrightarrow{h}_{j}),
\end{equation}
which represents the importance of node $j$ to node $i$. Afterwards, the attention coefficients are normalized by the softmax function:
%\vspace{-0.1cm}
\begin{equation}
%\vspace{-0.1cm}
\alpha_{ij}=\text{softmax}_j(e_{ij})=\frac{\text{exp}(e_{ij})}{\sum_{k\in N}}\text{exp}(e_{ij}),
\end{equation}
Following \cite{9_sr}, the attention mechanism $a$ is a single-layer feedforward neural network, parametrized by weight matrix $\overrightarrow{\boldsymbol{a}}\in \mathbb{R}^{2F'}$. After applying the LeakyReLU nonlinearity, the coefficients are also expressed as:
%\vspace{-0.1cm}
\begin{equation}
%\vspace{-0.1cm}
\alpha_{ij}=\frac{\text{exp}(\text{LeakyReLU}(\overrightarrow{a}^{T}[\boldsymbol{W}\overrightarrow{h}_i\|\boldsymbol{W}\overrightarrow{h}_j]))}{\sum_{k\in N_i}\text{exp}(\text{LeakyReLU}(\overrightarrow{a}^{T}[\boldsymbol{W}\overrightarrow{h}_i\|\boldsymbol{W}\overrightarrow{h}_k]))},
\end{equation}
where $(\cdot)^{T}$ represents the transposition operations and $\|$ means the concatenation. Then the final output of each node is updated on the strength of the similar neighborhood LR feature nodes $\overrightarrow{h}_j$:
%\vspace{-0.1cm}
\begin{equation}
%\vspace{-0.1cm}
\overrightarrow{h}_i^{'}=\sigma \left( \sum_{j\in{N}} \alpha_{ij}\boldsymbol{W}\overrightarrow{h}_j \right),
\end{equation}

We also employ the content branch to recover texture details shown in Fig. \ref{fig:fig1}, which is a stack of 3 deconvlutional and 3 convolutional layers.
\vspace{-4mm}
\subsection{Feedback Mechanism}
\vspace{-1mm}
The feedback mechanism is a loop iteration to allow the network to correct previous states and regenerate high-level representations. Such iterative cause-and-effect process helps to achieve the principle of the feedback scheme for image SR: high-level information can guide an LR image to recover a better SR image \cite{10_sr}. In our network, we utilize the feedback mechanism to transfer the feature summation with high-level information got from two branches to the low-level information of an input $x$. The judgment of the feedback connection controller (shown in Fig. 1) determines the time ($T$) of the feedback iteration, also named the feedback connection. 
% The high-level $x^{T}$ obtained by $T$ $th$ feedback iteration are combined with initial input $x$.
\textcolor{black}{The feedback mechanism is the recurrent CNN structure to refine $x$ features with high-level $x^T$ by the feedback connections ($T-th$ iteration). It can be unfolded to $T$ iteration, in which each iteration $t$ is temporally ordered from 1 to T.  The hidden state of each iteration is tied with the loss function and the weight parameters of each iteration are shared. The input of t-th iteration receives the feedback information t-1 iteration to correct original low-level inputs.}

\vspace{-4mm}
\section{Experimental Results}
\vspace{-2mm}
\subsection{Datasets}
\vspace{-2mm}
Two experiments were conducted to evaluate the performance of the feedback graph attention convolutional network. The first experiment is solving a cross-protocol super-resolution problem on diffusion MRI data (MUSHAC) \cite{19_dataset}. The HR images were obtained by state-of-the-art diffusion MRI acquisition by Prisma scanner with voxel size (1.5 $\times$ 1.5 $\times$ 1.5 $mm^{3}$), and the corresponding LR images were scanned by the standard acquisition of Prisma with a larger voxel size (2.4 $\times$ 2.4 $\times$ 2.4 $mm^{3}$). Nine subjects are used as training set and one subject for testing.
% We generated the LR images with the same method used in \cite{2_mri,4_sr} while the original MRIs were used as the HR ground truth images. 200 patients, 25 patients and 25 patients are used as the training, validation, and test data, respectively.  For each patient, we picked 25 most informative slices. 
For the second experiment, we utilize the proposed network to remove the MRI artifacts and regenerate HR images by the scale $\times 2$. We randomly divided the public WMH dataset \cite{14_sr} into training (2225 images from 48 patients), validation (278 images from 6 patients) and test parts (278 images from 6 patients). Afterward, the simulated artifacts of FLAIR modality \cite{14_sr} were generated by the physical model of MRI motion artifacts. 
%\vspace{-0.3cm}
\begin{table*}[t]
\vspace{-0.1cm}
\caption{\textcolor{black}{Quantitative results of cross-protocol super-resolution and artifacts removal tasks. The best results are highlighted in bold.}}\label{tab:tab1}
\small 
%\small  \footnotesize  \normalsize
\centering
\begin{tabular}{ccccc}
%\begin{tabular}{ccccc}
\toprule[1.5pt]
\multirow{}{}{Methods} & \multicolumn{2}{c}{Super-Resolution} & \multicolumn{2}{c}{Artifacts Removal} \\ \cline{2-5} 
 & PSNR & SSIM & PSNR & SSIM \\ \toprule[1.5pt]
Bicubic & 27.34\pm{1.32} & 0.8882\pm{0.0232} & 22.58\pm{3.59} & 0.6855\pm{0.1345} \\ \hline
SRCNN \cite{3_srcnn} & 29.46\pm{1.68}  & 0.9042\pm{0.0796} & 24.68\pm{3.38} & 0.7294\pm{0.1216} \\ \hline
VDSR  \cite{15_sr} & 29.66\pm{1.18} & 0.9026\pm{0.0731} & 25.39\pm{2.72} & 0.7588\pm{0.0921} \\ \hline
EDSR  \cite{16_sr} & 30.23\pm{1.56} & 0.9145\pm{0.0229}  & 25.68\pm{3.61} & 0.7824\pm{0.0952} \\ \hline
DDBPN \cite{17_sr} & 30.34\pm{1.56} & 0.9171\pm{0.0208}  & 25.58\pm{3.56} & 0.7821\pm{0.0952} \\ \hline
FB-GACN (Ours) & \textbf{30.48}\pm{1.63} & \textbf{0.9185}\pm{0.0194} & \textbf{25.78}\pm{3.71} & \textbf{0.7839}\pm{0.1003} \\ \toprule[1.5pt]
\end{tabular}
\vspace{-2mm}
\end{table*}

\begin{figure}[htbp]
\centering
\includegraphics[width=1\textwidth]{exp1.pdf}	
  \vspace{-13mm}
	\begin{center}
		\tabcolsep 1pt 
		%p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}
        \begin{tabular}{p{4cm}p{3cm}p{3cm}p{3cm}p{3cm}}
      % 1 & 2& 3&4&5 \\
     Ground truth &  Proposed  &  VDSR &  SRCNN &  Bicubic    \\
        \end{tabular}
	\end{center}	
\vspace{-5mm}	
\caption{Comparison with state-of-the-art methods of cross-protocol super-resolution on the diffusion MRI data (MUSHAC). Best viewed by zooming in on the screen.}\label{fig:fig3}
\vspace{-8mm}
\end{figure}


% \begin{figure*}[h]
% \centering
% \includegraphics[width=1\textwidth]{exp1_blur.pdf}
% \caption{Comparison of state-of-the-art methods: magnification factors $\times$2 with noise and blur and the input resolution 120$\times$120. Best viewed by zooming in the screen.}\label{fig:fig3}
% \vspace{-0.5cm}
% \end{figure*}



% \begin{figure*}[h]
% % \vspace{-2.0cm}
% \centering
% \includegraphics[width=1\textwidth]{artifact.pdf}
% \caption{Comparison of state-of-the-art methods: magnification factors $\times$2 with artifacts and the input resolution 100$\times$100. Best viewed by zooming in the screen.}\label{fig:fig4}
% \vspace{-0.6cm}
% \end{figure*}


%\vspace{-4mm}

\vspace{-4mm}
\subsection{Implementation Details}
In each training batch, nine LR patches
%with the size of $40 \times 40$ 
are randomly extracted as inputs. We train our model 300 epochs with ADAM optimize and learning rate is set as $10^{-4}$ initially and is divided by 2 every 80 epochs. We implement experiments with PyTorch using a NVIDIA TITAN X GPU.
\vspace{-4mm}
\subsection{Comparisons with State-of-the-Art Methods}
In order to evaluate the performances of our algorithms, we compare them with the start-of-the-art methods qualitatively and quantitatively. The four most recent state-of-the-art super-resolution methods are listed as follows: the Very Deep Super Resolution Network (VDSR) from \cite{15_sr}, the Super-Resolution Convolutional Neural Network (SRCNN) from \cite{3_srcnn},the Enhanced Deep Residual Networks (EDSR) from \cite{16_sr}, and the Deep Back-Projection Networks For Super-Resolution (DBPN) from \cite{17_sr}. We use open-resource implementations from the authors and train all the networks on the same dataset for a fair comparison. 
%\vspace{-3mm}
\begin{figure}[htb]
\centering
\includegraphics[width=1\textwidth]{artifact_v1.pdf}
        \vspace{-13mm}
	\begin{center}
		\tabcolsep 1pt 
		%p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}
        \begin{tabular}{p{4cm}p{3cm}p{3cm}p{3cm}p{3cm}}
     Ground truth &  Proposed  &  VDSR &  SRCNN &  Bicubic    \\
        \end{tabular}
	\end{center}
\vspace{-5mm}	
\caption{Comparison with state-of-the-art methods of artifacts removal with magnification factors $\times$2 and the input size 100$\times$100. Best viewed by zooming in on the screen.}\label{fig:fig4}
\vspace{-8mm}
\end{figure}
\vspace{-4mm}
\subsection{Quantitative Results}
% \vspace{-1mm}
The quantitative evaluation of the network using the peak signal-to-noise ratio (PSNR) and the structural similarity (SSIM) scores are listed in Table \ref{tab:tab1}. 
% Please add the following required packages to your document preamble:
% \usepackage{multirow}

\noindent\textbf{Cross-Protocol Super-Resolution}: This task is to evaluate the the performance of our method on the cross-protocol diffusion MRI quality enhancement. Our method achieves better results in comparison with other state-of-the-art methods, especially 3.46 dB higher than the traditional bicubic interpolation method. 

\noindent\textbf{Artifacts Removal}: To verify the effectiveness of our proposed network towards removing MRI artifacts and super-resolution scale $\times 2$, the PSNR and SSIM results of MRI artifacts are listed in Table \ref{tab:tab1}. Our method outperforms all the state-of-the-art algorithms with the best PSNR 25.78 dB and SSIM 0.7839.
% \vspace{-0.5cm}
\vspace{-3mm}
\subsection{Qualitative Evaluation}
\vspace{-1mm}
% Qualitative evaluations of two experiments are shown in Figures \ref{fig:fig3} and \ref{fig:fig4}, respectively.
{\noindent\textbf{Cross-Protocol Super-Resolution}}: The qualitative results of our methods on the diffusion MRI data (MUSHAC) by the standard and the start-of-the-art acquisition of Prisma are shown in Figure \ref{fig:fig3}. It can be observed that our proposed method obtains higher visual quality and recovers clearer structures with finer contrast.

{\noindent\textbf{Artifacts Removal}}: The qualitative results of our methods at magnifications $\times$2 with artifacts are shown in Figure \ref{fig:fig4}. It can be observed that our proposed method can remove artifacts and obtain the super-resolution results from the LR images. It recovers clearer structures with finer contrast, edges and lesion information.
\vspace{-0.2cm}
\subsection{Ablation study}
\vspace{-0.5cm}
\begin{table}[htbp]
\caption{Ablation study results (PSNR/SSIM): Comparisons our proposed model with the configuration without (w/o) the graph similarity knowledge.}\label{tab:tab2}
\vspace{-0.5cm}
\centering
\begin{tabular}{p{4cm}p{4cm}p{4cm}}
\hline
Ablation configuration & Super-Resolution & Artifacts Removal \\ \hline
w/o graph similarity & 30.35/0.9177 & 25.65/0.7735 \\ \hline
ours & 30.48/0.9185 & 25.77/0.7835 \\ \hline
\end{tabular}
\end{table}
\vspace{-0.2cm}
\noindent\textbf{Graph similarity knowledge}:
 We conduct an ablation study to demonstrate the effectiveness of the graph similarity branch. We compare the proposed network with and without patch-based similarity knowledge in terms of PSNR and SSIM on the test data, shown in Table \ref{tab:tab2}. The graph similarity branch boosts the performance both in the super-resolution and artifacts removal tasks. 

\noindent\textbf{Feedback Mechanism}:
We explore the effect of the iterative number of feedback connections. It can be observed from Table \ref{tab:tab3} that the reconstruction performance is improved when the iterative number increases from $T=1$ to $T=4$. Considering the balance between the computational time and the performance, $T=4$ is chosen as the iterative number in our paper. 

\begin{table}[htbp]
\vspace{-0.6cm}
\caption{The impact of the iterative number $T$ of feedback connection.}\label{tab:tab3}
\centering
\begin{tabular}{p{4cm}p{2cm}p{2cm}p{2cm}p{2cm}}
\hline
Feedback Connection & T=1 & T=2 & T=3 & T=4 \\ \hline
Super-Resolution & 30.22/0.9172 & 30.28/0.9173 & 30.34/0.9177 & 30.48/0.9185 \\ \hline
Artifacts Removal & 25.26/0.7632 & 25.41/0.7647 & 25.49/0.7682  & 25.77/0.7835 \\ \hline
\end{tabular}
\vspace{-0.8 cm}
\end{table}
\section{Conclusion}
%\vspace{-0.1 cm}
In this paper, we proposed a novel feedback graph attention convolutional network to enhance the visual quality and remove the common distortions (e.g., artifacts) of MR images, considering the self-similarity and correlations across MRI sub-regions. We regard each sub-region as a node and construct a graph to capture the global structure. We employ the feedback mechanism to recover texture details by refining low-level representations with high-level information in a time-series way. Comprehensive qualitative and quantitative experiments show that our algorithm can remove artifacts and further generate high-resolution MRI with finer structure, contrast and lesion information. 

% The proposed network achieves better SR criteria and superior visual quality compared to state-of-the-art methods.



--------------------------------------------
% This is where the content of your paper goes.  Some random
% notes\footnote{Random footnote are discouraged}:
% \begin{itemize}
% \item You should use \LaTeX \cite{Lamport:Book:1989}.
% \item JMLR/PMLR uses natbib for references. For simplicity, here, \verb|\cite|  defaults to
%   parenthetical citations, i.e. \verb|\citep|. You can of course also
%   use \verb|\citet| for textual citations.
% \item You should follow the guidelines provided by the conference.
% \item Read through the JMLR template documentation for specific \LaTeX
%   usage questions.
% \item Note that the JMLR template provides many handy functionalities
% such as \verb|\figureref| to refer to a figure,
% e.g. \figureref{fig:example},  \verb|\tableref| to refer to a table,
% e.g. \tableref{tab:example} and \verb|\equationref| to refer to an equation,
% e.g. \equationref{eq:example}.
% \end{itemize}

% \begin{table}[htbp]
%  % The first argument is the label.
%  % The caption goes in the second argument, and the table contents
%  % go in the third argument.
% \floatconts
%   {tab:example}%
%   {\caption{An Example Table}}%
%   {\begin{tabular}{ll}
%   \bfseries Dataset & \bfseries Result\\
%   Data1 & 0.12345\\
%   Data2 & 0.67890\\
%   Data3 & 0.54321\\
%   Data4 & 0.09876
%   \end{tabular}}
% \end{table}

% \begin{figure}[htbp]
%  % Caption and label go in the first argument and the figure contents
%  % go in the second argument
% \floatconts
%   {fig:example}
%   {\caption{Example Image}}
%   {\includegraphics[width=0.5\linewidth]{example-image}}
% \end{figure}

% \begin{algorithm2e}
% \caption{Computing Net Activation}
% \label{alg:net}
%  % older versions of algorithm2e have \dontprintsemicolon instead
%  % of the following:
%  %\DontPrintSemicolon
%  % older versions of algorithm2e have \linesnumbered instead of the
%  % following:
%  %\LinesNumbered
 
 
 
% \KwIn{$x_1, \ldots, x_n, w_1, \ldots, w_n$}
% \KwOut{$y$, the net activation}
% $y\leftarrow 0$\;
% \For{$i\leftarrow 1$ \KwTo $n$}{
%   $y \leftarrow y + w_i*x_i$\;
% }
% \end{algorithm2e}

% Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank a bunch of people.}


\bibliography{midl-samplebibliography}


% \newpage

\appendix

\section{Brats experiments}
\textcolor{black}{Considering the patients of MUSHAC datasets are limited, we also verified our models on BRATS 2018 public dataset with noise and blur for super-resolution problem. We generated the LR images with the same method used in [1, 2] while the original MRIs were used as the HR ground truth images. 200 patients, 25 patients, and 25 patients are used as the training, validation, and test data, respectively. For each patient, we picked 25 most informative slices. Our model also achieves the best performance (34.486/ 0.956) than other baselines (e.g.,EDSR (33.989 /0.953), DDBPN(34.264/0.954), VDSR(32.742/0.945), SRCNN(27.195/0.918), Bicubic (22.818 0.882)).}

\section{Training details}
\textcolor{black}{In each training stage, nine LR patches with the size of 50x50 are randomly extracted as inputs.  The number of neighboring nodes is set as 5. We train our model 300 epochs with ADAM optimize and learning rate is set as 10−4 initially and is divided by 2 every 80 epochs. We implement experiments with PyTorch using a NVIDIA TITAN X GPU. The total training process takes almost 18 hours and can process a 256x256 image within 0.2s.}

\section{Qualitative Evaluation}
\begin{figure}[htb]
\centering
\includegraphics[width=1\textwidth]{artifact_appendix.pdf}
% 	\end{center}
% \vspace{-5mm}	
\caption{\textcolor{black}{Comparison with state-of-the-art methods of artifacts removal with magnification factors $\times$2 and the input size 100$\times$100. Best viewed by zooming in on the screen.}}\label{fig:fig4}
\vspace{-8mm}
\end{figure}


\begin{figure}[htbp]
\centering
\includegraphics[width=1\textwidth]{exp1_appendix.pdf}	
%   \vspace{-13mm}
% 	\begin{center}
% 		\tabcolsep 1pt 
% 		%p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}p{1.8cm}
%         \begin{tabular}{p{4cm}p{3cm}p{3cm}p{3cm}p{3cm}}
%       % 1 & 2& 3&4&5 \\
%      Ground truth &  Proposed  &  VDSR &  SRCNN &  Bicubic    \\
%         \end{tabular}
% 	\end{center}	
% \vspace{-5mm}	
\caption{\textcolor{black}{Comparison with state-of-the-art methods of cross-protocol super-resolution on the diffusion MRI data (MUSHAC). Best viewed by zooming in on the screen.}}\label{fig:fig3}
\vspace{-8mm}
\end{figure}

% This is a boring technical proof of
% \begin{equation}\label{eq:example}
% \cos^2\theta + \sin^2\theta \equiv 1.
% \end{equation}

% \section{Proof of Theorem 2}

% This is a complete version of a proof sketched in the main text.

\end{document}
