\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution
\usepackage{xcolor}

\usepackage{mwe} % to get dummy images
\jmlrvolume{-- Accepted}
\jmlryear{2020}
\jmlrworkshop{Full Paper -- MIDL 2020 submission}
%\editors{Under Review for MIDL 2020}

\title[Understanding AD's structural connectivity through explainable AI]
{Understanding Alzheimer disease's structural connectivity through explainable AI}
\usepackage{hyperref}
\usepackage{natbib}
\usepackage{placeins}


\midlauthor{
\Name{Achraf Essemlali\midljointauthortext{Contributed equally}} \Email{Achraf.Essemlali@USherbrooke.ca}\\
\Name{Etienne St-Onge\midlotherjointauthor} \Email{Etienne.St-Onge@USherbrooke.ca}\\
\Name{Maxime Descoteaux\midljointauthortext{Contributed equally}} \Email{Maxime.Descoteaux@UShrebrooke.ca}\\
\Name{Pierre-Marc Jodoin\midlotherjointauthor} \Email{Pierre-Marc.Jodoin@USherbrooke.ca}\\
\addr Computer science department, University of Sherbrooke, Sherbrooke, Canada }


\begin{document}

\maketitle

\begin{abstract}
In the following work, we use a modified version of deep BrainNet convolutional neural network (CNN) trained on the diffusion weighted MRI (DW-MRI) tractography connectomes of patients with Alzheimer's Disease (AD) and Mild Cognitive Impairment (MCI) to better understand the structural connectomics of that disease.  We show that with a relatively simple connectomic BrainNetCNN used to classify brain images and explainable AI techniques, one can underline brain regions and their connectivity involved in AD.  Results reveal that the connected regions with high structural differences between groups are those also reported in previous AD literature. Our findings support that deep learning over structural connectomes is a powerful tool to leverage the complex structure within connectomes derived from diffusion MRI tractography. To our knowledge, our contribution is the first explainable AI work applied to structural analysis of a degenerative disease. %Another contribution of our work is to provide new insights into the structural connectivity that drives the AD pathology. 
\end{abstract}

\begin{keywords}
Structural connectome, diffusion weighted MRI, deep learning, saliency maps, Alzheimer's Disease
\end{keywords}

\section{Introduction}
Early detection of neurodegenerative diseases like Alzheimer's Disease (AD) along with proper treatments can delay its progression \citep{livingston2017dementia, weller2018current}. Several approaches have been explored to better predict, detect and understand the disease. These approaches include biological markers \citep{hampel2008core, kapaki2007cerebrospinal, irizarry2004biomarkers, blennow2015amyloid, patel2011potential, zetterberg2008biomarkers, mattsson2009csf, gomar2011utility, gomez2019challenge}, blood-based bio-markers \citep{henriksen2014future, thambisetty2010blood, mayeux2011blood, doecke2012blood}, neuro-psychological tests \citep{mckhann1984clinical, tierney2005neuropsychological, jacobs1995neuropsychological, weintraub2012neuropsychological}, artificial intelligence algorithms on medical images \citep{liu2014early, moradi2015machine, li2019deep, jo2019deep, lee2019predicting, litjens2017survey, liu2018applications}. Besides, magnetic resonance imaging (MRI) has been a modality of choice for AD diagnostics and has demonstrated its significance \citep{vemuri2010role}. Most MRI-based techniques for studying AD can be grouped under two main categories : i) MRI anatomical images analysis and ii) structural and functional connectomes (connectivity matrices) \citep{contreras2015structural}. Standard techniques employing MRI can be distinguished between: clinical analysis \citep{engelborghs2013clinical,cummings2019rights}, segmentation techniques \citep{biju2017alzheimer} and machine/deep learning algorithms \citep{jo2019deep}. In this work, we focus on deep learning classification using the structural connectomes derived from diffusion-weighed MRI.

%\subparagraph{}
In structural and functional connectivity analysis, the human brain complexity is represented as an interconnected network. This connectome is a graph whose nodes are brain anatomical regions and edges are connectivity ``strength''. Several studies explored brain networks using functional imaging modalities \citep{prescott2014alzheimer, filippi2018changes}. The knowledge and characterization of this connectome, and underlying changes in brain structure and activity, is essential to study cognitive and behavioral impairments. 

%However, previous AD prediction rely on manually chosen characteristics and none include DW-MRI tractography structural connectivity estimation.  

%\subparagraph{}
Both fMRI and dMRI connectivity matrices have been used widely for studying AD due to the rich information they held. \citet{prescott2014alzheimer} studied the differences in the structural connectomes among patients with normal cognition (NC), mild cognitive impairment (MCI), and AD while discovering associations between the structural connectome and cortical amyloid deposition. Changes in weighted structural connectome metrics were observed between NC, MCI and AD, with decreases from the NC group to the MCI and AD groups. %, shown for streamline-count in the bilateral frontal, right parietal, and bilateral temporal regions, weighted local efficiency in the left temporal region, and weighted clustering coefficient in the bilateral frontal and left temporal regions.
\citet{filippi2018changes} investigated the structural and functional brain connectomes in patients with AD and MCI. Severe graph analysis abnormalities were distinguished for both the functional and structural connectomes in AD patients compared to NC, where all brain lobes are involved except the basal ganglia and parietal lobes. 
%amnestic MCI; associated with memory loss and AD, and with stable MCI or converting MCI while comparing the functional connectivity in healthy brain network and the topography of brain atrophy in patients, where the study shows the progressive MCI to AD is associated with the loss and breakdown of anatomical brain connections, which affirms the initial hypothesis that the connectome can modulate the spread of AD in the brain. Severe graph analysis abnormalities were distinguished for both the functional and structural connectomes in AD patients compared to NC, where all brain lobes are involved except the basal ganglia and parietal lobes. The measures used are the nodal strength, path length, local efficiency and clustering coefficient. In AD and c-aMCI the study showed that there are severe altered graph analysis properties of the brain connectomes. Also, the regions that are strongly connected to the hippocampus are the most atrophic in AD and c-aMCI patients.
\citet{ye2019connectome} observed connectome abnormalities between different phases of the AD.  Results underlines 13 brain regions involved in the disease.

%\citep{ye2019connectome}, study connectome-wide network analysis of white matter connectivity in AD, where pattern changes in brain structural connectivity caused by AD  were identified by detecting network abnormalities between different phases of disease severity.  The multi-variate data-driven analytical framework employed in this study included no a priori knowledge for seed nodes, while demonstrated capability to efficiently detect the local connection that mostly contributing to the abnormal topology of the brain network. The study reveals the association between brain structural connectivity and AD progression, where extensive disruption of structural connectivity occurs in both prodromal and clinical stage of AD and several key structural disrupted connections demonstrate a promising distinguishing capability for predicting MCI individual converting to AD. The results show 13 brain regions, including frontal, temporal lobes, limbic areas and basal ganglia structures, with significantly different connectivity patterns between the CN and cMCI groups. While 33 regions were found distributed in frontal, temporal, occipital, and limbic areas, with significant connectivity differences between NC and AD subjects. Nine brain regions, including the left orbital part of superior and middle frontal gyrus, the bilateral supplementary motor area, the bilateral insula, the left hippocampus, the left putamen, and the left thalamus demonstrated extremely significant structural pattern changes along with the progression of AD.

%\subparagraph{}
In this paper, we intend to explore to what extent a deep convolutional neural network trained on the connectome of a large number of ADNI subjects can help underline the characteristics of the AD structure (\href{adni.loni.usc.edu}{adni.loni.usc.edu}). In that perspective, we trained a modified version of the BrainNetCNN~\citep{kawahara2017brainnetcnn} on the connectivity matrices of a heterogeneous set of patients to predict three groups of subjects: normal control (NC), mild cognitive impairment (MCI) and AD. Then, with the help of visualization techniques and a thorough node ablation analysis, we get to visualize brain regions as well as their connectivity that are involved in the prediction of AD.
%\subparagraph{}
%Our objective is thus to use deep learning as a tool to leverage the complex structure within DW-MRI tractography connectomes and give new insights to connectomics approach in AD. To our knowledge, this is the first study of this kind. 

Our work is a contribution to more explainable AI in advance medical imaging, using deep learning to better understand the specific connectivity of AD through connectivity ablation analysis and saliency map extraction, for understanding how the brain connectivity differs and change based on the different brain’s alteration with dementia. % To our knowledge, our work is the first contribution in the literature using state-of-the-art diffusion MRI tractography. 

%Our findings from the the ablation procedure support the idea that no single region is responsible for AD, but the combined effect of several cortical regions. Also, the entorhinal is the most intense difference between AD and NC along with hippocampus for MCI and NC. These regions are reported in AD research from voxel-based morphometry, cortical thickness or functional connectomics (\citep{jhoo2010discrimination}; \citep{choo2010posterior}; \citep{pennanen2004hippocampus}; \citep{hojjati2017predicting}). These findings show that deep convolution networks can be used to gain insights into the specifics of a neurodegenerative disease such as AD.


\section{Methods}
Since structural connectomes from DW-MRI tractography contain edge weights between pairs of  regions, they can easily be represented by a 2D matrix of all connections \citep{jeurissen2019diffusion}. This matrix is an adjacency matrix $A$ of size  $N \times N$, where $N$ is the number of regions and $A_{i,j}$ is the weight between regions $i$ and $j$. While this connectivity matrix can be pictured as a 2D image (c.f. Figure~\ref{fig:img1} for an example of a connectivity matrix), it cannot be inputted directly into a regular convolution neural networks (CNN), as the local neighborhood around each element $(i,j)$ is not isotropic. This is due to the very nature of a connectivity matrix, where neighbors of a node are distributed along horizontal lines and columns, i.e. connections to different brain regions.

%\subparagraph{}
As such, we implemented a modified version of BrainNetCNN \citep{kawahara2017brainnetcnn} originally used to predict cognitive and motor activities in premature infants. This network implements two convolution filters adapted to the context of a connectivity matrix: a so-called \emph{edge-to-edge} (E2E) filter and an \emph{edge-to-node} (E2N) filter.

\begin{figure}[tp]
    \centering
    \includegraphics[width=0.88\textwidth]{fig/AD_BrainNetCNN_Diagram.pdf}
    \caption{The BrainNetCNN-like architecture of our method.}
    \label{fig:img1}
\end{figure}

\subsection*{Edge-to-Edge (E2E) and Edge-to-Node (E2N) layers}
By definition, each region $i$ of a symmetric adjacency matrix $A$ is connected to all elements in the $i^\text{th}$ row ($A_{i\cdot}$) and $i^\text{th}$ column ($A_{\cdot i}$).  As such, it is not related to its local neighbors like a pixel of a regular image would be.  Thus, if such adjacency matrix is to be processed by a CNN, the usual 2D convolution operation need to be redefined.  In that perspective, the E2E filter is made of two 1D convolution filters: one spanning along the rows of $A$ and one spanning along the columns of $A$. These filters all process the connectivity matrix $A$ and produce M feature maps as shown in Figure~\ref{fig:img1}.  This simple, but effective operation, filters the topological locality of brain networks, combining weights of connected edges.  %The output of this convolutional layer is a set of $M$ feature maps.

%\subparagraph{}
%The E2E operation involves two 1D conv filters: one applied along the rows, and applied along the columns. Each E2E filter for an input $A$=$(A^n)_{n \in [\![1, M]\!]}$ where $A^n$=$(A^n _{i, j}) _ { (i, j) \in [\![1, N]\!]^2}$, either an adjacency matrix with $M=1$ for the first E2E layer or a set of feature maps with $M$ is the number of generated feature maps,is mathematically expressed by $B$=$(B _{i, j}) _ { (i, j) \in [\![1, N]\!]^2}$. \\
%Where $B_{i, j} = \sum_{n=1}^{M} \sum_{k=1}^{N} A^{n} _{i, k} * r_{k}  + A^{n} _{k, j}*c_{k}$, with $r=(r_{k})_{k \in [\![1, N]\!]}$ and $c=(c_{k})_{k \in [\![1, N]\!]}$ are the learned weights for the first and second 1D convolution layer respectively.

%\subparagraph{}
After the E2E layer comes the E2N layer. The E2N filters extract features from all the weights in each row in the preceding feature maps and convert it into a single scalar. The E2N layers captures the second order connectivity (indirect connections) of the brain.
As shown in Figure~\ref{fig:img1}, the E2N layer is formed of $L$ filters and thus returns $L$ vectors of dimensions $N \times 1$. We implement this operation with a 1D convolution filter.

\subsection*{The proposed architecture}
Our proposed network architecture is formed of an E2E layer followed by an E2N layer and two fully-connected (FC) layers. The E2E and E2N layers are hierarchical brain networks feature extraction functions. Note that for better convergence during training, a batch-norm operation was added at the end of each layer. These layers are then followed by two FC layers and then the output layer. To obtain a binary classification (e.g. NC-AD), a \emph{sigmoid} function is added to the output layer. Otherwise, for a three-class prediction (NC-MCI-AD), a \emph{softmax} function is used at the output.

%\subparagraph{}
\citet{refHyper} algorithm was used to fine-tune the model's hyperparameters including the number of E2E and E2N filters and the activation function (\emph{ReLu} or \emph{LeakyReLu}). This algorithm relies on \citet{falkner2018bohb} algorithm which combines both \emph{HyperBand} \citep{li2017hyperband} and Bayesian optimization algorithm.

%\subparagraph{}
The final best architecture is formed of a E2E layer with $23$ filters and a \emph{LeakyReLu} activation function with a negative slope of $0.015976$ followed by an E2N layer with $59$ filters with a \emph{ReLu} activation function. Meanwhile, the FC layers each contain $19$ units, the learning rate is $0.007812$ and the batch size is of $25$. Subsequent good configurations among the other possible ones are reported in the appendix.


\subsection*{Training}
The proposed architecture was implemented using \emph{Pytorch} with the use of \emph{HpBandSter}, to optimize all hyperparameters. In order to ensure that configurations with best performance were retained, 10 fold cross validation was utilized. The loss function is the well-known cross-entropy which we minimize with the Adam optimizer. For the learning procedure, along with the 10-fold cross-validation, the dataset was split into three sets: training (70\%), validation (15\%) and test (15\%). 


\section{Dataset}
We used the  ADNI (Alzheimer's Disease Neuroimaging Initiative) dataset \citep{iwatsubo2011alzheimer} which is a well-known a longitudinal dataset with different time acquisition for the images, baselines and after 6 months, 12 months, 24 months, 48 months, where baseline images are MRI images acquired for the first time (day 1). Different releases of ADNI exist. Here, ADNI2 and ADNI-Go were used with clinical-like DW-MRI acquisitions with 41 directions, 2mm isotropic and b-value 1000 s/mm$^2$. In order to compute the tractography from DW-MRI, \citet{theaud2020tractoflow} pipeline with default parameters was employed, ensuring reproducibility and fast processing. Connectivity matrices were estimated from tractography using the ``Lausanne 2008'' brain parcellation, an atlas of anatomical regions \citep{dale1999cortical, hagmann2008mapping}, and streamline count between every pair of regions (see Figure~\ref{fig:img1}). 

%\subparagraph{}
After quality control sanity checks, the final dataset is formed of 480 connectivity matrices distributed as follows: 152 NC, 181 MCI and 147 AD. Meanwhile, the baseline sample is formed of 57 NC, 95 MCI and 34 AD. 
The connectivity matrices were obtained with the Freesurfer Desikan-Killiany parcellation tool \citep{desikan2006automated}. This resulted into 83 regions : 68 cortical regions, 14 subcortical (nuclei) regions and 1 brainstem region. These connectivity matrices represent an undirected (symmetric) complete weighted graph of dimensions $83 \times 83$. It is worth mentioning that connectivity matrices were normalized to sum to $1$, in such a way that each element of the matrix represents a probability of a tractography connection occurring between region $i$ and region $j$ in the brain. The Figure~\ref{fig:img1} (bottom-left) presents a connectivity matrix from DW-MRI tractography in between cortical regions. 

%\subparagraph{}
Furthermore, it is known that streamline count between regions is heavily dependent on the size or surface area of cortical regions \citep{girard2014towards}.
Hence, the local volume of each cortical region was added in the previously generated connectivity matrices. In order to obtain the connectivity matrices with the cortical volume, the diagonal of the initial connectivity matrices has been filled with cortical volume of each region. This diagonal was also normalized, summing to $1$, resulting in reconstructed connectivity matrices with a total sum equal to $2$. As a result, our experiments were tested for both types of matrices, with and without cortical region volume in the diagonal.

\section{Experiments}
As mentioned before, we hypothesize that the use of a trained CNN can help better understand the specifics of the AD connectomics. We do this through two experiments: 1) an ablation analysis to measure to which extent a region and/or an edge can affect the prediction of the neural network and 2) a visualization experiment to recover which areas of the brain drive the most of the output of the neural network. 

\subsection*{Regions and connections ablation analysis}
The main idea here is to change the connectivity between regions of the brain in order to evaluate the impact of that change on the overall performance of the model. For this, three approaches were implemented: a \emph{node ablation}, a \emph{node randomization} and an \emph{edge ablation}. The \emph{node ablation} forces to zero the connections between a region $i$ and every other regions. The \emph{node randomization} ``randomizes'' values of connectivity between a region $i$ and the other regions while keeping the same average instead of forcing them to zero. As for \emph{edge ablation}, we set to zero the connection between regions $i$ and $j$. This last approach is also tested with a combination of edges, to a maximum of 4 connections simultaneously. The ablation analysis is done in turn for each region and each edge.

\subsection*{Saliency map extraction}
The goal of the ablation analysis is to identify if a node or an edge is responsible on its own for certain predictions of the network. In this section, we want to identify if a group of nodes or edges is responsible for certain outputs of the neural net. Hence, a legitimate step to understand the regions driving the model prediction is by retroprogating the gradient from a maximally activated output neuron all the way to the input connectivity map $A$. We did so after training the network to discriminate between NC, MCI and AD. The magnitude of the gradient shall thus give us a clue on which combination of regions and edges are most important for predicting these classes.

%\subparagraph{}
For interpretability and explainability of the results, advice from an expert neuroanatomist are included in the discussion. Hence, the interpretation will depend on the resulting features visualized over averaged inputs for each class and prior knowledge from the AD literature.

\section{Results}
Results given in this section are from the learned model applied to the test set, after training and validation, including a 10 fold cross-validation. The accuracy of the one-to-one prediction are as follows: $78\%$ for NC-MCI (45 test samples), $91\%$ for NC-AD ($45$ test samples) and $81\%$ MCI-AD ($49$ test samples). Table~\ref{tab:tabler} summarizes the one-to-one predictions with the following metrics: prediction precision, recall, F1-score, accuracy of training, validation and test. For both datasets: with and without cortical volume for each region in the diagonal (Table~\ref{tab:tabler}). For the one-to-all prediction, NC-MCI-AD ($72$ test samples), when incorporating cortical region volume in the diagonal of the matrices, the score improves from $76\%$ to $78\%$.

\begin{table}[h!]
    \centering
    \begin{tabular}{|l|c||c|c|c|c|c|c|}
    \hline
          Prediction & 
          \begin{tabular}{c} Cortical \\ regions \\ volume \end{tabular} & 
          precision & 
          recall & F1-score & 
          \begin{tabular}{c}valid.\\acc.\end{tabular} & 
          \begin{tabular}{c}test\\acc.\end{tabular} \\
         \hline
         NC - MCI &   & 86\% & 70\% & 77\% &  79\% & 78\% \\ %\hline
         NC - AD & no  & 95\% & 86\% & 90\% & 85\% & 91\%  \\ %\hline
         MCI - AD &   & 78\% & 81\% & 80\% &  71\% & 81\% \\ \hline
         NC - MCI &  & 74\% & 74\% & 74\% &  77\% & 72\% \\ %\hline
         NC - AD~ & yes & 91\% & 91\% & 91\% &  95\% & 91\% \\ %\hline
         MCI - AD &  & 80\% & 90\% & 85\% &  75\% & 86\% \\ \hline
    \end{tabular}
    \caption{Reported metrics for the experiments with and without regions volume.}
    \label{tab:tabler}
\end{table}

\subsection*{Regions and connections ablation analysis}
To our surprise, shutting down nodes and edges did not decrease in any significant way the predicted scores. In addition, the \emph{node randomization} decreased our prediction accuracy to 50\%, which emphasizes the importance of the structure within connectomes and how regions are connected between each other.

\subsection*{Saliency map visualisation}
As mentioned before, we retroprapagated the gradient from the maximally activated output neuron associated with AD, MCI and NC. Regions with higher values in the AD and MCI saliency maps are: hippocampus, amygdala, parahippocampal, entorhinal, fusiform regions. 

%\subparagraph{}
To further illustrate the difference between the activated regions of AD, MCI and those of NC, we subtracted the NC saliency map from the AD and the MCI.  We did so with the purpose of underlying the specifics of the AD and MCI connectomes.  The subtracted saliency maps are illustrated in Figure~\ref{fig:saliency2}. This revealed that the entorhinal was the most intense difference between AD and NC along with hippocampus for MCI and NC.
These regions are reported in AD research from voxel-based morphometry, cortical thickness or functional connectomics \citep{jhoo2010discrimination,choo2010posterior,pennanen2004hippocampus,hojjati2017predicting}.

\begin{figure}[h!]
    \centering
    \includegraphics[width=0.99\textwidth]{fig/salienty.pdf}
    \caption{Saliency map features visualization resulting from the difference of two saliency maps generated with the one-to-all model: A) MCI minus NC, B) AD minus NC. Diagonal values are presented with node color, and other values represented with edge color. The red and green circle are around the hippocampus and entorhinal nodes, two regions known for their implication in AD.}
    \label{fig:saliency2}
\end{figure}


\section{Discussion}
We have shown that CNNs adapted to the structure of DW-MRI tractography connectomes can be used to classify MCI and AD afflicted brains. Moreover, we showed that our trained network could be used to gain insights into the structural connections that drive the AD pathology. 

\subsection*{AD prediction}
Previously reported results for MCI and AD prediction are in the order of 80\%, e.g. 60\%-70\% from  MRI morphological methods \citep{lisowska2019joint}, and from  80\%-90\%  with functional MRI approaches \citep{hojjati2017predicting}. On the other hand, \citet{abrol2020deep} proposed a deep residual neural network for predicting the progression of AD, achieving a median accuracy of 91\% for AD vs NC, 86\% for both MCI vs NC and MCI vs AD. While the method published in \citet{li2015robust}, consists of a deep learning neural network to identify the progression of AD based on MRI and PET modalities, while using advanced techniques for improving the model prediction like dropout, stability selection, adaptive learning factor and multitask learning strategy, and reported an accuracy of 91.4\% for AD vs NC, 77.4\% for MCI vs NC and 70.1\% for MCI vs AD.\\
Our work shows competitive prediction percentages and also emphasizes that the key challenge in AD prediction is the prediction between MCI and AD, and between NC and MCI. These are the hardest classification tasks, where disease prediction is not clear-cut, and most likely requires more information (multi-modality, genetics, amongst others).
%TODO, a word on volume YES vs NO. Why sometimes it improves, why others not? Idea? Future work to explore? Say something.

\subsection*{Regions and connections ablation analysis}
The results of the ablation procedure support the idea that {\bf \emph{no}} single region and its connections are responsible for AD prediction, but the combined effect of several cortical regions, that are directly or indirectly connected via long-range fiber tracks. By indirect connectivity, we mean that a 2nd order connectivity exists between these regions. DW-MRI tractography is the only non-invasive modality that can provide this structural connectivity brain architecture, which is essential and should be considered in future AD studies. 


\subsection*{Saliency map visualization}
The amplitude of the retropropagated gradient underlines which regions strongly correlate with the neural net prediction. However, this correlation could be explained by a lower or higher structural connectivity estimated from the DW-MRI thus the analysis of the saliency map should be interpreted with care.

\subsection*{Limitations and future directions}
One of the current limitations of our work is the absence of anatomical priors for the structural connectome reconstruction. As such, more insights from the disease along with anatomical constraints could improve results. Since incorporating cortical region volumes can improve the prediction, adding more information from relevant brain features could, furthermore, increase the model power. For example, more information from diffusion such as fractional anisotropy (FA), mean diffusivity (MD), as well as more information from other MRI contrasts (e.g. cortical thickness, myelin, functional connectivity). As a result, future direction in predicting AD, and it's progression with MCI, is within the application of advance geometric or graph CNN over the connectome \citep{bronstein2017geometric}. Furthermore, along with continuous progress and efforts in creating larger datasets, a regression problem for AD progression prediction could be formulated, so that the disease progression can be assessed as a continuum in time. 


\section{Conclusion}

In this paper, we conducted an explainable AI experiment to better understand the connectomic structure of the AD.  From a CNN trained on the brain connectomes of ADNI patients, we showed from an ablation procedure that no single region is responsible for AD, but the combined effect of several cortical regions.  We also showed that the entorhinal is the most intense difference between AD and NC along with hippocampus for MCI and NC. These regions are reported in AD research from voxel-based morphometry, cortical thickness or functional connectomics (\citep{jhoo2010discrimination}; \citep{choo2010posterior}; \citep{pennanen2004hippocampus}; \citep{hojjati2017predicting}).  Our findings thus show that deep convolution networks can be used to gain insights into the specifics of a neurodegenerative disease such as AD.  This could have important implications in neurodegenerative diseases analysis.

%Structural connectomes from DW-MRI tractography contain rich information about underlying anatomical architecture of the brain. The proposed neural network architecture is useful to extract features within brain connectomes that predict the AD pathology. Our methodology confirms that a network of direct and indirect long-range structural connections may drive disease progression and need to be further studied with complimentary information in the future. This could have important implications in neurodegenerative diseases analysis.

\bibliography{biblio}


%\appendix

%\FloatBarrier
%\section{Hyperparameters}
%\FloatBarrier
%\begin{table}[h!] 
% \centering
% \setlength{\tabcolsep}{2pt}
% \def\arraystretch{1}% 
%    \small
%    \begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|}\hline
%           \begin{tabular}{c}E2E \\ \#layer\end{tabular}& 
%           \begin{tabular}{c}E2E \\ \#filters\end{tabular}& 
%           \begin{tabular}{c}E2E \#2\\ \#filters\end{tabular}& 
%           \begin{tabular}{c}E2E \\  \emph{ReLu}\end{tabular}& 
%           \begin{tabular}{c}E2N \\ \#filters\end{tabular}& 
%           \begin{tabular}{c}E2N\\  \emph{ReLu}\end{tabular}&  
%           \begin{tabular}{c}FC \\ nodes\#1 \end{tabular}&     
%           \begin{tabular}{c}Nb \\ epochs \end{tabular}&  
%           \begin{tabular}{c}batch \\ size \end{tabular}& 
%           \begin{tabular}{c}\emph{LeakyReLu} \\ slope \end{tabular}& 
%           \begin{tabular}{c}Learning \\ rate \end{tabular}\\ \hline
           
%          1 &	52	& *** &	None & 39 & ReLu & 13 & 190 & 34 & None & 0.0002638 \\ \hline
%          1 & 91 & *** & LeakyReLu & 83 & ReLu & 8 & 72 & 40 & 0.0634020 & 0.0005544 \\ \hline
%          2 & 59 & 32 & LeakyReLu & 51 & LeakyReLu & 15 & 86 & 31 & 0.0637440 & 0.0006991 \\ \hline
%          1 & 83 & *** & ReLu & 56 & None & 24 & 122 & 27 & None & 0.0003708 \\ \hline
%          2 & 17 & 92 & LeakyReLu & 67 & LeakyReLu & 24 & 124 & 38 & 0.0960874 & 0.0033806 \\ \hline
%          1 & 61 & *** & None & 5 & LeakyReLu & 14 & 78 & 19 & 0.0443389 & 0.0026034 \\ \hline
%          1 & 82 & *** & None & 49 & None & 4 &	185 & 30 & None & 0.0022742 \\ \hline
%          1 & 3 & *** & LeakyReLu & 4 & None & 8 & 96 & 24 & 0.0365603 & 0.0082176 \\ \hline
%          1 & 60 &	*** & LeakyReLu & 31 & ReLu & 18 & 68 & 39 & 0.0306985 & 0.0004283 \\ \hline
%          1 & 80 & *** & None & 10 & None & 9 & 59 & 15 & None & 0.0066648 \\ \hline

%    \end{tabular}
%    \caption{Subsequent 10 best configurations of hyperparameters: Single or double E2E stack, number of E2E filters ($M$), number of filter in the secound E2E ($M_2$), \emph{ReLu} activation for E2E, number of E2N filters ($M$), \emph{ReLu} activation for E2N, number of nodes fully-connected layers, number of epochs, batch size, slope for \emph{LeakyReLu}, and learning rate.}
%    \label{tab:tab1}
%\end{table}{}
%\FloatBarrier

%\section{Saliency Circle Graphs}
%\begin{figure}[h!]
%    \centering
%    \includegraphics[width=0.99\textwidth]{fig/ADAVG_c.png}
%    \caption{Saliency circle graph resulting from the gradient of the AD prediction.}
%    \label{fig:saliency_circ}
%\end{figure}


\end{document}
