\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{hhline}
\usepackage{placeins}
\usepackage{pdflscape}
\usepackage{rotating}
\usepackage{amsmath,amsfonts}
\usepackage{mathrsfs}
% \usepackage{colortbl}
% \usepackage{tabularx}
\usepackage{stfloats}
\usepackage{multirow}

% Header for extended abstracts
\jmlrproceedings{MIDL}{Medical Imaging with Deep Learning}
\jmlrpages{}
\jmlryear{2021}

% to be uncommented for submissions under review
% \jmlrworkshop{Short Paper -- MIDL 2021 submission}
% \jmlrvolume{-- Under Review}
% \editors{Under Review for MIDL 2021}

\title[Deep Bone Aging]{Virtual Bone Shape Aging}

\midlauthor{\Name{Francesco Caliv\'a}        \Email{francesco.caliva@ucsf.edu}\\
	\Name{Alejandro Morales Martinez} \Email{alejandro.moralesmartinez@ucsf.edu}\\
		\Name{Sharmila Majumdar} \Email{sharmila.majumdar@ucsf.edu}\\
	\Name{Valentina Pedoia} \Email{valentina.pedoia@ucsf.edu} \\
	\addr Center for Intelligent Imaging, University of California, San Francisco, United States of America}


\begin{document}

\maketitle

\begin{abstract}
 We use deep learning to age knee bone surfaces four years. We propose to encode an MRI-based bone surface in a spherical coordinate format, and use these spherical maps to predict shape changes in a 48 months time frame, in subjects with and without osteoarthritis. The experiments show that a 2D V-Net can predict bone surface shape with a mean absolute error of about 1 mm. Our code is available \href{https://github.com/fcaliva/Bone_Shape_Virtual_Aging}{\underline{here}}.
 \end{abstract}

\begin{keywords}
Bone Shape, Osteoarthritis, Spherical Encoding.
\end{keywords}

\section{Introduction}
Knee osteoarthritis (OA) is a complex joint disease with a global prevalence approaching 5\% \citep{cross_global_2014}. While the OA development involves all tissues of the knee joint, it is considered to be mechanically driven by load-dependent changes in the subchondral bone \citep{neogi_clinical_2012}. As such, bone shape changes are an appealing outcome target for clinical trials, and patient-personalized bone changes trajectory prediction could be of great interest in understanding the impact of specific intervention strategies. In this study, we aim to explore, for the first time, the usage of deep learning to predict bone shape changes in a time frame of 48 months on subjects with and without OA.
\begin{figure}[htbp]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:fig1}
  {\vspace{-0.9cm}\caption{Overview of the proposed approach. All 2D V-Nets consisted of 5 levels with 3, 4, 3, 4, 2 convolutions per-level.
  %Bone spherical maps from three time points (baseline,  12 months,  and 24 months) were used as inputs to a 2D V-Net model with the goal to predict the longitudinal bone surface changes at the 72 months timepoint.
  }}
  {\includegraphics[width=0.8\linewidth]{figures/pipeline_final_midl}}
\end{figure}

\section{Methods and Experiments}
This study uses data from the Osteoarthritis Initiative study, in which 4,796 subjects were scanned at 7 different time points spanning over 8 years. In our previous work, we used 3D Sagittal Double Echo Steady-State (3D-DESS) Magnetic Resonance Imaging (MRI) data to segment the knee bone using a V-Net model \cite{milletari2016v}, we then encoded bone shape features in a 2D spherical map that was used to diagnose and predict future OA \citep{morales2020learning}. For this study, we used the femur bone spherical maps from three  time points (baseline, 12 months, and 24 months) as inputs to a model tasked with predicting the longitudinal bone surface changes at the 72 months time point (Fig.~\ref{fig:fig1}). We used 4133 subjects split into 2855/598/680 train/val/test sets. BMI, age, and sex were controlled by testing for statistical independence across the splits.
The first of our three implemented approaches was a 2D modified V-Net, as a baseline model (Fig.~\ref{fig:fig1}). This model was given the concatenation of the spherical bone encoding of the three initial time points as an input, and it produced  the bone shape at a subsequent time point (72 months).
For the second approach, we corrupted the input spherical maps with white noise ($\mu$=0, var=0.03), and utilized the 2D V-Net as a denoising autoencoder. This solution was chosen as we observed the naive encoder-decoder excessively exploited the input information and merely replicated the latest input time point, which would prevent the network from learning meaningful representations of the data. The final approach was to give the network a more explicit supervision for the bone shape changes. We engineered the network target as the difference between the bone spherical maps at the 24 and 72 months time points. 
For all the networks, two sets of experiments were conducted. In the first, parameters were tuned by minimizing a mean-squared error (MSE) loss, in the second, a combination of mean-absolute error (MAE) and structural similarity index metric (SSIM) loss, which in our previous work~\cite{caliva2020breaking} proved instrumental for the restoration of fine details when reconstructing images. %A factor 6.7 was used to empirically rescale SSIM and MAE.
% In all the models, batch size was set to 8. To prevent overfitting, a 0.05 dropout rate was implemented, and training was interrupted using the early-stopping practice when no improvements in the validation set (in terms of MAE) were observed for 15 consecutive validation iterations. Adam optimizer~\cite{kingma2014adam} with learning rate $1\times 10-5$ was adopted.
In all the models, Adam optimizer with learning rate $1\times 10-5$, batch-size of 8 samples, 0.05 dropout rate and early-stopping with 15 val-iterations without improvement patience were used.

\begin{table}[htbp]
\floatconts
  {tab:results}
  {\vspace{-0.5cm}\caption{Bone shape prediction quality, measured in terms of mean-absolute error (MAE) in mm and structural similarity index metric (SSIM).% Results are stratified per group (All/OA/Not-OA) and per objective function used during model training. Among the models, `Enc-Dec' is the baseline V-Net model; `Denoiser' is the model which processed noisy inputs, `$\Delta$\textit{Shape}' is the model which predicted the difference between the bone surface encoding at the next time point and the last time point in the input data. With respect to the last, the residual is summed-up to the last input time point to formulate the predicted next-step bone shape.
  }}%
   {\vspace{-0.5cm}\resizebox{0.95\columnwidth}{!}{\begin{tabular}{c|c|c c c|c c c|c }
\hhline{---~--~--}
\multicolumn{3}{c}{All (N=680)} & & \multicolumn{2}{c}{OA (N=39)} & & \multicolumn{2}{c}{Not-OA (N=641)} \\
\hhline{---~--~--}
Model & MAE  & SSIM & & MAE & SSIM & & MAE & SSIM \\
\hhline{---~--~--}
Enc-Dec & 1.409 (0.653) &  0.922 (0.021) &  & 1.496 (0.458) &  0.920 (0.019) & & 1.404 (0.662) &  0.922 (0.021)\\  
Denoiser & 1.653 (0.648) &  0.926 (0.022) &  & 1.768 (0.517) &  0.924 (0.019) & & 1.646 (0.655) &  0.926 (0.022)\\
$\Delta$ Shape & 1.076 (0.630) &  0.912 (0.024) &  & 1.080 (0.444) &  0.911 (0.021) & & 1.075 (0.639) &  0.912 (0.024)\\
\hhline{---~--~--}
\multicolumn{1}{c}{} \\[-1.2ex]
\multicolumn{9}{c}{A) Trained using $\mathscr{L} = SSIM + 6.7 * MAE $}\\ 
\multicolumn{1}{c}{} \\[-2.2ex]
\multicolumn{1}{c}{} \\[-2.2ex]
\hhline{---~--~--}
\multicolumn{3}{c}{All (N=680)} & & \multicolumn{2}{c}{OA (N=39)} & & \multicolumn{2}{c}{Not-OA (N=641)} \\
\hhline{---~--~--}
Model & MAE & SSIM & & MAE & SSIM & & MAE & SSIM \\
\hhline{---~--~--}
Enc-Dec & 1.283 (0.641) &  0.925 (0.021) & & 1.332 (0.452)&  0.922 (0.020) & &  1.280 (0.651)&  0.925 (0.021) \\
Denoiser & 1.853 (0.590) &  0.868 (0.018) & & 1.934 (0.445)&  0.866 (0.017) & &  1.849 (0.598)&  0.868 (0.018) \\
$\Delta$ Shape & 1.272 (0.721) &  0.911 (0.025) & & 1.206 (0.459)&  0.910 (0.021) & &  1.276 (0.734)&  0.911 (0.025) \\
\hhline{---~--~--}
\multicolumn{1}{c}{} \\[-1.2ex]
\multicolumn{9}{c}{B) Trained using $\mathscr{L} = MSE $}
\end{tabular}}}
\end{table}


% \section{Discussion, Conclusions and Future Directions}
\section{Results and Conclusions}
We predicted femoral bone remodeling in a cohort of patients with osteoarthritis and control groups 48 months ahead of time. Table~\ref{tab:results} shows model comparisons on the whole test set and stratified by OA status. In general, models trained with combined loss outperformed the others. The best results in terms of MAE were obtained by the $\Delta$\textit{Shape} model. A visual example comparing actual and predicted bone shape changes is reported in Fig.~\ref{fig:fig2}.
\begin{figure}[h]
\floatconts
  {fig:fig2}
  {\vspace{-1cm}\caption{Views of a distal femur, showing the ground truth bone surface change in 48 months (left) and the bone surface change predicted by our model (right).}}
  {\includegraphics[width=1\linewidth]{figures/deltashape}}
\end{figure}

To the best of our knowledge, we are the first attempting to solve this challenging but significant task. Bone remodeling is a precursor of osteoarthritis and being able to characterize remodeling before the manifestation of the disease can greatly impact preventative care as well as diagnosis and patient management. 
%This seminal work poses the basis for several new directions. %Immediate next steps will extend the prediction to all the bones of the knee joint as well as other joints. 
In this proof-of-concept work, optimization of the deep learning framework was not the scope; we aim to investigate alternative architectures and training paradigms %which could include adversarial and contrastive learning 
in an extended version of this work.
% In the extension of this paper, we will perform a systematic analysis of which metrics better assess the quality of shape prediction.
% Future step
% Registration and Recurrent network
% Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank a bunch of people.}
\vspace{-0.3cm}
\bibliography{midl-samplebibliography}
\vspace{-0.5cm}
\end{document}
