\documentclass{midl} % Include author names

\jmlryear{2021}
\jmlrworkshop{Full Paper -- MIDL 2021}


\usepackage{float}

\definecolor{gt}{RGB}{214,39,40}
\definecolor{prop}{RGB}{236,236,42}
\definecolor{nnUnet}{RGB}{0,255,255}
\definecolor{nnUnet_wI}{RGB}{255,0,255}

\jmlrvolume{-- Under Review}
\jmlryear{2021}
\jmlrworkshop{Full Paper -- MIDL 2021 submission}
\editors{Under Review for MIDL 2021}

\title[Whole-Body Soft-Tissue Lesion Tracking and Segmentation]{Whole-Body Soft-Tissue Lesion Tracking and Segmentation in Longitudinal CT Imaging Studies}

\usepackage[bottom]{footmisc}

\midlauthor{\Name{Alessa Hering\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \Email{alessa.hering@mevis.fraunhofer.de}\\
\addr $^{1}$ Fraunhofer MEVIS, Bremen, Germany \\
\addr $^{2}$ Diagnostic Image Analysis Group, Radboudumc, Nijmegen, Netherlands \AND
\Name{Felix Peisen\midlotherjointauthor \nametag{$^{3}$}} \Email{felix.peisen@med.uni-tuebingen.de} \\
\addr $^{3}$Department of Diagnostic and Interventional Radiology, University Hospital Tübingen, Germany \AND
\Name{Teresa Amaral\nametag{$^{4}$}} \Email{Teresa.Amaral@med.uni-tuebingen.de} \\
\addr $^{4}$Department of Dermatology, University Hospital Tübingen, Germany \AND
\Name{Sergios Gatidis\nametag{$^{3}$}} \Email{Sergios.Gatidis@med.uni-tuebingen.de} \AND
\Name{Thomas Eigentler\nametag{$^{4}$}} \Email{thomas.eigentler@med.uni-tuebingen.de} \AND
\Name{Ahmed Othman\nametag{$^{3}$}} \Email{Ahmed.Othman@med.uni-tuebingen.de} \AND
\Name{Jan Moltz\nametag{$^{1}$}} \Email{jan.moltz@mevis.fraunhofer.de}%
}

\begin{document}


\maketitle

\begin{abstract}
In follow-up CT examinations of cancer patients, therapy success is evaluated by estimating the change in tumor size. This process is time-consuming and error-prone. We present a pipeline that automates the segmentation and measurement of matching lesions, given a point annotation in the baseline lesion. First, a region around the point annotation is extracted, in which a deep-learning-based segmentation of the lesion is performed. Afterward, a registration algorithm finds the corresponding image region in the follow-up scan and the convolutional neural network segments lesions inside this region. In the final step, the corresponding lesion is selected. We evaluate our pipeline on clinical follow-up data comprising 125 soft-tissue lesions from 43 patients with metastatic melanoma. Our pipeline succeeded for $96\%$ of the baseline and $80\%$ of the follow-up lesions, showing that we have laid the foundation for an efficient quantitative follow-up assessment in clinical routine.
\end{abstract}

\begin{keywords}
Soft-tissue lesion, follow-up, CT, Lesion Tracking, Lesion Segmentation, Image Registration
\end{keywords}

\section{Introduction}
Measurement of metastatic tumors on longitudinal computer tomography (CT) scans is essential to evaluate the efficacy of cancer treatment.  The current guideline of metastatic tumor evaluation on CT scans is called response evaluation criteria in solid tumors (RECIST) \cite{Eisenhauer2009}. Manual measurement of the tumors for the RECIST criteria is often time-consuming and error-prone. However, the diameter-based RECIST criteria also undergo continuous changes. Automated approaches might significantly speed up response evaluation and help to handle the ever-growing mass of image-based staging and follow-up evaluations \cite{moawad2020feasibility}. 

Furthermore, radiomics is currently one of the most important topics in radiology. High-throughput extraction of quantitative features resulting in the conversion of medical images into minable data and the subsequent analysis promise new insights into therapy response and hold the potential to revolutionize medical image-based evaluation techniques \cite{gillies2016radiomics}. Both fields have a huge clinical impact due to rising demand for fast and reliable therapy response evaluations. They, however, share a common bottleneck: automated lesion segmentation. Only if this obstacle is overcome, clinicians will use the mentioned techniques accordingly in a daily manner. 

Metastatic malignant melanoma is the perfect entity to implement a pipeline for full-body lesion segmentation. Besides lung and liver, metastatic lesions of melanoma can be found in almost every organ or tissue, such as lymph nodes, adrenal glands, cerebrum, bone, spleen, and soft tissue \cite{schadendorf2018melanoma}. Whole-body cross-sectional imaging is part of the standard diagnostic work-up for staging, response assessment, and follow-up in patients with advanced melanoma according to current international guidelines. Malignant melanoma has been increasing fast in the last decades and represents a public health matter in several countries due to its high mortality rates \cite{ward2017cutaneous}. 

Among melanoma metastases, soft-tissue lesions provide a particular hurdle. They can arise in a variety of locations (cutaneous, subcutaneous, muscular, retroperitoneal) and shapes (round, multilobular, well defined, invasive), are often primarily small and, if not surrounded by fatty tissue, extremely hard to distinguish. A sufficient segmentation pipeline for soft-tissue metastases in malignant melanoma patients would therefore provide a valuable foundation for further steps towards a full-body lesion segmentation pipeline, that could be transferred to other entities.  

To the best of our knowledge, no work has been presented until now that tackles the problem of soft-tissue lesion segmentation in longitudinal CT image series. Lesion segmentation in other anatomical regions, however, has been studied extensively. For example, promising results have been accomplished for liver \cite{bilic2019liver} and kidney lesions \cite{heller2021kits} in challenges. Currently, the most general and successful avilable approach is the nnU-Net framework of \cite{isensee2020nnunet}, which has shown impressive results for several organ segmentation tasks such as liver, spleen, kidney, pancreas, heart, or aorta segmentation and also outperforms most methods segmenting different lesion types such as pancreas, liver, lung, kidney, or MS lesions. 
nnU-Net \cite{isensee2020nnunet}, initially based on U-Net \cite{ronneberger2015unet}, automatically configures itself, including pre-processing, network architecture, training and post-processing---making it an ideal baseline to build a lesion tracking pipeline.

However, as the lesion segmentation experiments in \cite{isensee2020nnunet} focus only on segmenting lesions in one organ in one scan, it cannot be used ``as is'' and requires some modifications. Only few works have been presented on lesion tracking \cite{cai2020deepTracking} and on lesion tracking and segmentation in longitudinal image scans (e.g. \cite{xu2011lymphomaTracking,moltz2012workflow,folio2013automated}. In this work, we tackle the problem of longitudinal tracking and segmentation of soft-tissue lesions in whole-body CT scans. 


\section{Method}
In our proposed pipeline, soft-tissue lesions are first identified by a radiologist with one click inside the lesion in the baseline CT scan. This step is introduced to avoid annotation of false positive lesions. We then apply our algorithm to automatically segment and measure the diameter in the baseline and follow-up image. This is done by (1) extracting the region of interest (ROI) around the point annotation of the radiologist and applying our CNN to segment the lesion; (2) registering the baseline to the follow-up image; (3) propagating the region of interest to the follow-up image to constrain the search region and applying the CNN on the propagated region of interest in the follow-up image; and (4) selecting the corresponding lesion in the output of the CNN. Figure \ref{fig:overview} shows an overview of our proposed algorithm. In the following, we describe each step in more detail.
\begin{figure}[]
\centering
  \includegraphics[width=0.95\textwidth]{imgs/midl_overview.pdf}
\caption{Schematic representation of the proposed pipeline for lesion tracking and segmentation.} \label{fig:overview}
\end{figure}

\subsection{Lesion Segmentation}
To generate the training data, we select for each lesion a bounding box around the point annotation of the radiologist with a size of $100$ mm, which is clamped by the image region. Then, we use the nnU-Net framework of \cite{isensee2020nnunet} to train a 3d full resolution model which consists of a U-Net-like \cite{ronneberger2015unet} architecture. The main settings are shown in Table \ref{tab:nnUNet_settings} in the appendix. The trained network is applied to segment the lesion in the baseline and follow-up image on the test dataset. 


\subsection{Registration}
Propagation of lesion segmentations into follow-up images of the same patient allows for a higher degree of automation because the location and approximate appearance of the lesions are already known. In this scenario, registration algorithms can be employed to find the corresponding image region \cite{moltz2012workflow}. For metastatic melanoma, typically full-body or thorax-abdomen CT scans are acquired, which can easily exceed image sizes of $512\times 512\times 1000$, which can be a challenge in terms of memory usage and runtime. The registration has to align the global structures but at the same time be locally accurate enough so that the lesion propagation is precise enough. Therefore, we adopted a three-step approach to automatically register the baseline to the follow-up image, which consists of the following steps: (1) a translational alignment; (2) a rigid registration; and (3) a deformable registration. Hereby, the registration pipeline starts with robust methods with fewer degrees of freedom and moves on to more precise, but less robust methods, which require better starting points due to their higher degrees of freedom.

\paragraph{(1) Translational Alignment}
The translational prealigment is based on a brute force grid search method named FASTA (Fast Translation Alignment), which evaluates a difference measure (here Sum-of-Squared-Distances (SSD), the squared $\ell_2$ norm of the difference image)  on a grid of possible translations. Finer grids allow for more precise translation estimation at the expense of increased computational cost. For faster processing, the moving image is resampled to a maximal image size of $128\times128\times 128$. The fixed image is resampled to the same image resolution as the moving image. For the grid generation, we choose a sampling rate of 3, 3, and 51 in x, y, and z-direction respectively. Since the CT scans are centered around the body center, only the z-translation is used for prealignment. 

\paragraph{(2) Rigid Registration}
The translational prealignment in z-direction is used as a starting point for a rigid multi-level registration using the SSD distance measure. The method uses a Gauss-Newton optimization scheme to solve the optimization problem.

\paragraph{(3) Deformable Registration}
The final step is the matrix-free deformable registration of \cite{koenig2018matrix}. The deformation is defined as a minimizer of the cost function
\begin{equation}
    \min_y \mathcal{D}^{\text{NGF}}(\mathcal{F},\mathcal{M}(y))+ \alpha \mathcal{R}^{curv}(y),
\end{equation}
with the normalized gradient field distance measure $\mathcal{D}^{\text{NGF}}$ \cite{HaberModersitzki2006NGF} that focuses on the alignment of image gradients of the fixed image $\mathcal{F}$ and the deformed moving image $\mathcal{M}(y)$. The second-order curvature regularizer  $\mathcal{R}^{curv}$  \cite{fischer2003curvature} enforces smooth deformation by penalizing spatial derivatives. The parameter $\alpha$ is a weighting factor. The method uses the limited-memory Broyden-Fletcher-Goldfarb-Shannon (L-BFGS) optimization scheme to solve the optimization problem and is embedded in a multi-level scheme.

\subsection{Lesion Tracking}
We use the registration to propagate the baseline contour to the follow-up scan. While this propagated contour may not be accurate enough due to size changes under therapy, it provides a good initial correspondence. To compensate for registration errors, we enlarge the search region by $50$~mm in every direction to ensure that the corresponding lesion is inside this selected region and to include enough information for the CNN. %The same network as for the baseline segmentation is used to segment the lesion in the follow-up scan. 

\subsection{Lesion Selection}
The CNN is not constrained to segment only one lesion inside the selected region in the follow-up scan. Therefore, we select the lesion whose center is closest to the center of the propagated lesion. To avoid annotation of wrong lesions close by in the case of vanishing lesions under therapy, we only accept a lesion annotated by the network if the Euclidean distance of its center is smaller than $25$~mm to the propagated lesion center.

\section{Experiments and Results}
\subsection{Dataset}
The dataset consists of 206 baseline and follow-up CT scan pairs of patients with metastatic melanoma (Stage IV, AJCC) treated at the Center for Dermato-Oncology at the University Hospital Tuebingen, Germany. All patients received either mono (Nivolumab or Pembrolizumab) or combined (Nivolumab+Ipilimumab) immunotherapy or targeted therapy (Vemurafenib +Cobimetinib or Dabrafenib+Trametinib) before the follow-up scan. The patients were split into 163 training and validation cases and 43 test cases with overall 2408 and 125 manual annotated soft-tissue lesions in the baseline images. Training was performed exclusively on baseline images, whereas testing was done on both baseline and follow-up scans. Therefore, we selected patients with lower lesion counts for the test set in order to obtain a diverse set of lesions while keeping the annotation effort feasible. For the test cases, 25 of the 125 lesions are gone in the follow-up image.

\subsection{Baseline Segmentation}
To show the advantage of training the network only on a small region of interest around the lesions, we compare our approach to a network trained on the whole images. However, for the evaluation, we use the closest lesion to the point annotation for both approaches, and therefore, false-positive annotations are not taken into account.

Since the network is not forced to segment anything in the region of interest, we evaluate the percentage of correctly annotated lesions. A lesion counts as correctly annotated if there is an overlap with the segmentation mask. To evaluate the performance of our segmentation network, we use Dice coefficient, average surface distance (ASD), and Hausdorff distance (HD) if the network segmented the correct lesion. Moreover, we evaluate the Surface Dice \cite{nikolov2018surfaceDice} with a threshold of 1 mm, which is a good approximation for the correction effort given an imperfect segmentation mask of a relatively small structure.

When the nnU-Net is trained only on the small region of interest around the point annotation, the network segments the correct lesion in $96\%$, whereas with training on the whole image, only $37.6\%$ of the lesions are annotated. On the correctly annotated lesions, the network trained on the ROI achieves on average a better Dice Score ($0.79$ vs.\ $0.60$), Surface Dice ($0.88$ vs.\ $0.68$), and average surface distance ($1.40$ mm vs.\ $1.77$ mm) but a slightly worse Hausdorff distance ($5.09$ mm vs.\ $4.59$ mm). Note that the number of included lesions for the calculation differs depending on the training mode. Taking all lesions into account the advantage increases to $0.76$ vs.\ $0.23$ for the Dice Score and $0.85$ vs.\ $0.26$ for the Surface Dice. Figure \ref{fig:boxplots} summarizes the quantitative results and Fig. \ref{fig:VisResults} shows several visual examples of the results produced by our network. 

\begin{figure*}[h]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{c}
\includegraphics[width=0.95\textwidth]{imgs/MIDL_segmentation_baseline.eps} \\
\includegraphics[width=0.95\textwidth]{imgs/MIDL_segmentation_followUp.eps}

\end{tabular}
\caption{Comparison of the evaluation metrics for all baseline lesions (upper row) and follow-up lesions (lower row) in which the correct lesion was annotated with the underlying training mode. Therefore, the number of included lesions for the calculations varies depending on the training mode. For the follow-up lesions, the lesion results by the registration propagated are shown in yellow. The boxplots show the median line and the mean as a white circle.\label{fig:boxplots}} 
\end{figure*}



\subsection{Registration Accuracy}
We measure the registration accuracy using the center point matching (CPM) accuracy as in \cite{cai2020deepTracking}, which represents the percentage of correctly matched lesions. A match counts as correct when the Euclidean distance between the center of the propagated baseline lesion and the center of the manually annotated follow-up lesion is smaller than a threshold. Since in this application whole-body CT scans are registered and large volume changes of the lesion happen due to therapy, we set the threshold to $25$ mm. For this evaluation, only the lesions which are visible in the follow-up image are taken into account and therefore the number of lesions reduces to 100.


In 95 of the 100 cases, the Euclidean distance was less than the threshold with a mean Euclidean distance of $7.66$ mm. The average absolute offset between the center of the propagated baseline lesion and the center of the manually annotated follow-up lesion is $3.79$~mm, $3.16$~mm and $4.49$~mm in x-, y- and z-direction, respectively. A histogram of the offset is shown in Fig. \ref{fig:regAcc} in the appendix.


\subsection{Follow-Up Segmentation}
We evaluate the follow-up segmentation in the same way as the baseline segmentation. However, the successful segmentation of the follow-up lesion depends not only on the segmentation accuracy itself but the whole pipeline. For the cases in which the lesion was not propagated accurately enough, segmentation by the nnU-Net was not possible. To evaluate the whole pipeline, those lesions are counted as not correctly annotated lesions. Furthermore, in the 25 cases in which the lesion was fully regressive in the follow-up image, we expect the nnU-Net not to annotate anything. 

In $80\%$ of the lesions, our pipeline successfully annotates the lesion in the follow-up scan with an average Dice Score of $0.80$ and an average Surface Dice of $0.89$. The lesion propagated by the registration has an overlap to the manual annotation in $77.5\%$ with an average Dice score of $0.51$ and a Surface Dice of $0.56$.  All quantitative results are summarized in Fig. \ref{fig:boxplots}. All failure cases are visualized in the appendix. In 17 of the 25 cases in which the lesion has disappeared in the follow-up image, the nnU-Net correctly not segment anything.

\begin{figure*}[h]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{cccccc}
  \includegraphics[width=0.15\textwidth]{imgs/AD-1950-14662__b78432ed2ec14ca29d6a947790346c5d__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/AD-1950-14662__b78432ed2ec14ca29d6a947790346c5d__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/AL-1938-15742__07f2ce8d4b3d46b18c00665b7308c375__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/AL-1938-15742__07f2ce8d4b3d46b18c00665b7308c375__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/KK-1938-12562__491f5945f1f645d88344b1cf090af72f__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/KK-1938-12562__491f5945f1f645d88344b1cf090af72f__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/VK-1945-14018__53f24c7feee2480297e9d5ab4d5a7050__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/VK-1945-14018__53f24c7feee2480297e9d5ab4d5a7050__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/ER-1940-13111__2adad19c7d834078a110ec377b9a643d__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/ER-1940-13111__2adad19c7d834078a110ec377b9a643d__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/SR-1952-12234__66d8599d8fc64ca1b3ba5cec615ed2d5__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/SR-1952-12234__66d8599d8fc64ca1b3ba5cec615ed2d5__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/JW-1962-14544__9e41b5be37c54e88b458748ffb5b2114__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/JW-1962-14544__9e41b5be37c54e88b458748ffb5b2114__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/MB-1938-14928__228e87a893e1460cbed874acf2ac78b6__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/MB-1938-14928__228e87a893e1460cbed874acf2ac78b6__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/EH-1940-14637__beada091fbb044c7b6cc636b8d220dfa__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/EH-1940-14637__beada091fbb044c7b6cc636b8d220dfa__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/KK-1938-12562__4005b3393ee742438f8af8549b32dd51__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/KK-1938-12562__4005b3393ee742438f8af8549b32dd51__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/PT-1940-13617__3d84bf043ccc473c8a3830c814152381__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/PT-1940-13617__3d84bf043ccc473c8a3830c814152381__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/LP-1958-13685__e3afbbb81a6a42559c8ca845cc5ec3ec__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/LP-1958-13685__e3afbbb81a6a42559c8ca845cc5ec3ec__coG_fu.png}
\\
  {Baseline }
& {Follow-Up}
& {Baseline}
& {Follow-Up}
& {Baseline}
& {Follow-Up}
\end{tabular}
\caption{Visual examples of results produced by our method. Each example includes the baseline and follow-up lesion and therefore consists of two images (left baseline, right follow-up). On the baseline image, the manual annotation (red curve \textcolor{gt}{\rule{.2cm}{.2cm}}) and the nnU-Net annotation trained on the ROI (blue curve \textcolor{nnUnet}{\rule{.2cm}{.2cm}}) and trained on the whole image (pink curve \textcolor{nnUnet_wI}{\rule{.2cm}{.2cm}}) are shown. On the follow-up image, the manual annotation (red curve \textcolor{gt}{\rule{.2cm}{.2cm}}), the propagated lesion (yellow curve \textcolor{prop}{\rule{.2cm}{.2cm}}) and the results of the presented pipeline (blue curve \textcolor{nnUnet}{\rule{.2cm}{.2cm}}) are visualized.   } \label{fig:VisResults}
\end{figure*}



\section{Discussion and Conclusion}
This paper presents a pipeline that automates the segmentation of matching lesions in follow-up CT examinations of cancer patients, given a one-click point annotation in  the  baseline  lesion. We have validated our pipeline on the challenging task of whole-body soft-tissue lesion tracking and segmentation. Our pipeline succeeded for $96\%$ of the baseline lesions and for $80\%$ of the follow-up lesions with an average Dice Score of 0.79 and 0.80, respectively. Furthermore, our pipeline achieves an average Surface dice of 0.88, which shows that the required correction effort is very low. 

All failure cases in the follow-up image are visualized in Fig. \ref{fig:failureCases} in the appendix showing that the pipeline fails due to different reasons. For some cases, the registration was not accurate enough and therefore a wrong or no lesion was selected even though the correct one was segmented. Other lesions are hard to distinguish from surrounding tissue or they have an untypical shape that might cause problems. In some cases, the lesion split into two smaller lesions in the follow-up scan after the patient received therapy and the nnU-Net segmented both, but just one lesion was selected. In some of these cases, it is also difficult for a radiologist to identify and segment the lesion correctly. Our pipeline has still some limitations which have to be addressed before it could be used in the clinic. Lesions can split or merge over time, however, our pipeline assumes that every lesion in the baseline has zero or one corresponding lesion in the follow-up image. This does not always have to be true. Moreover, lesions that are very close to each other could be wrongly assigned in the follow-up scan. These problems will be solved in future work by integrating consistency rules. Besides, our pipeline is not yet capable of detecting new lesions in the follow-up scan. Furthermore, the current pipeline does not take the appearance of the baseline lesion into account. There are different approaches to integrate this information into the model. The transformed baseline image and the corresponding lesion mask could be used as an additional input for the follow-up model. However, this would mean that two models have to be trained; one for segmenting the baseline image and one for the follow-up images. To train the follow-up network, a sufficient number of lesion annotations has to be available. Unfortunately, we only have the annotations that we used for the evaluations and therefore this approach is not suitable. Another approach is a joint-segmentation-registration algorithm as in \cite{li2019hybrid}. We will explore this approach in future work. 


We have trained and evaluated our method on soft-tissue lesions, which are particularly challenging due to their diverse appearance and location. Our promising results suggest that we will be able to extend our approach to other lesion types as well. Additionally, for use in clinical routine, it is sufficient to extract the largest diameter from the segmentation, so that detailed corrections will not be necessary. With our work, we have laid the foundation for an efficient automated follow-up assessment according to the RECIST standard and implementation of automated segmentation for Radiomics analysis in clinical routine.


\midlacknowledgments{We thank Fabian Isensee, Paul Jäger, Simon Kohl, Jens Petersen, and Klaus Maier-Hein for providing the nnU-Net framework. The research was funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) – 428216905 / SPP 2177.}


\bibliography{hering21}
\newpage

\appendix

\section{Dataset}
\begin{figure}[H]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{ccc}
  \includegraphics[width=0.3\textwidth]{imgs/resolution_x.eps} &
  \includegraphics[width=0.3\textwidth]{imgs/resolution_y.eps} &
 \includegraphics[width=0.3\textwidth]{imgs/resolution_z.eps} \\
 x-direction & y-direction & z-direction
 
\end{tabular}
\caption{Histogram of the image resolution in x-,y- and z-direction. \label{fig:app_resolution}} 
\end{figure}

\section{Visualization of Deformationfields}
\begin{figure}[H]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{ccc}
  \includegraphics[width=0.3\textwidth]{imgs/u_AD1950.png} &
  \includegraphics[width=0.3\textwidth]{imgs/u_JW1933.png} &
 \includegraphics[width=0.3\textwidth]{imgs/u_KK1938.png} 
 
\end{tabular}
\caption{Example coronal slices extracted from three deformation fields to give an impression of the smoothness. \label{fig:deformationfields}} 
\end{figure}

\section{nnUNet settings}
\begin{table}[H]
\caption{Main settings chosen by the nnUNet framework to train the segmentation network}
    \centering
    \begin{tabular}{c|c|c}
     name & description &chosen parameter \\
     \hline
     net\_pool\_per\_axis & number of pooling operations in z,x,y direction & 3,5,5 \\ 
     base\_num\_features & number of features after first conv & 32 \\
     conv\_per\_stage & & 2 \\
     optimizer & & SGD \\
     learning rate &  & $\approx 0.00235$ \\
     
     max\_num\_epochs & maximal number of epochs & 1000 \\
     num\_batches\_per\_epoch & number of batches used in every epoch & 250 \\
     batch\_size & number of images per batch & 5 \\
     patch\_size & z,y,z direction &  56 128 128 \\
     normalization\_schemes & see \cite{isensee2020nnunet} for details on CT scheme& (0,'CT')\\
    \hline
    \end{tabular}
    \label{tab:nnUNet_settings}
\end{table}

\section{Failure cases}
\begin{figure}[H]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{cccccc}
  \includegraphics[width=0.15\textwidth]{imgs/failureCases/AK-1968-13162__5d24653e543e460d94a57ebd83de3c7e__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/AK-1968-13162__5d24653e543e460d94a57ebd83de3c7e__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/AL-1938-15742__2ef86c5b4c4c4604a6463f45bc99721d__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/AL-1938-15742__2ef86c5b4c4c4604a6463f45bc99721d__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/BD-1972-14964__9ae4269b2f24416b9ee03c73baa5488c__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/BD-1972-14964__9ae4269b2f24416b9ee03c73baa5488c__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/failureCases/BP-1971-13644__292eed841b7244f09e4a85f409537745__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/BP-1971-13644__292eed841b7244f09e4a85f409537745__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/DS-1937-13345__62904fb3807b49c098f39df6be4739be__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/DS-1937-13345__62904fb3807b49c098f39df6be4739be__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/EH-1940-14637__65295b6a679f4336aa8f8eada58d68cc__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/EH-1940-14637__65295b6a679f4336aa8f8eada58d68cc__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/failureCases/JW-1933-8202__686d378c1d3d45379f5c903340455ed4__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/JW-1933-8202__686d378c1d3d45379f5c903340455ed4__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/JW-1962-14544__9e41b5be37c54e88b458748ffb5b2114__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/JW-1962-14544__9e41b5be37c54e88b458748ffb5b2114__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/LP-1958-13685__e3afbbb81a6a42559c8ca845cc5ec3ec__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/LP-1958-13685__e3afbbb81a6a42559c8ca845cc5ec3ec__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/failureCases/MA-1958-13002__c0b3baa6e740419eb04e29deb0a8dc7b__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/MA-1958-13002__c0b3baa6e740419eb04e29deb0a8dc7b__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/MB-1938-14928__228e87a893e1460cbed874acf2ac78b6__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/MB-1938-14928__228e87a893e1460cbed874acf2ac78b6__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__3d84bf043ccc473c8a3830c814152381__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__3d84bf043ccc473c8a3830c814152381__coG_fu.png}
\\
  \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__80977dd9067a473bb2ccac0949356a8a__coG_ba.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__80977dd9067a473bb2ccac0949356a8a__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__f0336787cfba4b6898c04efaad09baf1__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/PT-1940-13617__f0336787cfba4b6898c04efaad09baf1__coG_fu.png}
& \includegraphics[width=0.15\textwidth]{imgs/failureCases/ZH-1940-13224__5430e8bcfa964f658515b855b85e4cea__coG_ba.png}
&  \includegraphics[width=0.15\textwidth]{imgs/failureCases/ZH-1940-13224__5430e8bcfa964f658515b855b85e4cea__coG_fu.png}
\\
  {Baseline }
& {Follow-Up}
& {Baseline}
& {Follow-Up}
& {Baseline}
& {Follow-Up}
\end{tabular}
\caption{All cases in which our pipeline fails to segment the lesion in the follow-up image. Each example includes the baseline and follow-up lesion and therefore consists of two images (left baseline, right follow-up). On the baseline image, the manual annotation (red curve \textcolor{gt}{\rule{.2cm}{.2cm}}) and the nnU-Net annotation trained on the ROI (blue curve \textcolor{nnUnet}{\rule{.2cm}{.2cm}}) are shown. On the follow-up image, the manual annotation (red curve \textcolor{gt}{\rule{.2cm}{.2cm}}), the propagated lesion (yellow curve \textcolor{prop}{\rule{.2cm}{.2cm}}) and the results of the presented pipeline (blue curve \textcolor{nnUnet}{\rule{.2cm}{.2cm}}) are visualized. For these cases, we do not apply the lesion selection and therefore some lesions seem to be correctly segmented, however, they are not selected using our criteria. There are different reasons for these failures. In some cases, the registration was not accurate enough and therefore a wrong or no lesion was segmented. Some lesions are hard to distinguish from surrounding tissue (e.g. last column), but also an untypical shape can be a problem.  } \label{fig:failureCases}
\end{figure}

\section{Diameter Error}

\begin{figure}[H]
\centering
  \includegraphics[width=0.65\textwidth]{imgs/diameter_error.eps}
\caption{Cumulative distribution of diameter error between the manual segmentation and the nnU-Net segmentation. Please note, that in clinical routine the diameter would not be calculated from a segmentation but measured directly which might also introduce some errors. The dotted lines visualize the 90th percentiles of the error, which are 3.6~mm for all lesions, 4.1~mm for the baseline lesions and 3.6~mm for the follow-up lesions.} \label{fig:cumDiameterError}
\end{figure}

\section{Registration Accuracy}
\begin{figure}[H]
\centering
\setlength{\tabcolsep}{0.001\textwidth}
\begin{tabular}{cc}
\includegraphics[width=0.4\textwidth]{imgs/eucDist.eps}
  \includegraphics[width=0.4\textwidth]{imgs/dist_x.eps} \\
  \includegraphics[width=0.4\textwidth]{imgs/dist_y.eps} 
 \includegraphics[width=0.4\textwidth]{imgs/dist_z.eps} 
\end{tabular}
\caption{Histogram of Euclidean distance and the absolute offset between the center of the propagated lesion and the center of the manually annotated follow-up lesion.. } \label{fig:regAcc}
\end{figure}


\end{document}
