% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{multirow}
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
\usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref}
%
\begin{document}
%
\title{A Two-Step Deep Learning Approach for Abdominal Organ Segmentation}
%
%\titlerunning{Abbreviated paper title}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
\author{Jianwei Gao\orcidID{0000-0002-6358-4117} \and
Juan Xu \and Honggao Fei \and Dazhu Liang}
%
\authorrunning{Gao et al.}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{Digital Health China Technologies Co., LTD, Beijing, China\\
\href{mailto:gaojw@dhctech.com}{gaojw@dhctech.com}  \href{mailto:xujuan@dhctech.com}{xujuan@dhctech.com}  \href{mailto:feihg@dhctech.com}{feihg@dhctech.com}  \href{mailto:liangdz@dhctech.com}{liangdz@dhctech.com}}
%
\maketitle              % typeset the header of the contribution
%
\begin{abstract}
Accurate delineation and analysis of anatomical structures within medical images are essential in various clinical applications, with medical image segmentation playing a key role. In the context of abdominal imaging, the precise segmentation of organs like the liver, spleen, and kidneys holds significant importance for tasks such as diagnosis, treatment planning, and surgical interventions. However, achieving precise and efficient segmentation of abdominal organs poses significant challenges due to the variability in organ shape, size, and appearance across different patients and imaging modalities. The MICCAI FLARE23 segmentation paper presents a solution to the challenging problem of segmenting 13 organs and tumor from CT scans, provided 2200 CT scans with partial labels and 1800 CT scans without labels, while balancing model performance and resource consumption. To address these challenges, the paper proposes a two-step segmentation approach that combines organ segmentation and tumor segmentation, which are both accomplished with nnU-Net model. We also crop some top and bottom slices for faster process.Experimental results on the FLARE 2023 test dataset achieved a mean Dice Similarity Coefficient of 0.0361, Normalized Sum of Differences of 0.0331 for organ, a mean Dice Similarity Coefficient of 0.005, Normalized Sum of Differences of 0 for lesion.Besides, our method cost 80.28s and 158993MB GPU.

\keywords{Abdominal organ segmentation \and Supervised Learning \and nnU-Net.}
\end{abstract}

\section{Introduction}

Medical image segmentation plays a pivotal role in various clinical applications, enabling the accurate delineation and analysis of anatomical structures within medical images. However, achieving precise and efficient segmentation of abdominal organs poses significant challenges, because it typically requires a large amount of labeled data to train an accurate model, while manually annotating organs from CT scans is a time-consuming and labor-intensive process, furthermore, abdominal organs may have complex morphological structures and heterogeneous lesions, which segmentation a more difficult task.

In recent years, deep learning became the mainstream method for medical image analysis, demonstrating remarkable capabilities in automated organ segmentation tasks.~\cite{Kart2022} Specifically, the nnU-Net model~\cite{isensee2021nnu} has emerged as a powerful framework for achieving state-of-the-art results in medical image segmentation. nnU-Net combines the strengths of the U-Net architecture with advancements in neural network design and training strategies, allowing for improved accuracy and robustness. Semi-supervised segmentation~\cite{wang2022semisupervised} is a type of segmentation where the training set consists of both labeled and unlabeled data. The goal is to assign pseudo-labels to the pixels of unlabeled images. This approach is useful when obtaining labeled data is expensive or time-consuming, which is perfect for this challenge.

Because there are no full 14 classes labeled data but 13 classes organ segmentation labeled data, in this paper,we break it down into two tasks: organ segmentation and tumour segmentation. Therefor, we propose an approach which involved training two nnUnet model with labeled data, which are used to segment organs and tumours respectively. A post-process is used to merge two Deep Learning results when inferencing.

\section{Method}

%###########################
\subsection{Preprocessing}
We use several pre-processing strategies as follows.
\begin{itemize} 
 \item Data choose and preprocessing

 We choose train data with full 13 organ label and data with tumor label, thus get 222 data for organ segment and 735 for tumor segment. then we split them by 8:2 ratio for train and validation. 
 \item Cropping strategy
 
 We use the CT scans as the data source to generate the bounding box of foreground, and then crop only the foreground object of the images.
 \item Resampling method for anisotropic data
 
 We resample the original data to unify the voxel spacing into $[1.0, 1.0, 1.0]$.
  
 \item Intensity normalization method

We collect intensity values from the foreground classes (all but the background and ignore) from all training cases, compute the mean, standard deviation as well as the 0.5 and 99.5 percentile of the values. Then clip to the percentiles, followed by subtraction of the mean and division with the standard deviation. The normalization that is applied is the same for each training case (for this input channel).

\end{itemize}

\subsection{Deep Network}
Figure~\ref{fig:Network} illustrates the applied 3D nnU-Net~\cite{isensee2021nnu}, where a 3D U-Net architecture is adopted. We use the leaky ReLU function with a negative slope of 0.01 as the activation function. Our first 3D nnU-Net has 14 out channels, corresponding to the background and 13 organs, while our second 3D nnU-Net has 2 out channels, corresponding to the background and the tumor. In this case, only data with 13 organ label and data with tumor label are used, the others is abandon.Unlabeled images were not used.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.3]{imgs/unet.png}
\caption{Our 3D U-Net architecture}
\label{fig:Network}
\end{figure}


We use the sum of Dice loss (after applying a softmax function) and Cross Entropy Loss as the loss function, because it's a popular choice for loss fuction and have been proven to be robust in various
medical image segmentation tasks.

When predicting a single image with the trained segmentation model, we first resample it to a voxel spacing of [1.0, 1.0, 1.0], as we did during training, and try to predict. In order to improving inference speed and
reducing resource consumption, we crop top and bottom slices of the data in predicting, only keep only the middle 50 slices.


\subsection{Post-processing}
During model prediction, we select the label (from 0 to 13) corresponding to the largest of the 13 outputs for each voxel, after that, we combine predictions of two model to get the final result. when one pixel is both predicted as tumor and organ, it will be considered as tumor. finally, we add full 0 array as top and bottom slices 

\section{Experiments}
\subsection{Dataset and evaluation measures}
The FLARE 2023 challenge is an extension of the FLARE 2021-2022~\cite{MedIA-FLARE21}\cite{FLARE22}, aiming to aim to promote the development of foundation models in abdominal disease analysis. The segmentation targets cover 13 organs and various abdominal lesions. The training dataset is curated from more than 30 medical centers under the license permission, including TCIA~\cite{TCIA}, LiTS~\cite{LiTS}, MSD~\cite{simpson2019MSD}, KiTS~\cite{KiTS,KiTSDataset}, autoPET~\cite{autoPET-Data,autoPET-MICCAI22}, TotalSegmentator~\cite{TotalSegmentator}, and AbdomenCT-1K~\cite{AbdomenCT-1K}. The training set includes 4000 abdomen CT scans where 2200 CT scans with partial labels and 1800 CT scans without labels. The validation and testing sets include 100 and 400 CT scans, respectively, which cover various abdominal cancer types, such as liver cancer, kidney cancer, pancreas cancer, colon cancer, gastric cancer, and so on. The organ annotation process used ITK-SNAP~\cite{ITKSNAP}, nnU-Net~\cite{nnUNet}, and MedSAM~\cite{MedSAM}.

The evaluation metrics encompass two accuracy measures—Dice Similarity Coefficient (DSC) and Normalized Surface Dice (NSD)—alongside two efficiency measures—running time and area under the GPU memory-time curve. These metrics collectively contribute to the ranking computation. Furthermore, the running time and GPU memory consumption are considered within tolerances of 15 seconds and 4 GB, respectively.

\subsection{Implementation details}
\subsubsection{Environment settings}
The development environments and requirements are presented in Table~\ref{table:env}.

\begin{table}[!htbp]
\caption{Development environments and requirements.}\label{table:env}
\centering
\begin{tabular}{ll}
\hline
Windows/Ubuntu version       & Ubuntu 20.04.4 LTS\\
\hline
CPU   & Intel(R) Xeon(R) Gold 5218R CPU @ 2.10GHz \\
\hline
RAM                         &128G\\
\hline
GPU (number and type)                         & 1* NVIDIA Tesla T4 (16G)\\
\hline
CUDA version                  & 11.6\\                          \hline
Programming language                 & Python 3.9\\ 
\hline
Deep learning framework & Pytorch (Torch 1.13.1, torchvision 0.14.1) \\
\hline
Specific dependencies         & numpy 1.25.2, SimpleITK 2.2.1, nnunetv2 2.1 nibabel 5.1.0 \\  
\hline
\end{tabular}
\end{table}


\subsubsection{Training protocols}
As described below.

Random flipping strategy (only for initial training stage): each image has a 20$\%$ probability of flipping along the x-axis and a 20$\%$ probability of flipping along the y-axis.

Random Gaussian smooth (only for initial training stage): each image has a 10$\%$ probability of being Gaussian smoothed with sigma in (0.5, 1.15) for every spatial dimension.

Random Gaussian noise (only for initial training stage): each image has a 20$\%$ probability of being added with Gaussian noise with mean in (0, 0.5) and standard deviation in (0, 1).

Random intensity change (only for initial training stage): each image has a 10$\%$ probability of changing intensity with gamma in (0.5, 2.5).

Random intensity shift (only for initial training stage): each image has a 10$\%$ probability of shifting intensity with offsets in (0, 0.3).

Patch sampling strategy: 2 patches of size $[128, 128, 128]$ are randomly cropped from each image. The center of each patch has 50$\%$ probability in the foreground and 50$\%$ probability in the background.

As described above,only data with 13 organ label and data with tumor label are used, the others is abandon. Unlabeled images were not used.

Some details of the initial training stage and the fine-tuning stage are shown in Table~\ref{table:initial} and Table~\ref{table:fine-tuning} respectively.

\begin{table*}[!htbp]
\caption{Training protocols (initial training stage).}
\label{table:initial}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & "he" normal initialization\\
\hline
Batch size                    & 2 \\
\hline 
Patch size & 128$\times$128$\times$128  \\ 
\hline
Total epochs & 1000 \\
\hline
Optimizer          & Adam          \\ \hline
Initial learning rate (lr)  & 0.0001 \\ \hline
Lr decay schedule & initial learning rate$\times(1-epoch/500)^{0.9}$ \\
\hline
Training time                                           & 20 hours \\  \hline 
Loss function & the sum of dice loss and cross entropy loss \\  \hline 
Number of model parameters    & 31.42M \\ \hline
\end{tabular}
%}
\end{center}
\end{table*}

\begin{table*}[!htbp]
\caption{Training protocols (fine-tuning stage).}
\label{table:fine-tuning}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & model after initial training\\
\hline
Batch size                    & 2 \\
\hline 
Patch size & 128$\times$128$\times$128  \\ 
\hline
Total epochs & 40 \\
\hline
Optimizer          & Adam          \\ \hline
Initial learning rate (lr)  & 0.00005\\ \hline
Lr decay schedule & initial learning rate$\times(1-epoch/500)^{0.9}$ \\\hline
Training time                                           & 39 hours \\  \hline 
Loss function & the sum of dice loss and cross entropy loss \\  \hline 
Number of model parameters    & 31.42M \\ \hline
\end{tabular}
\end{center}
\end{table*}


\section{Results and discussion}
\subsection{Quantitative results on validation set}

DSC and NSD results on validation set are shown in Table~\ref{table:validation_set}. It can be seen from the table that Aorta and LK have best proformance, while others has worst proformance.A possible reason of it is that Aorta and LK is larger organ and more likely in the center,therefor not be croped by preprocessing. 
\begin{table}[htbp]
\caption{Results on validation set.}
\label{table:validation_set}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{l|cc|cc|cc}
\hline
\multirow{2}{*}{Target} & \multicolumn{2}{c|}{Public Validation} & \multicolumn{2}{c|}{Online Validation} & \multicolumn{2}{c}{Testing} \\ 
\cline{2-7} 
                        & DSC(\%)            & NSD(\%)           & DSC(\%)            & NSD(\%)           & DSC(\%)      & NSD (\%)     \\ 
\hline
\hline
Liver                  &0.600$\pm$ $0.600$&0.000$\pm$ $0.000$& 5.500&4.400& 0.290&0.150 \\
\hline  
RK                  & 1.500$\pm$ $1.500$&1.110$\pm$ $1.110$ &  5.500 &8.900& 2.500&1.970\\
\hline
Spleen                   &0.000$\pm$ $0.000$&0.000$\pm$ $0.000$& 10.000& 10.000&  0.790&0.740\\
\hline
Pancreas                  & 0.440$\pm$ $0.440$ &0.540$\pm$ $0.540$&  5.100&7.500& 0.310&0.380\\ 
\hline
Aorta                   & 8.860$\pm$ $8.860$&9.570$\pm$ $9.570$&  6.870&7.460&  11.840&11.770\\
\hline 
IVC                  & 0.980$\pm$ $0.980$&0.820$\pm$ $0.820$&  14.500&14.000& 3.190&2.650 \\
\hline
RAG                   &4.000$\pm$ $ 4.000$&4.000$\pm$ $ 4.000$& 2.000&2.000& 0.230&0.230 \\
\hline
LAG                  & 2.000$\pm$ $ 2.000$&2.000$\pm$ $ 2.000$ & 1.000 &1.000& 0.930&0.930 \\
\hline 
Gallbladder                   & 8.000$\pm$ $ 8.000$& 8.000$\pm$ $ 8.000$ & 10.000&8.800&8.800&8.800 \\
\hline
Esophagus                & 0.000$\pm$ $0.000$&0.000$\pm$ $0.000$ &   0.0000&0.0000& 0.520&0.540 \\
\hline
Stomach                  & 0.000$\pm$ $0.000$&0.000$\pm$ $0.000$&   0.0000&0.0000& 0.420&0.320 \\  
\hline
Duodenum                  &5.500$\pm$ $5.500$&9.400$\pm$ $9.400$ &  0.280& 0.470& 0.130&0.130\\
\hline  
LK                 & 11.930$\pm$ $11.930$&11.480$\pm$ $11.480$ &  12.210& 11.090& 14.530&12.280 \\
\hline
Tumor                 & 0.000$\pm$ $0.000$&0.000$\pm$ $0.000$ &  0.000&0.000&  0.005&0.000\\
\hline
Average                 & 3.129$\pm$ $3.129$&3.351$\pm$ $3.351$ &  5.211&5.401&  3.178&2.921\\
\hline
\end{tabular}
\end{center}
\end{table}

\begin{table}[htbp]
\caption{Quantitative evaluation of segmentation efficiency in terms of the running them and GPU memory consumption. Total GPU denotes the area under GPU Memory-Time curve.}
\label{table:efficiency}
\centering
\begin{tabular}{ccccc}
\hline
Case ID & Image Size      & Running Time (s) & Max GPU (MB) & Total GPU (MB) \\ \hline
0001    & (512, 512, 55)  &  133.13                & 3746   & 30428   \\
0051    & (512, 512, 100) &  143.65                &  3806            & 14327               \\
0017    & (512, 512, 150) &  121.93                &  3590            & 24825               \\
0019    & (512, 512, 215) & 85.79            &        3323      &       14863         \\
0099    & (512, 512, 334) &      73.7            &      3374        &        11619        \\
0063    & (512, 512, 448) &      72.57            &       3370       &         10082       \\
0048    & (512, 512, 499) &    70.87              &       3316       &       10466         \\
0029    & (512, 512, 554) &       74.38           &         3382     &      11035          \\ \hline
\end{tabular}
\end{table}


\subsection{Qualitative results on validation set}

Two examples of good segmentation are shown in Figure ~\ref{fig:goodcases} and two examples of bad segmentation are shown in Figure ~\ref{fig:badcases}. Visualization is achieved with ITK-SNAP~\cite{py06nimg} version 3.6.0.

From the perspective of images, some potential reasons for the bad-segmentation cases are listed below. 

(1) The size of the case is very large, so we have to reduce the size of the case by cuting top and bottom slice to process it in 60 second.

(2) The case is not clear, distorted, or skewed.

(3) There are rare structures in the case that are not in the training set.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.3776]{imgs/good cases.png}
\caption{Good segmentation examples}
\label{fig:goodcases}
\end{figure}

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.3776]{imgs/bad cases.png}
\caption{Bad segmentation examples}
\label{fig:badcases}
\end{figure}

\subsection{Segmentation efficiency results on validation set}

Table~\ref{table:efficiency} show the efficiency results on 8 validation sets.Due to the crop of top and bottom slices, the data which has larger thrid-dimension also have fast running times.

\subsection{Results on final testing set}


\subsection{Limitation and future work}

In terms of model accuracy, first, we does not use pseudo-labels for unlabeled image at present. In the future, we are going to use pseudo-labels for unlabeled image. Second, we consider using some post-processing methods, such as largest connected component extraction, hole filling, open operation and closed operation, which are not used at present.

to reduce the time consumption, we simply cut top and bottom slices, which caused a large loss of accuracy. To deal with it, we consider using some optimization methods to improve the running speed of the model in the future.

\section{Conclusion}
In this paper, we have explored the application of the nnU-Net model for Flare23 abdominal organ segmentation. Due to the limitation of time, we could not Leverage the power of deep learning and the
architectural advancements of nnU-Net, but we will explore better deep learning methods in the future.


\subsubsection{Acknowledgements} The authors of this paper declare that the segmentation method they implemented for participation in the FLARE 2023 challenge has not used any pre-trained models nor additional datasets other than those provided by the organizers. The proposed solution is fully automatic without any manual intervention. 
%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}
\begin{table}[!htbp]
\caption{Checklist Table. Please fill out this checklist table in the answer column.}
\centering
\begin{tabular}{ll}
\hline
Requirements                                                                                                                    & Answer        \\ \hline
A meaningful title                                                                                                              & Yes       \\ \hline
The number of authors ($\leq$6)                                                                                                             & 4        \\ \hline
Author affiliations and ORCID                                                                                           & Yes        \\ \hline
Corresponding author email is presented                                                                                                  &Yes       \\ \hline
Validation scores are presented in the abstract                                                                                 &Yes        \\ \hline
\begin{tabular}[c]{@{}l@{}}Introduction includes at least three parts: \\ background, related work, and motivation\end{tabular} & Yes        \\ \hline
A pipeline/network figure is provided                                                                                           & Figure 1 \\ \hline
Pre-processing                                                                                                                  & Page 2   \\ \hline
Strategies to use the partial label                                                                                             & Page 2   \\ \hline
Strategies to use the unlabeled images.                                                                                         & Page 2   \\ \hline
Strategies to improve model inference                                                                                           & Page 3   \\ \hline
Post-processing                                                                                                                 & Page 3   \\ \hline
Dataset and evaluation metric section is presented                                                                              & Page 3   \\ \hline
Environment setting table is provided                                                                                           & Table 1  \\ \hline
Training protocol table is provided                                                                                             & Table 2  \\ \hline
Ablation study                                                                                                                  & Page 7   \\ \hline
Efficiency evaluation results are provided                                                                                     & Table 5 \\ \hline
Visualized segmentation example is provided                                                                                     & Figure 2/3 \\ \hline
Limitation and future work are presented                                                                                        & Yes        \\ \hline
Reference format is consistent.  & Yes    \\ \hline

\end{tabular}
\end{table}
\end{document}
