% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
\usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref}
%
\begin{document}
%
\title{An Efficiency Coarse-to-Fine Segmentation Framework for Abdominal Organs Segmentation}
%
%\titlerunning{Abbreviated paper title}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
\author{Cancan Chen \and Weixin Xu \and Rongguo Zhang}
%
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{Infervision Advanced Research Institute, Beijing, China\\
\email{\{ccancan,xweixin,zrongguo\}@infervision.com}}
%
\maketitle              % typeset the header of the contribution
%
\begin{abstract}
U-Net has been proved as the most successful segmentation architecture for medical image processing in recent years. Based on this, ResUNet imported ResBlock with skip connection focuses more on the contextual information. 
%This work aims to accomplish the task of abdominal multi-organs segmentation by the 3D ResUNet.
In this work, we adopt the 3D ResUNet to build a whole-volume-based coarse-to-fine segmentation framework for the abdominal multi-organs segmentation task, and the mean Dice Similarity Coefficient (DSC) of the segmentation results has achieved 87.67\%, the mean Normalized Surface Dice (NSD) has achieved 93.16\% on the FLARE2022 validation set. Besides, for each case on the FLARE2022 validation set, the average running time is 19.5614 seconds, and the max gpu memory consumption is 2657 MB.

\keywords{Abdominal Organs  \and Segmentation \and FLARE.}
\end{abstract}



\section{Introduction}

Abdominal organ segmentation plays an important role in clinical practice. In recent years, with the development of deep learning, many methods have been proposed to accomplish the segmentation task automatically. In this paper, we focus on multi-organ segmentation from abdominal CT scans. According to the Fast and Low GPU Memory Abdominal Organ Segmentation challenge which required develop segmentation methods that can segment 13 kinds of abdominal organs like the liver, kidney, spleen, pancreas, aorta, IVC, adrenal glands, gallbladder, esophagus, stomach and duodenum simultaneously, we attempted to design our method based on the original ResUNet~\cite{diakogiannis2020resunet}.

In this paper, based on the original ResUNet, we propose a whole-volume-based coarse-to-fine framework. In the first stage, i.e., coarse segmentation, we directly use whole volume CT images and resample it to $128\times128\times128$ as the input. In the fine stage, 13 organs are split into 2 groups: big organs and small organs. For the big organs, we crop the areas containing the organs based on the coarse segmentation results, and resample the cropped volumes to $160\times160\times160$ as fine stage input. For the other group, volumes are cropped to $64\times256\times256$ or $128\times128\times128$. Specifically, LAG, RAG, Gallbladder and Esophagus are regarded as small organs. Backbones for both stages are 3D ResUNet with 4 down-sample layers encoder and 4 up-sample layers decoders. Notably, the ASPP~\cite{ChenLC2018} module maybe be used to supply the info-loss caused by the multi down-sample on top level.

The main contributions of this work are summarized as follows:
\begin{itemize} 
\item We propose a whole-volume-based coarse-to-fine framework, which can effectively complete abdominal organs segmentation.

\item Based on our proposed framework, we fully utilize the relative position information between big organs by group neighbour organs, which can better locate and segment these organs, especially for stomach, pancreas, duodenum and oesophagus.

\item We evaluate our proposed framework on FLARE2022 challenge dataset. The effectiveness and efficiency can be well demonstrated.
\end{itemize}

\section{Method}
Our proposed method is a whole-volume-based coarse-to-fine framework. Details about the method are described as follows.

%###########################
\subsection{Preprocessing}
Our proposed method includes the following preprocessing steps:
\begin{itemize} 
 \item Reorientation image to target direction.
 \item Cropping strategy: None
 \item Resampling method for anisotropic data: 
 
 Constrained by hardware conditions, the original images are resampled to $128 \times 128 \times 128$ for both coarse segmentation and small organs' fine segmentation task. For the fine segmentation of big organs, images are resampled to $160 \times 160 \times 160$.

 \item Intensity normalization method:
 
 Considering that volumes from different centers have different HU values, and this phenomenon appears on the different organs. Therefore, images are clipped to range [-100, 300] and normalized to range [0, 1].
 \item Others: 
 
To improve the training and testing efficiency, mixed precision is adopted in the whole process of our framework working. 
\end{itemize}

\subsection{Proposed Method}
The process of our framework is shown in Figure~\ref{fig:framework}. In our proposed framework, coarse segmentation always leads to the error location of small organs, so the 3D ResUNet is cascaded to realize the relocation of small organs.
%localization models are used for small organs, which are 3D ResUNet. 
And 8 large organs are divided into 3 groups since that more relative position information can be captured. Figure~\ref{fig:Network} illustrates the applied 3D ResUNet~\cite{diakogiannis2020resunet}, where a U-Shape architecture is adopted.

\begin{figure}[htbp]
\centering
% \includegraphics[scale=0.4]{imgs/framework.pdf}
% \includegraphics[scale=0.4]{imgs/framework1.pdf}
\includegraphics[width=\textwidth]{imgs/framework1.pdf}
\caption{Process of our proposed framework.}
\label{fig:framework}
\end{figure}


Network architecture details: our proposed method is a whole-volume-based coarse-to-fine segmentation framework. For both coarse and fine segmentation stages, the network consists of 4 down-sample layers, 4 up-sample layers for the final segmentation results, and ASPP module.

\begin{figure}[h]
\centering
\includegraphics[width=\textwidth]{imgs/ResUNet.pdf}
\caption{Our proposed network architecture. For the Res Block layer, the stride of the final $1\times1\times1$ conv is set as 2, that's how we downsample the volumes.}
\label{fig:Network}
\end{figure}

Loss function: we use the summation between Dice loss and Cross-Entropy loss because compound loss functions have been proved to be robust in various medical image segmentation tasks~\cite{LossOdyssey}. 


\subsection{Post-processing}
To avoid the impact of noise, the connected component analysis~\cite{cc3d2021} is used, and we choose the maximum connected component as the final segmentation results.\\


\section{Experiments}
\subsection{Dataset and evaluation measures}
The FLARE2022 dataset is curated from more than 20 medical groups under the license permission, including MSD~\cite{simpson2019MSD}, KiTS~\cite{KiTS,KiTSDataset}, AbdomenCT-1K~\cite{ma2021abdomenct}, and TCIA~\cite{clark2013TCIA}. The training set includes 50 labelled CT scans with pancreas disease and 2000 unlabelled CT scans with liver, kidney, spleen, or pancreas diseases. The validation set includes 50 CT scans with liver, kidney, spleen, or pancreas diseases.
The testing set includes 200 CT scans where 100 cases has liver, kidney, spleen, or pancreas diseases and the other 100 cases has uterine corpus endometrial, urothelial bladder, stomach, sarcomas, or ovarian diseases. All the CT scans only have image information and the center information is not available.

The evaluation measures consist of two accuracy measures: Dice Similarity Coefficient (DSC) and Normalized Surface Dice (NSD), and three running efficiency measures: running time, area under GPU memory-time curve, and area under CPU utilization-time curve. All measures will be used to compute the ranking. Moreover, the GPU memory consumption has a 2 GB tolerance.


\subsection{Implementation details}
\subsubsection{Environment settings}
The development environments and requirements are presented in Table~\ref{table:env}.


\begin{table}[!htbp]
\caption{Development environments and requirements.}\label{table:env}
\centering
\begin{tabular}{ll}
\hline
Windows/Ubuntu version       & Ubuntu 18.04.06 LTS\\
\hline
CPU   & Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz \\
\hline
RAM                         &4$\times $32GB; 3200MT$/$s\\
\hline
GPU (number and type)                         & Four NVIDIA RTX A6000 48G\\
\hline
CUDA version                  & 11.4\\                          \hline
Programming language                 & Python 3.7\\ 
\hline
Deep learning framework & Pytorch (Torch 1.7.1+cu110, torchvision 0.8.2) \\
\hline
Specific dependencies         &                        \\                                                                      
\hline
(Optional) Link to code     &                                                                \\
\hline
\end{tabular}
\end{table}


\subsubsection{Training protocols}
In our training process, we performed the following data augmentation with project MONAI~\cite{monai2022} : 1). randomly crop the volumes from range -0.5 to 0.5; 2). add brightness and contrast on the volumes from range -0.4 to 0.4. 3). random elastic transform with prob=0.5 with sigma from range 3 to 5 and magnitude from range 100 to 200; 4). clip volumes from range 0 to 1. Details of our training protocols are shown in Table~\ref{table:training} and Table~\ref{table:training2nd}.

Different organs combination will bring different localization information, which is helpful for fine segmentation. Therefore, we divided the large organs into three groups: (liver, spleen, left and right kidneys); (stomach, pancreas and duodenum); (aorta and IVC). The other four small organs (LAG, RAG, gallbladder, esophagus) are localized by the 3D ResUNet firstly and then refined by another 3D ResUNet respectively. 

\begin{table*}[!htbp]
\caption{Training protocols.}
\label{table:training}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & ``he" normal initialization\\
\hline
Batch size                    & 8 \\
\hline 
Patch size & 128$\times$128$\times$128  \\ 
\hline
Total epochs & 100 \\
\hline
Optimizer          & ADAMW~\cite{loshchilov2017decoupled} ($weight decay=1e-4$)          \\ \hline
Initial learning rate (lr)  & 1e-4 \\ \hline
Lr decay schedule & halved by 20 epochs \\
\hline
Training time                                           & 18 hours \\  \hline 
Loss Function & Summation of Cross Entropy and Dice loss\\  \hline 
Number of model parameters    & 15.17M\\ \hline
Number of flops & 93.43G\\ \hline
\end{tabular}
%}
\end{center}
\end{table*}


\begin{table*}[!htbp]
\caption{Training protocols for the refine model.}
\label{table:training2nd}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & ``he" normal initialization\\
\hline
Batch size                    & 8 \\
\hline 
Patch size & 160$\times$160$\times$160  \\ 
\hline
Total epochs & 120 \\
\hline
Optimizer          & ADAMW~\cite{loshchilov2017decoupled} ($weight decay=1e-4$)          \\ \hline
Initial learning rate (lr)  & 1e-4 \\ \hline
Lr decay schedule & halved by 20 epochs \\
\hline
Training time                                           & 18 hours \\  \hline 
Loss Function & Summation of Cross Entropy and Dice loss\\  \hline 
Number of model parameters    & 15.17M\\ \hline
Number of flops & 182.49G\\ \hline
\end{tabular}
\end{center}
\end{table*}


\section{Results and discussion}
\subsection{Quantitative results on validation set}
As shown in Table~\ref{table:results}, our proposed method has achieved mean DSC as 0.8767 and mean NSD as 0.9316 on validation set. The segmentation performance is quite well, especially for the organs with big-size, such as liver, spleen, aorta and kidneys.

\subsubsection{With Unlabelled data. }In the process of our experiments, we tried to training our model by self-supervised learning with those unlabelled 2000 cases. In summary, we have tried classical methods like MOCOV2~\cite{chen2020improved}, SimSam~\cite{chen2021exploring}, etc. We also tried to random crop from the original volumes and segment the unlabelled cases by our trained coarse segmentation network and then masked RAG by the segmentation results, then using pix2pix GAN to restore the original volumes, the generator of the pix2pix GAN is utilized as pretrained model. However, all these methods have little effect, and have consumed us much time to attempt these methods. The best mean DSC value on validation set derived from our method, masked RAG and then utilized generator of the pix2pix GAN as pretrained model, with unlabelled data is 0.8416. Moreover, we have tried to combined predicted pseudo label from unlabeled 2000 images with the labeled 50 images, results show that pseudo labels are helpful.

% DSC results comparison without/with unlabelled are shown in table~\ref{table:results_unlabelled}.
\begin{table*}[!htbp]
\caption{Results of our proposed method on validation set.}
\label{table:results}
\begin{center}
\begin{tabular}{ccc} 
\hline
Organ  \qquad\qquad & DSC \qquad\qquad &NSD \qquad\\
\hline
Liver  \qquad\qquad &$0.9695\pm0.0211$ \qquad\qquad &$0.9832\pm0.0350$  \qquad\\
RK  \qquad\qquad &$0.9203\pm0.1899$ \qquad\qquad &$0.9448\pm0.1956$  \qquad\\
Spleen  \qquad\qquad &$0.9420\pm0.0197$ \qquad\qquad &$0.9807\pm0.0421$  \qquad\\
Pancreas  \qquad\qquad &$0.8711\pm0.0459$ \qquad\qquad &$0.9643\pm0.0442$  \qquad\\
Aorta  \qquad\qquad &$0.9426\pm0.0248$ \qquad\qquad &$0.9813\pm0.0312$  \qquad\\
IVC  \qquad\qquad &$0.9049\pm0.0825$ \qquad\qquad &$0.9272\pm0.0885$  \qquad\\
RAG  \qquad\qquad &$0.7703\pm0.2240$ \qquad\qquad &$0.8807\pm0.2313$  \qquad\\
LAG  \qquad\qquad &$0.8009\pm0.2352$ \qquad\qquad &$0.8948\pm0.2315$  \qquad\\
Gallbladder  \qquad\qquad &$0.8070\pm0.2417$ \qquad\qquad &$0.7943\pm0.2475$  \qquad\\
Esophagus  \qquad\qquad &$0.8482\pm0.1205$ \qquad\qquad &$0.9315\pm0.1207$  \qquad\\
Stomach  \qquad\qquad &$0.9122\pm0.1430$ \qquad\qquad &$0.9553\pm0.1245$  \qquad\\
Duodenum  \qquad\qquad &$0.8102\pm0.1023$ \qquad\qquad &$0.9424\pm0.0611$  \qquad\\
LK  \qquad\qquad &$0.8977\pm0.1746$ \qquad\qquad &$0.9302\pm0.1799$  \qquad\\
Average  \qquad\qquad &$0.8767\pm0.1250$ \qquad\qquad &$0.9316\pm0.1256$  \qquad\\
\hline
\end{tabular}
\end{center}
\end{table*}

% \begin{table*}[!htbp]
% \caption{DSC Results of our proposed method on validation set without/with unlabelled data.}
% \label{table:results_unlabelled}
% \begin{center}
% \begin{tabular}{ccc} 
% \hline
% Organ  \qquad\qquad & Without Unlabelled \qquad\qquad &With Unlabelled \qquad\\
% \hline
% Liver  \qquad\qquad &$0.9695\pm0.0211$ \qquad\qquad &$0.9832\pm0.0350$  \qquad\\
% RK  \qquad\qquad &$0.9203\pm0.1899$ \qquad\qquad &$0.9448\pm0.1956$  \qquad\\
% Spleen  \qquad\qquad &$0.9420\pm0.0197$ \qquad\qquad &$0.9807\pm0.0421$  \qquad\\
% Pancreas  \qquad\qquad &$0.8711\pm0.0459$ \qquad\qquad &$0.9643\pm0.0442$  \qquad\\
% Aorta  \qquad\qquad &$0.9426\pm0.0248$ \qquad\qquad &$0.9813\pm0.0312$  \qquad\\
% IVC  \qquad\qquad &$0.9049\pm0.0825$ \qquad\qquad &$0.9272\pm0.0885$  \qquad\\
% RAG  \qquad\qquad &$0.7703\pm0.2240$ \qquad\qquad &$0.8807\pm0.2313$  \qquad\\
% LAG  \qquad\qquad &$0.8009\pm0.2352$ \qquad\qquad &$0.8948\pm0.2315$  \qquad\\
% Gallbladder  \qquad\qquad &$0.8070\pm0.2417$ \qquad\qquad &$0.7943\pm0.2475$  \qquad\\
% Esophagus  \qquad\qquad &$0.8482\pm0.1205$ \qquad\qquad &$0.9315\pm0.1207$  \qquad\\
% Stomach  \qquad\qquad &$0.9122\pm0.1430$ \qquad\qquad &$0.9553\pm0.1245$  \qquad\\
% Duodenum  \qquad\qquad &$0.9102\pm0.1023$ \qquad\qquad &$0.9424\pm0.0611$  \qquad\\
% LK  \qquad\qquad &$0.8977\pm0.1746$ \qquad\qquad &$0.9302\pm0.1799$  \qquad\\
% Average  \qquad\qquad &$0.8767\pm0.1250$ \qquad\qquad &$0.9316\pm0.1256$  \qquad\\
% \hline
% \end{tabular}
% \end{center}
% \end{table*}

\subsection{Qualitative results on validation set}
Figure~\ref{fig:flare_example} shows some failed and successful examples on validation set. It can be found that our proposed method cannot segment gallbladder well on case \#2 since that size of gallbladder on this case is too small. Besides, for case \#3, because that the stomach is squeezed and displaced, in this caes, stomach was mistakenly segmented as esophagus. Moreover, in case \#31, some lesions like tumors in liver may look like gallbladder, this also will influence gallbladder segmentation performance. In case \#6, 8 and 35, no lesion in volumes look like neighbour organs, sizes of organs are normal, therefore organs in abdominal can achieve satisfactory segmentation performance.

\begin{figure}[htbp]
\centering
\includegraphics[width=\textwidth]{imgs/flare_example.pdf}
\caption{Some failed and successful examples. Columns from left to right are original volumes, ground truth and our predicted results, respectively.}
\label{fig:flare_example}
\end{figure}

\subsection{Segmentation efficiency results on validation set}
Our segmentation efficiency results on the validation set is shown in Table ~\ref{table:segmentation_efficiency_results}. The average running time of each case in validation set is 19.5614 seconds, the max gpu memory consumption is 2657 MB. The total AUC of GPU time and CPU time are 1,597,650 and 18,215.44, respectively.
\begin{table*}[h]
\caption{Segmentation efficiency results of our proposed method on validation set.}
\label{table:segmentation_efficiency_results}
\begin{center}
\begin{tabular}{c|c|c|c} 
\hline
Average Running Time   & Max Gpu Memory & AUC of GPU Time & AUC of CPU Time \\
\hline
19.5614 Seconds & 2657 MB & 1,597,650 & 18,215.44 \\
\hline
\end{tabular} 
\end{center}
\end{table*}
\subsection{Results on final testing set}
Our test phase results are shown in table ~\ref{table:test}, the average DSC and NSD value of 13 organs is 0.8774 and 0.9358, respectively.

\begin{table*}[!htbp]
\caption{Results of our proposed method on test set.}
\label{table:test}
\begin{center}
\begin{tabular}{ccc} 
\hline
Organ  \qquad\qquad & DSC \qquad\qquad &NSD \qquad\\
\hline
Liver  \qquad\qquad &$0.9722\pm0.0105$ \qquad\qquad &$0.9893\pm0.0181$  \qquad\\
RK  \qquad\qquad &$0.8979\pm0.2279$ \qquad\qquad &$0.9240\pm0.2359$  \qquad\\
Spleen  \qquad\qquad &$0.9175\pm0.1520$ \qquad\qquad &$0.9587\pm0.1616$  \qquad\\
Pancreas  \qquad\qquad &$0.8394\pm0.0866$ \qquad\qquad &$0.9485\pm0.0886$  \qquad\\
Aorta  \qquad\qquad &$0.9218\pm0.0638$ \qquad\qquad &$0.9621\pm0.0632$  \qquad\\
IVC  \qquad\qquad &$0.9099\pm0.0777$ \qquad\qquad &$0.9400\pm0.0919$  \qquad\\
RAG  \qquad\qquad &$0.8471\pm0.1237$ \qquad\qquad &$0.9488\pm0.1324$  \qquad\\
LAG  \qquad\qquad &$0.8484\pm0.1604$ \qquad\qquad &$0.9429\pm0.1646$  \qquad\\
Gallbladder  \qquad\qquad &$0.8034\pm0.2629$ \qquad\qquad &$0.7977\pm0.2683$  \qquad\\
Esophagus  \qquad\qquad &$0.8292\pm0.1157$ \qquad\qquad &$0.9277\pm0.1132$  \qquad\\
Stomach  \qquad\qquad &$0.9175\pm0.0983$ \qquad\qquad &$0.9553\pm0.0964$  \qquad\\
Duodenum  \qquad\qquad &$0.8020\pm0.1063$ \qquad\qquad &$0.9376\pm0.0955$  \qquad\\
LK  \qquad\qquad &$0.9001\pm0.1950$ \qquad\qquad &$0.9338\pm0.2017$  \qquad\\
Average  \qquad\qquad &$0.8774\pm0.1292$ \qquad\qquad &$0.9358\pm0.1331$  \qquad\\
\hline
\end{tabular}
\end{center}
\end{table*}

\subsection{Limitation and future work}
In this paper, the performance of small organ segmentation is still not satisfied. In the future, we will focus on the segmentation of those organs, such as gallbladder and adrenal gland. Moreover, self-supervised learning with unlabelled data will also be considered as our future work, and the careful adjustment will further improve the segmentation performance.



\section{Conclusion}
The proposed method can work well on abdominal organs, especially for the organs with big-size, such as liver, spleen and kidneys. Disappointing performance is obtained for AGs and gallbladder because the blurred edges and small-size.


\subsubsection{Acknowledgements} The authors of this paper declare that the segmentation method they implemented for participation in the FLARE 2022 challenge has not used any pre-trained models nor additional datasets other than those provided by the organizers. The proposed solution is fully automatic without any manual intervention.


%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}

\end{document}
