% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref} 
\usepackage{marvosym}
\usepackage{makecell}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{multirow}
\usepackage[section]{placeins}
\usepackage{wrapfig}
\usepackage{academicons}
\usepackage{xcolor}
\usepackage{orcidlink}
\newcommand{\orcid}[1]{\href{https://orcid.org/#1}{\textcolor[HTML]{A6CE39}{\aiOrcid}}}

% envelope symbol
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
  
%
\begin{document}
%
\title{Unlabeled Abdominal Multi-organ Image Segmentation Based on Semi-supervised Adversarial Training Strategy}
%
\titlerunning{Adversarial Semi-supervision}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
% \inst{1,2,\#,\orcidlink{0000-0001-5100-1722}} 
\author{YuanKe Pan\inst{\#, \orcidlink{0000-0002-1062-404X}} \and
Jinxin Zhu\inst{\#, \orcidlink{0000-0003-1247-7415}} \and
Bingding Huang\textsuperscript{\Letter}\inst{,\orcidlink{0000-0002-4748-2882}}}
%
\authorrunning{Y. Pan et al.}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{College of Big Data and Internet, Shenzhen Technology University, Shenzhen, 518188, China }
%

\maketitle              % typeset the header of the contribution
%

\footnotetext[1]{\#These authors contributed equally to this work.}
\footnotetext[2]{Corresponding author: Bingding Huang(huangbingding@sztu.edu.cn).}


\begin{abstract}

The unlabeled images are helpful to generalize segmentation models. To make full use of the unlabeled images, we develop a generator-discriminator training pipeline based on the EfficientSegNet, which has achieved the best performance and efficiency in previous FLARE 2021 challenge. For the generator, a coarse-to-fine strategy is used to produce segmentations of abdominal organs. Then the labeled image and the ground truth are applied to optimize the generator. The discriminator receives the original unlabeled image or the relevant noised image, together with their generated segmentation results to determine which segmentation is better for the unlabeled image. After the adversarial training, the generator is used to segment the unlabeled images. On the FLARE 2022 final testing set of 200 cases, our method achieved an average dice similarity coefficient (DSC) of 0.8497 and a normalized surface dice (NSD) of 0.8915. In the inference stage, the average inference time is 11.67 seconds per case, and the average GPU (MB) and CPU (\%) consumption per case are 311 and 225.6, respectively. The source code is freely available at \href{ https://github.com/Yuanke-Pan/Adversarial-EfficientSegNet}{https://github.com/Yuanke-Pan/Adversarial-EfficientSegNet}.

\keywords{Unlabeled image segmentation  \and Semi-supervised learning \and Adverarial training}

\end{abstract}



\section{Introduction}

% Introduce the background and difficulty of the segmentation task and your ideas.
The lack of labeled images is a great burden in medical imaging tasks. In the past five years, there are many researches~\cite{chen2021semi,tarvainen2017mean,Deep_Adversarial_Networks} show the potential of semi-supervised algorithms in medical applications. In this paper, we focus on building an abdominal multi-organ segmentation model with only 50 labeled and 2000 unlabeled abdominal CT scans. There are two main difficulties in this task: 1) how to use those 2000 unlabeled CT scans to obtain more generalized results. 2) how to keep GPU memory and computation cost at a low level.


The EfficientSegNet~\cite{Zhang2021EfficientCN} has shown excellent performance and efficiency in FLARE 2021 challenge~\cite{MedIA-FLARE21}. The basic idea of our method is inheriting the network architecture of EfficientSegNet and applying semi-supervised learning algorithms to improve the generalizability of the inference model. In this work, we propose a novel adversarial strategy to use the unlabeled CT images which can improve the segmentation results. The main contributions of this work are:

% Two direction are provied from recent research: 1) Use Mask AutoEncode\cite{He2021MaskedAA} to pretrain a transformer based model such as Unetr.  2) Use Semi-Supervise Learning method to train regular model. Considering the limits of GPU memory size and computation cost, the transformer based model not meet the requirements.

\begin{enumerate}
	\item We propose an adversarial semi-supervised learning pipeline for segmentation of medical images.
	\item We design an image-label fusion discriminator to make full use of unlabeled images.
\end{enumerate}


\section{Method}
%###########################
\subsection{Preprocessing}
% Full description of any pre-processing strategy, how the data is normalized. Please details the following aspects


We use the same preprocessing strategy in both training and prediction. The details are described as follows:
\begin{itemize} 
 	\item Reorienting the images to the left-posterior-inferior (LPI) view by flipping and reordering.
 	\item Resampling the images to the fixed sizes. The sizes of coarse and fine input are [160, 160, 160] and [192, 192, 192], respectively.
\end{itemize}

For labeled images, we implement the following preprocessing steps:
\begin{itemize}
	\item Reorienting the masks to the LPI view by flipping and reordering.
	\item Cropping the images according to the masks with a margin of [20, 20, 20].
\end{itemize}



% \FloatBarrier
\subsection{Proposed Method}

In this work, we combined efficient context-aware network as generator and image-label fusion discriminator to make cross learning between labeled CT images and unlabeled images. The whole workflow of our method is illustrated in Fig. \ref{fig:flow}, 






% Motivation and description of the method details. \textbf{Pre-trained models are not allowed to use in this challenge.}

% Strategies to use the unlabelled cases

% Network architecture details (Based on the winning solutions in FLARE 2021, we recommend using two-stage framework)

% Loss function: we use the summation between Dice loss and cross entropy loss because compound loss functions have been proved to be robust in various medical image segmentation tasks~\cite{LossOdyssey}. 

% Strategies to improve inference speed and reduce resource consumption  (Based on the winning solutions in FLARE 2021, we recommend using ONNX or TensorRT to speed up inference process)

% Please provide a figure to show your network architecture.
% Figure~\ref{fig:Network} illustrates the applied 3D nnU-Net~\cite{isensee2021nnu}, where a U-Net architecture is adopted.

The training phase of our proposed adversarial semi-supervise learning workflow consists of three steps in every iterations:
\begin{itemize}
    \item In step 1, we use the generator to produce segmentation result of the original image. Then we train the discriminator with the original image, the generated segmentation and the ground truth. Here we assume that the ground truth is always the best segmentation of the original image.
    \item In step 2, we train the generator with original image and the ground truth as supervised learning.
    \item In step 3, we first add the Gaussian noise to the unlabeled image to get the relevant noised image. Then we input the unlabeled image and the noised image into the generator to produce segmentation-Unlabeled and segmentation-Noised, respectively. After that, we input the unlabeled image and the noised image separately with the two generated segmentation results (segmentation-Unlabeled and segmentation-Noised) into the discriminator to determine which segmentation is better. The better segmentation will be used as the pseudo label and will be used to train the generator.
\end{itemize}
\vspace{-0.75cm}
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.25]{imgs/flow.pdf}
\caption{The whole workflow of our proposed method. The first step is used to train the discriminator, the second step is used to train the generator, and the third step is used to process unlabeled data. Loop through three steps over multiple iterations.}
\label{fig:flow}
\end{figure}
It is important to emphasize that for the first 50 and the last 50 training iterations, we only use step 2 to fine-tune the network. After the training process, the final generator is used to segment the unlabeled images.




\subsubsection{Generator}
For the coarse-to-fine training strategy, there are two different networks to extract global and local information. The coarse model is a typical 5-layer U-net structure which is employed to locate target organs. The fine model is the EfficientSegNet shown in Fig. \ref{fig:Fine}, which receives the coarse segmentation results from the coarse model for further refinement.




\subsubsection{Discriminator}
The input of the discriminator includes the original image and two segmentation results. For the labeled images, these two segmentation results are the generated segmentation and the ground truth. For the unlabeled images, these two segmentation results are derived from the unlabeled image and the relevant noised unlabeled image. The discriminator is used to decide which segmentation is better for the original image. 

\vspace{-0.5cm}
\begin{figure}[htbp]
\setlength{\abovecaptionskip}{0.2cm}
\setlength{\belowcaptionskip}{0.2cm}
\centering
\includegraphics[scale=0.35]{imgs/fine_unet_1.pdf}
\caption{The architecture of the fine network in the generator. The fine network performs fine segmentation on the coarse segmentation results of the corse network, and applies anisotropic convolution block to speed up the inference process.}
\label{fig:Fine}
\end{figure}


\begin{figure}[!htbp]
\setlength{\abovecaptionskip}{0.2cm}
\setlength{\belowcaptionskip}{0.2cm}
\centering
\includegraphics[scale=0.17]{imgs/fig3.png}
\caption{The architecture of the discriminator network. Discriminator can be regarded as a condition-based binary classification model, which judges which segmentation is better according to the image.}
\label{fig:Discriminator}
\end{figure}

As shown in Fig. \ref{fig:Discriminator}, the image-label fusion discriminator fuses the original image with the two segmentation labels to output which label is better.




% \FloatBarrier
\subsubsection{Optional Operation}
In order to improve the discriminator’s performance, we apply an augmentation to the ground truth called mask poisoning. This augmentation step adds a random noise to the ground truth, which can make the discriminator more concentrate on the data distribution.
% \FloatBarrier

\subsubsection{Accelerate strategy}
The same as the EfficientSegNet, the anisotropic convolution, anisotropic pooling and coarse-to-fine strategy are used to reduce inference time and GPU memory usage.




\subsection{Post-processing}
% \textit{Description of post-processing of the model outputs to get the final output in training stage.}\\

In the last step of the coarse-to-fine model, we apply a connected components analysis~\cite{silversmithconnected} to get the final segmentation results.



\section{Experiments}
\subsection{Dataset and evaluation metrics}
The FLARE 2022 dataset is curated from more than 20 medical groups under the license permission, including MSD~\cite{simpson2019MSD}, KiTS~\cite{KiTS,KiTSDataset}, AbdomenCT-1K~\cite{AbdomenCT-1K}, and TCIA~\cite{clark2013TCIA}. The training set includes 50 labeled CT scans with pancreas disease and 2000 unlabeled CT scans with liver, kidney, spleen, or pancreas diseases. The validation dataset includes 50 CT scans with liver, kidney, spleen, or pancreas diseases.
The test dataset includes 200 CT scans where 100 cases with liver, kidney, spleen, or pancreas diseases and the other 100 cases with uterine corpus endometrial, urothelial bladder, stomach, sarcomas, or ovarian diseases. All the CT scans only have image information and the center information is not available.

The evaluation metrics are two accuracy measurements: Dice Similarity Coefficient (DSC) and Normalized Surface Dice (NSD), and three running efficiency measurements: running time, area under GPU memory-time curve, and area under CPU utilization-time curve. All measurements will be used to compute the ranking. Moreover, the GPU memory consumption has a tolerance of 2 GB.


\subsection{Implementation details}
\subsubsection{Environment settings}
The development environments and requirements are shown in Table~\ref{table:env}.

\subsubsection{Training protocols}
% Please describe at least the following aspects:
During the training process, we apply random shift and brightness to the whole dataset, and Gaussian noise to the unlabeled images. The details of the training protocols are shown in Table \ref{table:training} and Table \ref{table:training2nd}.

% \vspace{-3cm}
\begin{table}[!htbp]
\caption{Development environments and requirements.}\label{table:env}
\centering
\setlength{\tabcolsep}{5mm}{}
\begin{tabular}{ll}
\hline
Windows/Ubuntu version & Ubuntu 20.04.1 LTS\\
\hline
CPU   & AMD EPYC 7742 64-Core Processor \\
\hline
RAM                         &16$\times $4GB\\
\hline
GPU                       & \makecell[l]{coarse: 2*NVIDIA A100 40G \\ refine: 4*NVIDIA A100 40G}\\
\hline
CUDA version                  & 11.2\\                          \hline
Programming language                 & Python 3.9\\ 
\hline
Deep learning framework & Pytorch (Torch 1.11, torchvision 0.12.0) \\
\hline
\end{tabular}
\end{table}

% \vspace{-3cm}
\begin{table*}[htbp]
\setlength{\abovecaptionskip}{0.2cm}
\setlength{\belowcaptionskip}{0.2cm}
\caption{Training protocols for the coarse model.}
\label{table:training}
\begin{center}
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{ll} 
\hline
Network initialization         & ``he" normal initialization\\
\hline
Batch size                    & \makecell[l]{Labelled data: 4 \\ UnLabelled data: 2} \\
\hline 
Patch size & 160$\times$160$\times$160  \\ 
\hline
Total epochs & 300  \\
\hline
Optimizer          & Adam ($\mu=0.99$)          \\ \hline
Initial learning rate (lr)  & 0.001 \\ \hline
Lr decay schedule & halved by 200 epochs \\
\hline
Training time                                           & 6 hours \\  \hline
Loss function  &  $\mathbf{L}=1-\frac{2 * \sum p_{\text {true }} * p_{\text {pred }}}{\sum p_{\text {true }}^{2}+\sum p_{\text {pred }}^{2}}$\\ \hline
%Number of model parameters    & 41.22M\footnote{https://github.com/sksq96/pytorch-summary} \\ \hline
%Number of flops & 59.32G\footnote{https://github.com/facebookresearch/fvcore} \\ \hline
%CO$_2$eq & 1 Kg\footnote{https://github.com/lfwa/carbontracker/} \\  \hline
\end{tabular}
\end{center}
\vspace{-1cm}
\end{table*}



\begin{table*}[!htbp]
\setlength{\abovecaptionskip}{0.2cm}
\setlength{\belowcaptionskip}{0.2cm}
\caption{Training protocols for the fine model.}
\label{table:training2nd}
\begin{center}
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{ll} 
\hline
Network initialization         & ``he" normal initialization\\
\hline
Batch size                    & \makecell[l]{Labelled data: 8 \\ UnLabelled data: 4}\\
\hline 
Patch size & 192$\times$192$\times$192  \\ 
\hline
Total epochs & 500 \\
\hline
Optimizer          & Adam ($\mu=0.99$)          \\ \hline
Initial learning rate (lr)  & 0.001 \\ \hline
lr decay schedule & halved by 200 epochs \\
\hline
Training time                                           & 3.4 hours \\  \hline
Loss function  &  $\mathbf{L}=1-\frac{2 * \sum p_{\text {true }} * p_{\text {pred }}}{\sum p_{\text {true }}^{2}+\sum p_{\text {pred }}^{2}}$\\ \hline
%Number of model parameters    & 41.22M\footnote{https://github.com/sksq96/pytorch-summary} \\ \hline
%Number of flops & 59.32G\footnote{https://github.com/facebookresearch/fvcore} \\ \hline
%CO$_2$eq & 1 Kg\footnote{https://github.com/lfwa/carbontracker/} \\  \hline
\end{tabular}
\end{center}
\vspace{-1cm}
\end{table*}


\section{Results and discussion}

% Note: Please describe at least the following aspects:\\
% The effect of using unlabelled cases;\\

% What kind of cases the proposed method works well?\\

% What are the possible reasons for the failed cases or organs?\\
% Segmentation efficiency analysis\\





% Currently, you can report the Dice score on validation set



\FloatBarrier
\subsection{Qualitative results on the validation dataset}
We compare our proposed method with the baseline model on 20 cases of validation set. The qualitative results are shown in Table \ref{table:baselineAndOurs}. 

% Please do ablation study to analysis the effect of unlabelled data.
\begin{table*}[!htbp]
\caption{Qualitative results of baseline and baseline+SS(semi-supervised) on 20 cases of validation set.}
\label{table:baselineAndOurs}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{lllll} 
\hline
\toprule
% Organ & \multicolumn{2}{c}{\makecell[cc]{\multicolumn{2}{c}{Baseline}\\\begin{tabular}{@{}ll@{}}DSC & NSD\\\end{tabular}}} & \multicolumn{2}{c}{\makecell[cc]{\multicolumn{2}{c}{Baseline+SS}\\\begin{tabular}{@{}ll@{}} DSC & NSD\\\end{tabular}}} \\
\multirow{2}{*}{Organ} & \multicolumn{2}{c}{Baseline} & \multicolumn{2}{c}{Baseline+SS} \\
 & DSC & NSD & DSC & NSD \\

\midrule
Liver & 0.936 & 0.915 & 0.961 & 0.955\\
RK & 0.818 & 0.796 & 0.838 & 0.815\\
Spleen & 0.920 & 0.899 & 0.941 & 0.932\\
Pancreas & 0.762 & 0.848 & 0.816 & 0.892\\
Aorta & 0.945 & 0.977 & 0.958 & 0.986\\
IVC & 0.835 & 0.825 & 0.891 & 0.892\\
RAG & 0.709 & 0.820 & 0.660 & 0.745\\
LAG & 0.709 & 0.798 & 0.821 & 0.900\\
Gallbladder & 0.676 & 0.658 & 0.753 & 0.751\\
Esophagus & 0.798 & 0.884 & 0.806 & 0.877\\
Stomach & 0.731 & 0.752 & 0.864 & 0.883\\
Duodenum & 0.675 & 0.829 & 0.718 & 0.811\\
LK & 0.889 & 0.879 & 0.893 & 0.893\\
Average & 0.800 & 0.837 & 0.840 & 0.872\\
\bottomrule
\end{tabular}
\end{center}
\end{table*}


In the case of comparison with the ground truth, Fig. \ref{fig:contrast} and Fig. \ref{fig:contrast_slices} show two examples with good segmentation results and two examples with poor segmentation results, respectively. Table \ref{table:contra_dsc} and Table \ref{table:contra_nsd} shows the corresponding DSC and NSD scores. For all 20 cases in the validation dataset, the average of DSC and NSD of our model are 0.840 and 0.872, respectively. At the same time, we observe that the segmentation results of our proposed model on Right Adrenal Gland (RAG) is the worst in all organs, with the average DSC and average NSD of 0.660 and 0.745, respectively. One reason may be that there are too many organs around it.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.22]{imgs/contrast.png}
\caption{Comparison of 3D segmentation in two good segmentation results (NO.6 and NO.21) and two poor segmentation results (NO.47 and NO.48).}
\label{fig:contrast}
\end{figure}

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.24]{imgs/contrast_slice.png}
\caption{Comparison of section segmentation of a layer in two good segmentation results (NO.6 and NO.21) and two poor segmentation results (NO.47 and NO.48).}
\label{fig:contrast_slices}
\end{figure}



\begin{table*}[!htbp]
\caption{DSC of each organ in the two good segmentation results (NO.6 and NO.21) and two poor segmentation results (NO.47 and NO.48).}
\label{table:contra_dsc}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{lllll} 
\hline
\toprule
%   & \multicolumn{2}{c}{Baseline} & \multicolumn{2}{c}{Ours} \\
%   \cline{1-5}
Organ & NO.6 & NO.21 & NO.47 & NO.48 \\
\midrule
Liver & 0.983 & 0.978 & 0.917 & 0.860\\
RK & 0.979 & 0.978 & 0.984 & 0.738\\
Spleen & 0.986 & 0.989 & 0.769 & 0.613\\
Pancreas & 0.894 & 0.927 & 0.700 & 0.800\\
Aorta & 0.976 & 0.972 & 0.938 & 0.828\\
IVC & 0.970 & 0.951 & 0.734 & 0.523\\
RAG & 0.898 & 0.807 & 0.000 & 0.788\\
LAG & 0.926 & 0.888 & 0.860 & 0.921\\
Gallbladder & 1.000 & 1.000 & 0.557 & 0.000\\
Esophagus & 0.907 & 0.951 & 0.841 & 0.260\\
Stomach & 0.938 & 0.977 & 0.734 & 0.297\\
Duodenum & 0.826 & 0.939 & 0.000 & 0.708\\
LK & 0.968 & 0.986 & 0.984 & 0.920\\
Average & 0.942 & 0.949 & 0.694 & 0.635\\
\bottomrule

\end{tabular}
\end{center}
\end{table*}


\begin{table*}[!htbp]
\caption{NSD of each organ in the two good segmentation results (NO.6 and NO.21) and two poor segmentation results (NO.47 and NO.48).}
\label{table:contra_nsd}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{lllll} 
\hline
\toprule
%   & \multicolumn{2}{c}{Baseline} & \multicolumn{2}{c}{Ours} \\
%   \cline{1-5}
Organ & NO.6 & NO.21 & NO.47 & NO.48 \\
\midrule
Liver & 0.982 & 0.991 & 0.812 & 0.781\\
RK & 0.988 & 0.986 & 0.998 & 0.658\\
Spleen & 0.989 & 1.000 & 0.638 & 0.510\\
Pancreas & 0.947 & 0.991 & 0.758 & 0.851\\
Aorta & 0.998 & 0.999 & 0.988 & 0.850\\
IVC & 0.996 & 0.962 & 0.688 & 0.581\\
RAG & 0.968 & 0.918 & 0.000 & 0.888\\
LAG & 0.989 & 0.972 & 0.944 & 0.984\\
Gallbladder & 1.000 & 1.000 & 0.610 & 0.000\\
Esophagus & 0.988 & 1.000 & 0.892 & 0.335\\
Stomach & 0.948 & 0.999 & 0.772 & 0.373\\
Duodenum & 0.871 & 0.999 & 0.001 & 0.836\\
LK & 0.934 & 0.998 & 0.985 & 0.876\\
Average & 0.969 & 0.986 & 0.699 & 0.656\\
\bottomrule

\end{tabular}
\end{center}
\end{table*}

Meanwhile, we compare our proposed method with the baseline model on 50 cases of validation set based on DSC. The qualitative results are shown in Table \ref{table:val_50_dsc}.

\begin{table*}[!htbp]
\caption{Qualitative DSC results of baseline and baseline+SS (semi-supervised) on 50 cases in the validation dataset.}
\label{table:val_50_dsc}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{lcc} 
\hline
\toprule
%   & \multicolumn{2}{c}{Baseline} & \multicolumn{2}{c}{Ours} \\
%   \cline{1-5}
Organ & baseline+SS & baseline \\
\midrule
Liver & 0.9555 & 0.9150\\
RK & 0.9021 & 0.8586\\
Spleen & 0.9341 & 0.8603\\
Pancreas & 0.8069 & 0.7422\\
Aorta & 0.9487 & 0.9180\\
IVC & 0.8762 & 0.8065\\
RAG & 0.6678 & 0.6842\\
LAG & 0.7297 & 0.6511\\
Gallbladder & 0.7644 & 0.6652\\
Esophagus & 0.8018 & 0.7412\\
Stomach & 0.8800 & 0.7279\\
Duodenum & 0.7298 & 0.6588\\
LK & 0.9066 & 0.8663\\
Average & 0.8387 & 0.7766\\

\bottomrule

\end{tabular}
\end{center}
\end{table*}
% \subsection{Segmentation efficiency results on validation set}


%\subsection{Results on final testing set}
%This is a placeholder. We will send you testing results after the challenge.

\FloatBarrier
\subsection{Segmentation efficiency results on validation set}
Our docker was validated with NVIDIA QUADRO RTX5000 (16G) and 32G RAM on 50-case validation set. The mean running time per case is 11.6 seconds, the mean maximum GPU memory is 2423MB, the mean area under GPU memory-time curve is 12287 and the mean area under CPU utilization-time curve is 224. 

\FloatBarrier
\subsection{Results on final testing set}
On the final testing set of 200 cases with undisclosed ground truth, the average DSC and NSD of our method were 84.97 and 89.15, respectively. In terms of segmentation efficiency, the mean running time per case is 11.67 seconds, the mean area under GPU memory-time curve is 12155 and the mean area under CPU utilization-time curve is 226. The average DSC and NSD for these 13 organs are shown in Table \ref{table:final_test}.

\begin{table*}[!htbp]
\caption{Average DSC and NSD of 13 organs on the final test set.}
\label{table:final_test}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\setlength{\tabcolsep}{7mm}{}
\begin{tabular}{lcc} 
\hline
\toprule
%   & \multicolumn{2}{c}{Baseline} & \multicolumn{2}{c}{Ours} \\
%   \cline{1-5}
Organ & DSC & NSD \\
\midrule
Liver & 0.9650  & 0.9653 \\
RK & 0.9166  & 0.9075 \\
Spleen & 0.9307  & 0.9329 \\
Pancreas & 0.7721  & 0.8708 \\
Aorta & 0.9583  & 0.9796 \\
IVC & 0.8688  & 0.8727 \\
RAG & 0.7698  & 0.8804 \\
LAG & 0.7838  & 0.8794 \\
Gallbladder & 0.7705  & 0.7740 \\
Esophagus & 0.7590  & 0.8405 \\
Stomach & 0.8840  & 0.9040 \\
Duodenum & 0.7280  & 0.8474 \\
LK & 0.9396  & 0.9346 \\
Average & 0.8497  & 0.8915 \\


\bottomrule

\end{tabular}
\end{center}
\end{table*}

\subsection{Limitation and future work}
In this work, we separate the training pipeline into three steps. In each step, the parameters of the discriminator and the generator are not updated at the same time, it might cost additional training time. An end-to-end semi-supervised learning algorithm will be our goal in the future. Due to the time limitations, we do not find a way to perform the proper analysis of the dataset. Considering the success of nn-UNet~\cite{isensee2018nnu}, we believe that a proper analysis might improve our final segmentation results.



\section{Conclusion}
In this paper, in order to use the large amount of unlabeled data, we develop an adversarial generator-discriminator training pipeline based on EfficientSegNet. 
For the generator, we employ the coarse-to-fine strategy to generate segmentation results. The labeled images and their ground truth are used to optimize the generator. For the discriminator, we first add noise to the original unlabeled images, then the discriminator receives the original unlabeled image or the relevant noised image together with their generated segmentation results. The better segmentation is determined by the discriminator as the pseudo label of the unlabeled image. On the FLARE 2022 validation dataset, our method achieved an average DSC of 0.840 and a NSD of 0.872 with an average process time of 11.6 seconds per case in the inference phase.

\subsubsection{Acknowledgements} The authors of this paper declare that the segmentation method implemented for participation in the FLARE 2022 challenge has not used any pre-trained models nor additional datasets other than those provided by the organizers. The proposed solution is fully automatic without any manual intervention.


%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}

\end{document}
