% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
\usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref}
%
\usepackage{amssymb, amsmath, bm, latexsym, comment}
\usepackage{multirow}
\usepackage{xcolor}
\usepackage{graphicx}
\usepackage{subfigure}
\usepackage{indentfirst}
\usepackage{setspace}
\usepackage{verbatim}
\usepackage{array}
\usepackage{cite}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage[capitalize]{cleveref}
\usepackage{arydshln}
\usepackage{multirow}
\usepackage{subfigure}

\newcommand{\orcidauthorA}{0000-0001-6677-6677}

%\newcommand{\orcid}[1]{\href{https://orcid.org/#1}{\includegraphics[width=10pt]{imgs/orcid.png}}}
%

\begin{document}
%
\title{Semi-supervised multi-organ segmentation with cross supervision using siamese network}
%
%\titlerunning{Abbreviated paper title}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
%\author{Jia Dengqiang\inst{1}}
%\author{Jia Dengqiang\inst{1}, \orcid{0000-0000-0000-0000}}
\author{Jia Dengqiang\inst{1},ORCID=0000-0002-0902-1882}

%
\authorrunning{Jia Dengqiang}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{1, School of Naval Architecture, Ocean and Civil Engineering,
Shanghai Jiao Tong University, Shanghai, China,
\email{wangxifeng004@163.com}\\
}
%
\maketitle              % typeset the header of the contribution
%
\begin{abstract}
Numerous unlabeled data is useful for supervised medical image segmentation, if the labeled data is limited.
To leverage all the unlabeled images for efficient abdominal organ segmentation, we developed semi-supervised framework with cross supervision  using siamese network, i.e., SemiSeg-CSSN.
Cross supervision enables the two networks to optimize the network using pseudo-labels generated by the other.
Moreover, we applied the cascade strategy for the task because of the large and uncertain locations of the abdomen regions.
To validate the effects of unlabeled data, we employed an unlabeled image filtering strategy to select the unlabeled image and their pseudo label images with low uncertainty.
On the FLARE2022 validation cases, with the help of unlabeled data, our method obtained the average dice similarity coefficient (DSC) of 77.7\% and average normalized surface distance (NSD) of 82.0\%, which is better than the supervised method. 
The average running time is 12.9s per case in inference phase and maximum used GPU
memory is  2052 MB.
\keywords{Semi-supervised, cross supervision  \and label filtering}
\end{abstract}



\section{Introduction}
For supervised learning, few labeled data tend to leading the over-fitting problem.
In the task of medical image analysis, however, manual voxel-level labeling is expensive and time-consuming because of the professional domain knowledge.



Semi-supervised learning (SSL) aims to solve the learning problem in scenario of sparsely labeled images and a large number of auxiliary unlabeled images.
These learning methods have been studied in classification problems \cite{fralick1967learning,scudder1965probability}.

Currently, semi-supervised segmentation has raised attention.
Self-training strategy tries to learn from unlabeled data by imputing the labels for samples predicted with high confidence \cite{blum1998combining,chen2020naive, mittal2019semi}.

There are many datasets of natural images datasets available for semi-supervised segmentation, such as Pascal VOC 2012\cite{everingham2015pascal} and Cityscapes\cite{cordts2016cityscapes}.
For medical image segmentation, FLARE2022 challenge has a large-scale abdominal datasets that contains 50 labeled images and 2000 unlabeled images.
Besides, the challenge has at most 13 organs are annotated, which belongs to the standard closed-set SSL\cite{Chen2022}.
The difficulty of this challenge is to segment both large and small organs given a scenario with less labeled data.

The multi-organ segmentation have three main difficulties.
\begin{itemize}
	\item Class imbalance problem.As shown in Figure~\ref{fig:class_ratio}, RAG and LAG have small class ratios, which leads to the class imbalance problem.
	\item Large shape variations and pathology influence. Some organs, e.g., Gallbladder (brown), Pancreas (yellow) and Duodenum (blue) shown in Figure~\ref{fig:shape}, have large variation on shapes, and some organs are diseased, such as liver and kidney tumors (see Figure~\ref{fig:tumor}).
	\item \textcolor{black}{Non-uniform} images. Some images have incomplete abdominal regions, and the image information are not normalized.
\end{itemize}




\begin{figure}[htbp]
\centering
\includegraphics[scale=0.4]{imgs/class_ratio}
\caption{Class ratio in the 50 labeled images in FLARE2022.}
\label{fig:class_ratio}
\end{figure}




 \begin{figure}[!htbp]
  \centering
  \includegraphics[scale=0.5]{imgs/shape_vary_tumor_1.pdf}
    \caption{Four selected examples in FLARE2022. Large shape variations of Gallbladder (brown), Pancreas (yellow) and Duodenum (blue).}
 \label{fig:shape}
  \end{figure}

 \begin{figure}[!htbp]
  \centering
  \includegraphics[scale=0.5]{imgs/shape_vary_tumor_2.pdf}
    \caption{A selected examples in FLARE2022. The left kidney has a tumor.}
 \label{fig:tumor}
  \end{figure}


In this paper, we proposed a siamese network with cross supervision to train the semi-supervised segmentation network.
\textcolor{black}{Two networks, which have the same architecture and the same number of parameters, are introduced, and they are initialized differently at the beginning of training.}
These two segmentation networks can generate pseudo label images, and supervise each other's training in the \textcolor{black}{manner} of cross supervision.
Moreover, we employed a filtering strategy for unlabeled images.
These selected unlabeled images have pseudo label images with low uncertainty, which can ensure the stability of training.

The main contributions of our work are summarized as
follows:
\begin{itemize}
\item To leverage the unlabeled data, we use cross supervision strategy, which is achieved via a siamese network.
\item To improve the efficiency, we use anisotropic convolution block and strip pooling module.
\item We also employ a filtering strategy to improve the performance.
\item The effectiveness and efficiency of the proposed semi-supervised framework are demonstrated on FLARE2022 challenge dataset, where we
achieve the top 10 with low time cost and less memory usage.
\end{itemize}


\section{Method}
\begin{figure}[htbp]
\centering
\includegraphics[scale=0.45]{imgs/CPS_diagram.pdf}
\caption{Cross supervision framework when using unlabeled data. The siamese network contains two sub-networks, denoted as $P_{\theta_1}$ and $P_{\theta_2}$, whose architectures are the same, and the two sub-networks are initialized differently at the beginning of training.
When using unlabeled images, two segmentation probability maps $P_1$ and $P_2$  for the given intensity image $\boldsymbol{I}_u$.
The two probability maps can be transformed to two different pseudo label images  $S_1$  and  $S_1$  for the input image.
Thus, these two segmentation sub-networks can generate pseudo label images, and supervise each other's training in the manner of cross supervision.
}
\label{fig:CPS_diagram}
\end{figure}

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.45]{imgs/Sup_diagram.pdf}
\caption{Cross supervision framework when using labeled data. When using labeled images, two loss functions are constructed, i.e., the cross supervision loss and supervision loss.
The two subnetworks can also generate the pseudo label images for intensity image $\boldsymbol{I}_l$.
The two subnetworks can be supervised based on these pseudo label images in the cross-supervision manner.
Besides, the label images can be considered as ground truth of segmentations, therefore, the output probability maps can be also supervised with the ground truth of the segmentation.
}
\label{fig:Sup_diagram}
\end{figure}

Figures \ref{fig:CPS_diagram} and \ref{fig:Sup_diagram} show our approach using  cross supervision with labelled and unlabelled data, respectively.
%###########################
\subsection{Preprocessing}
% Full description of any pre-processing strategy, how the data is normalized. Please details the following aspects
% \begin{itemize} 
%  \item Cropping strategy
%  \item Resampling method for anisotropic data
%  \item Intensity normalization method
%  \item Others
% \end{itemize}


The labeled images are cropped using their corresponding labels, which avoid selecting the patch without any labels.
For the unlabeled images, we use the trained coarse segmentation model to crop the abdominal regions.
All the images are re-sampled for a fixed spacing, i.e., $1 \rm{ mm}\times 1 \rm{ mm}\times 3 \rm{ mm}$.


\subsection{Proposed Method}

We propose a semi-supervised segmentation framework for multi-organ segmentation task, which can leverage large number of unlabeled data.
The framework consists of two sub-networks, which have the same structures.
We separately optimize the \textcolor{black}{sub-networks}, and simultaneously use them to predict the pseudo labels of unlabeled data.
We train the two networks in a cross-supervised \textcolor{black}{manner} \cite{chen2021semi}.

Besides, we employed a cascade strategy, which aims to segmenting the abdomen organs via coarse-to-fine procedure\cite{zhang2021efficient}.
Because the region of interest, i.e., ROI, of abdominal organs is large, we can not efficiently segment all the organs in a single-stage network.
Therefore, we first segmented the organs from downsampling images, which can be seen as a coarse segmentation.
With the help of the coarse segmentation, we segmented the organs from the original images in the second stage.

We can also train a semi-supervised network via selected unlabeled images, which is based on the uncertainty metric of their pseudo label images.


\subsection{Cross supervision using siamese network (CSSN)}\label{cross-supervision}

Let $\mathcal{L}=\left\{(\boldsymbol{I}_{1},\boldsymbol{S}^{*}_{1}),(\boldsymbol{I}_{2},\boldsymbol{S}^{*}_{2}),...,(\boldsymbol{I}_{N},\boldsymbol{S}^{*}_{N})\right\}$ and $\mathcal{U}=\left\{\boldsymbol{I}_{N+1},  \boldsymbol{I}_{N+2},,...,\boldsymbol{I}_{M}\right\}$ denote the labeled data and unlabeled data.
$\boldsymbol{I}$ and $\boldsymbol{S}^{*}$ denote the intensity image and label image.
The aim of the semi-supervised segmentation is to obtain a segmentation plan that can leverage $\mathcal{L}$ and $\mathcal{U}$.
We can use the segmentation plan to predict a probability map $\boldsymbol{P}$ for $\boldsymbol{I}$ as:
\begin{equation}
    \boldsymbol{P}=P_{\theta}(\boldsymbol{I}).
\end{equation}


In particular, we introduce two sub-networks, i.e., $P_{\theta_1}$ and $P_{\theta_2}$, to obtain two probability maps for a fixed image $\boldsymbol{I}$:
\begin{equation}\label{method:segmentation subnetworks}
\begin{array}{cc}
    \boldsymbol{P}_1=P_{\theta_1}(\boldsymbol{I}),\\
    \boldsymbol{P}_2=P_{\theta_2}(\boldsymbol{I}).
\end{array}
\end{equation}

The siamese networks ($P_{\theta_1}$ and $P_{\theta_1}$) have the same structures and \textcolor{black}{the same number of }parameters but are initialized differently at the beginning of training.
As in Equation \ref{method:segmentation subnetworks}, we can obtain two different predictions for one input image because of the two sub-networks with different parameters.
Since we use label information for supervision, we can use any supervised loss functions, e.g., cross-entropy loss, dice loss and combination of them, which we denote as $\ell{s}$ in this work.

As shown in Figure \ref{fig:CPS_diagram}, for unlabeled data $\mathcal{L}$, we employ a bidirectional consistent strategy for supervision.
For example, \textcolor{black}{sub-network} $P_{\theta_1}$ can be supervised by the pseudo label images generated by the frozen \textcolor{black}{subnetwork} $P_{\theta_2}$.
For voxel $i$ of an unlabeled image, we can calculate the loss as:
\begin{equation}
 \ell_{s}\left(p_{i|\theta_{1}}, s_{i|\theta_{2}}\right),
\end{equation}
where $p_{i|\theta_{1}}$ denotes the predicted probability for voxel $i$ using sub-network $P_{\theta_1}$, and $s_{i|\theta_{2}}$ is the label for voxel $i$ generated by $P_{\theta_2}$.
The sub-network $P_{\theta_2}$ can be supervised in the similar \textcolor{black}{manner}.

Thus, the cross-supervised (CS) loss function for $\boldsymbol{I}_u \in \mathcal{U}$ can be formulated as follows:
\begin{equation}\label{CS_loss}
\mathcal{C}_{u}^{u}= \frac{1}{V_u} \sum_{i\in \boldsymbol{I}_u}\left(\ell_{s}\left(p_{i|\theta_{1}}, s_{i|\theta_{2}}\right)+\ell_{s}\left(p_{i|\theta_{2}}, s_{i|\theta_{1}}\right)\right),
\end{equation}
\textcolor{black}{where ${V_u}$ is the number of voxels in $\boldsymbol{I}_u$.}

As shown in Figure \ref{fig:Sup_diagram}, for labeled data $\boldsymbol{I}_l \in\mathcal{L}$, we first employ supervised loss functions for the two sub-networks:
\begin{equation}
\mathcal{C}_{s}= \frac{1}{V_l} \sum_{i\in \boldsymbol{I}_l}\left(\ell_{s}\left(p_{i|\theta_{1}}, s_{i}^{*}\right)+\ell_{s}\left(p_{i|\theta_{2}}, s_{i}^{*}\right)\right),
\end{equation}
where $s_{i}^{*}$ is the voxel $i$ in the label image $\boldsymbol{S}^{*}$. \textcolor{black}{The number of voxels in $\boldsymbol{I}_l$ is denoted as 
${V_l}$.}

As shown in Figure \ref{fig:Sup_diagram}, using the pseudo label images, we can also formulate the CS loss for labeled data in the same \textcolor{black}{manner} as Equation (\ref{CS_loss}), i.e., $\mathcal{C}_{u}^{l}$.

The training loss function can be formulated as:

\begin{equation}
    \mathcal{C}=\mathcal{C}_{s}+\mathcal{C}_{u}^{u}+\mathcal{C}_{u}^{l}.
\end{equation}



\subsection{Unlabeled image filtering (UIF) based on uncertainty}
Unlabeled images may contain cases with different distributions than labeled images.
Although we used pseudo-label images inject strong data augmentations, some pseudo-label images with high uncertainty were still prone to accumulate and degrade the performance.
To solve this problem, we prioritized reliable unlabeled images based on holistic prediction-level stability.

To obtain reliable unlabeled images, we employed UIF on the pseudo-labeled images, as the selection approach of Yang et al.\cite{yang2022st++}.
We selected 200 (top 10\%) unlabeled images and their pseudo-labeled images with the lowest uncertainty from the 2000 unlabeled images.
For an unlabeled image $\boldsymbol{I}_l$, we can compute the uncertainty as:
\begin{equation}
	U_l=1-\frac{1}{9}\sum_{j=1}^{9}{\text{DSC}(\boldsymbol{S}_{l|\mathcal{M}_{j*100}},\boldsymbol{S}_{l|\mathcal{M}_{1000}})},
\end{equation}

where $\boldsymbol{S}_{l|\mathcal{M}_{j*100}}$ denotes the pseudo label image of $\boldsymbol{I}_l$ generated by trained model $\mathcal{M}_{j*100}$.
The trained model $\mathcal{M}_{j*100}$ is saved to the disk in epoch $j*100$ during training the supervised segmentation network.

After we have selected 200 unlabeled images, we can train a new SemiSeg-CSSN with a new mixed dataset containing both labeled and unlabeled images.
We embedded CSSN during training the network.

\subsection{Strategies to improve inference speed and reduce resource consumption}

We take the whole image as input and output a segmentation result of the whole image size, which is more efficient than using a patchwork segmentation result based on patches.
Besides, we employed the strategies from efficientSegNet\cite{zhang2021efficient} to reduce the resource consumption.
An anisotropic convolution with a $k \times k \times 1$ intra-slice convolution and a $1 \times 1 \times k$ inter-slice convolution are used in the decoder module.
In addition, the low-level and high-level feature maps are aggregated by addition rather than concatenation due to the low GPU memory footprint.

\subsection{Post-processing}

For the results of segmentations, we used the maximal union region selection as post-processing steps.
We selected the unique region which has the maximal areas from the candidate regions for each class.

\section{Experiments}
\subsection{Dataset and evaluation measures}
The FLARE2022 dataset is curated from more than 20 medical groups under the license permission, including MSD~\cite{simpson2019MSD}, KiTS~\cite{KiTS,KiTSDataset}, AbdomenCT-1K~\cite{AbdomenCT-1K}, and TCIA~\cite{clark2013TCIA}. 
It is an extension of the FLARE 2021~\cite{MedIA-FLARE21} with more segmentation targets and more diverse abdomen CT scans.
The training set includes 50 labelled CT scans with pancreas disease and 2000 unlabelled CT scans with liver, kidney, spleen, or pancreas diseases. The validation set includes 50 CT scans with liver, kidney, spleen, or pancreas diseases.
The testing set includes 200 CT scans where 100 cases has liver, kidney, spleen, or pancreas diseases and the other 100 cases has uterine corpus endometrial, urothelial bladder, stomach, sarcomas, or ovarian diseases. All the CT scans only have image information and the center information is not available.

The evaluation measures consist of two accuracy measures: Dice Similarity Coefficient (DSC) and Normalized Surface Dice (NSD), and three running efficiency measures: running time, area under GPU memory-time curve, and area under CPU utilization-time curve. All measures will be used to compute the ranking. Moreover, the GPU memory consumption has a 2 GB tolerance.


\subsection{Implementation details}
\subsubsection{Environment settings}
The environments and requirements for training are presented in Table~\ref{table:env}.


\begin{table}[t]
\caption{Environments and requirements for training.}\label{table:env}
\centering
\begin{tabular}{ll}
\hline
Windows/Ubuntu version       &Ubuntu 20.04.4 LTS\\
\hline
CPU   & Platinum 82 series (72vCPU) v5@2.5GHz \\
\hline
RAM                         &16$\times $4GB; 2.67MT$/$s\\
\hline
GPU (number and type)                         & NVIDIA V100 16$\times $32G\\
\hline
CUDA version                  & 11.1\\                          \hline
Programming language                 & Python 3.6\\ 
\hline
Deep learning framework & Pytorch (Torch 1.8.0, torchvision 0.9.0) \\
\hline
Code is publicly
available at & \href{https://github.com/jdq818/SemiSeg-CSSN}{SemiSeg-CSSN}\\
\hline
\end{tabular}
\end{table}


\subsubsection{Training protocols}
We implemented the proposed framework using EfficientSegNet network used in FLARE21 challenge.
The patch-based Unet such as nnUnet\cite{isensee2021nnu} also can be used as the basic segmentation, however, we found it consumes large RAM when prediction.
Brightness, crop, random rotation, random transition and random elastic deformation were used for data augmentation.
We random resampled the data with size described in Table~\ref{table:training}.
Besides, we trained the coarse model with the 50 labeled images.
% Please describe at least the following aspects:

% Data augmentation (Based on the winning solutions in FLARE 2021, we recommend using extensive data augmentation)
% patch sampling strategy, optimal model selection criteria


\begin{table*}[h]
\caption{Training protocols for SemiSeg-CSSN.}
\label{table:training}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & Kaiming normal initialization\\
\hline
Batch size                    & 8(coarse), 1(fine) \\
\hline 
Input size (coarse) & 160$\times$160$\times$160 \\ 
\hline
Input size (fine) & 192$\times$192$\times$192 \\ 
\hline
Total epochs & 500(coarse), 200(fine) \\
\hline
Optimizer         &Adam with betas (0.9, 0.99),
L2 penalty: 0.00001      \\ 
\hline
Loss &Dice loss and focal loss
(alpha = 0.5, gamma = 2)\\
\hline
Initial learning rate (lr)  & 0.01 \\ \hline
Training time (coarse)                                         
& 6 (coarse), 300(fine) hours\\ \hline                                    
\end{tabular}
%}
\end{center}
\end{table*}





\section{Results and discussion}
\subsection{Quantitative results on validation set}

We used 50 labeled and 2000 unlabeled images to train the network in cross-supervised \textcolor{black}{manner}.
The results show that the method using unlabeled data improve the dice score of the method with only 50 labeled images.

Table~\ref{tab:results} shows the results of the proposed methods.
The results of our submitted solution (docker container), which is evaluated by the organizers of FLARE2022, are reported in the last two columns in Table~\ref{tab:results}, i.e., $\rm{SemiSeg-CSSN}^*$.
The other results are evaluated on the 20 selected validation cases, whose ground truth are send by the organizers.


Compared to the supervised method, the average DSC of the semi-supervised method (SemiSeg-CSSN) improves from 70.9\% to 78.0\%, while the average NSC improves from 74.8\% to 82.0\%.
The results show that LAG, Gallbladder and RAG segmentation is the three difficult organs and Liver, Spleen and Aorta is the three easy organs for abdominal organ segmentation. 
The difficulties may be due to unclear boundaries and class imbalance issues.
Besides, the standard deviations of Gallbladder segmentation are relative large, which demonstrates the method achieves disappointed robustness for Gallbladder. 
As shown in Figure~\ref{fig:Case_visual}, Case $\#0047$ has a complete Gallbladder, while Case $\#0048$ does not have one.
Moreover, as shown in Figure~\ref{fig:Case_visual}, the pathologies, such as the tumor in Liver in Case $\#0047$, have negative effects on the segmentation.

Besides, for 2000 unlabeled images, we generated their pseudo label images using trained supervised segmentation network.
Then, we used UIF to select 200 unlabeled images and their pseudo label images with low uncertainty, and trained a new segmentation network via SemiSeg-CSSN.
As shown in Table~\ref{tab:results}, the DSC of semi-supervised segmentation network improves from 78.0\% to 79.1\%.




\begin{table}[t]
\caption{Quantitative results of supervised and semi-supervised methods in terms of DSC and NSD on the validation dataset. The symbol 50(L)+ 2000(U) denotes the method, which used 50 labeled and 2000 unlabeled images. We reported the mean and standard deviation in parentheses.}
\label{tab:results}
\centering
\scriptsize
\begin{tabular}{l c c c c c c |c c}
\hline
\multirow{3}{*}{Organ} & \multicolumn{2}{c}{Supervised} & \multicolumn{2}{c}{SemiSeg-CSSN} & \multicolumn{2}{c}{SemiSeg-CSSN+UIF} & \multicolumn{2}{|c}{$\rm{SemiSeg-CSSN}^*$} \\ 
{} & \multicolumn{2}{c}{50(L)} & \multicolumn{2}{c}{50(L)+2000(U)} & \multicolumn{2}{c}{50(L)+200(U)} &\multicolumn{2}{|c}{50(L)+2000(U)} \\ 
{} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} &\multicolumn{2}{|c}{DSC(\%), NSD(\%)}\\ 
\hline
Liver&\multicolumn{2}{c}{90.8(7.3),85.4(13.0)}& \multicolumn{2}{c}{93.7(5.4), 91.3(10.7)} & \multicolumn{2}{c}{96.3(2.0),96.7(4.7)}& \multicolumn{2}{|c}{92.5(14.1),90.6(15.4) }\\
RK&\multicolumn{2}{c}{79.3(33.6) ,77.93(32.8)}& \multicolumn{2}{c}{80.5(32.8),79.0(32.5) } & \multicolumn{2}{c}{79.8(31.9), 79.5(31.9)}& \multicolumn{2}{|c}{87.3(26.2),86.7(26.3) }\\
Spleen&\multicolumn{2}{c}{92.5(7.4),90.0(12.3)}& \multicolumn{2}{c}{89.8(21.9),89.0(23.4)} & \multicolumn{2}{c}{87.6(18.7), 88.6(18.5)}& \multicolumn{2}{|c}{91.6(19.8),91.7(20.9) }\\
Pancreas&\multicolumn{2}{c}{68.8(12.3),76.2(14.5)}& \multicolumn{2}{c}{73.3(13.8),81.1(14.6) } & \multicolumn{2}{c}{78.4(15.3), 88.8(13.8)}& \multicolumn{2}{|c}{75.4(15.8),82.8(16.9) }\\
Aorta&\multicolumn{2}{c}{90.6(4.0),91.8(6.6)}& \multicolumn{2}{c}{94.5(2.6),96.3(3.9)} & \multicolumn{2}{c}{93.8(2.6), 96.5(3.2)}& \multicolumn{2}{|c}{91.8(14.5),93.6(15.2) }\\
IVC&\multicolumn{2}{c}{83.5(11.7),80.4(12.3)}& \multicolumn{2}{c}{87.7(8.6),87.0(8.9)} & \multicolumn{2}{c}{87.0(11.6),87.1(10.6)}& \multicolumn{2}{|c}{82.4(16.2),80.5(17.0) }\\
RAG&\multicolumn{2}{c}{54.1(31.6),63.0(35.0)}& \multicolumn{2}{c}{64.1(33.2),72.6(37.1) } & \multicolumn{2}{c}{77.3(14.6),89.9(10.0)}& \multicolumn{2}{|c}{64.8(31.7),74.5(36.0) }\\
LAG&\multicolumn{2}{c}{24.4(29.6),48.1(34.8)}& \multicolumn{2}{c}{51.7(32.0),71.6(27.8)} & \multicolumn{2}{c}{72.1(20.7),82.4(21.7)}& \multicolumn{2}{|c}{47.5(32.9),70.0(30.8) }\\
Gallbladder&\multicolumn{2}{c}{42.3(40.0),39.3(39.3)}& \multicolumn{2}{c}{60.3(38.0),57.6(38.1) } & \multicolumn{2}{c}{63.9(41.4),61.9(42.2)}& \multicolumn{2}{|c}{68.0(34.8),66.1(35.7)}\\
Esophagus&\multicolumn{2}{c}{75.1(17.1),84.8(18.2)}& \multicolumn{2}{c}{82.4(9.8),91.7(9.5)} & \multicolumn{2}{c}{76.7(12.9),86.5(14.2)}& \multicolumn{2}{|c}{76.0(21.3),84.1(23.2)}\\
Stomach&\multicolumn{2}{c}{71.0(29.0),71.0(29.3)}& \multicolumn{2}{c}{83.0(18.8),84.8(17.9)} & \multicolumn{2}{c}{74.2(28.7),78.5(28.1)}& \multicolumn{2}{|c}{80.8(24.2),82.4(24.4)}\\
Duodenum&\multicolumn{2}{c}{61.7(26.0),76.9(26.0)}& \multicolumn{2}{c}{64.0(23.8),78.2(22.0) } & \multicolumn{2}{c}{59.1(26.8),74.8(24.9)}& \multicolumn{2}{|c}{63.3(23.1),77.0(22.7)}\\
LK&\multicolumn{2}{c}{88.2(22.2),87.2(22.5)}& \multicolumn{2}{c}{88.6(21.1),85.4(22.2)} & \multicolumn{2}{c}{81.7(25.1), 	82.3(25.0) }& \multicolumn{2}{|c}{88.6(19.9),85.4(21.5)}\\
 \hline
Avg.&\multicolumn{2}{c}{70.9(31.0) ,74.8(29.4) }& \multicolumn{2}{c}{78.0(26.6),82.0(25.3)} & \multicolumn{2}{c}{79.1(24.5),84.1(23.7)}& \multicolumn{2}{|c}{77.7(13.1),82.0(7.9)}\\
 \hline
\end{tabular}
\end{table}

\subsection{\textcolor{black}{Segmentation efficiency results on validation set}}

 		 	 	\begin{table}[t]
\caption{Quantitative .}
\label{tab:efficient_results}
\centering
\scriptsize
\begin{tabular}{c| c| c| c}
\hline
\multirow{1}{*}{Mean runtime (s)} & \multicolumn{1}{c|}{Maximum used GPU memory (MB)}& \multicolumn{1}{c|}{AUC GPU time} & \multicolumn{1}{c}{AUC CPU time}\\ 
\hline
12.9&2052&13776.9&250.6\\
\hline
\end{tabular}
\end{table}


Table~\ref{tab:efficient_results} presents the segmentation efficiency results.
The mean runtime is 12.9 s per case in prediction step, maximum used GPU memory is 2052 MB, AUC GPU time is 13776.9, and AUC CPU time 250.6.

\subsection{\textcolor{black}{Quantitative results on test set}}


 		 	 	\begin{table}[t]
\caption{Quantitative results SemiSeg-CSSN in terms of DSC and NSD on the test dataset. We used 50 labeled and 2000 unlabeled images for training. We reported the mean and standard deviation in parentheses.}
\label{tab:cases_results_test}
\centering
\scriptsize
\begin{tabular}{l c c }
\hline
\multirow{1}{*}{Organ} & \multicolumn{1}{c}{DSC(\%)}& \multicolumn{1}{c}{NSD(\%)} \\ 
\hline
Liver&\multicolumn{2}{c}{94.3(5.2) 92.7(8.8)}\\
RK&\multicolumn{2}{c}{89.1	(21.6) 87.6	(22.3)}\\
Spllen&\multicolumn{2}{c}{90.6(20.3) 90.7(21.3)}\\
Pancreas&\multicolumn{2}{c}{71.2(17.9)	 79.8(19.3)	}\\
Aorta&\multicolumn{2}{c}{93.3(8.2)	 95.1(9.0)	}\\
IVC&\multicolumn{2}{c}{83.6(15.1) 83.1(16.3)}\\
RAG&\multicolumn{2}{c}{75.2(20.4) 86.2(22.1)}\\
LAG&\multicolumn{2}{c}{48.7(31.9) 74.8(28.7)}\\
Gallbladder&\multicolumn{2}{c}{66.5(36.3) 65.0(36.2)	}\\
Esophagus&\multicolumn{2}{c}{69.0(22.4) 77.5(25.3)}\\
Stomach&\multicolumn{2}{c}{83.4(17.8) 84.8(17.7)}\\
Duodenum&\multicolumn{2}{c}{65.1(17.3) 79.0(18.0)}\\ LK.&\multicolumn{2}{c}{86.4(21.7) 82.3(23.0)}\\ \hline
Avg.&\multicolumn{2}{c}{78.2(13.0) 83.0(7.7)}\\ \hline
\end{tabular}
\end{table}


Table~\ref{tab:cases_results_test} shows the quantitative result of SemiSeg-CSSN on test dataset.
The average DSC of 13-organ segmentation is $78.2\pm 13.0\%$, and the average NSD is $83.0\pm 7.8\%$.
The organs with the highest and lowest DSC were Liver and LAG, respectively. 
The gallbladder has the largest standard deviation.
 	 
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 			

 	\begin{table}[t]
\caption{Quantitative results of the good (Case $\#$0006 and Case $\#$0035) and bad (Case $\#$0047 and Case $\#$0048) examples.}
\label{tab:cases_results}
\centering
\scriptsize
\begin{tabular}{l c c c c c c c c}
\hline
\multirow{2}{*}{Organ} & \multicolumn{2}{c}{Case $\#0006$} & \multicolumn{2}{c}{Case $\#0035$} & \multicolumn{2}{c}{Case $\#0047$} & \multicolumn{2}{c}{Case $\#0048$} \\ 
{} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} & \multicolumn{2}{c}{DSC(\%), NSD(\%)} &\multicolumn{2}{c}{DSC(\%), NSD(\%)}\\ 
\hline
Liver&\multicolumn{2}{c}{96.8, 93.2}& \multicolumn{2}{c}{96.7, 96.8} & \multicolumn{2}{c}{82.2, 67.0}& \multicolumn{2}{c}{87.1	, 82.2}\\
RK&\multicolumn{2}{c}{96.5, 96.7			}& \multicolumn{2}{c}{97.3, 97.1} & \multicolumn{2}{c}{95.7, 93.4}& \multicolumn{2}{c}{84.8, 77.6}\\
Spleen&\multicolumn{2}{c}{97.9, 97.9}& \multicolumn{2}{c}{98.0	, 99.6} & \multicolumn{2}{c}{85.8, 76.4}& \multicolumn{2}{c}{63.6, 50.0}\\
Pancreas&\multicolumn{2}{c}{83.6, 90.5	}& \multicolumn{2}{c}{87.9,	99.0} & \multicolumn{2}{c}{60.9	, 68.0}& \multicolumn{2}{c}{66.6, 75.7}\\
Aorta&\multicolumn{2}{c}{95.8, 97.6			}& \multicolumn{2}{c}{96.4, 99.8} & \multicolumn{2}{c}{92.3, 96.8}& \multicolumn{2}{c}{86.9, 85.2}\\
IVC&\multicolumn{2}{c}{94.5, 97.5			}& \multicolumn{2}{c}{93.5	, 94.4} & \multicolumn{2}{c}{86.0, 84.7}& \multicolumn{2}{c}{53.5, 57.7}\\
RAG&\multicolumn{2}{c}{90.9,	97.6			}& \multicolumn{2}{c}{70.2,	85.8} & \multicolumn{2}{c}{0.0,0.0}& \multicolumn{2}{c}{63.7,	66.0}\\
LAG&\multicolumn{2}{c}{87.1,	95.2			}& \multicolumn{2}{c}{75.3,	80.6} & \multicolumn{2}{c}{11.5, 61.5}& \multicolumn{2}{c}{15.0, 63.9}\\
Gallbladder&\multicolumn{2}{c}{100.0, 100.0		}& \multicolumn{2}{c}{53.9,	52.9} & \multicolumn{2}{c}{52.7,	55.0}& \multicolumn{2}{c}{0.0, 0.0}\\
Esophagus&\multicolumn{2}{c}{85.4, 92.9			}& \multicolumn{2}{c}{88.52, 	96.6} & \multicolumn{2}{c}{87.2	, 99.0}& \multicolumn{2}{c}{57.6, 70.8}\\
Stomach&\multicolumn{2}{c}{89.9,	88.6}& \multicolumn{2}{c}{92.8, 97.7	} & \multicolumn{2}{c}{76.4, 76.8}& \multicolumn{2}{c}{40.3,	 41.0}\\
Duodenum&\multicolumn{2}{c}{72.9, 82.67			}& \multicolumn{2}{c}{85.0, 	96.8} & \multicolumn{2}{c}{4.8, 15.3}& \multicolumn{2}{c}{59.5, 76.4}\\
LK&\multicolumn{2}{c}{95.1, 88.2			}& \multicolumn{2}{c}{97.6, 	98.3} & \multicolumn{2}{c}{98.1	, 98.2}& \multicolumn{2}{c}{93.8, 89.4}\\ \hline
Avg.&\multicolumn{2}{c}{91.3, 	93.6 				 		 	 				 }& \multicolumn{2}{c}{87.2, 	91.6 } & \multicolumn{2}{c}{64.1, 68.3}& \multicolumn{2}{c}{59.4, 63.9}\\ \hline

\end{tabular}
\end{table}

 		 		 			 	
 \begin{figure}[t]
\centering
\includegraphics[scale=0.5]{imgs/Cases_visual.pdf}
\caption{Qualitative results on good (Case $\#$0006 and Case $\#$0035) and bad (Case $\#$0047 and Case $\#$0048) examples. First column is the image, \textcolor{black}{second column is the results achieved by our propose method, and third column is the ground truth of the segmentation}. The DSC of each case is presented at the top-left corner.}
\label{fig:Case_visual}
\end{figure} 	



 	
 			 	 	 	
\subsection{Ablation study: influence of different number of unlabeled data}	

To further validate the effect unlabeled images, different numbers of unlabeled images from the training set were selected. 
For each case, we trained the SemiSeg-CSSN model using 50 labeled images and different number, i.e., ranging from 0 to 2000, of unlabeled images. 
Note that supervised model used 0 unlabeled images.
Figure~\ref{fig:ablation_study} shows the segmentation results.
With the number of unlabeled images increases, the performance of SemiSeg-CSSN models are increased, and all the models with unlabeled images perform better than the supervised method. 
Moreover, it is clear that the SemiSeg-CSSN model tends to converge when trained with more than 1000 unlabeled images.	 	 	 	 	 	 	 			 	 	 	 	 	 	 	
 \begin{figure}[!htbp]
\centering
\includegraphics[scale=0.5]{imgs/AS.pdf}
\caption{Performance plot of our semi-supervised approach with 50 labeled images and different number (from 0 to 2000) of unlabeled images. Note the case with 0 unlabeled images denotes the fully supervised method.}
\label{fig:ablation_study}
\end{figure} 	

 	 	 	 	 	 	 	 	 	 	 	 	
 	 	 	 	 	 	 	 	 	 	 	 	 		 	 	 		 	 	 	 	 	 	 	  	 	 	 	 	 	 	 	 	 	 	  	 	 	 	 	 
	 	 	 	 	 	 	 	 	 	 	 	 
 	 	 	 	 	 	 	 	 	 	 	 	 








\section{Discussion and conclusion}


 
% The main finding and results
Using unlabeled data, the proposed semi-supervised method achieved better results than the results of the supervised method.
Whether using supervised or semi supervised methods, the segmentation of some organs is still challenging.
LAG segmentation obtained disappointing performance because of unclear boundaries and class imbalance issues.
The existence of seriously pathology-affected organs, such as Livers and Kidneys, are critical factor for the poor segmentation performance. 
Besides, further research is needed to identify accurate boundaries and suppress pathological effects.

SemiSeg-CSSN+UIF model only used 200 unlabeled images, which achieved higher DSC and NSD than the model with un-filtering 2000 unlabeled images.
It means the quality counts more than quantity when using unlabeled images.
However, because UIF can be used in any trained segmentation network to select unlabeled images, there is a progressive training strategy, which is needed to explore in the future.

\subsection{\textcolor{black}{Limitation and future work}}
We summarize the limitations and potential improvement as follows:

\begin{itemize}
	\item Address the difficulties of multi-organ segmentation with class imbalance problem.
	\item Robust algorithms for shape variation of organs and presence of pathologies.
	\item Normalization of ROI and image information.
	\item The quality of pseudo labels needs more attention than quantity.

\end{itemize}

\subsubsection{Acknowledgements} The authors of this paper declare that the segmentation method they implemented for participation in the FLARE 2022 challenge has not used any pre-trained models nor additional datasets other than those provided by the organizers.


%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}

\end{document}
