% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{multirow}
\usepackage{graphicx}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
\usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref}
%
\usepackage{bbding}
\usepackage{amsmath}
% add ORCID
\usepackage{tikz,xcolor,hyperref}

\definecolor{lime}{HTML}{A6CE39}
\DeclareRobustCommand{\orcidicon}{
\begin{tikzpicture}
\draw[lime, fill=lime] (0,0)
circle[radius=0.16]
node[white]{{\fontfamily{qag}\selectfont \tiny \.{I}D}}; 
\end{tikzpicture}
\hspace{-2mm}
}
\foreach \x in {A, ..., Z}{%
\expandafter\xdef\csname orcid\x\endcsname{\noexpand\href{https://orcid.org/\csname orcidauthor\x\endcsname}{\noexpand\orcidicon}}
}
\newcommand{\orcidauthorA}{0000-0002-3962-7535}
\newcommand{\orcidauthorB}{0000-0002-7056-1335}
%
% \usepackage[marginal]{footmisc}
% \renewcommand{\thefootnote}{}



\begin{document}
%
\title{AdaptNet: Adaptive Learning from Partially Labeled Data for Abdomen Multi-Organ and Tumor Segmentation}
% AdaptNet: Adaptive Learning from Partially Labeled Data for Accurate Abdomen Multi-Organ and Tumor Segmentation 
% \title{AdaptNet: Adaptive Learning to Partially Labeled Dataset for Abdomen Multi-Organ and Tumor Segmentation}
% \title{AdaptNet: Abdomen Multi-Organ and Tumor Segmentation by Adaptive Learning on Partially Labeled Dataset} 
% AdaptNet: Adaptive Learning of Abdomen Multi-Organ and Tumor Segmentation from Partially Labeled Dataset 
% Adaptive Learning from Imbalanced Annotation (Imbalanced Labeled data/partially labeled dataset) for Abdomen Multi-Organ and Tumor Segmentation

%
%\titlerunning{Abbreviated paper title}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
\author{
JiChao Luo \inst{1,2} \and
Zhihong Chen \inst{1,2}\hspace{-1.5mm}\orcidB{}  \and
Wenbin Liu \inst{1} \and
Zaiyi Liu \inst{2,4} \and
Bingjiang Qiu \inst{2,3,4}   \hspace{-1.5mm}\orcidA{}\textsuperscript{\Envelope}     \and 
Gang Fang \inst{1} \textsuperscript{\Envelope} 
}
\titlerunning{AdaptNet:Adaptive Learning for Segmentation}
%
\authorrunning{J.Luo, B.Qiu et al.}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{
Institute of Computing Science and Technology, Guangzhou University, Guangzhou, 510006, China 
\and
Department of Radiology, Guangdong Provincial People's Hospital (Guangdong Academy of Medical Sciences), Southern Medical University, Guangzhou 510080, China 
\and
Guangdong Cardiovascular Institute, Guangdong Provincial People's Hospital, Guangdong Academy of Sciences, Guangzhou 510080, China
\and
Guangdong Provincial Key Laboratory of Artificial Intelligence in Medical Image Analysis and Application, Guangzhou 510080, China\\
\email{qiubingjiang@gdph.org.cn},
\email{gangf@gzhu.edu.cn}\\
 % $^*$ These authors contributed equally to this work
}
%

\maketitle              % typeset the header of the contribution
% \footnote{First Author and Second Author contribute equally to this work.\\}
%
\begin{abstract}
Due to the high costs associated with the labor and expertise required for annotating 3D medical images at the voxel level, most public and in-house datasets only include annotations of a single (or a few) organ or tumor. 
This limitation results in what is commonly referred to as the 'partial labeling/annotation problem'. 
In order to tackle this issue, we introduce an adaptive learning network, AdaptNet, to effectively segment multiple organs and tumors within partially labeled data from abdomen CT images. 
AdaptNet comprises three key components: a segmentation network, a pseudo-label generation network, and an adaptive controller responsible for generating dynamic weights. 
% Prior to instructing the model on its expected task, information regarding the current segmentation task is encoded in a task-aware manner. Unlike existing methods that fix kernels after training, 
AdaptNet generates adaptive weights dynamically through the controller, which takes into account the balance of the partial labels and the corresponding pseudo-labels. 
This approach enables AdaptNet to efficiently and flexibly learn multiple organ and tumor information from the partial labeling/annotation dataset, which is typically performed by multiple or multi-head networks. 
We conduct validation on a large-scale partially annotated dataset under MICCAI FLARE 2023 challenge and demonstrate that the proposed AdaptNet outperforms the baseline method across the 13 different organ and tumor segmentation tasks. 
Our method achieves a mean organ Dice Similarity Coefficient (DSC) of 89.61\% and a Normalized Surface Dice (NSD) of 94.94\%, and a tumor DSC and NSD of 39.16\% and 30.52\% on the FLARE 2023 online validation. 
Additionally, in the Final Testing dataset, our method achieves a mean organ DSC and NSD of 89.34\% and 95.26\% and a tumor DSC and NSD of 54.59\% and 40.78\%, and the area under GPU memory-time curve is 33.35s and 84276 MB. 
The code is available at \href{https://github.com/Prech-start/FLARE23_AdaptNet}{https://github.com/Prech-start/FLARE23\_AdaptNet}. 
\keywords{Adaptive learning \and partial labeling/annotation  \and Abdomen organ segmentation.} 
\end{abstract}



\section{Introduction}
%  Background (Issuee, [treatment, diagosis], difficuilt/time-consuming, proposed computer-aided tech)
% Cancer has become a leading cause of death globally, including in China, posing a significant threat to people's well-being. Accurately segmenting tumor areas in medical images is vital for radiotherapy and diagnosis\cite{jiang2022deep}. Delineation of abdominal organs from medical images is an essential step to analyze and visualize anatomically or/and functionally important regions before diagnosis, therapy and surgery\cite{7424482}, and accurate segmentation of organs and tumor providing essential information\cite{oliveira2011segmentation}. Manual segmentation, not only needs to be performed by experienced radiologists, but is time-consuming and labor-intensive. Thus, automated methods for tumor segmentation are indispensable to mitigate these challenges. 
Abdominal organs are quite common cancer sites, such as colon and rectum cancer, and pancreas cancer, which are the 2nd and 3rd most common cause of cancer death \cite{SEERCSF}. 
Computed tomography (CT) provides doctors with valuable prognostic information. 
During the diagnosis process, the doctor evaluates the lesion or organ by manual annotation in two dimensions plane on the CT, which leads to a tedious procedure in the clinical practice. 
Moreover, the structural complexity of abdominal organs and their cancers make the annotation process challenging \cite{joskowicz2019inter}. 
% and this process requires experience, time, and suffers from inter- and intra- observer variability\cite{10.1007/s00330-018-5695-5}. 
Currently, there are many high-quality publicly available tumor datasets, such as liver cancer segmentation \cite{bilic2023liver}, lung nodule segmentation \cite{armato2011lung}, etc. However, they are all for one single type of tumor. In terms of organ segmentation, several multi-organ segmentation datasets with all the organ labels have been released, e.g., BTCV \cite{landman2015miccai}, AMOS\cite{ji2022amos}, etc. 
However, this kind of dataset with all organs or tumors annotated is almost unachievable in real clinical workflow. 
Utilizing these datasets inevitably creates partial labeling/annotation problems. 
Furthermore, there is still no general and publicly available dataset with 'partial labeling/annotation problems' for universal abdominal organs and pan-cancer segmentation nowadays. 
FLARE2023 challenge, an extension of FLARE2021 and FLARE2022 challenges, provides such an opportunity, which aims to promote the development of universal organ and tumor segmentation in abdominal CT scans. 
FLARE2023 showcases a rich variety of tumor types and a combination of multiple different organ annotations, as shown in Fig. \ref{fig:datasample}. 
This imbalanced labeling could potentially lead to the failure of the segmentation methods. 

% it is important to note that its annotations are imbalanced
\begin{figure}
    \centering
    \includegraphics[width=\textwidth, trim=0 70 0 80, clip]{imgs/samples.pdf}
    \caption{Samples of imbalanced annotations in FLARE2023. Partial organ annotations are observed in some cases, as depicted in case (a). In other cases, all organs are annotated, but tumors are absent, as shown in case (b). There are also cases with annotations covering both tumors and organs, as demonstrated in cases (c) and (d).}
\label{fig:datasample}
\end{figure}
% \textbf{need to make a description of the challenge about 'partial labeling problem'? 
% I suggest make a figure. figure (a): only have one organ with its annotation, figure (b): have two or three annotation organs, figure (c): have full annations in the figure.
% \\
% results: figure (a) and (b) are normal in the real world, but figure (c) not. 
% }



% traditional method -> DL method
Formerly, researchers have proposed some traditional segmentation methods gray-level based methods \cite{kobashi1995knowledge},  Live wire segmentation approaches \cite{schenk2000efficient}, and mathematical fitting procedure \cite{gao1998abdominal} for segmentation tasks which are more efficiently than manual segmentation methods. However, the traditional methods need manual design features. Compared with the traditional methods, Deep Learning (DL) methods demonstrate enhanced accuracy and much better generalization capacity. 
% DL has advantages in generalization capacity, which requires manual feature creation and extraction. 
% Previous A DL Method (disadvantage) -> B DL method (finish A problem, new problem)
In recent years, regardless of various works based on fully supervision learning method \cite{li2023lvit} achieve State-of-the-Art (SOTA) performance in single data centers, many of which are small and single data center 
\cite{ma2021abdomenct}. 
Furthermore, most of the SOTA methods cannot be easily verified and generalized in other datasets with imbalanced annotations. 
% As previously mentioned, the issue of the single type dataset exposes phenomena that using the fully supervised methods to train a model with an Imbalance annotated dataset results in accuracy loss when transferring it to a different dataset. 


To address the problem, this study intends to use the core concept of semi-supervised learning to effectively use unlabeled organ samples to improve model performance. 
Semi-supervised learning potentially learns wrong information from incorrect pseudo-labels, which would lead to performance degradation. 
% Semi-supervised learning exists a potential issue as the incorrect pseudo-labels contains the wrong or inaccurate annotations, which would cause performance degradation
Normally, selecting high-confidence predictions can fix the problem of performance degradation. However, this way would exclude a large amount of unlabeled data from the training process, resulting in insufficient model training. 
Furthermore, this way leads to the low-quality pseudo-labels not being utilized in training. 
Therefore, based on that, we propose an adaptive learning segmentation method to efficiently utilize and learn pseudo-labels. 

% Inspired by these issue, we proposed a method, which consists of xxxx....
In this paper, we propose an automatic segmentation method, AdaptNet, for abdominal organs and cancers based on FLARE2023 dataset with imbalanced partial labeling. 
The proposed framework AdaptNet mainly contains three components: a pseudo-label generation network that creates the class-wise annotations which not exist in the true labels, a controller responsible for generating dynamic weights, and a segmentation network that segments lesions and organs based on adaptive weights generated by dynamic weights controller. The main contributions of this work are summarized as follows: 
(1) Through the proposed AdaptNet, pseudo-labels have been effectively utilized and learned, which introduces the unlabeled organ information, while also avoiding the misleading from incorrect pseudo-labels. 
(2) To balance the pseudo-labels and the original label, dynamic weights are generated automatically by a controller. 
(3) To mitigate the misleading of incorrect pseudo-labels, an adaptive loss approach is employed to train the segmentation model. Experiments show the effectiveness of the proposed AdaptNet for the partial labeling problem. 



\section{Method}
%###########################
\subsection{Preprocessing}
% 解释或者标题
% \subsubsection{Pseudo-label selection} We using pseudo-labeled data provided by the \cite{huang2022revisiting} as pseudo-labels for the work (\textbf{comment: this is not the part of the preprocessing. I suggest moving it to the 2.2 Proposed Method}). 

\subsubsection{Resample and normalization} 
We resample the pixel spacing to (2.2838, 1.8709, 1.8709) for all cases, and clip the pixel value based on the Hounsfield units to $[-160, 240]$, and normalize all the cases in $[0, 1]$ to ensure data stability and consistency. 

\subsubsection{Cropping the data} 
To reduce redundant or irrelevant information and save computing resources, all the original CT matrix is cropped according to the foreground markers generated by original labels and pseudo-labels (the details are in the next section). 

\subsubsection{Data augmentation}
In order to prevent the model from over-fitting, data augmentation is used in this study. The augmentation approaches of nnU-Net methodology 
\cite{isensee2021nnu} have been utilized. 

\subsection{Proposed Method} 
% AdaptNet comprises three key components: a segmentation network, a pseudo-label generation network, and an adaptive controller responsible for generating dynamic weights. 

% a pseudo-label generation network that creates the class-wise annotations which not exist in the true labels, a controller responsible for generating dynamic weights, and a segmentation network that segments lesions and organs based on adaptive weights generated by dynamic weights controller. 

Specifically, the proposed AdaptNet contains a pseudo-label generator network which is followed by a label filling module, a baseline network which is to make a segmentation prediction, and a dynamic weights controller which is mainly made up of an adaptive weight calculation (AWC) module, as shown in Fig.~\ref{fig:overview}. 


\begin{figure}[htbp]
\centering
\includegraphics[width=1\textwidth, trim=20 40 5 40, clip]{imgs/overview_1.1.pdf}
\caption{Overview of our proposed AdaptNet. 
Green block: Generate the mix label and ROI bounding box by Label Filling Module, and weight of adaptive loss calculation by AWC. 
Pink block: Baseline segmentation network. 
Blue block: Adaptive weight calculation (AWC) module: calculate the weight according to the unique object class between the pseudo-label and the true label. 
Yellow Block: Label Filling Module filters the interfering information and combines the pseudo-label and true label into a mix-label. }
\label{fig:overview}
\end{figure}

\subsubsection{Pseudo-Label Generator} \label{pseudo}
Pseudo-labels contain valuable information about the location and boundary of target organs and tumors during training, which enhances the model's discriminative ability. By incorporating pseudo-labels, pseudo-label generator arguments the datasets and promotes the model to learn more boundary information from unlabeled organs in true labels. 
Here, we apply the segmentation network from \cite{huang2022revisiting} as the pseudo-label generator network, which has achieved remarkable results in the FLARE2022 challenge. 

\subsubsection{Segmentation Network}
    
The baseline network is built upon nnU-Net \cite{isensee2021nnu}, utilizing parameters generated using the nnU-Net methodology. 


\subsubsection{Label Filling Module} 
% After generating pseudo-labels, we observed a significant imbalance in the data annotations. 
% To address this issue, we applied a Label Filter to the labels and performed ROI cropping on the data. 
% The specific process entailed excluding the target classes present in both the true labels and pseudo-labels from the pseudo-labels, and then combining them to form a mixed-labels(ML). 
% Subsequently, based on the mixed label, we acquire ROI bounding boxes rich in informative content. 

To incorporate pseudo-labels into the true labels, Label Filling Module is used after pseudo-labels were generated. The details are illustrated in Fig.~\ref{fig:overview}. 
In general, the Label Filling Module can be expressed in
the following equation: 
\begin{equation}
    ML = R(PL, U_{TL} \cap U_{PL}, 0) + TL
\end{equation}
where the ${U_{TL}}$ and ${U_{PL}}$ denote the list of classes for true label (TL) and pseudo-label (PL). The expression $R(S,I,0)$ signifies the substitution of the intersection $I$ with the value 0 within the set $S$, and then combining TL and PL to form a mixed-label (ML).  

% \textbf{The Label Filling module primarily eliminates annotations in the pseudo-labels that might interfere with the true labels and calculates the Region of Interest (ROI) bounding box from processed filling labels. }



% To alleviate the issue of label imbalance, we filtered out the annotations that were common between the true labels and pseudo-labels before incorporating filtered pseudo-labels into the true labels, achieving filtering and filling. 


\subsubsection{Adaptive Weight Calculation} 



% For sparsely labeled data, we employ pseudo-labeled data for data augmentation. However, 

Considering the potentially misleading effects of pseudo-labels during training, we introduce an adaptive loss to impose constraints. 
The main idea of adaptive loss is to automatically weaken learning efficiency from pseudo-labels while amplifying the guiding capability of true labels during training. 
For the purpose of this paper, we define symbol $C_O$ as $[1, class\_count]$, where the $class\_count$ means the total class counts for the segmentation task, and 
suppose $C_{pseudo}$ represents the unique indexes collection of pseudo-label. Its definition is 
$$C_{pseudo} = \{c_i \mid c_i \in C_O, {c_i \text{ is pseudo label}}\}, $$
where $i$ is the $i$-th class index. 
Then, indexes collection of true label $C_{true}$ is 
% $C_{true} = [1,{class\_count}] \setminus C_{pseudo}$, where $C_{pseudo}$ is a set minus operation. 
$$C_{true} = \{c_i \mid c_i \in C_O, {c_i \text{ is true label}}\}. $$
Then, the updated steps of loss weight for each target class are as follows: 
\begin{equation}
   w^o_i =  \frac{1}{class\_count} , i \in C_O, 
\end{equation}
\begin{equation}
    w^p_i =  \xi_{ada} \ast w^o_i , i \in C_{pseudo}, 
\end{equation}
\begin{equation}
    w^t_i = w^o_i + \frac{\sum ^{ c \in C_{pseudo}} (w^o_c - w^p_c)}{\mid C_{true}\mid}, i \in C_{true}, 
\end{equation}
where 
% $i$ is the $i$-th class index, $i \in [1,class\_count]$, and $class\_count$ means the total class counts for the segmentation task. 
$w^p_i$ and $w^t_i$ represent the weight of the $i$-th class in pseudo-label and true label, respectively. 
The $\xi_{ada}$ is an adjustable parameter to control attention to the true label. It is initialized to a default value of 0.5. The $| C_{true} |$ equals with class number in true label. 

In general, the weight $w_i$ of the $i$-th class can be defined as follows: 
% \begin{equation}
%     w_i = \begin{cases}
%         w^p_i, & \text{if } i\text{-th class is pseudo-label} \\
%         w^t_i, & \text{if } i\text{-th class is true label} \\
%     \end{cases}
% \end{equation}
\begin{equation}
    w_i = \begin{cases}
        w^p_i, &  i \in{C_{pseudo}} \\
        w^t_i, &  i \in{C_{true}} \\
    \end{cases}
\end{equation}

In this way, the model can pay more attention to the organ with real labels and also learn the shape or location information of unlabeled organs via their corresponding pseudo-labels. 
In other words, the true label gains a dynamic higher loss score than pseudo-labels according to the label status of each patch. 
Therefore, the Adaptive Weight module suppresses gradients generated by features in the filled labels that could disrupt training and enhance the learning capacity for the true annotations. 

Then, we combine the adaptive weight and $ComboLoss$ function which is combined with $DiceLoss$ and $CELoss$. The $ComboLoss$ converges considerably faster than cross-entropy loss during training\cite{taghanaki2019combo}. It is defined as: 
\begin{equation}
    L_{CE}(y,\hat{y}, w) = \sum_{i}^{C_O} w_i (-\frac{1}{N} \sum^N_{j=1} {y_j^ilog(\hat{y}_j^i) + (1-{y}_j^i)log(1-\hat{y}_j^i)}), 
\end{equation}
\begin{equation}
    L_{Dice}(y,\hat{y}, w) = \sum_{i}^{C_O} w_i (1 - \frac{2\sum_{j=1}^Ny_j^i\hat{y}_j^i}{\sum_{j=1}^Ny_j^i+\hat{y}_j^i}), 
\end{equation}
\begin{equation}
    loss(y,\hat{y}, w) =  \alpha_{ce} * L_{CE}(y, \hat{y}, w) + \alpha_{dc} * L_{Dice}(y, \hat{y}, w), 
\end{equation}
where the $y_j^i$ and $\hat{y}_j^i$ mean the ground truth and the predicted probability of pixel $j$, respectively, and $N$ is the number of pixels.
$\alpha_{ce}$ and $\alpha_{dc}$ are the hyper-parameters to balance the contribution of $DiceLoss$ and $CELoss$. $\alpha_{ce}$ and $\alpha_{dc}$ are set to 0.5 in this study. 
% The $C$ is the 


% \subsubsection{Strategies to improve inference speed and reduce resource consumption} 
\subsubsection{Training Strategies} 

One of the obstacles to training 3D networks is the problem of "insufficient memory". 
A common solution is to train a 3D network from smaller sub-volumes (3D patches) and test it by sliding window. 
We set the step of the sliding window and use multithreaded preprocessing of CT image to reduce our inference time. The shape of the sliding window is consistent with the patch as shown in Table \ref{table:training}. 
Here, to reduce the inference time, the length of the step is $[5/6, 7/8, 9/10]$ times the window width for each axis instead of the default parameter $[1/2, 1/2, 1/2]$ of nnU-Net. 
Consequently, the inference time significantly decreases, e.g., from 72s to 48s for case 0048 in the environment of this study. 



\subsection{Post-processing}
In the post-processing stage, we employ a connected component-based method after the segmentation prediction. Particularly in organ image segmentation, it helps remove the disconnected voxels, consequently, reducing false positives. In the study, the largest connected component of each segmented organ volume is simply selected. 
% Applied to both coarse and fine model outputs, it enhances accuracy in segmenting multiple organs. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$
\section{Experiments}
\subsection{Dataset and evaluation measures}
The FLARE 2023 challenge is an extension of the FLARE 2021-2022~\cite{MedIA-FLARE21}\cite{FLARE22}, aiming to promote the development of foundation models in abdominal disease analysis. The segmentation targets cover 13 organs (liver, spleen, pancreas, right kidney (RK), left kidney (LK), stomach, gallbladder, esophagus, aorta, inferior vena cava (IVC), right adrenal gland (RAG), left adrenal gland (LAG), and duodenum) and various abdominal lesions, which cover various abdominal cancer types, such as liver cancer, kidney cancer, pancreas cancer, colon cancer, gastric cancer, and so on. The organ annotation process used ITK-SNAP~\cite{ITKSNAP}, nnU-Net~\cite{isensee2021nnu}, and MedSAM~\cite{MedSAM}. The training dataset is curated from more than 30 medical centers under the license permission, including TCIA~\cite{TCIA}, LiTS~\cite{LiTS}, MSD~\cite{simpson2019MSD}, KiTS~\cite{KiTS,KiTSDataset}, autoPET~\cite{autoPET-Data,autoPET-MICCAI22}, TotalSegmentator~\cite{TotalSegmentator}, and AbdomenCT-1K~\cite{ma2021abdomenct}. The training set includes 4000 abdomen CT scans where 2200 CT scans with partial labels and 1800 CT scans without labels. The validation and testing sets include 100 and 400 CT scans. 
In this study, unlabeled images were not used. Only 2200 scans with partial labels have been used due to the computational resource limitation, and the 1800 unlabled images are not used. 
The frequency statistics about the 2200 cases regarding organ and tumor annotations are provided in Table~\ref{tab:oaops}. 
5-fold cross-validation has been performed, in which 1760 cases are chosen as the training dataset, and the rest 440 cases are as the internal validation dataset in each fold. 

\begin{table}[!ht]
    \centering
    \caption{Organ annotation occurrence frequency(\%) summary}
    \label{tab:oaops}
    \renewcommand{\arraystretch}{1.2}
    \setlength{\tabcolsep}{3pt}
    \begin{tabular}{c|c|c|c|c|c|c|c}
    \hline
        Target & Liver & RK & Spleen & Pancreas & Aorta & IVC & RAG  \\ 
        Frequency& 59.6 & 59.1 & 59.4 & 59.6 & 11.3 & 11.3 & 11.3\\ \hline
        Target & LAG & Gallbladder & Esophagus & Stomach & Duodenum & LK & Tumor \\ 
        Frequency& 11.2 & 10.2 & 11.3 & 11.3 & 11.3 & 59.0 & 68.0 \\ \hline
    \end{tabular}
\end{table}



The evaluation metrics encompass two accuracy measures—Dice Similarity Coefficient (DSC) and Normalized Surface Dice (NSD)—alongside two efficiency measures—running time and area under the GPU memory-time curve. These metrics collectively contribute to the ranking computation. Furthermore, the running time and GPU memory consumption are considered within tolerances of 15 seconds and 4 GB, respectively.


\subsection{Implementation details}
\subsubsection{Environment settings}
The development environments and requirements are presented in Table~\ref{table:env}. 

\begin{table}[!htbp]
\caption{Development environments and requirements.}\label{table:env}
\centering
\begin{tabular}{ll}
\hline
System       & Ubuntu 23.04\\
\hline
CPU   & Intel(R) Core(TM) i9-10900X CPU@3.70GHz \\
\hline
RAM                         &4$\times $32GB; 2933MT$/$s\\
\hline
GPU                        & NVIDIA GeForce RTX\texttrademark  3090 24G\\
\hline
CUDA version                  & 12.0\\                          \hline
Programming language                 & Python 3.9.16\\ 
\hline
Deep learning framework & Pytorch (Torch 2.0.1) \\
\hline
Code     &     \href{https://github.com/Prech-start/FLARE23_AdaptNet}{https://github.com/Prech-start/FLARE23\_AdaptNet}                                                           \\
\hline
\end{tabular}
\end{table}


\subsubsection{Training protocols}
During the training phase, we set the batch size to 2 and randomly select all samples within each epoch. 
For each sample, we perform random patch cropping with patch sizes of $(96, 128, 160)$. 
As for the optimizer, we utilize AdamW with a learning rate of 1e-3 and a weight decay of 1e-5. The learning rate updating follows the default mechanism of AdamW. Additional details are presented in Table~\ref{table:training}. 


\begin{table*}[!htbp]
\caption{Training protocols.}
\label{table:training}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Network initialization         & ``he" normal initialization\\
\hline
Batch size                    & 2 \\
\hline 
Patch size & 96$\times$128$\times$160  \\ 
\hline
Total epochs & 120 \\
\hline
Optimizer          & AdamW with weight decay($\mu=1e-5$)          \\ \hline
Initial learning rate (lr)  & 0.001 \\ \hline
Lr decay schedule & halved by 200 epochs \\
\hline
Training time                                           & 11 hours per fold \\  \hline 
Loss function &  Adaptive Loss \\  \hline
Number of model parameters    & 30.8M\footnote{https://github.com/sksq96/pytorch-summary} \\ \hline
Number of flops & 838.6116 G\footnote{https://github.com/facebookresearch/fvcore} \\ \hline
CO$_2$eq & 3.91908 Kg\footnote{https://github.com/lfwa/carbontracker/} \\  \hline
\end{tabular}
\end{center}
\end{table*}


\section{Results and discussion}
% Note: Please describe at least the following aspects in this section


% 1. The effect of using unlabelled cases;


% 2. In what kind of cases the proposed method works well?

% 3. What are the possible reasons for the failed cases or organs?


% 4. Segmentation efficiency analysis


The best fold was selected via the results in the Public validation, as shown in Table~\ref{tab:five_fold}. 
% The submitted method is selected by result in Table \ref{tab:five_fold}. 
The result of Public Validation is calculated with the 50 open cases from 100 Validation set. 
The result for Online Validation is collected from FLARE2023 website. 
It is worth noting that in the metrics for public validation, we have included the standard deviation, represented as evaluation score$\pm$std. 
% The metrics of the online validation are not available, we have not added the standard deviation. 
The std of the online validation is not available since it is not reported online. The results for the validation are listed in Table \ref{tab:final-results}.  

\begin{table}[!htbp]
    \centering
    \caption{Segmentation DSC(\%) of five fold from Public Validation.}
    \renewcommand{\arraystretch}{1.2}
    \label{tab:five_fold}
    \setlength{\tabcolsep}{3pt}
    \begin{tabular}{l|cc|cc|cc}
    \hline
        \multirow{2}{*}{Target} & \multicolumn{2}{c|}{baseline} & \multicolumn{2}{c|}{label filling} & \multicolumn{2}{c}{proposed}\\ \cline{2-7} 
         & Organ & Tumor & Organ& Tumor & Organ& Tumor \\ \hline
        fold0 & 34.84  & 36.89  & 89.25  & 40.94 & 88.96  & 45.12  \\ 
        fold1 & 36.18  & 37.20  & 89.30  & 40.75 & 88.97  & 43.24  \\ 
        fold2 & 35.79  & 34.15  & 89.05  & 42.02 & 89.02  & 45.35  \\ 
        fold3 & 35.57  & 40.17  & 89.20  & 44.19 & 89.09  & 43.75  \\ 
        fold4 & 35.07  & 39.61  & 89.22  & 41.73 & 88.94  & 45.04  \\ \hline
        mean  & 35.49  & 37.60  & 89.20  & 41.92 & 88.96  & 44.50 \\ \hline
    \end{tabular}
\end{table}


\begin{table}[htbp]
% \caption{Quantitative evaluation results. \textbf{The results should correspond to your final docker submission. The public validation denotes the performance on the 50 validation cases with ground truth. Please present both the mean score and standard deviation. The online validation denotes the leaderboard results. The Testing results will be released during MICCAI. Please leave them blank at present.} You can use a similar 
% Table format to present the ablation study results of the public and online validation. A useful online tool to create latex table 
% \url{https://www.tablesgenerator.com/latex_tables.}
% }
\caption{Result in Public Validation, Online Validation and Final Testing. }
\label{tab:final-results}
\renewcommand{\arraystretch}{1.2}
    \setlength{\tabcolsep}{3pt}
\centering
\begin{tabular}{l|cc|cc|cc}
\hline
\multirow{2}{*}{Target} & \multicolumn{2}{c|}{Public Validation} & \multicolumn{2}{c|}{Online Validation} & \multicolumn{2}{c}{Testing} \\ \cline{2-7} 
                        & DSC(\%)            & NSD(\%)           & DSC(\%)            & NSD(\%)           & DSC(\%)      & NSD (\%)     \\ \hline
        Liver       & 97.74$ \pm$0.44& 99.28$\pm$0.73 & 97.60 & 99.07 & 96.95 & 98.27 \\ 
        RK          & 94.44$\pm$7.76 & 95.92$\pm$8.61 & 93.83 & 95.36 & 93.73 & 94.75 \\ 
        Spleen      & 96.88$\pm$0.94 & 99.12$\pm$1.75 & 96.94 & 99.19 & 96.48 & 98.94 \\ 
        Pancreas    & 86.06$\pm$5.58 & 97.06$\pm$4.01 & 84.70 & 96.18 & 88.64 & 97.01 \\ 
        Aorta       & 94.74$\pm$1.25 & 98.78$\pm$2.29 & 94.74 & 98.72 & 94.97 & 99.53 \\ 
        IVC         & 88.62$\pm$7.60  & 91.26$\pm$7.90 & 88.30 & 90.60 & 88.83 & 92.09 \\ 
        RAG         & 81.41$\pm$12.23& 94.97$\pm$13.68 & 81.43 & 95.51 & 81.65 & 95.30 \\ 
        LAG         & 82.64$\pm$5.66 & 95.96$\pm$4.34 & 80.86 & 94.37 & 81.94 & 94.79 \\ 
        Gallbladder & 86.53$\pm$18.94& 88.38$\pm$20.43 & 84.11 & 85.80 & 83.38 & 86.32 \\ 
        Esophagus   & 79.95$\pm$16.67& 90.77$\pm$16.92 & 81.14 & 92.53 & 86.56 & 96.88 \\ 
        Stomach     & 93.14$\pm$3.20  & 97.25$\pm$4.10 & 93.67 & 97.59 & 93.53 & 97.17 \\ 
        Duodenum    & 81.51$\pm$7.90  & 94.80$\pm$5.91 & 81.43 & 94.54 & 84.02 & 94.27 \\ 
        LK          & 93.45$\pm$6.72 & 94.73$\pm$8.90 & 93.18 & 94.81 & 92.90 & 94.40 \\ \hline
        
        Organ Average & 89.01$\pm$6.13 & 95.25$\pm$3.23 & 88.61 & 94.94 & 89.34 & 95.26 \\ \hline
        Tumor         & 43.75$\pm$35.21& 35.46$\pm$29.93 & 39.16 & 30.52 & 54.59 & 40.78 \\ \hline
\end{tabular}
\end{table}

\subsection{Quantitative results on validation set}
As shown in Table~\ref{tab:quantitative-results}, the quantitative experiments have been carried out for more comprehensive ablation studies on the Pseudo-label filling and Adapting weight calculation. 
% As a result, we have found that our method performs better on tumor DSC. 
For the tumor segmentation, the proposed method performs better than the baseline model and the label-filling-based model, with an improvement of at least 0.0258 and 0.0295 in DSC and NSD scores, respectively. 
For the organ segmentation, the segmentation result of our proposed method is slightly worse (with a decline of only 0.0027 in DSC score) than the model that used pseudo-label filling. 
Specifically, comparisons with Quantitative evaluation in Table \ref{tab:quantitative-results} and annotation statistics in Table~\ref{tab:oaops} illustrate that the baseline model is invalid in segmenting the organs with low frequency, i.e., aorta (0.113), IVC (0.113), RAG (0.113), LAG (0.112), gallbladder (0.102), esophagus (0.113), stomach (0.113), and duodenum (0.113). The model's ability is strengthened in tumors and organs with high frequency (e.g., liver, spleen, etc. ). It also demonstrates the effectiveness of pseudo-label filling in the segmentation task with imbalance annotations. 
The proposed AdaptNet approach improves segmentation of the part of objects with high frequency (i.e., RK (0.591), LK (0.590), and tumor (0.680)), while the segmentation results from AdaptNet are not as promising as the model used pseudo-label filling for the organs with low frequency. 
According to the weight calculation algorithm and the frequency of organ annotation occurrence, it can be inferred that this situation is reasonable in that the lower the labeling frequency, the less guided by the real annotation. 
% when there is a low frequency of organ annotations and a high frequency of tumor annotations. 
% For the baseline model, the DSC of small organs is consistently very low because of the extremely low frequency of annotations for the small organs. 
% This is not what we expected, 

\begin{table}[!ht]
    \centering
    \caption{Overview of Ablation Experiment Results. Note: Label filling: baseline + Label filling module. Proposed: baseline + Label filling module + Adaptive weight calculation.}
    \label{tab:quantitative-results}
    \renewcommand{\arraystretch}{1.2}
    \setlength{\tabcolsep}{3pt}
    \begin{tabular}{l|cc|cc|cc}
    \hline
        \multirow{2}{*}{Target} & \multicolumn{2}{c|}{Baseline} & \multicolumn{2}{c|}{Label filling} & \multicolumn{2}{c}{Proposed}\\ \cline{2-7} 
                        & DSC(\%) & NSD(\%) & DSC(\%) & NSD(\%) & DSC(\%) & NSD(\%) \\ \hline
        Liver           & 90.78  & 91.85  & 97.76  & 99.26 & 97.72  & 99.22  \\ 
        RK              & 89.08  & 90.35  & 93.84  & 95.25 & 94.02  & 95.39  \\ 
        Spleen          & 91.71  & 93.57  & 96.90  & 99.22 & 96.85  & 99.10  \\ 
        Pancreas        & 80.25  & 91.39  & 85.87  & 96.93 & 85.85  & 96.90  \\ 
        Aorta           & 1.73   &  1.59  & 94.78  & 98.90 & 94.68  & 98.69  \\ 
        IVC             & 2.29   &  2.12  & 89.32  & 92.12 & 88.62  & 91.25  \\ 
        RAG             & 6.96   &  7.61  & 81.24  & 94.86 & 81.26  & 94.87  \\ 
        LAG             & 6.23   &  7.26  & 82.33  & 95.73 & 81.85  & 95.35  \\ 
        Gallbladder     & 10.24  & 10.03  & 85.08  & 87.02 & 83.96  & 85.74  \\ 
        Esophagus       &  3.78  &  4.50  & 80.54  & 91.37 & 80.25  & 91.13  \\ 
        Stomach         &  3.54  &  3.85  & 93.80  & 97.70 & 93.40  & 97.69  \\ 
        Duodenum        & 0.46   & 0.59   & 82.06  & 94.88 & 81.31  & 94.68  \\ 
        LK              & 88.64  & 89.76  & 93.15  & 94.19 & 93.37  & 94.59  \\ \hline
        Organ Average   & 36.60  & 38.04  & 88.97  & 95.19 & 88.70  & 94.97  \\ \hline
        Tumor           & 37.60  & 27.68  & 41.93  & 32.34 & 44.51  & 35.29  \\ \hline
    \end{tabular}
\end{table}


% \begin{table}[!ht]
%     \centering
%     \renewcommand{\arraystretch}{1.2}
%     \setlength{\tabcolsep}{3pt}
%     \begin{tabular}{l|cc|cc|cc|cc}
%     \hline
%         \multirow{2}{*}{Target} & \multicolumn{2}{c|}{baseline} & \multicolumn{2}{c|}{op} & \multicolumn{2}{c|}{28} & \multicolumn{2}{c}{01}\\ \cline{2-9} 
%          & DSC & NSD & DSC & NSD & DSC & NSD & DSC & NSD \\ \hline
%         Liver & 0.9142  & 0.9267  & 0.9773  & 0.9918  & 0.9764  & 0.9912  & 0.9771  & 0.9919  \\ 
%         RK & 0.8952  & 0.9070  & 0.9372  & 0.9531  & 0.9332  & 0.9472  & 0.9371  & 0.9520  \\ 
%         Spleen & 0.9323  & 0.9526  & 0.9658  & 0.9882  & 0.9695  & 0.9930  & 0.9684  & 0.9912  \\ 
%         Pancreas & 0.7908  & 0.9082  & 0.8482  & 0.9620  & 0.8430  & 0.9586  & 0.8491  & 0.9633  \\ 
%         Aorta & 0.0133  & 0.0137  & 0.9494  & 0.9906  & 0.9447  & 0.9836  & 0.9480  & 0.9885  \\ 
%         IVC & 0.0220  & 0.0216  & 0.8927  & 0.9209  & 0.8837  & 0.9101  & 0.8873  & 0.9142  \\ 
%         RAG & 0.0557  & 0.0684  & 0.8208  & 0.9583  & 0.8068  & 0.9462  & 0.8187  & 0.9564  \\ 
%         LAG & 0.0481  & 0.0584  & 0.8212  & 0.9513  & 0.7999  & 0.9315  & 0.8144  & 0.9453  \\ 
%         Gallbladder & 0.1227  & 0.1216  & 0.8587  & 0.8781  & 0.8491  & 0.8630  & 0.8528  & 0.8709  \\ 
%         Esophagus & 0.0375  & 0.0476  & 0.8166  & 0.9278  & 0.8087  & 0.9232  & 0.8141  & 0.9262  \\ 
%         Stomach & 0.0274  & 0.0299  & 0.9396  & 0.9762  & 0.9272  & 0.9711  & 0.9358  & 0.9759  \\ 
%         Duodenum & 0.0084  & 0.0125  & 0.8215  & 0.9479  & 0.7968  & 0.9377  & 0.8135  & 0.9441  \\ 
%         LK & 0.8859  & 0.9016  & 0.9276  & 0.9421  & 0.9277  & 0.9419  & 0.9311  & 0.9469  \\ 
%         Tumor & 0.3126  & 0.2337  & 0.3543  & 0.2652  & 0.3689  & 0.2794  & 0.3740  & 0.2896  \\ \hline
%         Average & 0.3656  & 0.3823  & 0.8905  & 0.9530  & 0.8821  & 0.9460  & 0.8883  & 0.9513  \\ \hline
%     \end{tabular}
% \end{table}


\subsection{Qualitative results on validation set}
In this section, we show the two good segmentation cases and two bad segmentation cases. 

\subsubsection{Good segmentation cases}
% As shown in the case of case-0031, the segmentation of pancreatic lesions is notably superior, and there are no erroneous stomach segmentation within the duodenum. Compared to the label filling method, our approach yields superior results in this instance. 
% As shown in case-0033 of Fig.~\ref{fig:good_case}, the baseline method is not available to segment the kidney lesions, duodenum, aorta and inferior vena cava. Compared to the over-segmentation of the label filling method in the kidney, our method avoids segmenting the surrounding of connecting tissues and performs better in results. 
As shown in case-0087 of Fig.~\ref{fig:good_case}, the baseline method is not available to segment the IVC, aorta, stomach, duodenum and RAG. Meanwhile, the baseline method misclassifies part of LK as spleen. 
The label filling method can only segment part of the duodenum. 
Compared to the under-segmentation of the baseline method and the label filling method in the kidney, our method performs much better in the tumor. 
% In case-0093 of Fig.~\ref{fig:good_case}, the kidney lesions, stomach, duodenum, aorta, and inferior vena cava are not segmented in the baseline method. The example shows that the kidney lesion is missed, and the lesion is misclassified as part of kidney by the label filling method, while the proposed AdaptNet can tackle this confusing tissue of the kidney. 
% Compared to the label filling method, our approach exhibits a better ability to highlight tumor segmentation. It demonstrates improved tumor segmentation performance. 
In case-0057 of Fig.~\ref{fig:good_case}, the tumor in RK, stomach, aorta, LK and IVC are not segmented in the baseline method. 
The part of LK is misclassified as part of the tumor and the lesion in LK is under-segmentation by the label filling method, while the proposed AdaptNet can almost segment the tumor in LK, however, the small part of LK is misclassified as pancreas. 
Compared to the label filling method, our approach exhibits a better ability to highlight tumor segmentation. It demonstrates improved tumor segmentation performance. 

\begin{figure}[!htbp]
\centering
\includegraphics[width=\textwidth,  trim=30 110 30 110, clip]{imgs/good_case.pdf}
\caption{Good segmentation cases from 50 validation set. }
\label{fig:good_case}
\end{figure}

\subsubsection{Bad segmentation cases}
In case-0067 of Fig.~\ref{fig:bad_case}, the baseline has trouble in segmenting the IVC and aorta. 
% the segmentation of liver tumors is not apparent due to the lesions' similarity in morphology to intrahepatic bile ducts after preprocessing, as well as low gray-scale contrast. 
And all three methods fail to segment the esophagus. 
It can be explained that the location of the esophagus makes all the methods confusing. 
% Additionally, the segmentation of the duodenum is inconspicuous due to the indistinct intestinal wall in the current image pixel value range.
% It is noted that the Tumor DSC is 0.87 in the proposed method, but the liver lesion is not segmented by any method. 
% The reason is that this small lesion is just one of the lesions, while the other big lesion is in good segmented in this case. 
In case-0095, as shown in Fig.~\ref{fig:bad_case}, the baseline model does not segment the duodenum, IVC, gallbladder and aorta. 
% although there are subtle grayscale differences in the lesion area compared to the surrounding tissue, the model correctly identifies these differences and does not misclassify it as part of the liver. 
The three methods misclassify the LK as the tumor. The duodenum and pancreas are similar in gray scale so the boundary of these organs is not clear in the predictive segmentation. 
% Our method misclassifies the stomach compared to label filling. 
% Moreover, the liver lesions are also not segmented by all three methods, in which the baseline method misclassifis the lesion as liver, and the other two methods do not recognize the lesion. 

\begin{figure}[!htbp]
\centering
\includegraphics[width=\textwidth,  trim=15 100 15 120, clip]{imgs/bad_case.pdf}
\caption{Bad segmentation cases from 50 validation set. }
\label{fig:bad_case}
\end{figure}


\subsection{Segmentation efficiency results on validation set}
We have submitted our Docker container encapsulating our model to the official challenge. We have tested it on 20 cases, and the efficiency metrics were as follows: an average execution time of 40.673 seconds, an average maximum GPU memory usage of 4499.8MB, and an average area under the CPU curve of 124628 seconds. There are 8 cases with efficiency as shown in Table~\ref{table:efficiency}. 

\begin{table}[htbp]
\caption{Quantitative evaluation of segmentation efficiency in terms of the running them and GPU memory consumption. Total GPU denotes the area under GPU Memory-Time curve. Evaluation GPU platform: NVIDIA QUADRO RTX5000 (16G).}
\label{table:efficiency}
\centering
\begin{tabular}{ccccc}
\hline
Case ID & Image Size      & Running Time (s) & Max GPU (MB) & Total GPU (MB) \\ \hline
0001    & (512, 512, 55)  & 33.39            & 4088         & 75893          \\
0051    & (512, 512, 100) & 43.83            & 4850         & 154144         \\
0017    & (512, 512, 150) & 46.19            & 4938         & 161893          \\
0019    & (512, 512, 215) & 41.23            & 4394         & 122667         \\
0099    & (512, 512, 334) & 51.92            & 4686         & 155622         \\
0063    & (512, 512, 448) & 53.18            & 4674         & 154248         \\
0048    & (512, 512, 499) & 59.8             & 4658         & 175999         \\
0029    & (512, 512, 554) & 75.38            & 5202         & 231308         \\ \hline

\end{tabular}
\end{table}


\subsection{Results on final testing set}
% This is a placeholder. We will send you the testing results during MICCAI (2023.10.8). 
The testing results from the docker of our solution were evaluated by the challenge officially on the Final Testing, and are shown in Table \ref{tab:final-results}.



\subsection{Limitation and future work}
Upon reflecting on our study, it becomes evident that we encounter certain limitations in the following aspects. 

\textbf{Calculation of Adaptive Weights: }
The computation of adaptive weights did not take into consideration the issue of small organ volumes, resulting in a lack of differentiation in loss weights between small organs. 
% This is an unreasonable limitation. 
Moreover, we find the phenomenon that the lower occurrence of label frequency resulted in a loss of segmentation accuracy, as evidenced by the fact that in our approach, while there was an improvement in tumor DSC, the mean DSC for organs experienced a slight decrease.

\textbf{Effect of Different Preprocessing Strategies: }
Different preprocessing strategies were found to impact the contrast of the images. Future work may involve training on a fusion of images processed using various preprocessing methods. 

% \textbf{Challenges with Large Spatial Span Tumor Segmentation: }
% Some unrepresented segmentation results indicate that our method struggles with tumor segmentation across large spatial spans. It may be beneficial to incorporate spatial awareness modules or attention mechanisms to address this limitation. 

\textbf{Frequency is not fully taken into account in modeling: }
The frequency of each object is different in the dataset. Considering the frequency of each object would improve the segmentation performance of the model. 




\section{Conclusion}
In order to tackle 'partial labeling/annotation problem', we develop an adaptive learning network, AdaptNet, to effectively segment multiple organs and tumors within partially labeled datasets from abdomen CT images. 
% AdaptNet generates adaptive weights dynamically through an adaptive controller, which takes into account the balance of the partial labels and the corresponding pseudo-labels. 
The quantitative and qualitative results show that AdaptNet can efficiently and flexibly learn multiple organ and tumor information from the partial labeling/annotation dataset, which is typically performed by multiple or multi-head networks. 
We conducted validation on a large-scale partially annotated dataset under MICCAI FLARE 2023 challenge and demonstrated that the proposed AdaptNet outperforms baseline segmentation methods across the 13 different organ and tumor segmentation tasks. 

\subsubsection{Acknowledgements} 
The authors of this paper declare that the segmentation method they implemented for participation in the FLARE 2023 challenge has not used any pre-trained models nor additional datasets other than those provided by the organizers. The proposed solution is fully automatic without any manual intervention. We thank all the data owners for making the CT scans publicly available and CodaLab~\cite{pavao2022codalab} for hosting the challenge platform. 
We also acknowledge Dr. Zheng Yunlin for kindly sharing the clinical knowledge and supporting some analysis for the segmentation results. This research was supported by the National Natural Science Foundation of China[No. 61972107]; Regional Innovation and
Development Joint Fund of National Natural Science Foundation of China [No. U22A20345]. National Science Foundation for Young Scientists of China [No. 82202142]; China Postdoctoral Science Foundation [No. 2022M720857];Guangdong Provincial Key Laboratory of Artificial Intelligence in Medical Image Analysis and Application [No. 2022B1212010011]; High-level Hospital Construction Project [No. DFJHBF202105]; Open Project of Guangdong Provincial Key Laboratory of Artificial Intelligence in Medical Image Analysis and Application [No. 2022B1212010011].  

%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}

\newpage


% Please add the following required packages to your document preamble:
% \usepackage[normalem]{ulem}
% \useunder{\uline}{\ul}{}
\begin{table}[!htbp]
\caption{Checklist Table. Please fill out this checklist table in the answer column.}
\centering
\begin{tabular}{ll}
\hline
Requirements                                                                                                                    & Answer        \\ \hline
A meaningful title                                                                                                              & Yes        \\ \hline
The number of authors ($\leq$6)                                                                                                             & 6        \\ \hline
Author affiliations and ORCID                                                                                           & Yes        \\ \hline
Corresponding author email is presented                                                                                                  & Yes        \\ \hline
Validation scores are presented in the abstract                                                                                 & Yes        \\ \hline
\begin{tabular}[c]{@{}l@{}}Introduction includes at least three parts: \\ background, related work, and motivation\end{tabular} & Yes        \\ \hline
A pipeline/network figure is provided                                                                                           & 2  \\ \hline
Pre-processing                                                                                                                  & 3-4   \\ \hline
Strategies to use the partial label                                                                                             & 4-7   \\ \hline
Strategies to use the unlabeled images.                                                                                         & 7   \\ \hline
Strategies to improve model inference                                                                                           & 6   \\ \hline
Post-processing                                                                                                                 & 6   \\ \hline
Dataset and evaluation metric section is presented                                                                              & 7   \\ \hline
Environment setting table is provided                                                                                           & 2
\\ \hline
Training protocol table is provided                                                                                             & 3  \\ \hline
Ablation study                                                                                                                  & 11  \\ \hline
Efficiency evaluation results are provided                                                                                     & 7 \\ \hline
Visualized segmentation example is provided                                                                                    & 3, 4 \\ \hline
Limitation and future work are presented                                                                                       & Yes  \\ \hline
Reference format is consistent.  & Yes        \\ \hline

\end{tabular}
\end{table}

\end{document}
