\documentclass{midl} % Include author names
% \documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{url}
\usepackage{booktabs}
\usepackage{multirow}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{soul}
\usepackage{color, xcolor} 
\usepackage{caption}

\jmlrvolume{-- 31}
\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024 submission}
\editors{Accepted for publication at MIDL 2024}

\title[In-context medical images segmentation]{
  ICL-SAM: Synergizing In-context Learning Model and SAM in Medical Image Segmentation}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Jiesi Hu\nametag{$^{1,2}$}} \Email{405323011@qq.com}\\
\Name{Yang Shang\nametag{$^{1}$}} \Email{23s052012@stu.hit.edu.cn}\\
\Name{Yanwu Yang\nametag{$^{1,2}$}} \Email{20b952019@stu.hit.edu.cn}\\
\Name{Xutao Guo\nametag{$^{1,2}$}} \Email{18B952052@stu.hit.edu.cn}\\
\Name{Hanyang Peng\nametag{$^{2}$}} \Email{philoso\_phy0922@163.com}\\
% \(^{*}\)
\Name{Ting Ma\midljointauthortext{Corresponding authors}\nametag{$^{1,2,3,4}$}} \Email{tma@hit.edu.cn}\\
\addr $^{1}$ Electronic and Information Engineering School, Harbin Institute of Technology (Shenzhen), Shenzhen, China \\
\addr $^{2}$ Peng Cheng Laboratory, Shenzhen, China\\
\addr $^{3}$ Guangdong Provincial Key Laboratory of Aerospace Communication and Networking Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, China \\
\addr $^{4}$ International Research Institute for Artificial Intelligence, Harbin Institute of Technology (Shenzhen), Shenzhen, China}

\begin{document}

\maketitle

\begin{abstract}
Medical image segmentation, a field facing domain shifts due to diverse imaging modalities and biomedical domains, has made strides with the development of robust models. The In-Context Learning (ICL) model, like UniverSeg, demonstrates robustness to domain shifts with support image-label pairs in varied medical imaging segmentation tasks. However, its performance is still unsatisfied. On the other hand, the Segment Anything Model (SAM) stands out as a powerful universal segmentation model. In this work, we introduce a novel methodology, ICL-SAM, that integrates the superior performance of SAM with the ICL model to create more effective segmentation models within the in-context learning paradigm. Our approach employs SAM to refine segmentation results from ICL model and leverages ICL model to generate prompts for SAM, eliminating the need for manual prompt provision. Additionally, we introduce a semantic confidence map generation method into our framework to guide the prediction of both ICL model and SAM, thereby further enhancing segmentation accuracy. Our method has been extensively evaluated across multiple medical imaging contexts, including fundus, MRI, and CT images, spanning five datasets. The results demonstrate significant performance improvements, particularly in settings with few support pairs, where our method can achieve over a 10\% increase in the absolute Dice coefficient compared to cutting edge ICL model. Our code will be publicly available.
% Our code is available at: \url{https://github.com/jiesihu/In-context-MedSAM}.
\end{abstract}

  
\begin{keywords}
  Segmentation, In-context learning, SAM
\end{keywords}

\section{Introduction}
Image segmentation represents a pivotal challenge in medical image analysis, and deep learning has increasingly become the predominant approach for this task \cite{ronneberger2015u,isensee2021nnu,chen2021transunet,dolz2018hyperdense}. The field grapples with domain shift due to the heterogeneity of imaging modalities and the diversity within biomedical domains \cite{liu2023memory}. While existing segmentation tools are technologically advanced, they are often restricted to specific tasks or related domains. This specialization limits their ability to address domain shifts and diverse scenarios, particularly in the context of evolving clinical and scientific requirements in medical imaging. Despite various fine-tuning techniques being proposed \cite{liu2023memory, chen2021source, yang2022source,hu2023chebyshev}, the necessity for extensive computational resources and specialized machine learning expertise poses significant barriers in actual applications.

Originating from the field of natural language processing, In-Context Learning (ICL) has recently emerged as a promising methodology for developing universal segmentation models robust to domain shifts. Models such as UniverSeg \cite{butoi2023universeg} and Neuralizer \cite{czolbe2023neuralizer} applied this approach, constructing models that can adapt to new tasks or domains by leveraging the support set present in input data. This circumvents the need for extensive retraining and achieves promising results in few-shot scenarios. However, current state-of-the-art ICL models like UniverSeg\cite{butoi2023universeg} still face challenges due to their suboptimal performance, particularly with limited support set data.
\begin{figure}
  \centering
  \includegraphics[width=0.9\textwidth]{framework.pdf}
  \caption{Illustration of the Universal models cooperation framework, Semantic confidence map generation module and Adding semantic confidence map in SAM module. }
  \label{framework}
\end{figure}

On the other hand, the Segment Anything Model (SAM) \cite{kirillov2023segment} represents another universal segmentation model capable of generating satisfactory segmentation masks with bounding box or point prompts. Since SAM's introduction, numerous studies have explored its application in image segmentation \cite{zhang2023personalize}. Huang et al. \cite{huang2023segment} assessed SAM's effectiveness across various medical datasets, noting that bounding box prompts generally outperform point prompts. Ma et al. \cite{ma2023segment} achieved promising results by fine-tuning SAM with a substantial annotated medical segmentation dataset. However, the usage paradigm of SAM differs from that of ICL models. SAM necessitates providing prompts for each query image, whereas ICL models require only a set of fixed image-label pairs.

In this paper, we propose a method called ICL-SAM that utilizes SAM to refine the suboptimal segmentation results produced by ICL model and allows ICL model to provide targeted prompts for SAM, thereby eliminating the necessity for manual prompt provision. Furthermore, we harness semantic confidence maps derived from SAM's feature maps, which are replete with rich semantic information, to guide the generation of results and, thus, improve accuracy. ICL-SAM has been comprehensively evaluated in various contexts, including fundus, MRI, and CT images. It achieves significant performance improvements over cutting edge ICL models, notably in few-shot scenarios, where it achieves an increase over 10\% in the Dice coefficient. The key contributions of our work are summarized as follows:

\begin{itemize}
    \item We introduce an innovative method that employs the powerful SAM model to bolster and refine in-context learning model in medical image segmentation.
    \item We propose a novel semantic confidence map generation technique and its integration within our framework to enhance segmentation performance, based on the in-context learning framework.
    \item Our methodology is extensively evaluated across three types of images and five datasets, demonstrating significant improvements in model performance, especially in situations with limited support sets.
\end{itemize}



\begin{figure}[h]
  \centering
  \includegraphics[width=0.6\textwidth]{framework-1.pdf}
  \caption{Workflows of ICL model and SAM during inferencing}
  \label{framework-1}
\end{figure}

\section{Method}

Consider the set \( \{ (x_i, y_i) \}_{i=1}^n \) of image-label pairs for a segmentation task. We utilize the advanced ICL model UniverSeg \cite{butoi2023universeg} in our experiments. The UniverSeg model learns a function \( \hat{y}_{ \text{ICL}} =  \text{Dec}_{ \text{ICL}}( \text{Enc}_{ \text{ICL}}(x, S)) \), utilizing a CNN encoder and decoder architecture. It predicts a label map for an input \( x \) in accordance with the task-specified support set \( S = \{ (x_j, y_j) \}_{j=1}^m \), which consists of available example image-label pairs, where \( m \) denotes the number of pairs in the support set. For the SAM model, the prediction map is defined as \( \hat{y}_{ \text{SAM}} =  \text{Dec}_{\text{SAM}}( \text{Enc}_{\text{SAM}}(x), \text{prompt}) \). Figure \ref{framework-1} shows the workflow of these two models during inference.

% It is noteworthy that in our model, the bounding box prompt for SAM is autonomously generated from the output \( \hat{y}_{ \text{ICL}}\), thereby eliminating the need for manual prompt creation.

\subsection{Universal Models Cooperation}

The architecture of our cooperative methodology is depicted in Figure \ref{framework}. This module operates by iteratively generating bounding boxes from the segmentation outputs of the ICL model. These bounding boxes are then employed as prompts within the SAM framework, thereby achieving refined segmentation outcomes. Empirical studies, such as those by \cite{ma2023segment,huang2023segment}, have substantiated the effectiveness of bounding boxes as potent prompts for medical image segmentation. In addition, to fully harness the capabilities of the SAM encoder, we generate a semantic confidence map from its feature map outputs, specifically designed to emphasize the foreground regions. This confidence map plays a pivotal role in concurrently enhancing the segmentation performance of both the ICL model and SAM.

The final output map fuses results from both SAM and ICL models, formulated by the following equation:
\begin{equation}
  \hat{y} = \gamma \hat{y}_{ \text{ICL}} + (1-\gamma)\hat{y}_{ \text{SAM}},
\end{equation}
where \(\hat{y}\) denotes the prediction of our method. Considering that the performance of the ICL model is enhanced with an increase in support set size \( m \), we dynamically adjust the \(\gamma\) value to \(\gamma =a(1-e^{-\tau m})\) to increase the weight of ICL, where \(\tau\) is a temperature factor.

\subsection{Semantic Confidence Map Generation}

Given that the support set comprises labeled data, it is feasible to employ the universal features extracted by SAM to train a Logistic Regression (LR) model. This model is adept at generating a semantic confidence map for the query image that accentuates the target region, as delineated in Figure \ref{framework}. In the training phase, features \(  \text{Enc}_{ \text{SAM}}(S) \in \mathbb{R}^{m \times 64 \times 64 \times 256} \) from the support set are utilized as training data, where \( m \) denotes the size of the support set. These features are reshaped into \(  \text{Enc}_{ \text{SAM}}(S) \in \mathbb{R}^{4096m \times 256} \) to serve as the input for the LR model, with the corresponding labels in the support set acting as the ground truth \( Y \in \mathbb{R}^{4096m} \). The SAGA algorithm \cite{defazio2014saga}, known for its efficiency with large training sets, alongside binary cross-entropy loss, is utilized for parameter optimization. After training, the semantic confidence map can be computed as follows:


\begin{equation}
  C = \text{sigmoid}(\delta\odot  \text{Enc}_{ \text{SAM}}(x)+\delta_{0}),
\end{equation}
where \(\delta\) represents the parameters of the LR model. Furthermore, since the dot product in feature dimensions can be replaced by a convolution layer with a \(1 \times 1\) kernel, the parameters of the trained logistic regression model are assigned to a convolution layer during actual inference. This approach enables the model to exploit GPU parallelism, thus ensuring inference speed. During inference, the confidence map for a query image \( x \) is computed by \( C = \text{sigmoid}(\text{conv}_{1\times 1}( \text{Enc}_{ \text{SAM}}(x))) \), where the confidence map is subsequently interpolated to \( C \in \mathbb{R}^{h \times w} \), where \(h\) and \(w\) represent the spatial size of the target layer to which \(C\) will be applied. Note that other pretrained models can be used to build confidence maps instead of SAM using this method.


\subsection{Adding Semantic Confidence Map in SAM}

The bounding boxes provided by the ICL model may lack precision. To alleviate this, we introduce explicit semantic guidance into the decoder of SAM, enhancing its focus on foreground regions. As depicted in Figure \ref{framework}, we incorporate the generated confidence map into the second cross-attention block of the image-to-token attention and the final token-to-image attention blocks within SAM's decoder. While there are other attention blocks in SAM's decoder, we have found that changing just these two blocks suffices. Specifically, we have modified the original attention matrices \( I \in \mathbb{R}^{h \times w} \) corresponding to the mask token for these blocks as follows:

\begin{equation}
  I_{ICL-SAM} = \text{softmax}(I\odot e^{\beta \text{Z-Norm}(C)}),
\end{equation}
where \(\text{Z-Norm}(C)\) denotes the z-score normalization, and \(\beta=2\) serves as a balancing factor. A larger value of \(\beta\) exerts greater influence of the confidence map on the original attention matrices. This modification allows SAM to more effectively concentrate on foreground features even when the bounding box is not accurate, thereby enhancing overall segmentation accuracy.

\subsection{Adding Semantic Confidence Map in ICL Model}

The spatial information provided by the semantic confidence map can be beneficial for the ICL model, particularly when its segmentation is imprecise. Therefore, we incorporate the confidence map as spatial attention into the decoder of the ICL model. Specifically, the process of adding the confidence map in UniverSeg is defined as follows:

% \begin{equation}
%   Dec_{UniSeg}^{i}(x)' = \text{Norm}\left(Dec_{UniSeg}^{i}(x) \odot (\alpha C+1) \middle| Dec_{UniSeg}^{i}(x)\right),
% \end{equation}

\begin{equation}
   \text{Dec}_{ \text{ICL-SAM}}^{i}(x) = \text{Norm}\left( \text{Dec}_{ \text{ICL}}^{i}(x) \odot e^{\alpha\text{Z-Norm}(C)} \middle|  \text{Dec}_{ \text{ICL}}^{i}(x)\right),
\end{equation}
where \( \text{Dec}_{ \text{ICL}}^{i}(x)\) represents the feature map at the \(i\)th decode layer of the UniverSeg model. The function \(\text{Norm}\) is employed to ensure the consistency of the L2 norm of the feature map, defined as \(\text{Norm}(B|A) = \frac{B||A||_{2}}{||B||_{2}}\). This attention mechanism is incorporated at each layer of the decoder in UniverSeg, analogous to the Attention U-Net \cite{oktay2018attention}. The larger the value of \(\alpha\), the more significant is the impact of the confidence map on the UniverSeg model. When \(\alpha=0\), the outcome is equivalent to the vanilla UniverSeg. We dynamically adjust the \(\alpha\) value as \(\alpha = be^{-\tau m}\). When the support set size is larger, we reduce the effect of the semantic confidence map. 

\subsection{Iterative Bounding Box Generation}

We generate the bounding box prompts for SAM using the output pseudo-label of the ICL model. To eliminate noise within the prediction map, we first apply morphological shrinkage to the pseudo-label, reducing it to reduce the area of the pseudo-labels to 90\% of their original size. This step helps eliminate some of the finer noise. Subsequently, we use morphological inflation to restore the retained pseudo-labels to their original size, ensuring the accuracy of the bounding boxes generated afterward. Subsequently, we iteratively select foreground components and generate bounding boxes around the component, which are then inputted as prompts into the SAM decoder. The final output map from the SAM decoder unifies the maps generated for each bounding box: \(  \text{Dec}_{ \text{SAM}}( \text{Enc}_{ \text{SAM}}(x)) =  \bigcup\limits_{i}  \text{Dec}_{ \text{SAM}}( \text{Enc}_{ \text{SAM}}(x), \text{prompt}_{i})\). This method can effectively handle scenarios involving multiple separated targets within an image.



\section{Experiments and Discussion}

\subsection{Datasets}

Our methodology was evaluated across three distinct scenarios, encompassing the segmentation of fundus, brain MRI, and kidney CT images. Each dataset consists of designated meta support set and query sets. We randomly selected image-label pairs to constitute the support set from the meta support set and conducted inference on the query set.

(1) For the segmentation of the optic disc and cup in retinal fundus images, we utilized datasets from the \textbf{REFUGE} challenge \cite{orlando2020refuge}, \textbf{RIM-ONE-r3} \cite{fumero2011rim}, and \textbf{Drishti-GS} \cite{sivaswamy2015comprehensive}. The composition of the meta support and query sets for these datasets was 320/80, 99/60, and 50/51 images, respectively.

(2) Whole tumor segmentation was performed on T1, T1ce, T2, and FLAIR modalities using the \textbf{BraTS2020} dataset \cite{bakas2018identifying}, focusing on low-grade glioma cases. The meta support set and the query set were randomly divided with 53 and 23 cases.

(3) The \textbf{Kits23} dataset \cite{heller2023kits21} was utilized for combined segmentation of kidney and tumor, with the dataset being randomly partitioned into meta support and query subsets containing 245 and 244 cases, respectively.

For the 3D MRI and CT datasets, we extracted 2D slices that contained the segmentation targets. The preprocessing of images was aligned with the protocols in UniverSeg \cite{butoi2023universeg} and SAM \cite{kirillov2023segment, ma2023segment}. Detailed description of the datasets could be found in appendix.

\subsection{Implementation Details and Comparison Models}
Our experiments were conducted on NVIDIA V100 GPUs equipped with 32GB of memory. For each inference scenario, we randomly selected support sets 10 times, calculating their mean results to derive the final outcome. The Dice coefficient, which quantifies the overlap between the predicted segmentation and the ground truth, was employed for evaluation. In these experiments, the parameters \(a\), \(b\), \(\tau\) were set to 0.5, 0.3, and 0.1, respectively, values that were determined to be optimal through our testing. Note that our model does not require fine-tuning the parameters of the ICL and SAM models. Thus, ICL-SAM can be directly applied to other tasks without retraining.

To ascertain the efficacy of our approach, comparisons were made with both UniverSeg\cite{butoi2023universeg} and Neuralizer\cite{czolbe2023neuralizer}, which are state-of-the-art in-context learning models in the realm of medical imaging. UniverSeg is a universal segmentation model, and Neuralizer, trained on neuroimaging data, is versatile in performing a variety of tasks beyond segmentation. Regarding the SAM model, we evaluated both the original SAM\cite{kirillov2023segment} checkpoint and the MedSAM\cite{ma2023segment} checkpoint of the ViT-B model.


% Painter and SegGPT are ICL models trained on natural images. Painter can only accept one support pair, and SegGPT can support multiple support pairs, but due to our GPU memory constraints, we were only able to test up to 16 support pairs. In our experiments, we used the UniverSeg and SegGPT as the ICL model in our method. 

\begin{table}[htbp]
  \centering
  % \captionsetup{font=scriptsize}
  \caption{Performance comparison of different models on multiple datasets.}
  \label{tab:model_performance}
  \resizebox{0.8\textwidth}{!}{
  \begin{tabular}{lcccccccc}
  \toprule
  \multirow{2}{*}{Dataset} & \multirow{2}{*}{Model} & \multicolumn{7}{c}{Support set size} \\
  \cmidrule{3-9}
   & & \textbf{1} & \textbf{2} & \textbf{4} & \textbf{8} & \textbf{16} & \textbf{32} & \textbf{64} \\
  \midrule
  \multirow{6}{*}{Fundus}  
                  & UniverSeg & 0.5797 & 0.7081 & 0.7529 & 0.7896 & 0.8148 & 0.8290 & 0.8344 \\
                  & Neuralizer & 0.6774 & 0.7109 & 0.7314 & 0.7325 & 0.7531 & 0.7515 & 0.7512 \\
                  % & UniverSeg + MedSAM attention & 0.6276 & 0.7268 & 0.7606 & 0.7952 & 0.8180 & 0.8296 & 0.8346 \\
                  % & UniverSeg prompted MedSAM & 0.7385 & 0.7880 & \textbf{0.8060} & \textbf{0.8275} & 0.8387 & 0.8456 & 0.8486 \\
                  & UniverSeg+SAM & 0.6909	& 0.7347	& 0.7716	& 0.8030	& 0.8173	& 0.8293	& 0.8339\\
                  & UniverSeg+MedSAM & \textbf{0.7391} & \textbf{0.7889} & \textbf{0.8057} & \textbf{0.8264} & \textbf{0.8391} & \textbf{0.8467} & \textbf{0.8499} \\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{0.7161} \\
                  & MedSAM+GT & \multicolumn{7}{c}{0.8873} \\
  \midrule
  \multirow{6}{*}{BraTs} 
                  & UniverSeg & 0.2078 & 0.3059 & 0.4706 & 0.5780 & 0.6704 & 0.7277 & 0.7747 \\
                  & Neuralizer & 0.2161 & 0.2284 & 0.2540 & 0.2563 & 0.3203 & 0.3760 & 0.4750 \\
                  % & UniverSeg + MedSAM attention & 0.2719 & 0.3848 & 0.5131 & 0.6006 & 0.6796 & 0.7301 & 0.7748 \\
                  % & UniverSeg prompted MedSAM & \textbf{0.3415} & \textbf{0.4622} & \textbf{0.5925} & \textbf{0.6632} & 0.7188 & 0.7510 & 0.7740 \\
                  & UniverSeg+SAM & 0.2874	& 0.3993	& 0.5269	& 0.6184	& 0.6857	& 0.7454	& 0.7799 \\
                  & UniverSeg+MedSAM & \textbf{0.3387} & \textbf{0.4584} & \textbf{0.5879} & \textbf{0.6579} & \textbf{0.7191} & \textbf{0.7586} & \textbf{0.7899} \\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{0.8398} \\
                  & MedSAM+GT &  \multicolumn{7}{c}{0.8447} \\
  \midrule
  \multirow{6}{*}{Kits23} 
                  & UniverSeg & 0.4487 & 0.5671 & 0.7179 & 0.7838 & 0.8340 & 0.8500 & 0.8646 \\
                  & Neuralizer & 0.3715 & 0.5278 & 0.5510 & 0.6384 & 0.6515 & 0.6694 & 0.6728 \\
                  % & UniverSeg + MedSAM attention & 0.5287 & 0.6143 & 0.7410 & 0.7940 & 0.8376 & 0.8510 & 0.8647 \\
                  % & UniverSeg prompted MedSAM & 0.6269 & 0.6450 & 0.7317 & 0.7866 & 0.8288 & 0.8402 & 0.8518 \\
                  & UniverSeg+SAM &\textbf{0.6455}	&\textbf{0.7182}	&\textbf{0.8065}	&\textbf{0.8439}	&\textbf{0.8629}	&\textbf{0.8764}	&\textbf{0.8843}\\
                  & UniverSeg+MedSAM & 0.6272 & 0.6493 & 0.7438 & 0.8053 & 0.8504 & 0.8633 & 0.8757 \\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{0.9510} \\
                  & MedSAM+GT & \multicolumn{7}{c}{0.9371} \\
  \bottomrule
  \end{tabular}
  }
\end{table}

\subsection{Comparison Results}
Table \ref{tab:model_performance} displays the Dice score of our model across various support set sizes and datasets. In the case of the Fundus dataset, we present the mean segmentation values for the optic disc and cup across three datasets. For the BraTs2020 dataset, the average segmentation outcomes across all modalities are shown. For the Kits23 dataset, we illustrate the results of the combined segmentation of the kidney and tumor. Detailed results are available in the appendix. The SAM+GT demonstrates the performance of using ground truth map to generate bounding box input into SAM, which demonstrates the upper bound of the corresponding SAM model. In the appendix, we also present the Average Symmetric Surface Distance (ASSD) of our model.

A notable enhancement in performance is evident when the support set size is small. Employing our methodology with a support set size of 1 yields improvements of 15.94\%, 13.09\%, and 19.68\% in the Dice coefficient across the three datasets, respectively. Furthermore, our approach consistently outperforms across all support set sizes and datasets. The integration of SAM and MedSAM demonstrates differential impacts in various scenarios. MedSAM is particularly advantageous for fundus and brain MRI, and the addition of SAM is more effective for CT kidney segmentation. This disparity is likely due to SAM's limited specialization in the medical field, leading to suboptimal performance in cases with less distinct boundaries like Fundus and BraTs, and superior results in kidney segmentation where boundaries are more obvious. For the Fundus data set, although the performance of SAM+GT is poor, UniverSeg+SAM can still bring improvements. We attribute this is to the benefits brought by the proposed confidence map. Although the current efficacy is yet to match that of MedSAM+GT, our method has successfully narrowed this gap, showcasing potential within the in-context learning framework.


\begin{figure}
  \centering
  \includegraphics[width=0.9\textwidth]{Curve.pdf}
  \caption{Comparison of models under different support sizes.}
  \label{Curve}
\end{figure}

Figure \ref{Curve} presents our results in the form of a curve graph, where the shaded areas represent the standard deviation of the corresponding performances. Notably, for the Fundus and Kits23 datasets, our method with only 16 support instances achieves performance comparable to that of UniverSeg with a support size of 64. In the context of the BraTs2020 dataset, a support size of 32 achieves performance equivalent to UniverSeg with 64 support instances. This underscores the potential of our approach to significantly reduce the annotation burden on clinicians for in-context inference while maintaining robust performance.

Figure \ref{examples} demonstrates the enhancement process of UniverSeg through our methodology. It shows that UniverSeg's segmentation can be incomplete and coarse. The incorporation of the semantic confidence map leads to better segmentation outcomes. By processing the bounding boxes through SAM, we achieve a more comprehensive and accurate segmentation. 


\noindent % Ensures there is no indentation for the line
\begin{minipage}{0.65\textwidth}
  \centering
  \includegraphics[width=\textwidth]{examples.pdf}
  \captionof{figure}{The process of refining the results from UniverSeg.}
  \label{examples}
\end{minipage}%
\begin{minipage}{0.35\textwidth}
  \centering
  \captionof{table}{Ablation study.}
  \label{tab:universeg_variants}
  \resizebox{\textwidth}{!}{
  \begin{tabular}{lcc}
  \toprule
  Variant & Dice (\%) & Gain \\
  \midrule
  UniverSeg (ICL model) & 67.19 & - \\
  \cmidrule{1-1}
  +  \begin{tabular}[x]{@{}c@{}}Confidence map\\in UniverSeg\end{tabular} & 69.42 & +2.23 \\
  \hspace{2pt} + MedSAM & 70.10 & +0.68 \\
  \hspace{4pt}\hspace{4pt} + \begin{tabular}[x]{@{}c@{}}Confidence map\\in MedSAM\end{tabular} & 72.89 & +2.79 \\
  \hspace{4pt}\hspace{4pt}\hspace{4pt} + Fusion & 73.43 & +0.53 \\
  \bottomrule
  \end{tabular}
  }
\end{minipage}

\subsection{Ablation Study}

Table \ref{tab:universeg_variants} presents an ablation study, showcasing the mean performance across all three datasets and for every support set size. The task-specific ablation analyses are in the appendix. It is evident that incorporating SAM enhances the UniverSeg model's performance. Furthermore, the addition of semantic confidence into either UniverSeg or SAM markedly elevates the overall model efficacy. The Fusion approach, which combines the predictive outcomes of both the ICL and SAM models, also demonstrates improvement, particularly when the support set is large, because, in such scenarios, UniverSeg is capable of providing an relatively accurate segmentation mask.

\section{Conclusion}
Our proposed methodology capitalizes on SAM's high precision in segmentation and the ICL model's ability to provide contextual support. The integration of a semantic confidence map further enhances segmentation accuracy. Our comprehensive evaluations demonstrate the effectiveness of the proposed framework, particularly in scenarios with limited support sets. Additionally, this framework reduces the need for extensive manual input for SAM. Our research highlights the significant potential of the in-context learning paradigm, suggesting opportunities for future enhancements.

\midlacknowledgments{This work was supported in part by grants from the National Natural Science Foundation of P.R. China (62276081), and The Major Key Project of PCL (PCL2021A06)}


\bibliography{midl-fullpaper}

\appendix
\section{Dataset Details}
The Table \ref{tab:dataset_summary} provides a comprehensive summary of the datasets utilized in the study, detailing their modality, object of interest, and the distribution of images across the meta support and query sets.

\section{Experiment Details}
The Tables \ref{tab:model_performance_all} and \ref{tab:model_performance_all_II} showcase the detailed performance outcomes of various models across different support set sizes and datasets, specifically focusing on medical image segmentation tasks like Fundus (Disc and Cup) and BraTs2020 (FLAIR, T1, T1CE, T2), as well as the Kits23 dataset. Each model, including Neuralizer, UniverSeg, are evaluated for their segmentation efficacy as indicated by the support set sizes ranging from 1 to 64, consistent with the naming in \ref{tab:model_performance}. Additionally, we present ablation study data for different tasks to provide readers with a clearer understanding of our method's performance in various scenarios. The naming convention for the ablation study section is in line with that used in Tables \ref{tab:universeg_variants}, and `\text{+ Fusion}` is actually the same model as `\text{MedSAM+UniverSeg}`. 

A notable observation is the consistent performance improvement when confidence maps are integrated into both UniverSeg and MedSAM models, illustrating the value of semantic information in refining segmentation results. The Fusion approach, which combines the strengths of UniverSeg and MedSAM, generally yields the highest performance across most datasets and support sizes, underscoring the effectiveness of leveraging multiple models' capabilities in concert. Moreover, the comparison with SAM+UniverSeg and ground truth-enhanced versions (SAM+GT and MedSAM+GT) provides a benchmark, showing the potential ceiling of segmentation performance with these methodologies. 

Overall, these results highlight the potential of advanced in-context learning models and their combinations to address the challenges of medical image segmentation, especially in scenarios with limited labeled data. The consistent performance gains across different datasets and support set sizes underscore the robustness and adaptability of the proposed methodologies.

\begin{table}[htbp]
\centering
\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccc}
\toprule
\multirow{2}{*}{\textbf{Dataset}} & \multirow{2}{*}{\textbf{Modality}} & \multirow{2}{*}{\textbf{Object}} & \multicolumn{2}{c}{\textbf{Meta support set}} & \multicolumn{2}{c}{\textbf{Query set}} \\ 
\cmidrule{4-7}
                  &                   &                 & \textbf{\# 3D images} & \textbf{\# 2D images} & \textbf{\# 3D images} & \textbf{\# 2D images} \\ 
\hline
REFUGE           & RGB               & Fundus          & -                      & 320                   & -                      & 80                    \\ 
RIM-ONE-r3       & RGB               & Fundus          & -                      & 99                    & -                      & 60                    \\ 
Drishti-GS       & RGB               & Fundus          & -                      & 20                    & -                      & 51                    \\ 
BraTs2020        & T1, T1ce, T2, FLAIR & Brain          & 53\(\times\)4                     & 3439\(\times\)4                  & 23\(\times\)4                     & 1487\(\times\)4                  \\ 
Kits23           & CT                & Kidney          & 245                    & 6396                  & 244                    & 5630                  \\ 
\bottomrule
\end{tabular}
}
\caption{Summary of datasets used in the study.}
\label{tab:dataset_summary}
\end{table}
  
\begin{table}[htbp]
\centering
\caption{Performance complarison across varying support set sizes and datasets, Part 1.}
\label{tab:model_performance_all}
\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccccc}
\toprule
\multirow{2}{*}{Dataset} & \multirow{2}{*}{Model} & \multicolumn{7}{c}{Support set size} \\
\cmidrule{3-9}
  & & \textbf{1} & \textbf{2} & \textbf{4} & \textbf{8} & \textbf{16} & \textbf{32} & \textbf{64} \\
\midrule
\multirow{6}{*}{Fundus: Disc} 
  & Neuralizer	& 0.8012	& 0.8514	& 0.8629	& 0.8716	& 0.8804	& 0.8851	& 0.8867\\
  & UniverSeg	& 0.6611	& 0.8163	& 0.8648	& 0.8937	& 0.9117	& 0.9203	& 0.9235\\
  & SAM+UniverSeg	&0.8291	&0.8664	&0.8911	&0.9066	&0.9173	&0.9240	&0.9260\\
  & MedSAM+UniverSeg	&0.8716	&0.9060	&0.9157	&0.9266	&0.9331	&0.9355	&0.9373\\
  \cmidrule{2-9}
  & + Conﬁdence map in UniverSeg	& 0.7330	& 0.8441	& 0.8756	& 0.8999	& 0.9146	& 0.9208	& 0.9236\\
  & + MedSAM	& 0.8525	& 0.8862	& 0.8931	& 0.9055	& 0.9216	& 0.9227	& 0.9261 \\
  & + Conﬁdence map in MedSAM	&0.8710	&0.9049	&0.9151	&0.9262	&0.9319	&0.9344	&0.9349\\
  & + Fusion	&0.8716	&0.9060	&0.9157	&0.9266	&0.9331	&0.9355	&0.9373\\
  \cmidrule{2-9}
  & SAM + GT	& \multicolumn{7}{c}{0.7706} \\ 		
  & MedSAM + GT	& \multicolumn{7}{c}{0.9441} \\ 						

\midrule
\multirow{6}{*}{Fundus: Cup} 
  &Neuralizer	& 0.5537	& 0.5704	& 0.5999	& 0.5935	& 0.6259	& 0.6180	& 0.6157\\
  &UniverSeg	& 0.4983	& 0.5998	& 0.6410	& 0.6856	& 0.7179	& 0.7377	& 0.7453\\
  &SAM+UniverSeg	& 0.5528	& 0.6029	& 0.6521	& 0.6994	& 0.7173	& 0.7346	& 0.7418\\
  &MedSAM+UniverSeg	& 0.6066	& 0.6719	& 0.6956	& 0.7263	& 0.7451	& 0.7579	& 0.7625\\
  \cmidrule{2-9}
  &+ Conﬁdence map in UniverSeg	& 0.5222	& 0.6095	& 0.6455	& 0.6905	& 0.7214	& 0.7384	& 0.7455\\
  &+ MedSAM	& 0.5630	& 0.6257	& 0.6454	& 0.6715	& 0.6936	& 0.7131	& 0.7226\\
  &+ Conﬁdence map in MedSAM	& 0.6060	& 0.6710	& 0.6968	& 0.7288	& 0.7455	& 0.7568	& 0.7622\\
  &+ Fusion	& 0.6066	& 0.6719	& 0.6956	& 0.7263	& 0.7451	& 0.7579	& 0.7625\\
  \cmidrule{2-9}
  & SAM + GT	& \multicolumn{7}{c}{0.6615} \\ 		
  & MedSAM + GT	& 	\multicolumn{7}{c}{0.8304} \\							
		
\midrule		
\multirow{6}{*}{BraTs: FLAIR} 
 & Neuralizer	& 0.2252	& 0.2519	& 0.2944	& 0.3026	& 0.3882	& 0.4460	& 0.5467\\ 
 & UniverSeg	& 0.2566	& 0.3880	& 0.6193	& 0.7292	& 0.8024	& 0.8402	& 0.8668\\
 & SAM+UniverSeg	& 0.3342	& 0.5095	& 0.6765	& 0.7810	& 0.8279	& 0.8598	& 0.8791\\
 & MedSAM+UniverSeg	& 0.4299	& 0.5776	& 0.7430	& 0.8076	& 0.8502	& 0.8715	& 0.8874\\
 \cmidrule{2-9}
 & + Conﬁdence map in UniverSeg	& 0.3311	& 0.4825	& 0.6547	& 0.7480	& 0.8091	& 0.8417	& 0.8669\\
 & + MedSAM	& 0.3407	& 0.4974	& 0.7016	& 0.7512	& 0.8060	& 0.8214	& 0.8408\\
 & + Conﬁdence map in MedSAM	& 0.4337	& 0.5819	& 0.7462	& 0.8093	& 0.8479	& 0.8631	& 0.8729\\
 & + Fusion	& 0.4299	& 0.5776	& 0.7430	& 0.8076	& 0.8502	& 0.8715	& 0.8874\\
 \cmidrule{2-9}
 & SAM + GT	& \multicolumn{7}{c}{0.8743} \\ 		
 & MedSAM + GT	& 	\multicolumn{7}{c}{0.8905	} \\								
\midrule		
\multirow{6}{*}{BraTs: T1} 
& Neuralizer                 & 0.2125 & 0.2181 & 0.2458 & 0.2478 & 0.2989 & 0.3592 & 0.4640 \\
& UniverSeg                  & 0.1736 & 0.2368 & 0.3735 & 0.4719 & 0.5687 & 0.6415 & 0.7032 \\
& SAM+UniverSeg              & 0.2549 & 0.3257 & 0.4255 & 0.5054 & 0.5750 & 0.6545 & 0.7051 \\
& MedSAM+UniverSeg                  & 0.2819 & 0.3780 & 0.4872 & 0.5580 & 0.6199 & 0.6730 & 0.7171 \\
\cmidrule{2-9}
& + Conﬁdence map in UniverSeg  & 0.2314 & 0.3080 & 0.4174 & 0.4977 & 0.5785 & 0.6439 & 0.7033 \\
& + MedSAM                    & 0.2596 & 0.3537 & 0.3675 & 0.5223 & 0.5825 & 0.6338 & 0.6660 \\
& + Conﬁdence map in MedSAM     & 0.2846 & 0.3831 & 0.4928 & 0.5647 & 0.6205 & 0.6633 & 0.6980 \\
& + Fusion                   & 0.2819 & 0.3780 & 0.4872 & 0.5580 & 0.6199 & 0.6730 & 0.7171 \\
\cmidrule{2-9}
& SAM + GT	& \multicolumn{7}{c}{0.8192} \\ 		
& MedSAM + GT                   &        \multicolumn{7}{c}{0.8125}\\
\bottomrule
\end{tabular}
}
\end{table}

\begin{table}[htbp]
\centering
\caption{Performance complarison across varying support set sizes and datasets, Part 2.}
\label{tab:model_performance_all_II}
\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccccc}
\toprule
\multirow{2}{*}{Dataset} & \multirow{2}{*}{Model} & \multicolumn{7}{c}{Support set size} \\
\cmidrule{3-9}
& & \textbf{1} & \textbf{2} & \textbf{4} & \textbf{8} & \textbf{16} & \textbf{32} & \textbf{64} \\
\midrule		
\multirow{6}{*}{BraTs: T1CE} 
& Neuralizer                 & 0.2059 & 0.2014 & 0.2182 & 0.2201 & 0.2696 & 0.3058 & 0.3997 \\
& UniverSeg                  & 0.1907 & 0.2498 & 0.3557 & 0.4448 & 0.5585 & 0.6342 & 0.6951 \\
& SAM+UniverSeg              & 0.2667 & 0.3242 & 0.4019 & 0.4884 & 0.5678 & 0.6546 & 0.6985 \\
& MedSAM+UniverSeg                    & 0.2850 & 0.3767 & 0.4675 & 0.5248 & 0.6087 & 0.6680 & 0.7092 \\
\cmidrule{2-9}
& + Confidence map in UniverSeg & 0.2394 & 0.3163 & 0.4041 & 0.4733 & 0.5714 & 0.6378 & 0.6953 \\
& + MedSAM                   & 0.2570 & 0.3394 & 0.4434 & 0.4912 & 0.5883 & 0.6398 & 0.6615 \\
& + Confidence map in MedSAM & 0.2874 & 0.3808 & 0.4737 & 0.5322 & 0.6102 & 0.6613 & 0.6925 \\
& + Fusion                   & 0.2850 & 0.3767 & 0.4675 & 0.5248 & 0.6087 & 0.6680 & 0.7092 \\
\cmidrule{2-9}
& SAM + GT	& \multicolumn{7}{c}{0.8007} \\ 		
& MedSAM + GT & \multicolumn{7}{c}{0.8228} \\
\midrule
\multirow{6}{*}{BraTs: T2} 
 & Neuralizer                       & 0.2207 & 0.2422 & 0.2578 & 0.2546 & 0.3245 & 0.3930 & 0.4896 \\
 & UniverSeg                        & 0.2104 & 0.3489 & 0.5337 & 0.6659 & 0.7521 & 0.7950 & 0.8336 \\
 & SAM+UniverSeg                    & 0.2937 & 0.4378 & 0.6038 & 0.6989 & 0.7721 & 0.8126 & 0.8368 \\
 & MedSAM+UniverSeg                 & 0.3579 & 0.5013 & 0.6538 & 0.7412 & 0.7975 & 0.8219 & 0.8461 \\
 \cmidrule{2-9}
 & + Confidence map in UniverSeg    & 0.2855 & 0.4326 & 0.5763 & 0.6834 & 0.7592 & 0.7971 & 0.8336 \\
 & + MedSAM                         & 0.3433 & 0.4300 & 0.6056 & 0.6689 & 0.7377 & 0.7650 & 0.7897 \\
 & + Confidence map in MedSAM       & 0.3603 & 0.5032 & 0.6574 & 0.7465 & 0.7967 & 0.8161 & 0.8325 \\
 & + Fusion                         & 0.3579 & 0.5013 & 0.6538 & 0.7412 & 0.7975 & 0.8219 & 0.8461 \\
 \cmidrule{2-9}
 & SAM + GT	& \multicolumn{7}{c}{0.8649} \\ 		
 & MedSAM + GT                         &        \multicolumn{7}{c}{0.8528}  \\
\midrule
\multirow{6}{*}{Kits23} 
& Neuralizer                       & 0.3715 & 0.5278 & 0.5510 & 0.6384 & 0.6515 & 0.6694 & 0.6728 \\
& UniverSeg                        & 0.4487 & 0.5671 & 0.7179 & 0.7838 & 0.8340 & 0.8500 & 0.8646 \\
& SAM+UniverSeg                    & 0.6455 & 0.7182 & 0.8065 & 0.8439 & 0.8629 & 0.8764 & 0.8843 \\
& MedSAM+UniverSeg                        & 0.6272 & 0.6493 & 0.7438 & 0.8053 & 0.8504 & 0.8633 & 0.8757 \\
\cmidrule{2-9}
& + Confidence map in UniverSeg    & 0.5287 & 0.6143 & 0.7410 & 0.7940 & 0.8376 & 0.8510 & 0.8647 \\
& + MedSAM                         & 0.5864 & 0.6349 & 0.7320 & 0.7934 & 0.8296 & 0.8460 & 0.8515 \\
& + Confidence map in MedSAM       & 0.6269 & 0.6450 & 0.7317 & 0.7866 & 0.8228 & 0.8402 & 0.8518 \\
& + Fusion                        & 0.6272 & 0.6493 & 0.7438 & 0.8053 & 0.8504 & 0.8633 & 0.8757 \\
\cmidrule{2-9}
& SAM + GT	& \multicolumn{7}{c}{0.9510} \\ 		
& MedSAM + GT                         &        \multicolumn{7}{c}{0.9371}    \\
\bottomrule
\end{tabular}
}
\end{table}

To provide a comprehensive demonstration of our model's performance, we also include the Average Symmetric Surface Distance (ASSD) in Table \ref{tab:model_ASD}. ASSD measures the mean distance between corresponding points on the surfaces of two objects, and is widely used in fundus and tumor segmentation. For consistency in comparison, all images were resized to \(512 \times 512\) when computing the ASSD. The ASSD is reported in pixels. As indicated in the table, the results largely align with those in Table \ref{tab:model_performance}. UniverSeg+MedSAM achieves superior performance for the fundus and BraTS2020 datasets, while UniverSeg+SAM demonstrates the best outcomes for the Kits23 dataset. It is also noted that model improvements are more pronounced at smaller context sizes, with performance gains diminishing as context size increases. Nonetheless, even at a context size of 64, a significant improvement is observed for the Kits23 dataset, highlighting the exceptional performance of our method. Furthermore, there is a gap between our approach and MedSAM+GT, suggesting potential for further enhancement of in-context learning models.


\begin{table}[htbp]
  \centering
  % \captionsetup{font=scriptsize}
  \caption{Performance comparison of ASSD (pixel) across multiple datasets.}
  \label{tab:model_ASD}
  \resizebox{0.8\textwidth}{!}{
  \begin{tabular}{lcccccccc}
  \toprule
  \multirow{2}{*}{Dataset} & \multirow{2}{*}{Model} & \multicolumn{7}{c}{Support set size} \\
  \cmidrule{3-9}
   & & \textbf{1} & \textbf{2} & \textbf{4} & \textbf{8} & \textbf{16} & \textbf{32} & \textbf{64} \\
  \midrule
  \multirow{6}{*}{Fundus}  
                  & UniverSeg & 23.18 &	17.83 &	14.45 &	12.44 &	11.13 &	10.41 &	10.17 \\
                  & Neuralizer & 22.38 &	19.43 &	16.03 &	14.65 &	13.97 &	13.64 &	13.36 \\
                  & UniverSeg+SAM & 17.78 &	15.80 &	13.80 &	12.23 &	11.02 &	10.52 &	10.02 \\
                  & UniverSeg+MedSAM & \textbf{16.99} &	\textbf{13.41} &	\textbf{10.86} &	\textbf{10.01} &	\textbf{9.50 }&	\textbf{8.91 }&	\textbf{8.71 }\\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{16.24} \\
                  & MedSAM+GT & \multicolumn{7}{c}{6.48} \\
  \midrule
  \multirow{6}{*}{BraTs} 
                  & UniverSeg & 46.12 &	41.55 &	36.18 &	26.59 &	20.74	 &17.88 &	14.23 \\
                  & Neuralizer & 63.80 &	52.92 &	51.75 &	46.76 &41.71 &	33.71 &	27.04 \\
                  & UniverSeg+SAM & 48.25 &	39.37 &	34.35 &	26.61 &	20.27	 &17.48 &	14.32 \\
                  & UniverSeg+MedSAM & \textbf{39.74} &	\textbf{35.10} &	\textbf{30.32} &	\textbf{23.06} &	\textbf{18.64} &	\textbf{16.29} &	\textbf{13.34} \\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{6.74} \\
                  & MedSAM+GT &  \multicolumn{7}{c}{6.58} \\
  \midrule
  \multirow{6}{*}{Kits23} 
                  & UniverSeg & 39.62 &	32.80 &	27.52 &	22.46 &	22.99 &	19.71 &	18.08 \\
                  & Neuralizer & 61.69 &	43.80 &	37.29 &	35.53 &	31.79 &	29.59 &	28.84 \\
                  & UniverSeg+SAM & \textbf{34.25} &	\textbf{27.00} &	\textbf{19.80} &	\textbf{16.26} & \textbf{18.02} &	\textbf{15.50} &	\textbf{14.45} \\
                  & UniverSeg+MedSAM &34.57 &	29.47 &	22.41 &	19.08 &	19.35 &	16.14 &	14.73 \\
                  \cmidrule{3-9} 
                  & SAM+GT & \multicolumn{7}{c}{4.19} \\
                  & MedSAM+GT & \multicolumn{7}{c}{5.47} \\
  \bottomrule
  \end{tabular}
  }
\end{table}

Figure \ref{examples} presents additional visualization results, primarily illustrating how our outcomes evolve from the original ICL model (UniverSeg). The evolution process adheres to the naming convention of the ablation study in Tables \ref{tab:universeg_variants}. To distinctly showcase the effects, we mainly selected smaller context sizes (2 for the fundus dataset and 4 for others). From the figure, it is evident that incorporating the confidence map into the ICL model or integrating SAM significantly enhances the results. This also reveals that the main reason for the improvement offered by our method is the utilization of the powerful SAM to rectify inaccuracies produced by the ICL model. Moreover, our approach tends to yield good results when the segmentation from UniverSeg provides an approximate location of the target and generates precise bounding boxes. However, cases where UniverSeg's segmentation map misses the target result in SAM's inability to produce accurate results as well. This limitation is a primary factor in the persisting performance gap between our method and MedSAM+GT.

\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{examples_2.pdf}
\captionof{figure}{Details of refining the results from UniverSeg.}
\label{examples-2}
\end{figure}

\begin{figure}
\centering
\includegraphics[width=\textwidth]{figs_sens.pdf}
\captionof{figure}{Sensitivity analysis of hyperparameters.}
\label{fig:sense-2}
\end{figure}

Figure \ref{fig:sense-2} displays the hyperparameter sensitivity analysis. We assessed the effects of different hyperparameters on fundus datasets and found that optimal performance is attained when \(a\), \(b\), \(\tau\), and \(\beta\) are set to 0.5, 0.1, and 2.0, respectively. Given their effectiveness on the fundus datasets, these hyperparameters were also applied to other datasets, demonstrating the robustness and generalizability of our algorithm.


\end{document}

