\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images


% Please add the following required packages to your document preamble:
\usepackage{multirow}
% \usepackage[table,xcdraw]{xcolor}
% Beamer presentation requires \usepackage{colortbl} instead of \usepackage[table,xcdraw]{xcolor}
\usepackage[normalem]{ulem}
\useunder{\uline}{\ul}{}
\usepackage{microtype}
\usepackage{booktabs}
\usepackage{hyperref}


\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
\jmlrvolume{-- nnn}
\editors{Accepted for publication at MIDL 2024}

\title[Edge-Guided Single-source Domain Generalization for Medical Image Segmentation]{Train Once, Deploy Anywhere: Edge-Guided Single-source Domain Generalization for Medical Image Segmentation}

\midlauthor{\Name{Jun Jiang \nametag{$^{1}$}} \Email{junjiang.steve@gmail.com}\\
\addr $^{1}$ Shenzhen Institute for Advanced Study, UESTC, Shenzhen, China\\
\Name{Shi Gu \nametag{$^{1,2}$}} \Email{gus@uestc.edu.cn}\\
\addr $^{2}$ School of Computer Science and Engineering, UESTC, Chengdu, China
}

% \midlauthor{\Name{Jun Jiang \nametag{$^{1}$}} \Email{junjiang.steve@gmail.com}\\
% \Name{Shi Gu \nametag{$^{1}$}} \Email{gus@uestc.edu.cn}\\
% \addr $^{1}$ Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China
% }

%  % Two authors with the same address
% \midlauthor{\Name{Jun Jiang} \Email{junjiang.steve@gmail.com}\and
% \Name{Shi Gu} \Email{gus@uestc.edu.cn} \\
% \addr Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China}



\begin{document}

\maketitle

\begin{abstract}
In medical image analysis, unsupervised domain adaptation models require retraining when receiving samples from a new data distribution, and multi-source domain generalization methods might be infeasible when there is only a single source domain. These will pose formidable obstacles to model deployment. To this end, we take the "Train Once, Deploy Anywhere" as our objective and consider a challenging but practical problem: Single-source Domain Generalization (SDG). Meanwhile, we note that (i) the medical image segmentation applications where generalization errors often come from imprecise predictions at the ambiguous boundary of anatomies and (ii) the edge of the image is domain-invariant, which can reduce the domain shift between the source and target domain in all network layers. Specifically, we borrow the prior knowledge from Digital Image Processing and take the edge of the image as input to enhance the model attention at the boundary of anatomies and improve the generalization performance on unknown target domains. Extensive experiments on three typical medical image segmentation datasets, which cover cross-sequence, cross-center, and cross-modality settings with various anatomical structures, demonstrate our method achieves superior generalization performance compared to the state-of-the-art SDG methods. The code is available at \href{https://github.com/thinkdifferentor/EGSDG}{https://github.com/thinkdifferentor/EGSDG}.
\end{abstract}

\begin{keywords}
Domain Generalization, Transfer Learning, Medical Image Segmentation.
\end{keywords}

% ---------------------------------------------------------
\section{Introduction}

Medical image segmentation is a crucial task in clinical applications. In recent years, deep segmentation networks have achieved remarkable progress\cite{Butoi_2023_ICCV,isensee2021nnu}. However, when there is domain shift between the training and testing data, the performance of data-driven deep models degrades dramatically, like scanning technique, acquisition parameters, device manufacturers, etc. Recently, many efforts of Domain Generalization (DG) and Unsupervised Domain Adaptation (UDA) have been made to improve the model's generalization ability on the target domain \cite{su2023rethinking, feng2023unsupervised}. Further, domain generalization can be divided into Multi-source Domain Generalization (MDG) and Single-source Domain Generalization (SDG). On the one hand, MDG models \cite{dou2019domain,li2019episodic} are designed with multiple source domains to learn domain-invariant representations, and it may not work when there is only a single source domain. On the other hand, the high cost of medical image labeling and the strict regulations of privacy protection make it difficult to obtain large amounts of medical data. Besides, previous UDA works \cite{feng2023unsupervised,chen2019synergistic} require retraining when receiving the samples from a new data distribution, which leads to the high cost of model deployment. To this end, we take the "\textit{Train Once, Deploy Anywhere}" as our objective and consider a challenging but practical setting: single-source domain generalization.

\begin{figure}[t]
\centering
\begin{minipage}[t]{0.295\linewidth}
\includegraphics[width=0.9\linewidth]{figs/edge_brats_single.png}
\end{minipage}
\begin{minipage}[t]{0.695\linewidth}
\includegraphics[width=0.99\linewidth]{figs/framework.png}
\end{minipage}
\caption{Example case with corresponding Sobel gradient map of BraTS'19, which can be used as domain-invariant information to guide the training process (left). Overview of our proposed \textit{Edge-Guided Single-source Domain Generalization} (EGSDG) for medical image segmentation (right).}
\label{fig:framework}
\end{figure}

For segmentation tasks, models often make inaccurate predictions at target boundaries. This weakness is more pronounced for domain generalization or adaptation segmentation tasks in medical images due to the domain gap and ambiguous boundary of anatomies. Recently, some works have proposed corresponding solutions and made significant progress \cite{liu2022single, you2023learning, feng2023unsupervised}. However, there are several limitations to them. First, they do not directly take the image edge as the model input, which weakens the supervision of edge information during the training process. Second, they acquire the image edge or shape priors by learning, which will take lots of training time. Based on these insights, we employ the edge detection algorithm to get image edge maps and use them as input to train the model directly. This effectively filters out domain-specific information and significantly improves the generalization ability. The major contributions of this work are as follows: (i) We make a comprehensive analysis to the impact of image edge on the model generalization ability, including the positions of edge supervision signals, such as shallow, deep, or output layers; the fusion strategies of edge map and feature map, such as concatenating it with shallow or deep features and directly as network input. (ii) For the challenging yet essential SDG problem of medical image segmentation, we propose a simple yet effective approach EGSDG, which significantly improves the generalization performance on unknown target domains. (iii) We conducted extensive experiments on three typical medical image segmentation datasets that cover various anatomical structures. With only a single source domain, our method achieves superior generalization performance on the unknown target domain compared to the state-of-the-art SDG methods.

% \begin{itemize}
% 		\item[$\bullet$] We make a comprehensive analysis to the impact of image edge on the model generalization ability, including the positions of edge supervision signals, such as shallow, deep, or output layers; the fusion strategies of edge map and feature map, such as concatenating it with shallow or deep features and directly as network input.
% 		\item[$\bullet$] For the challenging yet essential SDG problem of medical image segmentation, we propose a simple yet effective approach, which significantly improves the generalization performance on unknown target domains.
% 		\item[$\bullet$] We conducted extensive experiments on three typical medical image segmentation datasets that cover various anatomical structures. With only a single-source domain, our method achieves superior generalization performance on the unknown target domain compared to the state-of-the-art SDG methods.
% \end{itemize}


% ---------------------------------------------------------
\section{Related Works}

\subsection{SDG of Nature Image}

SDG models of nature images can be divided into two mainstream methods: (1) \textit{the image-level method}, which improve the model generalization by data augmentation with the help of existing large datasets (e.g. ImageNet) \cite{Yue2019DomainRA, Lee2022WildNetLD}, and (2) \textit{the feature-level method}, which aims to learn domain-invariant segmentation network by removing the style information of feature maps with normalization or whitening strategy \cite{Choi2021RobustNetID, Peng2022SemanticAwareDG}. However, these models may not work well on grayscale medical images, because there are significant differences in texture, structure, and data privacy policies between medical and natural images.

%-------------------------------------------------------------------------
\subsection{SDG of Medical Image}

In medical images, there are fewer works on the SDG segmentation task compared to natural images. Most of these models \cite{liu2022single, ouyang2022causality, su2023rethinking} conduct data augmentation on the source domain to improve the model’s robustness. Different from the previous works, we introduce an edge-guided model, which filters the domain-specific information effectively and improves the generalization ability significantly.

%-------------------------------------------------------------------------
\subsection{Edge-Guided Methods}

Recently, many efforts of edge-guided methods have been made to raise attention to the segmentation boundary and improve the generalization ability of models. Cardace et al. \cite{Cardace2021ShallowFG} presented a novel low-level adaptation strategy with semantic edges and displacement maps from shallow features to obtain sharp predictions. CIConv \cite{Lengyel2021ZeroShotDD} exploited a visual inductive prior derived from physics-based reflection models and cast a number of color-invariant edge detectors as trainable layers for domain adaptation. In contrast to existing methods, we utilize the edge detector to extract edge maps of images and take them as input to train the model directly.



% ---------------------------------------------------------
\section{Methodology}

\subsection{Preliminaries}

Edge detectors significantly filter out useless information, while preserving the important structural properties of an image. There are a large number of edge detection algorithms available, each designed to be sensitive to specific types of edges like edge orientation, noise environment, and edge structure. We take the most classic ones for exploring, including Canny \cite{canny1986computational}, AutoCanny \cite{rong2014improved}, Roberts \cite{Roberts1963MachinePO}, Prewitt \cite{prewitt1970object}, Sobel \cite{kittler1983accuracy}, and Laplacian \cite{lecun1998gradient}. For an image, its gradient at $(x,y)$ is defined as vector $\bigtriangledown f\left ( x,y \right )$, which is composed of the partial derivative of the image in the X and Y directions:
\begin{equation}
\bigtriangledown f\left ( x,y \right ) =
\left [ G_x, G_y \right ] =\left [ \frac{\partial f }{\partial x}, \frac{\partial f }{\partial y} \right ] 
\label{eq:gradient}
\end{equation}
The modulus and direction of the gradient are defined by:
\begin{equation}
\left | \bigtriangledown f\left ( x,y \right ) \right | = \sqrt{G_x^2+G_y^2}, \theta (x,y)=\arctan (\frac{G_y}{G_x} )
\label{eq:modulus and direction}
\end{equation}
For Laplacian, the second derivative is defined as:
\begin{equation}
\bigtriangledown^2 f\left ( x,y \right ) =\frac{\partial^2f(x,y)}{\partial x^2 } + 
\frac{\partial^2f(x,y)}{\partial y^2 }
\label{eq:second derivative}
\end{equation} Note that the digital images are discrete and different edge detection algorithms differ in the way of $G_x$ and $G_y$ calculation, which are provided in Appendix \ref{appendix:A}. The details of Canny and AutoCanny algorithms can be found in Appendix \ref{appendix:B} and \cite{rong2014improved}. Compared to Canny algorithm, AutoCanny does not need to manually set Gaussian smoothing parameters and the double thresholds.


%-------------------------------------------------------------------------
\subsection{Problem Definition}
In single-source domain generalization, we are given a single source domain $D^s=\left \{(x_i^s,y_i^s )\right \}_{i=1}^{N_s}$, where $s$ represents the domain ID, $x_i^s \in \mathbb{R}^{H \times W \times 3}$ is the $i$-th image in the source domain $s$. $y_i^s \in \mathbb{R}^{H \times  W}$ is the corresponding ground truth mask, and $N_s$ is the total number of samples. Given unseen target domain $D^t=\left \{ x_i^t,y_i^t \right \}_{i=1}^{N_t}$, which is not accessible during the training process, we aim to minimize the error between prediction $\hat y_i^t$ and ground truth mask $y_i^t$.



%-------------------------------------------------------------------------
\subsection{Edge-Guided SDG}

Edge or gradient information is one of the most important features of an image. The edge of image is domain-invariant, which can reduce the domain shift between the source and target domain in all network layers \cite{Lengyel2021ZeroShotDD}. Different from previous works, TASD \cite{liu2022single} establishes the dictionary learning to extract the explicit shape priors and CIConv \cite{Lengyel2021ZeroShotDD} derived from the complex Kubelka-Munk theory to build a learnable edge detector layer, our model is borrowed from the classical edge detection algorithm with less computational complexity and more stable performance. Visualization comparison of classical edge detection algorithms and CIConv refer to Appendix \ref{appendix:C}.

In addition, data augmentation can enrich the gradient information of the training samples, which will bring huge performance gains to our edge-guided model's generalization ability (Details refer to Appendix \ref{appendix:D}). For medical images, we expect to map the source image to diverse grayscale value distribution and keep the appearance of the anatomic structures perceivable at the same time. Motivated by Model Genesis \cite{Zhou2019ModelsGG}, we employ the Bézier Curve \cite{Mortenson1999MathematicsFC} as our data augmentation method, which is generated from two end points ($P_0$ and $P_3$) and two control points ($P_1$ and $P_2$), defined as:
\begin{equation}
B(t)=\sum_{i=0}^{n}\binom{n}{i}P_i(1-t)^{n-i}t^i,n=3,t\in [0,1]
\label{eq:BézierCurve}
\end{equation}
where $t$ is a fractional value along the length of the line.

The learning process of our EGSDG is shown in \figureref{fig:framework}. Firstly, we perform the BézierCurve data augmentation on source samples ($x_i^s \in \mathbb{R}^{H \times W \times 3}$) before the training stage. Then, the edge detector is exploited to extract the edge maps ($e_i^s \in \mathbb{R}^{H \times W}$) of the augmented samples. Finally, we take the edge maps $e_i^s$ as input to train the segmentation network $\phi^w$ with parameters $w$ by minimizing cross-entropy loss:
\begin{equation}
\mathcal{L}_{ce}(\phi^w;D^{s})=- {\textstyle \sum_{i}\left [ y_i^s,log(\phi^w(e_i^s)) \right ]}
\label{eq:cross-entropy}
\end{equation}
We use the edge detector to compress the image into a single-channel edge map. It effectively filters domain-specific information and trains a model with high generalization performance. The network locates the segmentation object by the gradient change (Roberts, Prewitt, Sobel, and Laplacian) or the anatomy contour (Canny and AutoCanny) of the target area.



\begin{table}[t]
\centering
\caption{Quantitative comparison of different methods on BraTS'19 (left) and Prostate (right) datasets. Note that CIConv* indicates training with BézierCurve augmented dataset and the result of SADN is reported by that method on BraTS'18 dataset.}
\begin{minipage}{0.435\linewidth}
\resizebox{.85\linewidth}{!}{    
\begin{tabular}{l|cccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                  & \multicolumn{4}{c}{\textbf{Source Domain: T2}}                                                                                                                    \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Method}}} & \multicolumn{1}{l}{\textbf{T1}}        & \multicolumn{1}{l}{\textbf{T1ce}}      & \multicolumn{1}{l}{\textbf{Flair}}     & \multicolumn{1}{l}{\textbf{Avg.}}   \\ \hline
\textbf{Upper Bound}                                   & 74.42                                  & 71.64                                  & 82.75                                  & 76.27                                  \\
\textbf{Lower Bound}                                   & 13.82                                  & 11.58                                  & 66.61                                  & 30.67                                  \\ \hline
\textbf{IBN-Net}                                       & 34.37                                  & 48.27                                  & 42.33                                  & 41.66                                  \\
\textbf{SW}                                            & 31.83                                  & 40.48                                  & 34.95                                  & 35.75                                  \\
\textbf{RobustNet}                                     & 8.59                                   & 10.14                                  & 68.29                                  & 29.01                                  \\
\textbf{SADN}                                          & 49.36                                  & 38.09                                  & 75.87                                  & 54.44                                  \\
\textbf{CSDG}                                          & 46.76                                  & 44.99                                  & 60.20                                  & 50.65                                  \\ 
\textbf{CIConv}                                        & 15.36                                  & 20.83                                  & {\ul 76.07}                          & 37.42                                  \\ 
\textbf{CIConv*}                       & {\ul 53.82}                      & {\ul 52.69}                           & 74.05                                  & {\ul 60.19}                    \\ \hline
\textbf{EGSDG w/o Aug.}                & 51.38                                  & 50.35                                  & 71.63                                  & 57.79  \\
\textbf{EGSDG w/ Aug.}                              & \textbf{62.59} & \textbf{54.68} & \textbf{77.07} & \textbf{64.78} \\
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.547\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                  & \multicolumn{6}{c}{\textbf{Source Domain: Site B}}                                                                                                                              \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Method}}} & \textbf{Site A}               & \textbf{Site C}               & \textbf{Site D}               & \textbf{Site E}               & \textbf{Site F} & \textbf{Avg.}              \\ \hline
\textbf{Upper   Bound}                                 & 89.13                         & 89.96                         & 89.31                         & 87.76                         & 89.34           & 89.10                         \\
\textbf{Lower Bound}                                   & 63.62                         & 19.42                         & 81.06                         & 83.89                         & 71.17           & 63.83                         \\ \hline
\textbf{IBN-Net}                                       & 67.36                         & 46.79                         & 65.09                         & 71.45                         & 76.88           & 65.51                         \\
\textbf{SW}                                            & 70.83                         & 51.71                         & 70.89                         & 51.96                         & 68.97           & 62.87                         \\
\textbf{RobustNet}                                        & 73.27 & 55.04 & 77.41 & 54.79 & 70.21           & 66.14 \\
\textbf{CSDG}                                          & 69.75                         & 61.47                         & 74.27                         & 76.31                   & 70.54           & 70.47                         \\ 

\textbf{CIConv}                                        & 73.48                       & {\ul 63.51 }                        & 80.80                         & 62.15                         & 74.93           & 70.97                         \\ 
\textbf{CIConv*}                                       & {\ul 76.41}                   & 59.74                         & 76.63                         & \textbf{78.10}                &  77.17     & {\ul 73.61}                         \\ \hline 
\textbf{EGSDG w/o Aug.}                            & 72.70                                  & 59.54                                  & \textbf{83.00}                                  & 70.36                                  & \textbf{81.11}                                  & 73.34  \\
\textbf{EGSDG w/ Aug.}                            & \textbf{78.51}                & \textbf{64.16}                         & {\ul 82.95}                   & { \ul 77.34}                & {\ul 78.20}  & \textbf{76.23}                \\
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\label{tab:tab1}
\end{table}

\begin{table}[t]
\centering
\caption{Quantitative comparison of different methods on MMWHS dataset. Note that CIConv* indicates training with the BézierCurve augmented dataset and the result of SADN is reported by that method.}
\resizebox{.8\linewidth}{!}{
\begin{tabular}{l|ccccc|ccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                  & \multicolumn{5}{c|}{\textbf{Source Domain: MRI}}                                                                                                                                                          & \multicolumn{5}{c}{\textbf{Source Domain: CT}}                                                                                                                                                          \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Method}}} & \textbf{AA}                            & \textbf{LAC}                           & \textbf{LVC}                           & \textbf{MYO}                           & \multicolumn{1}{l|}{\textbf{Avg.}} & \textbf{AA}                            & \textbf{LAC}                        & \textbf{LVC}                           & \textbf{MYO}                           & \textbf{Avg.}                       \\ \hline
\textbf{Upper   Bound}                                 & 89.74                                  & 84.99                                  & 87.44                                  & 83.34                                  & 86.38                                 & 80.76                                  & 82.29                               & 92.38                                  & 78.23                                  & 83.42                                  \\
\textbf{Lower   Bound}         & 32.18                                  & 35.92                                  & 19.53                                  & 9.42                                   & 24.26                                 & 18.44          & 8.84        & 38.72          & 9.65           & 18.91          \\ \hline
\textbf{IBN-Net}                                       & 59.04                                  & 67.63                                  & 67.34                            & 45.49                                  & 59.88                                 & 31.23                                  & 42.36                               & 59.91                                  & 34.63                                  & 42.03                                  \\
\textbf{SW}                                            & 52.94                                  & 69.52                                  & 64.28                                  & 44.64                                  & 57.84                                 & 38.95                                  & 47.62                               & 62.82                                  & 33.30                                  & 45.67                                  \\
\textbf{RobustNet}                                        & 68.07                            & 74.68                            & 62.56                                  & 46.09                                  & 62.85                                 & 52.27                            & \textbf{60.08}                      & 67.26                            & 32.97                                  & {\ul 53.14}                            \\
\textbf{SADN}                                          & 51.42          & 50.20          & 52.86          & 52.31                            & 51.70         & 33.38                                  & 31.65                               & 33.29                                  & 30.45                                  & 32.19                                  \\
\textbf{CSDG}                                          & 66.91                                  & 68.06                                  & 64.43                                  & 52.24                                  & 62.91                           & 37.10                                  & 51.76                               & 70.64                                  & { \ul 41.38}                                  & 50.22                                  \\
\textbf{CIConv}                                        & 67.42                                  & 70.83                                  & 65.19                                  & 42.77                                  & 61.55                           & 45.40                                  & 45.38                               & 57.08                                  & 32.44                                  & 45.08                                  \\ 
\textbf{CIConv*}                                       & \textbf{78.38}                                  & { \ul 75.67}                                  & {\ul 69.33}                                  & 55.92                                  & {\ul 69.83}                           & 45.75                                  & 50.64                               & {\ul 71.93}                                  & 35.33                                  & 50.91                                  \\ \hline
\textbf{EGSDG w/o Aug.}  & {\ul 73.67}               & 72.45    & 57.31     & {\ul 57.42}    & 65.21   & {\ul 54.11} & 53.41 & 62.74 & 32.86   & 50.78 \\
\textbf{EGSDG w/ Aug.}                            &  73.45 & \textbf{78.48} & \textbf{71.94} & \textbf{60.13} & \textbf{71.00} & \textbf{55.14} & {\ul 57.34} & \textbf{72.50} & \textbf{45.84}    & \textbf{57.71} \\
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\label{tab:tab2}
\end{table}



% ---------------------------------------------------------
\section{Experiments and Results}
\subsection{Experimental Setup}



\textbf{Datasets and Preprocessing}
In our experiments, we employ three datasets, the cross-sequence brain tumor segmentation dataset (BraTS’19, T2 as source domain) \cite{Menze2015TheMB}, the cross-center prostate dataset (Prostate, Site B as source domain) \cite{liu2020ms}, and the cross-modality cardiac dataset (MMWHS, CT and MRI as source domain respectively) \cite{Zhuang2016MultiscalePA}, for evaluation. In particular, we shuffle all the volumes and divide them into four equal parts for each sequence firstly to prevent the ground truth leakage because the mask of each case is shared with four sequences in BraTS’19. Details are given in Appendix \ref{appendix:E}. 

\noindent\textbf{Network and Training Details} 
Following CSDG \cite{ouyang2022causality}, we utilize U-Net \cite{Ronneberger2015UNetCN} with an EffcientNet-b2 \cite{Tan2019EfficientNetRM} backbone as our segmentation model and implement our model by PyTorch framework on one NVIDIA TITAN XP GPU. We use Adam optimizer \cite{kingma2014adam} with an initial learning rate of $3\times10^{-4}$ and batch size of 8 to train the model. For all experiments, the learning rate is decayed according to the polynomial rule for stable training.

\noindent\textbf{Evaluation Metrics} 
We take the Dice coefficient (Dice) as our evaluation metric, which measures the overlapping ratio between prediction and ground truth. The higher the Dice value, the better the segmentation performance.


\begin{figure}[t]
\centering
\begin{minipage}[t]{0.495\linewidth}
\includegraphics[width=0.95\linewidth]{figs/visualization_brats_mmwhs.png}
\end{minipage}
\begin{minipage}[t]{0.495\linewidth}
\centering
\includegraphics[width=0.97\linewidth]{figs/visualization_prostate.png}
% \caption{Qualitative comparison of the Prostate samples segmentation (best viewed in color).}
% \label{fig:visualization_prostate}
\end{minipage}
\caption{Qualitative comparison of BraTS'19 (top) and MMWHS (bottom) samples segmentation (left) and Prostate samples segmentation (right). MRI means CT$\rightarrow$MRI domain generalization and CT means MRI$\rightarrow$CT domain generalization.}
% \label{fig:visualization_brats_mmwhs}
\label{fig:visualization_brats_mmwhs_prostate}
\end{figure}



%-------------------------------------------------------------------------
\subsection{Comparison Experiments}
\label{sec:4.2}
We compare our method with SOTA single-source domain generalization methods including IBN-Net \cite{Pan2018TwoAO}, SW \cite{Pan2019SwitchableWF}, RobustNet \cite{Choi2021RobustNetID}, SADN \cite{zhou2022generalizable}, CSDG \cite{ouyang2022causality}, and CIConv \cite{Lengyel2021ZeroShotDD}. For a fair comparison, we employ the same segmentation network to train the CIConv model. Besides, we also provide the results without domain generalization by directly applying the model learned in the source domain to unknown target domains (Lower Bound) and with supervised training on the target domain (Upper Bound). In addition, the comprehensive comparison between CIConv and ours is given in Appendix \ref{appendix:F}.

\tableref{tab:tab1} and \tableref{tab:tab2} report the comparison results on the BraTS’19, Prostate, and MMWHS datasets respectively. Overall, our model outperforms others, especially in the large distribution shift dataset (BraTS’19 and MMWHS). For the results of normalization and whitening-based models (IBN-Net, SW, and RobustNet), which are designed for nature image, their performance is significantly lower than our model in each evaluation dataset. For the results of data augmentation-based methods (SADN and CSDG), their performance is unstable on different datasets. The distribution of grayscale values varies across different datasets and the level of domain shift varies among different SDG segmentation settings. However, the augmented samples fail to cover the unseen target domain distribution on the specific task and lead to terrible generalization performance. In addition, the performance of CIConv is lower than our model on all three datasets. Qualitative examples are shown in \figureref{fig:visualization_brats_mmwhs_prostate}. As we can see, our model can produce accurate and sharp predictions at the boundary of anatomies. The enlarged qualitative results refer to Appendix \ref{appendix:G}.


%-------------------------------------------------------------------------
\subsection{Edge-guided Strategy}

To enhance the model's attention at the boundary of targets, previous works \cite{Lengyel2021ZeroShotDD, liu2021feddg, liu2022single, you2023learning} have tried different strategies. Here, we conducted a comprehensive analysis to the impact of image edge on the model generalization ability. We employ the vanilla U-Net \cite{Ronneberger2015UNetCN} as the segmentation network which is borrowed from \href{https://github.com/milesial/Pytorch-UNet}{Pytorch-UNet}. Other configurations are the same as the main experiment. The visualization of different edge-guided strategies and corresponding explanations are provided in \figureref{fig:ablations}. These edge-guided strategies can be divided into two categories: (i) using the edge map as the supervision signal to increase the model's attention at the boundary of segmentation targets. (ii) concatenating the feature map and edge map to force the model learning domain invariant representation and enhance the generalization ability. Note that we make the same process at the testing stage for the second category experiments (Exp. 1, 2, 3, and 4).

\tableref{tab:tab3} and \tableref{tab:tab4} report the comparison results on BraTS'19, Prostate, and MMWHS datasets respectively. Overall, adopting the edge map as an additional guided signal can improve model generalization performance compared to the Lower Bound. Notably, employing the image edge as input to train the model directly brings tremendous generalization ability gains on the three datasets. We note that (i) for the former, the performance is lower than ours, probably because this category strategy weakens the supervision of the edge information via the segmentation head at the training stage, and (ii) for the latter, the performance is lower than ours, possibly due to the grayscale information making the model overfit on the source domain.

\begin{figure}[t]
\centering
\includegraphics[width=0.7\linewidth]{figs/ablations.png}
\caption{Visualization of different edge-guided strategies with vanilla U-Net framework. Exp. 1 means taking the image edge as input. Exp. 2 means concatenating the image and edge map as input. Exp. 3 means concatenating the feature map $X_{En}^1$ and the edge map as the next layer's input. Exp. 4 means concatenating the feature map $X_{De}^1$ and the edge map as the segmentation layer's input. Exp. 5 means employing the image edge as the supervision of feature map $X_{En}^1$ with a single Conv2d segmentation layer. Exp. 6 means employing the image edge as the supervision of feature map $X_{De}^1$ with a single Conv2d segmentation layer. Exp. 7 means employing the image edge as the supervision of feature map $X_{De}^2$ with a single Conv2d segmentation layer. AutoCanny is employed in all experiments.}
\label{fig:ablations}
\end{figure}

\begin{table}[t]
\centering
\caption{Performance of different edge-guided strategies on BraTS'19 (left) and Prostate (right) with vanilla U-Net.}
\begin{minipage}[t]{0.435\linewidth}
\resizebox{.86\linewidth}{!}{
\begin{tabular}{l|cccc}
\toprule[1.5pt]
% \hline
\multicolumn{1}{c|}{}                                       & \multicolumn{4}{c}{{\color[HTML]{000000} \textbf{Source Domain: T2}}} \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiments}}} & \textbf{T1}   & \textbf{T1ce}   & \textbf{Flair}  & \textbf{Avg.}  \\ \hline
\textbf{Lower   Bound}              & 10.72         & 5.86            & 58.34           & 24.97             \\
\textbf{Exp. 1 (Ours)}              & 37.86         & 39.95           & 51.10           & \textbf{42.97}    \\
\textbf{Exp. 2}                                             & 11.65         & 11.42           & 61.02           & 28.03             \\
\textbf{Exp. 3}                                             & 13.90         & 11.37           & 58.88           & 28.05             \\
\textbf{Exp. 4}                                             & 11.58         & 9.22            & 59.48           & 26.76             \\
\textbf{Exp. 5}                                             & 10.14         & 9.35            & 64.84           & {\ul 28.11}       \\
\textbf{Exp. 6}                                             & 13.28         & 10.57           & 58.93           & 27.59             \\
\textbf{Exp. 7}                                             & 10.60         & 8.92            & 64.48           & 28.00             \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.547\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                       & \multicolumn{6}{c}{{\color[HTML]{000000} \textbf{Source Domain: Site B}}}                                  \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiments}}} & \textbf{Site A} & \textbf{Site C} & \textbf{Site D} & \textbf{Site E} & \textbf{Site F} & \textbf{Avg.} \\ \hline
\textbf{Lower   Bound}              & 42.25           & 25.79           & 59.91           & 14.88           & 37.12           & 35.99            \\
\textbf{Exp. 1 (Ours)}              & 47.79           & 31.39           & 48.68           & 52.01           & 51.34           & \textbf{46.24}   \\
\textbf{Exp. 2}                                             & 58.65           & 42.94           & 46.88           & 12.81           & 31.87           & 38.63            \\
\textbf{Exp. 3}                                             & 38.31           & 17.44           & 61.78           & 21.68           & 41.35           & 36.11            \\
\textbf{Exp. 4}                                             & 32.33           & 24.74           & 43.97           & 32.14           & 36.14           & 33.86            \\
\textbf{Exp. 5}                                             & 48.3            & 37.14           & 66.96           & 20.48           & 35.02           & 41.58            \\
\textbf{Exp. 6}                                             & 43.56           & 25.73           & 78.45           & 20.3            & 55.23           & {\ul 44.65}      \\
\textbf{Exp. 7}                                             & 51.87           & 27.15           & 61.72           & 18.16           & 49.23           & 41.63            \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\label{tab:tab3}
\end{table}



\begin{table}[t]
\centering
\caption{Performance of different edge-guided strategies on MMWHS with vanilla U-Net.}
\vspace{3mm}
\resizebox{.8\linewidth}{!}{
\begin{tabular}{l|ccccc|ccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                       & \multicolumn{5}{c|}{{\color[HTML]{000000} \textbf{Source Domain: MRI}}}                                                                          & \multicolumn{5}{c}{\textbf{Source Domain: CT}}                              \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiments}}} & \textbf{AA}                   & \textbf{LAC}                  & \textbf{LVC}                  & \textbf{MYO}                  & \textbf{Avg.} & \textbf{AA} & \textbf{LAC} & \textbf{LVC} & \textbf{MYO} & \textbf{Avg.} \\ \hline
\textbf{Lower   Bound}              & 22.60                         & 36.71                         & 23.51                         & 13.08                         & 23.98            & 12.84       & 37.94        & 23.44        & 5.45         & 19.92            \\
\textbf{Exp. 1 (Ours)}              & 63.37                         & 64.05                         & 39.76                         & 38.50                          & \textbf{51.42}   & 43.74       & 52.79        & 60.39        & 34.59        & \textbf{47.88}   \\
\textbf{Exp. 2}                                             & 53.95                         & 51.45                         & 42.73                         & 31.10                          & {\ul 44.81}      & 23.78       & 38.40        & 36.22        & 14.02        & 28.10            \\
\textbf{Exp. 3}                     & 49.67 & 54.07 & 39.19 & 25.58 & 42.13            & 21.70       & 38.73        & 43.90        & 13.28        & 29.40            \\
\textbf{Exp. 4}                                             & 50.63                         & 53.15                         & 45.02                         & 25.90                          & 43.68            & 29.92       & 41.07        & 39.01        & 12.59        & 30.65            \\
\textbf{Exp. 5}                                             & 50.26                         & 52.04                         & 44.76                         & 29.86                         & 44.23            & 30.11       & 48.25        & 41.46        & 13.62        & {\ul 33.36}      \\
\textbf{Exp. 6}                                             & 50.14                         & 49.72                         & 34.54                         & 21.74                         & 39.03            & 25.52       & 38.94        & 44.98        & 11.54        & 30.24            \\
\textbf{Exp. 7}                                             & 57.50                         & 52.66                         & 36.08                         & 26.36                         & 43.15            & 19.80       & 46.68        & 25.99        & 10.41        & 25.72            \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\label{tab:tab4}
\end{table}








% ---------------------------------------------------------
\section{Conclusion and Discussion}
In this work, we use the edge of image as input to train a network. It improves the model's generalization performance significantly on unknown target domains. Extensive experiments on three typical medical image segmentation datasets demonstrate our approach achieves superior generalization performance compared to the state-of-the-art SDG methods. However, our model has the following limitations: (i) The optimal edge extractor is different in diverse segmentation scenarios, which brings great challenges to choosing the best one for an unseen dataset. (ii) In low-contrast images (like Ultrasound or CT), the model cannot extract valuable edge information well, which may lead to poor segmentation performance. (iii) When the segmentation target is small (like multiple sclerosis or cochlear), the extracted edge information may be similar to the surrounding noise, which will lead to the wrong segmentation results. In addition, there are limitations in the experimental setup of BraTS'19 because different sequences focus on different structures, which may lead to the tumor is not well visible in on specific modality.





% ---------------------------------------------------------
\bibliography{midl24_313} % reference




% ---------------------------------------------------------
\appendix


% ---------------------------------------------------------
\newpage
\section{Edge detector operators}
\label{appendix:A}

Digital images are discrete and different edge detection algorithms differ in the way of $G_x$ and $G_y$ calculation. Different edge detector operators are given in \figureref{fig:operators}.

\begin{figure}[htbp]
  \centering
   \includegraphics[width=0.7\linewidth]{figs/operators.png}
   \caption{Overview of classic edge detection algorithms' definitions and corresponding operators of the partial derivative of the image in the X and Y directions.}
   \label{fig:operators}
\end{figure}



% ---------------------------------------------------------
\newpage
\section{Details and configuration of Canny algorithm}
\label{appendix:B}

\subsection{Detail of Canny algorithm}
The steps of the Canny algorithm \cite{canny1986computational} include Image Smoothing, Gradient Calculation, Non-maximum Suppression, and Edges Checking.

\textbf{Image Smoothing} Gaussian filter is used to smooth images and get rid of the noise which is defined as :
\begin{equation}
  % G(x,y)=\frac{1}{2\pi\sigma^2}e^{-\frac{x^2+y^2}{2\sigma^2} } 
  G(x,y)=\frac{1}{2\pi\sigma^2}exp(-\frac{x^2+y^2}{2\sigma^2}) 
  \label{eq:Gaussian}
\end{equation}
where $\sigma$ stands for the size of the Gaussian Kernel, which controls the extent of smoothing the image. This critical parameter needs to be set manually based on experience.

\textbf{Gradient Calculation} The traditional Canny algorithm adopts a limited difference of $2\times2$ neighboring area to calculate the magnitude and direction of the image gradient. The operator of the partial derivative of the image in the X and Y directions is defined by:
\begin{equation}
  G_x = \begin{bmatrix}
 -1 & 1 \\
 -1 & 1
\end{bmatrix}
,
  G_y = \begin{bmatrix}
 1 & 1 \\
 -1 & -1
\end{bmatrix}
  \label{eq:canny_operator}
\end{equation}

\textbf{Non-maximum Suppression} After acquiring the gradient magnitude image, it’s needed to perform non-maximum suppression (NMS) on the image to accurately locate edges. The process of NMS can help guarantee that each edge is one-pixel width. 

\textbf{Edges Checking} Canny adopts a double-threshold method to select edge points after carrying on non-maximum suppression. The pixels whose gradient magnitude is above the high threshold will be marked as edge points, and those whose gradient magnitude is under the low threshold will be marked as non-edge points, and the rest will be marked as candidate edge points. Those candidate edge points that are connected with edge points will be marked as edge points. This method reduces the influence of noise on the edge of the final edge image. The low and high thresholds need to be set manually based on experience.


\subsection{Configuration of Canny algorithm}
We have manually designed the corresponding Cany detector parameters for each domain of each dataset. For all scenarios, the size of the Gaussian kernel $3\times3$. The configuration of the double-threshold is provided in \tableref{tab:tab5}.


\begin{table}[htbp]
\centering
\caption{The configuration of Canny algorithm's double-threshold.}
\vspace{3mm}
\resizebox{.3\linewidth}{!}{
\begin{tabular}{llll}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c}{\textbf{Dataset}} & \textbf{Domain} & \textbf{Low} & \textbf{High} \\ \hline
\multirow{4}{*}{\textbf{BraTS'19}}   & T2              & 40           & 80            \\
                                     & T1              & 20           & 60            \\
                                     & Flair           & 40           & 100           \\
                                     & T1ce            & 20           & 50            \\ \hline
\multirow{6}{*}{\textbf{Prostate}}   & Site A          & 50           & 200           \\
                                     & Site B          & 100          & 200           \\
                                     & Site C          & 50           & 150           \\
                                     & Site D          & 50           & 140           \\
                                     & Site E          & 20           & 40            \\
                                     & Site F          & 30           & 70            \\ \hline
\multirow{2}{*}{\textbf{MMWHS}}      & MRI             & 30           & 80            \\
                                     & CT              & 70           & 120           \\ 
% \hline
\bottomrule[1.5pt]
    \end{tabular}}
\label{tab:tab5}
\end{table}









% ---------------------------------------------------------
\newpage
\section{Visualization of edge detectors and CIConv}
\label{appendix:C}

The visual comparison of different classic edge detection algorithms \cite{canny1986computational, rong2014improved, Roberts1963MachinePO, prewitt1970object, kittler1983accuracy, lecun1998gradient} and CIConv \cite{Lengyel2021ZeroShotDD} is shown in \figureref{fig:detectors}. As we can see, there are large differences in the edge or gradient map extracted by different edge detection algorithms with the same image. Compared with CIConv, the classic edge detection algorithms can filter more useless information with less computation.

\begin{figure}[htbp]
  \centering
   \includegraphics[width=0.75\linewidth]{figs/edge_detectors.png}
   \caption{Visualization comparison of different edge detection algorithms and CIConv.}
   \label{fig:detectors}
\end{figure}


% ---------------------------------------------------------
% \newpage
\section{Edge-guided Model with Data Augmentation}
\label{appendix:D}

\subsection{Visualization of BézierCurve augmentation}
The visualization examples of BézierCurve augmentation are shown in \figureref{fig:augmentation_brats}. As introduced in the main text, this augmentation method maps the source image to diverse grayscale value distribution and keeps the appearance of the anatomic structures perceivable at the same time.

\begin{figure}[htbp]
  \centering
   \includegraphics[width=0.75\linewidth]{figs/augmentation_brats.png}
   \caption{Visualization of generated Bezier Curve and corresponding augmented image on the BraTS'19 samples.}
   \label{fig:augmentation_brats}
\end{figure}

\subsection{Edge and gradient map of BézierCurve augmented image}
We realized that for the same case, the edge or gradient map in each domain is different by the same edge detector. Therefore, we use data augmentation to simulate the data distribution of unknown target domain before edge extraction to enrich the gradient information of edge-guided training. The visualization of the BézierCurve augmented image and corresponding edge and gradient map is shown in \figureref{fig:edge_augmentation}.

\begin{figure}[htbp]
  \centering
  \includegraphics[width=0.8\linewidth]{figs/edge_augmentation.png}
   \caption{Visualization of augmented image and corresponding edge (AutoCanny) and gradient (Sobel) map.}
   \label{fig:edge_augmentation}
\end{figure}


\subsection{Results of Edge-guided models with BézierCurve}
\noindent\textbf{Edge Detector} Different edge detectors will extract distinct image edges or gradients for the same image, which affects the training process and testing performance. Accordingly, we conducted comprehensive comparison experiments on classical edge detectors, including  Canny \cite{canny1986computational}, AutoCanny \cite{rong2014improved}, Roberts \cite{Roberts1963MachinePO}, Prewitt \cite{prewitt1970object}, Sobel \cite{kittler1983accuracy}, and Laplacian \cite{lecun1998gradient}. 

\noindent\textbf{BézierCurve Augmentation} For edge-guided models, data augmentation is supposed to simulate the edge or gradient information of the unseen target samples to train a model with great generalization ability. To this end, we also explore the above edge-guided models on the BézierCurve \cite{Zhou2019ModelsGG} augmented samples, which is a simple idea to generate different styles by adjusting the gray value distribution of images. 

\tableref{tab:tab6} and \tableref{tab:tab7} show the results. As we can see, taking the edge as input promotes the generalization ability of the model remarkably and the BézierCurve can further improve its performance. We note that different edge extractors are sensitive to specific types of edges like edge orientation, noise environment, and edge structure. In different segmentation scenarios, the texture, intensity, and noise of medical images are diverse. This leads to the discrepancy between the valuable edge extraction and the irrelevant noise filtering by different edge extractors, which makes the optimal edge detector on each domain different. This will bring great challenges to choosing the best one for an unseen dataset.

% We note that on different datasets, the optimal edge detection algorithm is different, which will bring great challenges to choosing the best one for an unseen dataset.


\begin{table}[t]
\centering
\caption{Ablation study of edge detectors with original (first group) and BézierCurve augmented (second group\dag) sample on BraTS'19 (left) and Prostate (right).}
\begin{minipage}[t]{0.435\linewidth}
\resizebox{.85\linewidth}{!}{
\begin{tabular}{l|cccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                         & \multicolumn{4}{c}{\textbf{Source Domain: T2}}                                                                                                                    \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Edge Detector}}} & \multicolumn{1}{l}{\textbf{T1}}        & \multicolumn{1}{l}{\textbf{T1ce}}      & \multicolumn{1}{l}{\textbf{Flair}}     & \multicolumn{1}{l}{\textbf{Avg.}}   \\ \hline
\textbf{Canny}                                                & 40.30                                  & 50.18                                  & 61.96                                  & 50.81                                  \\
\textbf{AutoCanny}                                            & 48.67                                  & 56.25                                  & 66.07                                  & 57.0                                   \\
\textbf{Roberts}                                              & 43.70                                  & 48.93                                  & 67.40                                  & 53.34                                  \\
\textbf{Prewitt}                                              & 50.28                                  & 50.31                                  & 72.38                                  & 57.66                                  \\ 
\textbf{Sobel}                                                & 51.38                                  & 50.35                                  & 71.63                                  & \textbf{57.79}  \\
\textbf{Laplacian}                                            & 31.39                                  & 43.39                                  & 61.86                                  & 45.55                                  \\ \hline
\textbf{Canny\dag}                                                & 56.58                                  & 53.48                                  & 62.39                                  & 57.48                                  \\
\textbf{AutoCanny\dag}                                            & 56.28                                  & 53.76                                  & 63.84                                  & 57.96                                  \\
\textbf{Roberts\dag}                                              & 58.66                                  & 55.60                                  & 68.84                                  & 61.03                                  \\
\textbf{Prewitt\dag}                                              & 55.26                                  & 55.82                                  & 72.59                                  & 61.22                                  \\
\textbf{Sobel\dag}             & 62.59            & 54.68             & 77.07                & \textbf{64.78}                            \\
\textbf{Laplacian\dag}                                            & 54.55          & 55.78          & 58.68          & 56.34             \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.547\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                         & \multicolumn{6}{c}{\textbf{Source Domain: Site B}}                                                                                                                                                                                                  \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Edge Detector}}} & \textbf{Site A}                        & \textbf{Site C}                        & \textbf{Site D}                        & \textbf{Site E}                        & \textbf{Site F}                        & \textbf{Avg.}                       \\ \hline
\textbf{Canny}                                                & 72.12                                  & 46.13                                  & 64.82                                  & 63.93                                  & 62.74                                  & 61.95                                  \\
\textbf{AutoCanny}                                            & 72.70                                  & 59.54                                  & 83.00                                  & 70.36                                  & 81.11                                  & \textbf{73.34}                                  \\
\textbf{Roberts}                                              & 68.04                                  & 49.73                                  & 75.93                                  & 71.82                                  & 78.79                                  & 68.86                                  \\
\textbf{Prewitt}                                              & 73.39                                  & 48.83                                  & 81.08                                  & 80.27                                  & 69.05                                  & 70.52                                  \\
\textbf{Sobel}                                                & 73.35          & 48.36          & 84.13          & 79.95          & 71.07                                  & 71.37          \\
\textbf{Laplacian}                                            & 73.48          & 50.19          & 81.20          & 79.92          & 81.74                                  & 73.31          \\ \hline
\textbf{Canny\dag}                                                & 66.28                                  & 56.55                                  & 58.59                                  & 70.29                                  & 66.43                                  & 63.63                                  \\
\textbf{AutoCanny\dag}         & 78.51          & 64.16           & 82.95              & 77.34       & 78.20    & \textbf{76.23} \\
\textbf{Roberts\dag}                                              & 75.23                                  & 57.62                                  & 80.43                                  & 79.45                                  & 71.26                                  & 72.80                                  \\
\textbf{Prewitt\dag}                                              & 71.40                                  & 56.59                                  & 76.44                                  & 79.50                                  & 77.95                                  & 72.38                                  \\
 
\textbf{Sobel\dag}                                                & 75.48                                  & 55.87                                  & 75.53                                  & 84.10                                  & 70.60                                  & 72.32                                  \\
 
\textbf{Laplacian\dag}                                            & 75.66                                  & 42.04                                  & 85.28                                  & 83.30                                  & 82.89                                  & 73.83                                  \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\label{tab:tab6}
\end{table}




\begin{table}[t]
\centering
\caption{Ablation study of edge detectors with original (top) and BézierCurve augmented (bottom\dag) sample on MMWHS.}
\resizebox{.8\linewidth}{!}{
\begin{tabular}{l|ccccc|ccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                         & \multicolumn{5}{c|}{\textbf{Source Domain: MRI}}                                                                                                                      & \multicolumn{5}{c}{\textbf{Source Domain: CT}}                                                                                                                \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Edge Detector}}} & \textbf{AA}                   & \textbf{LAC}                  & \textbf{LVC}                  & \textbf{MYO}                  & \multicolumn{1}{l|}{\textbf{Avg.}} & \textbf{AA}                   & \textbf{LAC}                  & \textbf{LVC}                  & \textbf{MYO}                  & \textbf{Avg.}              \\ \hline
\textbf{Canny}                                                & 67.91                         & 69.87                         & 67.63                         & 50.15                         & 63.89                                 & 54.11 & 53.41 & 62.74 & 32.86                         & \textbf{50.78}                         \\
\textbf{AutoCanny}                    & 65.22                         & 71.51                         & 64.22                         & 51.58                         & 63.13                                 & 44.96 & 56.68 & 58.79 & 34.93 & 48.84 \\
\textbf{Roberts}                                              & 72.17                         & 70.72                         & 59.21                         & 55.41                         & 64.38                                 & 32.21                         & 49.15                         & 52.98                         & 18.96                         & 38.33                         \\
\textbf{Prewitt}                                              & 68.76                         & 70.82                         & 65.87                         & 51.67                         & 64.28                                 & 37.98                         & 49.31                         & 61.04                         & 21.92                         & 42.56                         \\
\textbf{Sobel}                                                & 73.67                         & 72.45                         & 57.31                         & 57.42                         & \textbf{65.21}                                 & 40.80                         & 54.55                         & 64.92                         & 21.94                         & 45.55                         \\
\textbf{Laplacian}                                            & 67.43 & 72.02 & 62.72 & 56.52                         & 64.67         & 36.27                         & 48.94                         & 74.07                         & 30.79                         & 47.52                         \\ \hline
\textbf{Canny\dag}                                                & 66.03                         & 73.65                         & 71.11                         & 52.69                         & 65.87                                 & 55.14 & 57.34 & 72.50 & 45.84 & \textbf{57.71} \\
\textbf{AutoCanny\dag}                                            & 70.63                         & 69.81                         & 67.15                         & 52.39                         & 65.00                                 & 52.89                         & 62.96                         & 65.15                         & 34.48                         & 53.87                         \\
\textbf{Roberts\dag}                                              & 72.04                         & 73.90                         & 65.43                         & 54.89                         & 66.57                                 & 46.38                         & 46.66                         & 65.67                         & 29.47                         & 47.04                         \\
\textbf{Prewitt\dag}                                              & 71.05                         & 75.04                         & 68.31                         & 55.07                         & 67.37                                 & 49.40                         & 54.18                         & 60.27                         & 32.12                         & 48.99                         \\
\textbf{Sobel\dag}                                                & 73.45 & 78.48 & 71.94 & 60.13 & \textbf{71.00}         & 35.57 & 52.98 & 60.45 & 31.96 & 45.24 \\
\textbf{Laplacian\dag}                                            & 70.74 & 70.23 & 64.51 & 54.79 & 65.07         & 49.99 & 49.18 & 67.71 & 32.65 & 49.88 \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\label{tab:tab7}
\end{table}


\subsection{Results of Edge-guided models with RandConv}
To further validate the effectiveness of data augmentation to edge-guided models. We conducted an experiment on Edge-guided with RandConv \cite{Xu2020RobustAG}, which employs transformation via randomly initializing the weight of the first convolution layer. \tableref{tab:tab10} reports the results on three datasets, which shows that the performance has improved compared to using only the edge-guided model. However, it’s generally lower than training edge-guided models with BézierCurve augmented samples. 

\begin{table}[htbp]
\centering
\caption{The result of edge-guided methods with RandConv augmented sample on the BraTS'19 (left), Prostate (middle), and MMWHS (right) datasets.}
\begin{minipage}[t]{0.25\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                                                                                  & \multicolumn{4}{c}{\textbf{Source Domain: T2}}                                                                                                  \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Edge Detector \end{tabular}}}} & \multicolumn{1}{l}{\textbf{T1}} & \multicolumn{1}{l}{\textbf{T1ce}} & \multicolumn{1}{l}{\textbf{Flair}} & \multicolumn{1}{l}{\textbf{Avg.}} \\ \hline
\textbf{Canny}     & 53.48                         & 53.00                         & 57.80                         & 54.76                         \\
\textbf{AutoCanny} & 38.85                         & 40.13                         & 65.22                         & 48.07                         \\
\textbf{Roberts}   & 51.43                         & 46.49                         & 67.82                         & 55.25                         \\
\textbf{Prewitt}   & 46.81                         & 51.46                         & 68.37                         & 55.55                         \\
\textbf{Sobel}     & 56.32                         & 51.90                         & 67.68                         & \textbf{58.63}                \\
\textbf{Laplacian} & 51.30                         & 53.67                         & 68.32                         & 57.76                         \\
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.37\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                                                                                  & \multicolumn{6}{c}{\textbf{Source Domain: Site B}}                                                                                                                                            \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Edge Detector\end{tabular}}}} & \textbf{Site A}               & \textbf{Site C}               & \textbf{Site D}               & \textbf{Site E}               & \textbf{Site F}               & \textbf{Avg.}              \\ \hline
\textbf{Canny}     & 57.30 & 52.65 & 58.57 & 58.73 & 45.65 & 54.58          \\
\textbf{AutoCanny} & 76.27 & 59.08 & 80.09 & 74.93 & 72.56 & \textbf{72.59} \\
\textbf{Roberts}   & 70.75 & 50.53 & 76.69 & 52.93 & 65.61 & 63.30          \\
\textbf{Prewitt}   & 62.70 & 52.44 & 54.53 & 54.46 & 46.41 & 54.11          \\
\textbf{Sobel}     & 73.72 & 53.78 & 75.41 & 64.95 & 54.90 & 64.55          \\
\textbf{Laplacian} & 65.99 & 60.79 & 61.20 & 56.81 & 32.22 & 55.40          \\
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.29\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|ccccc}
% \hline
\toprule[1.5pt]
\multicolumn{1}{c|}{}                                                                                                 & \multicolumn{5}{c}{\textbf{Source Domain: MRI}}                                                                                                                      \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{\begin{tabular}[c]{@{}c@{}} Edge Detector\end{tabular}}}} & \textbf{AA}                   & \textbf{LAC}                  & \textbf{LVC}                  & \textbf{MYO}                  & \multicolumn{1}{l}{\textbf{Avg.}} \\ \hline
\textbf{Canny}     & 63.99                         & 70.52                         & 58.85                         & 52.10                         & 61.37          \\                         
\textbf{AutoCanny} & 68.18                         & 68.96                         & 59.79                         & 48.04                         & 61.24          \\                         
\textbf{Roberts}   & 69.44                         & 69.65                         & 57.90                         & 47.74                         & 61.18          \\                         
\textbf{Prewitt}   & 65.77                         & 65.04                         & 49.12                         & 55.01                         & 58.74          \\                         
\textbf{Sobel}     & 72.58                         & 69.04                         & 66.96                         & 57.96                         & \textbf{66.64} \\
\textbf{Laplacian} & 75.93                         & 68.83                         & 65.15                         & 55.59                         & 66.38          \\ 
% \hline
\bottomrule[1.5pt]
\end{tabular}}
\end{minipage}
\label{tab:tab10}
\end{table}


% ---------------------------------------------------------
\newpage
\section{Details of datasets and preprocessing}
\label{appendix:E}

BraTS’19 contains 335 cases which were acquired with different clinical protocols and various scanners from multiple institutions. Each case is composed of four sequences of MR images (T2, T1, Flair, and T1CE). Due to experts always annotating the whole tumor on T2, we use T2 as the source domain and others as unknown target domains. Prostate contains prostate T2-weighted MRI data collected from six different data sources. We follow the previous work \cite{liu2020ms} to partition the data into six datasets A to F, according to the clinical centers that the datasets collected. Consistent with our previous work, we take Site B as the source domain and others as unknown target domains. MMWHS dataset consists of unpaired 20 MRI and 20 CT volumes collected at different clinical sites, which contains the ground truth mask of four cardiac structures, including the ascending aorta (AA), the left atrium blood cavity (LAC), the left ventricle blood cavity (LVC), and the myocardium of the left ventricle (MYO). We make domain generalizations in both directions.

For data preprocessing, each volume was normalized to zero mean and unit variance. Then, we get the slices from each volume in the axial (BraTS'19 and Prostate) or coronal (MMWHS) plane and normalize the image to [-1, 1] before feeding it to the network. For BraTS’19 and MMWHS, we make the center crop and then resize it to $256\times256$. For Prostate, the size of the image is $384\times384$. Each domain was randomly split with 80\% samples for training and 20\% samples for testing. It is worth noting that (i) there are three sub-structures in BraTS'19 (the Enhancing Tumor (ET), the Tumor Core (TC), and the Whole Tumor (WT)) and we merged them into one label for segmentation which is consistent with SADN \cite{zhou2022generalizable} and (ii) we shuffle all the volumes and divide them into four equal parts for each sequence firstly to prevent the ground truth leakage problem because the mask of each case is shared with four sequences in BraTS’19.





% ---------------------------------------------------------
\newpage
\section{Comparison between ours and CIConv}
\label{appendix:F}

In essence, Color Invariant Convolution (CIConv) \cite{Lengyel2021ZeroShotDD} is a learnable edge detector that is derived from the physics-based reflection models \cite{Geusebroek2001ColorI}. On the one hand, the computational process of Color Invariant theory \cite{Geusebroek2001ColorI} is very complicated, which greatly increases the training and inference time. On the other hand, the performance of different variants of CIConv is unstable. Therefore, we compare the performance of CIConv and edge detection algorithms with the RefineNet \cite{lin2017refinenet} which is utilized in the CIConv model. \tableref{tab:tab9} reports the comparison results, where we can see that the performance of different variants in CIConv varies greatly, while the performance of all edge detectors is stable and superior to the CIConv.


\begin{table}[htbp]
\centering
\caption{The comparison result of CIConv (first group) and Edge-guided model (second group) on the BraTS'19 (left), Prostate (middle), and MMWHS (right) datasets with RefineNet. The best performance of CIConv and Edge-guided model is underlined and bolded respectively.}
\begin{minipage}[t]{0.26\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccc}
\hline
\multicolumn{1}{c|}{}                                      & \multicolumn{4}{c}{\textbf{Source Domain: T2}}                                                                                                  \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiment}}} & \multicolumn{1}{l}{\textbf{T1}} & \multicolumn{1}{l}{\textbf{T1ce}} & \multicolumn{1}{l}{\textbf{Flair}} & \multicolumn{1}{l}{\textbf{Avg.}} \\ \hline
\textbf{invariant-E}                                       & 34.95                           & 41.08                             & 62.06                              & {\ul 46.03}                                \\
\textbf{invariant-W}                                       & 44.79                           & 31.15                             & 57.10                              & 44.35                                \\
\textbf{invariant-C}                                       & 10.93                           & 18.98                             & 25.00                              & 18.30                                \\
\textbf{invariant-N}                                       & 0.00                            & 0.00                              & 0.00                               & 0.00                                 \\
\textbf{invariant-H}                                       & 7.98                            & 10.78                             & 10.80                              & 9.85                                 \\ \hline
\textbf{Canny}                                             & 42.82                           & 45.62                             & 51.34                              & 46.59                                \\
\textbf{AutoCanny}                                         & 39.87                           & 45.45                             & 51.69                              & 45.67                                \\
\textbf{Roberts}                                           & 38.21                           & 42.95                             & 51.71                              & 44.29                                \\
\textbf{Prewitt}                                             & 36.62                           & 36.73                             & 60.55                              & 44.63                                \\
\textbf{Sobel}                                           & 38.41                           & 44.14                             & 59.36                              & \textbf{47.30}                                \\

\textbf{Laplacian}                                         & 19.60   & 22.54     & 49.32      & 30.49        \\ 
\hline
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.385\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|cccccc}
\hline
\multicolumn{1}{c|}{}                                         & \multicolumn{6}{c}{\textbf{Source Domain: Site B}}                                                                                                                                            \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiment}}} & \textbf{Site A}               & \textbf{Site C}               & \textbf{Site D}               & \textbf{Site E}               & \textbf{Site F}               & \textbf{Avg.}              \\ \hline
\textbf{invariant-E}                                          & 58.61                         & 55.81                         & 68.07                         & 46.46                         & 58.25                         & 57.44                         \\
\textbf{invariant-W}                                          & 57.71                         & 42.28                         & 70.78                         & 37.61                         & 56.84                         & 53.04                         \\
\textbf{invariant-C}                                          & 52.59                         & 40.85                         & 68.56                         & 63.49                         & 64.06                         & {\ul 57.91}                         \\
\textbf{invariant-N}                                          & 0.00                          & 0.00                          & 0.00                          & 0.00                          & 0.00                          & 0.00                          \\
\textbf{invariant-H}                                          & 55.06 & 45.26 & 56.62 & 57.28 & 63.96                         & 55.64 \\ \hline
\textbf{Canny}                                                & 60.02                         & 52.19                         & 63.12                         & 74.62                         & 64.56                         & 62.9                          \\
\textbf{AutoCanny}                                            & 77.10 & 62.35 & 71.05 & 71.00 & 66.54 & \textbf{69.61} \\
\textbf{Roberts}                                              & 63.31                         & 40.56                         & 66.35                         & 39.74                         & 42.55                         & 50.50                         \\
\textbf{Prewitt}                                              & 62.25                         & 47.78                         & 71.78                         & 60.48                         & 64.69                         & 61.40                         \\

\textbf{Sobel}                                                & 61.25                         & 36.48                         & 69.08                         & 51.35                         & 42.87                         & 52.21                         \\

\textbf{Laplacian}                                            & 64.70                         & 36.69                         & 69.26                         & 53.29                         & 51.85                         & 55.16                         \\ 
\hline
\end{tabular}}
\end{minipage}
\begin{minipage}[t]{0.3\linewidth}
\resizebox{.99\linewidth}{!}{
\begin{tabular}{l|ccccc}
\hline
\multicolumn{1}{c|}{}                                      & \multicolumn{5}{c}{\textbf{Source Domain: MRI}}                                                                                                                      \\
\multicolumn{1}{c|}{\multirow{-2}{*}{\textbf{Experiment}}} & \textbf{AA}                   & \textbf{LAC}                  & \textbf{LVC}                  & \textbf{MYO}                  & \multicolumn{1}{l}{\textbf{Avg.}} \\ \hline
\textbf{invariant-E}                                       & 55.55                         & 57.07                         & 52.77                         & 26.79                         & 48.04                                \\
\textbf{invariant-W}               & 70.35                         & 76.32                         & 58.81                         & 42.45                         & {\ul 61.98}                                \\
\textbf{invariant-C}                                       & 60.17                         & 60.09                         & 45.79                         & 30.00                         & 49.01                                \\
\textbf{invariant-N}                                       & 0.00                          & 0.00                          & 0.00                          & 0.00                          & 0.00                                 \\
\textbf{invariant-H}                                       & 60.52 & 63.39 & 53.77 & 31.59                         & 52.32        \\ \hline
\textbf{Canny}                                             & 70.10                         & 73.30                         & 63.79                         & 43.81                         & 62.75                                \\
\textbf{AutoCanny}                                         & 67.56                         & 71.43                         & 54.27                         & 44.45                         & 59.43                                \\
\textbf{Roberts} & 71.93 & 70.81 & 64.53 & 54.83 & 65.53                                \\
\textbf{Prewitt}                                           & 66.75                         & 72.95                         & 65.67                         & 54.29                         & 64.92                                \\
\textbf{Sobel}                                           & 64.85                         & 75.37                         & 66.26                         & 56.15                         & \textbf{65.66}        \\

\textbf{Laplacian}                                         & 71.99 & 72.48 & 64.33 & 51.75 & 65.14        \\ 
\hline
\end{tabular}}
\end{minipage}
\label{tab:tab9}
\end{table}





% ---------------------------------------------------------
\newpage
\section{The enlarged qualitative results}
\label{appendix:G}


\begin{figure}[htbp]
\centering
\includegraphics[width=0.81\linewidth]{figs/visualization_brats_mmwhs.png}
\caption{Qualitative comparison of BraTS'19 (top) and MMWHS (bottom) samples. MRI means CT$\rightarrow$MRI domain generalization and CT means MRI$\rightarrow$CT domain generalization.}
\label{fig:visualization_brats_mmwhs}
\end{figure}


\begin{figure}[htbp]
\centering
\includegraphics[width=0.81\linewidth]{figs/visualization_prostate.png}
\caption{Qualitative comparison of Prostate samples.}
\label{fig:visualization_prostate}
\end{figure}


\end{document}
