\documentclass{midl} % Include author names
%\documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution










%% Stylefile to load YCVIU template

%% The amssymb package provides various useful mathematical symbols
\usepackage{amssymb}
\usepackage{latexsym}

\usepackage{lineno,hyperref}
\modulolinenumbers[5]
\usepackage{hyperref}
\usepackage{multicol}
\usepackage{graphicx}
\usepackage{xcolor,cancel,mathrsfs,amscd}
%\usepackage{subfigure}
\usepackage{subfig}
%\usepackage{subcaption,graphicx}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{subfig}
\usepackage{color}
\usepackage{float}
\usepackage{url}
\usepackage{breakcites}
\usepackage{array}

\newcolumntype{L}{>{\centering\arraybackslash}m{3cm}}

\usepackage[nameinlink,capitalize]{cleveref}

% \usepackage[table]{xcolor} http://ctan.org/pkg/xcolor

\usepackage[T1]{fontenc}% optional T1 font encoding

\usepackage{supertabular}
\usepackage{hyperref}
\usepackage{xcolor}
\usepackage[graphicx]{realboxes}
\usepackage{pdflscape}
\usepackage{amsmath}
\usepackage{ulem}


\usepackage[cmintegrals]{newtxmath}
\definecolor{LightCyan}{rgb}{0.88,1,1}
\usepackage{multirow}


\definecolor{myblue}{rgb}{.0,.0,.8}
\newcommand{\veronika}[1]{\textcolor{myblue}{Veronika: #1}}
\newcommand{\Rosana}[1]{\textcolor{purple}{Rosana's comment: #1}}
\definecolor{myg}{rgb}{.0,.3,0}
\newcommand{\caro}[1]{\textcolor{myg}{Caro: #1}}


\definecolor{normal}{rgb}{.0,.0,.0}
\definecolor{myred}{rgb}{.8,.0,.0}
\definecolor{myblue}{rgb}{.0,.0,.8}

\newcommand{\todo}[1]{\textcolor{myred}{TODO: #1}}
\newcommand{\added}[1]{\textcolor{myblue}{#1}}

\newcommand{\PH}[1]{\textcolor{myblue}{{\bf PH:} #1}}







\usepackage{multicol}

\usepackage{mwe} % to get dummy images
\jmlrvolume{-- Under Review}
\jmlryear{2021}
\jmlrworkshop{Full Paper -- MIDL 2021 submission}
\editors{Under Review for MIDL 2021}

\title[A Soft Contour-Based-Loss for Medical Image Segmentation]{A Soft Contour-Based-Loss for Medical Image Segmentation}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship


\begin{document}

% %
% % author names and IEEE memberships
% % note positions of commas and nonbreaking spaces ( ~ ) LaTeX will not break
% % a structure at a ~ so this keeps an author's name from being broken across
% % two lines.
% % use \thanks{} to gain access to the first footnote area
% % a separate \thanks must be used for each paragraph as LaTeX2e's \thanks
% % was not built to handle multiple paragraphs
% %

% % <-this % stops a space
% %\thanks{Manuscript received April 19, 2005; revised August 26, 2015.}

% % note the % following the last \IEEEmembership and also \thanks - 
% % these prevent an unwanted space from occurring between the last author name
% % and the end of the author line. i.e., if you had this:
% % 
% % \author{....lastname \thanks{...} \thanks{...} }
% %                     ^------------^------------^----Do not want these spaces!
% %
% % a space would be appended to the last name and could cause every name on that
% % line to be shifted left slightly. This is one of those "LaTeX things". For
% % instance, "\textbf{A} \textbf{B}" will typeset as "A B" not "AB". To get
% % "AB" then you have to do: "\textbf{A}\textbf{B}"
% % \thanks is no different in this regard, so shield the last } of each \thanks
% % that ends a line with a % and do not let a space in before the next \thanks.
% % Spaces after \IEEEmembership other than the last one are OK (and needed) as
% % you are supposed to have spaces between the names. For what it is worth,
% % this is a minor point as most people would not even notice if the said evil
% % space somehow managed to creep in.

% %\markboth{Journal of \LaTeX\ Class Files,~Vol.~14, No.~8, August~2015}%
% %{Shell \MakeLowercase{\textit{et al.}}: Bare Demo of IEEEtran.cls for IEEE Communications Society Journals}
% % The only time the second header will appear is for the odd numbered pages
% % after the title page when using the twoside option.
% % 
% % *** Note that you probably will NOT want to include the author's ***
% % *** name in the headers of peer review papers.                   ***
% % You can use \ifCLASSOPTIONpeerreview for conditional compilation here if
% % you desire.


% % If you want to put a publisher's ID mark on the page you can do it like
% % this:
% %\IEEEpubid{0000--0000/00\$00.00~\copyright~2015 IEEE}
% % Remember, if you use this you must call \IEEEpubidadjcol in the second
% % column for its text to clear the IEEEpubid mark.

% % use for special paper notices
% %\IEEEspecialpapernotice{(Invited Paper)}
% % make the title area
% \maketitle
% % As a general rule, do not put math, special symbols or citations
% % in the abstract or keywords.
% \begin{abstract}

% \end{abstract}

% % Note that keywords are not normally used for peerreview papers.
% \begin{IEEEkeywords}

% \end{IEEEkeywords}


% % For peer review papers, you can put extra information on the cover
% % page as needed:
% % \ifCLASSOPTIONpeerreview
% % \begin{center} \bfseries EDICS Category: 3-BBND \end{center}
% % \fi
% %
% % For peerreview papers, this IEEEtran command inserts a page break and
% % creates the second title. It will be ignored for other modes.
% \IEEEpeerreviewmaketitle




% \subsection{Common Annotations:}
% \begin{enumerate}
%     \item $T_1(g) $: groundtruth variable/attribute
%     \item $ T_2(p)$: Predicted variable/attribute
%     \item $ p(y_p)  $: probability of pixel p 
%     \item $y_p $: label of pixel p
%     \item $ N:$ total number of pixels 
%     \item $K=|\mathcal{K}|$ number of classes of classes, 
%     \item $\mathcal{K}$ the set of classes.
%     \item $\Omega$ the image space,
%     \item $D:=dim(\Omega)$ the number of dimensions (2D,  3D here).
%     \item $s_\theta \in [0,1]^{K\times |\Omega|} $ the network output for image $I$ (network params: $\theta$). 
%     \item $N_p$: 4 connected neighborhood pixels of pixel $p$
%     \item $D_G: \Omega \xrightarrow[]{} \mathbb{R^+} $ distance maps 
%     \item  $Z_G(q)$ the nearest point on the contour  $\partial_G$ to point $y_q$
%     \item $y_s(p)$:  corresponding point on the predicted boundary $\partial_S$ gotten along the normal direction to $\partial_G$  at p.\\
%     \item $S_r$: predicted region: b
%     \item $ \hat{\Omega}_p$: The region between the predicted S and groundtruth contours
%     \item $ S :$ predicted boundary
%     \item $ G: $ ground-truth boundary
% \end{enumerate}


\section{Tabulating state-of-the-art losses}
\begin{table}[h!]
    \caption{ }
    \smallskip
    \begin{center}
    \begin{tabular}{l|ccc}
      Loss   & Equation \\ \hline
       Boundary \cite{Kervadec2019_BoundaryLoss} & $$
    L_{Boundary} = \sum_{q \in \Omega} \phi_g(q). p(y_q)
    
$$

 \\ \hline
      
Hausdorff \cite{karimi_reducing_2019} & $$
    L_{HD}(q,p) = \frac{1}{|\Omega|}. \sum_{\Omega}(p - q)^2.(d_p^{\alpha} - d_q^{\alpha})
   
$$

\\ \hline
   
      Boundary Enhanced \cite{Yang2018}&  L_{BE} = || \mathcal{L}(p(y_p)) - \mathcal{L}(y_p)||_2  \\
      \hline
      Boundary Loss 2 \cite{caliva2019_MIDL} & $$
    L_{Dist-Penalty} = \frac{1}{N}. \sum_{i = 1} (1+ D_G) \circ \sum_{r = 1}{K} -y_p^r.log(\hat{y_p^r}) 
 
$$ \\ \hline

Shape-aware Loss \cite{Arif2018}&   $$l_{shape} = - \sum_{p \in \hat{\Omega}_p}\sum_{r =1}^{K}y_{p}^r.E_i.log(P(y_p^r|x_p; w))
$$
 \\\hline
       
    \end{tabular}
    \end{center}
    \label{tab:Multi-HS}
\end{table}













\section{Brief Description of the Losses}
\begin{enumerate}
    \item \textbf{Boundary  loss \cite{Kervadec2019_BoundaryLoss}}
    \begin{equation}
    L_{Boundary} = \sum_{q \in \Omega} \phi_g(q). p(y_q)
    \label{kervadec_boundary_dist}
\end{equation}

where: 

$$
  \phi_g(q) = 
\begin{cases}
-D_g(q) & , q \in S_r\\
D_g(q) & Else

\end{cases}
$$ 

\item \textbf{Haussdorf Loss \cite{karimi_reducing_2019} }
\begin{equation}
    L_{HD}(q,p) = \frac{1}{|\Omega|}. \sum_{\Omega}(p - q)^2.(d_p^{\alpha} - d_q^{\alpha})
    \label{HDLOSS}
\end{equation}
where

$p$ is the predicted segmentation map \\
$q$ : ground-truth segmentation map \\

$d_p: $ distane map of the GT segmentation defined as the unsigned distance to the ground-truth boundary $\delta_{p}$.



\item \textbf{Boundary Enhanced Loss or Laplacian Loss by \cite{yang2019_MIDL}}
\begin{equation}
    L_{BE} = || \mathcal{L}(p(y_p)) - \mathcal{L}(y_p)||_2 
\end{equation}

where $\mathcal{L}$ is the Laplacian Operator

\item \textbf{Kervadec-like Loss \cite{caliva2019_MIDL}}

\begin{equation}
    L_{Dist-Penalty} = \frac{1}{N}. \sum_{i = 1} (1+ D_G) \circ \sum_{r = 1}{K} -y_p^r.log(\hat{y_p^r}) 
    \label{Caliva_dist}
\end{equation}

where: 
$\circ$: is the handamard product between a vector and a matrix and  $K=|\mathcal{K}|$ number of classes of classes, r represents classes.

\item \textbf{ Shape-aware Loss \cite{Arif2018}}

\begin{equation}
    l_{shape} = - \sum_{p \in \hat{\Omega}_p}\sum_{r =1}^{K}y_{p}^r.E_i.log(P(y_p^r|x_p; w))
\label{Arif_shape_aware}
\end{equation}
where
    $$E_i = D(S, G)$$ \\
    $D(.)$ is the average point to curve Euclidean distance between the predicted shape, S and the ground truth shape, G.
    

\end{enumerate}



\\
\bigskip 
\section{Everything to know about the Papers for the losses}
\subsection{\textbf{Boundary Enhanced Loss by \cite{yang2019_MIDL}}}
In this paper, the authors aimed to exploit Laplacian filters in order to develop a boundary enhanced loss term that invokes the network to generate strong responses around the boundary areas while producing a zero response given pixels that are farther from the peripheries. 
The proposed loss function is an extension of the $||.||_2$ where the optimized variable are the laplacean filtered probabilities. The designated loss function is as follows: 

\begin{equation}
    L_{BE} = || \mathcal{L}(p(y_p)) - \mathcal{L}(y_p)||_2 
\end{equation}


To obtain the laplacean filters, a set of 3D non-trainable convolutions were utilized in order to perform 3D segmentation for both structural (task02: spleen) and non-structural (task01: brain tumors) anatomical objects. Datasets under consideration were obtained from the Decathlon2019 challenge for medical image segmentation. The proposed loss term was added to regular dice, weighted by a static factor $\lambda$  and trained according to regular stochastic gradient descent with adam optimizer. 

The effectiveness of this loss comes in its ability to accomondate a variety of tasks and objective i.e it can segment structural objects (organs) as well as non structural objects( Brain tumors). Moreover, it accommodates different modalities (CT , MRI) and is easy to integrate into any 3D -backbone structure. On the segmnetation evaluation level, the proposed loss suppresses false positives as well as remote outliers at a distance from the understudied regions, however, no significant improvement is registered given their dice evaluation metric ( about .  1\%) and performs comparably to that of \cite{Kervadec2019_BoundaryLoss} in the case of tumors. Moreover, it cannot be applied independently from the Dice loss. This is mainly due to the fact that it can not differentiate between the interior and exterior of organs thus only distinguishing the boundaries of the objects rather than their interior aswell.



\subsection{\textbf{Boundary Loss \cite{Kervadec2019_BoundaryLoss}} }
\begin{equation}
    Dist(\partial _G) = \int_{\partial_G}^{} || y_{S}(p) - p ||_2^2 dx
    \label{kervadec_boundary}
\end{equation}
where $y_s(p)$:  corresponding point on the boundary $\partial_S$ gotten along the normal direction $\partial_G$  at p. and $p \in \Omega$

In \cite{Kervadec2019_BoundaryLoss}, authors aimed at considering the curve evolution by evaluating boundary changes or variations as a soluton to class inbalance within fully supervised segmentation networks. The original form of the loss function (expressed in \equationautorefname~\ref{kervadec_boundary}) is an extension of the $||.||_2$ norm with optimizing contour position features. The main objective is to decrease the inter-distance between the segmented contour and the ground-truth contours. However, given that distances cannot be integrated with region based loss functions as explained by \cite{Kervadec2019_BoundaryLoss}, the authors transformed the above expression, basing on theories present by \cite{Boykov2006}, onto a distance map dedicated loss function of same objective (see \equationautorefname~\ref{kervadec_boundary_dist}). In doing so, the boundary loss was hence reduced onto a simple process of finetuning/weighting probability functions via the computed level set $\phi_G(q)$. In summary, \cite{Kervadec2019_BoundaryLoss} weighted the pixel probabilities basing on their distance error.

% A distance map evaluates the distance between point $q \in \Omega$ and the nearest point $Z_G(q)$ on the contour $\partial_G$. 

% $$
%     Dist(\partial _G) =|| q - Z_G(q)|| = 2 \int_{S_r}\phi_G(q).d(q) - \intt_{G}\phi_G(q).d_q 
% $$
% which is further reduced it to the following compact loss form: 

% \begin{equation}
%   Dist(\partial _G, \partial_S)= \int_{\Omega}{} \phi_g(q).s(q)d_q 
% \end{equation}
% where
% $$
%   \phi_g(q) = 
% \begin{cases}
% -D_g(q) & , q \in S_r\\
% D_g(q) & Else

% \end{cases}

%  and  s: \Omega \xrightarrow{} \{\, 0,1 \} = 
% \begin{cases}
% 1,  & q \in S_r  \\
% 0, &  Else
% \end{cases}
% $$

% TO arrive to a loss function, \cite{Kervadec2019_BoundaryLoss} then replaces s which represents the groundtruth labels with soft probabilities yielding the function to be:

\begin{equation}
    L_{Boundary} = \sum_{q \in \Omega} \phi_g(q). p(y_q)
    
    \label{kervadec_boundary_dist}
    \end{equation}

where: 

$$
  \phi_g(q) = 
\begin{cases}
-D_g(q) & , q \in S_r\\
D_g(q) & Else

\end{cases}

$$

$$ L = \alpha. L_{CE} + (1 - \alpha).L_{Boundary}$$ 


Validation of the proposed loss was done on two main tasks: ischemic stroke lesion ISLES  and Matter Hyperintensities (WMH) datasets. To train the network, the boundary loss of \cite{Kervadec2019_BoundaryLoss} was added to the regular pixel-wise cross entropy and weighted by a dynamic factor $ \alpha $. Inorder to achieve balance between the pixel-wise intensity loss and the boundary loss, \cite{Kervadec2019_BoundaryLoss}, set the value of $lambda$ to 1 and decremented it by 0.01 per each epoch.In this way, the impact of the boundary loss is increased progressivly though training while decreasing that of regional losses.  The main reason for this training strategy is to overcome general optimization problems like saddle points and local minima encountered when using the proposed loss independently. 

Despite the significance of the proposed method which benchmarked an increase in dice similarity coefficients and hausdorf distance, however, multiple challenges remain to be addressed. For one, the loss encounters a variety of optimizing problems as mentioned above including saddle points and local minima. Thus,the use of the adam optimizer or the stochastic gradient descent may not be optimal in such cases. Two,optimization problems hinder the possibility of training with the boundary loss independently of pixel-wise regional losses. Thus, as \cite{Kervadec2019_BoundaryLoss} explored in his paper, training basing on boundary loss alone often cause the network to collapse onto empty foreground regions midway through the network. 



\subsection{\textbf{Penalty Distance Maps:}}
Similar to \cite{Kervadec2019_BoundaryLoss}, \cite{caliva2019_MIDL} exploited distance maps as weighing factors for cross-entropy losses in order to improve extraction of shape bio-markers and enable the network to focus on hard-to-segment boundary regions.  Through integrating the distance maps onto the cross entropy baseline, \cite{caliva2019_MIDL} gave more weight to pixels lieing in close proximity of the bones that those that are far away. Their proposed loss is described in \equationautorefname~\ref{Caliva_dist}.
\begin{equation}
    L_{Dist-Penalty} = \frac{1}{N}. \sum_{i = 1} (1+ D_G) \circ \sum_{r = 1}{K} -y_p^r.log(\hat{y_p^r}) 
    \label{Caliva_dist}
\end{equation}

where: 
$\circ$: is the handamard product between a vector and a matrix.

To obtain distance maps, distance transform were applied on segmentation masks that were then reverted by voxel-wise subtraction from the overall max distance value. Inter-distance map inside the bones were also generated separately and then combined inorder to accommodate the size imbalance between the bones. The method was validated using the public Osteoarthritis Initiative dataset trained using mini-batch Gradient Descent with Adam Optimizer. 

Despite the similarity of the method with respect to \cite{Kervadec2019_BoundaryLoss}, however, multiple differences may be registered. For one, kervadec delt with single lesion 2D-segmentation (1 organ) and exploited the loss to resolve the problem of class imabalance. On the other hand, \cite{caliva2019_MIDL}, integrated onto multi-class organ segmentation inorder to distinguish between bone classes including the distal fumor , the proximal tibia, and the patella and addressed 3D knee MRI segmentations. In addition, whereas \cite{Kervadec2019_BoundaryLoss} adopted a dynamic weighting strategy that increases the impact of the boundary loss gradually along training as a solution to optimization challenges, \cite{caliva2019_MIDL} was able to train the network with his boundary loss from the start. However, \cite{caliva2019_MIDL} did not register a significant improvement on dice accuracies as that of \cite{Kervadec2019_BoundaryLoss}. Moreover, \cite{Kervadec2019_BoundaryLoss} did not require post-processing steps as what was done in \cite{caliva2019_MIDL}.





\subsection{\textbf{Shape-Aware Loss function}}

In the same context, \cite{Arif2018} integrated shape onto vertebrae  segmentation through the use of a shape aware loss function that constraints predictions to conform to possible permissible shapes. Inorder to do so, it extends upon the logarithmic cross entropy baseline by introducing an average point to curve euclidean distance factor between predicted contour $S$ and the ground-truth contour $G$ (\equationautorefname~\ref{Arif_shape_aware}).  

\begin{equation}
    l_{shape} = - \sum_{p \in \hat{\Omega}_p}\sum_{r =1}^{K}y_{p}^r.E_i.log(P(y_p^r|x_p; w))
\label{Arif_shape_aware}
\end{equation}
where
    $$E_i = D(S, G)$$ \\
    $D(.)$ is the average point to curve Euclidean distance between the predicted shape, S and the ground truth shape, G
    
In comparison to \cite{Kervadec2019_BoundaryLoss}, both proposed losses weighted probabilities with distance error maps. However, \cite{Arif2018} made use of the contours as shape descriptors/features and summed only over the erroned pixels$ \in \hat{\Omega}$ which is the error pixel space between ground-truth boundary and segmented boundary.In this way, \cite{Arif2018} penalized predicted areas outside the ground-truth boundary. The proposed loss was added to pixel-wise cross entropy in such a way that both terms contributed equally. The proposed method was evaluated on lateral cervical spine X-ray images using pixel-wise accuracy (pA) metric, dice similarity coefficient as well as shape aware metrics including the loss itself and the fit failure coefficient.

The significance of the proposed shape aware loss does not lie in the outperformance of the method with respect to regular region accuracies (Dice + pA) which was consistent with presented state of the art. It is rather is producing anatomically plausible segmentations which were revealed in the outperformance of the proposed model when tested against shape metrics(fit-failure + loss coefficient) especially when considering hard case images. 





\bibliography{midl-samplebibliography}

\end{document}