\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{hyperref}
\usepackage{url}
\usepackage{graphicx}
\usepackage{multirow}
\usepackage{booktabs}  
\usepackage{enumitem}
\usepackage{array}
\usepackage[normalem]{ulem}
\setlist{noitemsep, topsep=2pt}
\usepackage{siunitx}
\sisetup{detect-weight=true, detect-inline-weight=math}

\jmlrvolume{-- Under Review}
\jmlryear{2026}
% \jmlrworkshop{Full Paper -- MIDL 2026 submission}
% \editors{Under Review for MIDL 2026}

\jmlrworkshop{Full Paper -- MIDL 2026}
\jmlrvolume{-- nnn}
\editors{Accepted for publication at MIDL 2026}
% \title[Short Title]{Full Title of Article}



\title[Subgroup Discovery Methods for Segmentation]{SEG4SEG: Identifying Systematic Failure Modes in Segmentation by Subgroup Discovery Methods}


\midlauthor{\Name{Nina Weng\nametag{$^{1}$}} \orcid{0009-0006-4635-0438} \Email{ninwe@dtu.dk}\\
\Name{Eike Petersen\nametag{$^{2,3}$}} \Email{eike.petersen@mevis.fraunhofer.de}\\
\Name{Alceu Bissoto\nametag{$^{4,5}$}} \Email{alceu.bissoto@unibe.ch}\\
\Name{Susu Sun\nametag{$^{6}$}} \Email{susu.sun@uni-tuebingen.de}\\
\Name{Lisa M. Koch\nametag{$^{4,5}$}} \Email{lisa.koch@unibe.ch}\\
\Name{Aasa Feragen\nametag{$^{1}$}} \Email{afhar@dtu.dk}\\
\Name{Siavash Bigdeli\nametag{$^{1}$}} \Email{sarbi@dtu.dk}\\
\Name{Christian F. Baumgartner\nametag{$^{7}$}} \Email{christian.baumgartner@unilu.ch}\\
{
\addr $^{1}$ Technical University of Denmark, Kongens Lyngby, Denmark\\
\addr $^{2}$ Fraunhofer Institute for Digital Medicine MEVIS, Bremen, Germany\\
\addr $^{3}$ Hannover Medical School, Institute for Diagnostic and Interventional Radiology, Hanover, Germany\\
\addr $^{4}$ Department of Diabetes, Endocrinology, Nutritional Medicine and Metabolism UDEM, Inselspital, Bern University Hospital, University of Bern, Switzerland\\
\addr $^{5}$ Department of Digital Medicine, University of Bern, Switzerland\\
\addr $^{6}$ Cluster of Excellence: Machine Learning - New Perspectives for Science, University of Tübingen, Germany\\
\addr $^{7}$ Faculty of Health Sciences and Medicine, University of Lucerne, Switzerland
}
}


\newcommand{\ninwe}[1]{\textcolor{purple}{[nina: #1]}}
\newcommand{\rebuttal}[1]{\textcolor{orange}{#1}}

\begin{document}

\maketitle

\begin{abstract}
Deep learning models for medical image segmentation can achieve high overall performance but fail systematically on critical subgroups. While Slice Discovery Methods (SDM) have shown promise in revealing classification failures, their effectiveness for segmentation remains unexplored. 
Moreover, although various systematic failures have been reported in segmentation tasks, no prior work has systematically categorized them.
In this work, we address both gaps. 
First, we categorize potential sources of systematic errors in medical image segmentation. 
Second, we empirically investigate whether SDMs can identify problematic slices in each of those categories without manual annotations. 
Our evaluation covers four controlled failure types and two real-world failure cases, using medical imaging datasets and explicit success criteria for SDM evaluation.
Our experiments show that SDMs adapted for segmentation can identify systematic errors, demonstrating their potential for failure analysis in medical imaging. Our code is publicly available at \href{https://nina-weng.github.io/seg4seg.github.io}{nina-weng.github.io/seg4seg.github.io}.
\end{abstract}

\begin{keywords}
Subgroup discovery, segmentation, systematic error, shortcut learning
\end{keywords}


\input{sections/01_introduction}

\input{sections/02_problem_definition}
\input{sections/04_taxonomy}
\input{sections/03_sdm4seg}

\input{sections/05_experiments}
\input{sections/06_discussion_conclusion}

\bibliography{midl26_157}


\input{sections/07_appendix}



\end{document}
