\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\raggedbottom
\usepackage{hyperref}
\usepackage[nolist,nohyperlinks]{acronym}
\usepackage{multirow}
\usepackage{enumitem}
\usepackage{diagbox}
\usepackage[format=plain]{caption}
\jmlryear{2025}
\jmlrworkshop{Full Paper -- MIDL 2025}
\jmlrvolume{-- 116}
\editors{Accepted for publication at MIDL 2025}

\title[Curriculum Learning for Language-guided Detection]{Curriculum Learning for Language-guided,
Multi-modal Detection of Various Pathologies}

\midlauthor{
\hspace{-3pt}\Name{Laurenz Adrian Heidrich\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \Email{laurenz.heidrich@tum.de} \\
\Name{Aditya Rastogi\midlotherjointauthor\nametag{$^{2}$}} \Email{aditya.rastogi@ukbonn.de}\\%
\Name{Priyank Upadhya\nametag{$^{2}$}} \Email{priyank.upadhya@ukbonn.de}\\%
\Name{Gianluca Brugnara\nametag{$^{2,3}$}} \Email{gianluca.brugnara@ukbonn.de}\\%
\Name{Martha Foltyn-Dumitru\nametag{$^{2}$}} \Email{Martha.Foltyn-Dumitru@ukbonn.de}\\%
\Name{Benedikt Wiestler\nametag{$^{1,4}$}} \Email{b.wiestler@tum.de}\\%
\Name{Philipp Vollmuth\nametag{$^{2,3}$}} \Email{philipp.vollmuth@ukbonn.de} \\ \\
\addr $^{1}$ AI for Image-Guided Diagnosis and Therapy (AI-IDT), School of Medicine and Health, Technical University of Munich, Munich, Germany \\
\addr $^{2}$ Division for Computational Radiology \& Clinical AI (CCIBonn.ai), University Hospital Bonn, Bonn, Germany \\
\addr $^{3}$ Divison for Medical Image Computing (MIC), German Cancer Research Center (DFKZ), Heidelberg, Germany \\
\addr $^{4}$ Munich Center for Machine Learning, Munich, Germany
}

\begin{document}

\begin{acronym}
  \input{./chapters/acronym_entries.tex}
\end{acronym}

\maketitle

\begin{abstract}
Pathology detection in medical imaging is crucial for radiologists, yet current approaches that train specialized models for each region of interest often lack efficiency and robustness. Furthermore, the scarcity of annotated medical data, particularly for diverse phenotypes, poses significant challenges in achieving generalizability. To address these challenges, we present a novel language-guided object detection pipeline that leverages curriculum learning strategies, chosen for their ability to progressively train models on increasingly complex samples, thereby improving generalization across pathologies, phenotypes, and modalities. We developed a unified pipeline to convert segmentation datasets into bounding box annotations, and applied two curriculum learning approaches - teacher curriculum and bounding box size curriculum - to train a Grounding DINO model. Our method was evaluated on different tumor types in MRI and CT scans and showed significant improvements in detection accuracy. The teacher and bounding box size curriculum learning approaches yielded a 4.9\% AP and 5.2\% AP increase over baseline, respectively. The results highlight the potential of curriculum learning to optimize medical image analysis and clinical workflow. The code is available at \url{https://github.com/CCI-Bonn/CL4OD}.


\end{abstract}

\begin{keywords}
Medical Image Analysis, Deep Learning, Tumor Detection, Curriculum Learning 
\end{keywords}

\input{chapters/1_introduction_and_related}
\input{chapters/2_method}
\input{chapters/3_experiments}
\input{chapters/4_conclusion}





\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{P.V. is funded through the Else Kröner Clinician Scientist Endowed Professorship (reference number: 2022\_EKCS.17). A.R. is funded through the Bonfor Startup Postdoc Fellowship (reference number: 2024-1B-10)}


\bibliography{midl25_116}

\input{chapters/appendix.tex}

\end{document}

