\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{booktabs}

\usepackage{microtype}
\usepackage{graphicx}
% \usepackage{subfigure}
\usepackage{enumitem}
\usepackage[utf8]{inputenc}
\usepackage{pmboxdraw}  % For box-drawing characters


% \usepackage{amsthm}

\usepackage{pifont}
\newcommand{\cmark}{\ding{51}}%
\newcommand{\xmark}{\ding{55}}%


\usepackage{tikz}
\usetikzlibrary{positioning, shapes.geometric}

% if you use cleveref..
% \usepackage[capitalize,noabbrev]{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\jmlrvolume{-- 20}
\jmlryear{2026}
\jmlrworkshop{Validation Studies -- MIDL 2026}

\editors{Accepted for publication at MIDL 2026}

% \title[ReX-MLE]{AI Agents Fail at Medical Imaging Challenges} 

\title[ReX-MLE]{ReX-MLE: The Autonomous Agent Benchmark \\ \vspace{1pt} for Medical Imaging Challenges} 

\midlauthor{
\Name{Roshan Kenia\midljointauthortext{Contributed equally}\nametag{$^{1}$}} \Email{roshan\_kenia@fas.harvard.edu}\\
\Name{Xiaoman Zhang\midlotherjointauthor\nametag{$^{1}$}} \Email{xiaoman\_zhang@hms.harvard.edu}\\
\Name{Pranav Rajpurkar\nametag{$^{1}$}} \\%\Email{pranav\_rajpurkar@hms.harvard.edu}\\
\addr $^{1}$ Department of Biomedical Informatics, Harvard Medical School, Boston, MA
}


 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
% \midlauthor{\Name{Author Name1\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \orcid{1111-2222-3333-4444} \Email{abc@sample.edu}\\
% \addr $^{1}$ Address 1 \\
% \addr $^{2}$ Address 2 \AND
% \Name{Author Name2\midlotherjointauthor\nametag{$^{1}$}} \Email{xyz@sample.edu}\\
% \Name{Author Name3\nametag{$^{2}$}} \Email{alphabeta@example.edu}\\
% \Name{Author Name4\midljointauthortext{Contributed equally}\nametag{$^{3}$}} \Email{uvw@foo.ac.uk}\\
% \addr $^{3}$ Address 3 \AND
% \Name{Author Name5\midlotherjointauthor\nametag{$^{4}$}} \Email{fgh@bar.com}\\
% \addr $^{4}$ Address 4
% }

\begin{document}

\maketitle


\input{sections/00-abstract}
\vspace{-5pt}
\input{sections/01-introduction}
\clearpage
\input{sections/03-method}
\input{sections/04-results}
\input{sections/02-related}
\input{sections/07-discussion}


\section{Conclusion}
% In this work, we introduced ReX-MLE, a 20-challenge medical imaging benchmark revealing a large gap between general-purpose autonomous ML agents and the domain expertise required for medical AI. Leading agents achieve only 3.95–12.15\% mean percentile rank, with many submissions at the 0th percentile. Our analysis shows these failures stem from missing scientific and engineering practices rather than limits in time or model scale; agents rarely employ the strategies used by human experts, even when given solution reports. ReX-MLE highlights that scaling general agents alone is insufficient for scientific competence and underscores the need for architectures that incorporate domain knowledge, structured reasoning, and robust experimentation.

In this paper, we introduced ReX-MLE, a 20-challenge medical imaging benchmark that exposes a substantial gap between general-purpose autonomous ML agents and the domain expertise required for medical AI. We showed that leading agents achieve only 4.53–12.15\% mean percentile rank on ReX-MLE, with most submissions falling to the 0th percentile. Our capability analysis shows that these failures arise from missing scientific and engineering practices rather than insufficient time or model scale. Agents rarely demonstrate the core strategies used by human competition winners, even when provided with solution reports. These results challenge the notion that scaling general agents will naturally yield scientific competence and highlight the need for architectures that integrate domain knowledge, structured reasoning, and robust experimental workflows. We release ReX-MLE to support systematic progress toward autonomous systems capable of credible medical imaging research.

% \clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% % Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank a bunch of people.}


\bibliography{midl26_20}


\input{sections/appendix}
% \appendix

% \section{Proof of Theorem 1}

% This is a boring technical proof of
% \begin{equation}\label{eq:example}
% \cos^2\theta + \sin^2\theta \equiv 1.
% \end{equation}

% \section{Proof of Theorem 2}

% This is a complete version of a proof sketched in the main text.

\end{document}
