\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{caption}
\usepackage{booktabs}

\newcommand{\stoptocwriting}{\addtocontents{toc}{\protect\setcounter{tocdepth}{-5}}}
\newcommand{\resumetocwriting}{\addtocontents{toc}{\protect\setcounter{tocdepth}{\arabic{tocdepth}}}}

\jmlryear{2025}
\jmlrworkshop{Full Paper -- MIDL 2025}
\jmlrvolume{-- 153}
\editors{Accepted for publication at MIDL 2025}

\title[MedVAE: Efficient Automated Interpretation of Medical Images]{MedVAE: Efficient Automated Interpretation of Medical Images with Large-Scale Generalizable Autoencoders}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{
\Name{Maya Varma\midljointauthortext{Equal Contribution}\nametag{$^{1}$}} \Email{mayavarma@cs.stanford.edu}\\
\Name{Ashwin Kumar\midlotherjointauthor\nametag{$^{1}$}} \Email{akkumar@stanford.edu}\\
\Name{Rogier van der Sluijs\midlotherjointauthor\nametag{$^{1}$}} \Email{sluijs@stanford.edu}\\
\Name{Sophie Ostmeier\nametag{$^{1}$}} \Email{sostm@stanford.edu}\\
\Name{Louis Blankemeier\nametag{$^{1}$}} \Email{lblankem@stanford.edu}\\
\Name{Pierre Chambon\nametag{$^{1}$}} \Email{pchambon@stanford.edu}\\
\Name{Christian Bluethgen\nametag{$^{1}$}} \Email{bluethgen@stanford.edu}\\
\Name{Jip Prince\nametag{$^{2}$}} \Email{jipfprince@gmail.com}\\
\Name{Curtis Langlotz\nametag{$^{1}$}} \Email{langlotz@stanford.edu}\\
\Name{Akshay Chaudhari\nametag{$^{1}$}} \Email{akshaysc@stanford.edu}\\
\addr $^{1}$ Stanford Center for Artificial Intelligence in Medicine and Imaging, Stanford University, USA \\
\addr $^{2}$ UMC Utrecht, Netherlands
}

\begin{document}
\stoptocwriting

\maketitle

\begin{abstract}
Medical images are acquired at high resolutions with large fields of view in order to capture fine-grained features necessary for clinical decision-making. Consequently, training deep learning models on medical images can incur large computational costs. In this work, we address the challenge of downsizing medical images in order to improve downstream computational efficiency while preserving clinically-relevant features. We introduce \textit{MedVAE}, a family of six large-scale 2D and 3D autoencoders capable of encoding medical images as downsized latent representations and decoding latent representations back to high-resolution images. We train MedVAE autoencoders using a novel two-stage training approach with 1,052,730 medical images. Across diverse tasks obtained from 20 medical image datasets, we demonstrate that (1) utilizing MedVAE latent representations in place of high-resolution images when training downstream models can lead to efficiency benefits (up to 70x improvement in throughput) while simultaneously preserving clinically-relevant features and (2) MedVAE can decode latent representations back to high-resolution images with high fidelity. Our work demonstrates that large-scale, generalizable autoencoders can help address critical efficiency challenges in the medical domain.\footnote{Code: \url{https://github.com/StanfordMIMI/MedVAE}}
\end{abstract}

\begin{keywords}
computer-aided detection and diagnosis, variational autoencoders, efficiency
\end{keywords}

\input{sections/1_intro}

\input{sections/1_related_works}
\input{sections/2_methods}
\input{sections/3_results}
\input{sections/4_discussion}

\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{MV is supported by graduate fellowship awards from the Department of Defense (NDSEG), the Knight-Hennessy Scholars program at Stanford University, and the Quad program. AK is supported by graduate fellowships from the Knight-Hennessy Scholars program at Stanford University and Tau Beta Pi society. RS was supported by the Rubicon fellowship of the Dutch National Research Council (NWO). AC is supported by NIH grants R01 HL167974, R01HL169345, R01 AR077604, R01 EB002524, R01 AR079431, P41 EB027060, AY2 AX000045, and 1AYS AX0000024-01; ARPA-H grants AY2AX000045 and 1AYSAX0000024-01; and NIH contracts 75N92020C00008 and 75N92020C00021. A.C. has provided consulting services to Patient Square Capital, Chondrometrics GmbH, and Elucid Bioimaging; is co-founder of Cognita; has equity interest in Cognita, Subtle Medical, LVIS Corp, Brain Key. CL is supported by NIH grants R01 HL155410, R01 HL157235, by AHRQ grant R18HS026886, and by the Gordon and Betty Moore Foundation. CL is also supported by the Medical Imaging and Data Resource Center (MIDRC), which is funded by the National Institute of Biomedical Imaging and Bioengineering (NIBIB) under contract 75N92020C00021 and through the Advanced Research Projects Agency for Health (ARPA-H). 

\noindent This research was funded, in part, by the Advanced Research Projects Agency for Health (ARPA-H). The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the oﬃcial policies, either expressed or implied, of the U.S. Government.}


\bibliography{midl25_153}

\clearpage
\appendix
\tableofcontents

\resumetocwriting


\input{appendix/1_extendedmethods}
\input{appendix/2_extendedresults}
\input{appendix/3_extendeddiscussion}

\end{document}
