\documentclass{midl} % Include author names

\makeatletter
% Redefine \subfigure to omit automatic (a), (b), ...
\renewcommand*{\subfigure}[1][]{%
  \bgroup
  \def\@subfigcap{#1}%
  \@subfigure
}

% Patch \@subfigure to suppress the label and avoid extra space
\renewcommand*{\@subfigure}[2][b]{%
  \advance\c@figure by 1\relax
  \refstepcounter{subfigure}%
  % box for caption: only include optional caption if nonempty
  \sbox\@subfloatcapbox{%
    \ifx\@subfigcap\@empty
      % empty: nothing at all
    \else
      \@subfigcap
    \fi
  }%
  % box for figure content
  \sbox\@subfloatcontsbox{#2}%
  % layout: stack figure content above caption
  \begin{tabular}[#1]{@{}c@{}}%
    \usebox\@subfloatcontsbox%
    \ifdim\wd\@subfloatcapbox>0pt
      \\ \usebox\@subfloatcapbox
    \fi
  \end{tabular}%
  \egroup
}
\makeatother

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{multirow}
%\usepackage{caption}
%\usepackage{subcaption}
\usepackage{tablefootnote}
\usepackage{placeins}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{xcolor}
%\usepackage{subfigure}
%\renewcommand{\thesubfigure}{}

\jmlrvolume{-- 37}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\editors{Accepted for publication at MIDL 2026}

\title[Impact of Reconstruction on Downstream AI Fairness and Performance]{Evaluating the Impact of Medical Image Reconstruction on Downstream AI Fairness and Performance}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 \midlauthor{\Name{Matteo Wohlrapp\nametag{$^{1,2}$}} \Email{matteo.wohlrapp@cdtm.de}\\
  \Name{Niklas Bubeck\nametag{$^{2,3}$}} \Email{niklas.bubeck@tum.de}\\
  \Name{Daniel Rueckert\nametag{$^{2,3,4}$}} \Email{daniel.rueckert@tum.de}\\
  \Name{William Lotter\nametag{$^{1,5}$}} \Email{lotterb@ds.dfci.harvard.edu}\\
  \addr $^{1}$Department of Data Science, Dana-Farber Cancer Institute, Boston, MA, USA\\
  \addr $^{2}$AI in Medicine, Technical University of Munich, Munich, Germany\\
  \addr $^{3}$Munich Center for Machine Learning (MCML), Munich, Germany\\
  \addr $^{4}$Department of Computing, Imperial College London, London, UK\\
   \addr $^{5}$Harvard Medical School, Boston, MA, USA
  }


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
% \midlauthor{\Name{Author Name1\midljointauthortext{Contributed equally}\nametag{$^{1,2}$}} \orcid{1111-2222-3333-4444} \Email{abc@sample.edu}\\
% \addr $^{1}$ Address 1 \\
% \addr $^{2}$ Address 2 \AND
% \Name{Author Name2\midlotherjointauthor\nametag{$^{1}$}} \Email{xyz@sample.edu}\\
% \Name{Author Name3\nametag{$^{2}$}} \Email{alphabeta@example.edu}\\
% \Name{Author Name4\midljointauthortext{Contributed equally}\nametag{$^{3}$}} \Email{uvw@foo.ac.uk}\\
% \addr $^{3}$ Address 3 \AND
% \Name{Author Name5\midlotherjointauthor\nametag{$^{4}$}} \Email{fgh@bar.com}\\
% \addr $^{4}$ Address 4
% }

\begin{document}

\maketitle

\begin{abstract}
AI-based image reconstruction models are increasingly deployed in clinical workflows to improve image quality from noisy data, such as low-dose X-rays or accelerated MRI scans. However, these models are typically evaluated using pixel-level metrics like PSNR, leaving their impact on downstream diagnostic performance and fairness unclear. We introduce a scalable evaluation framework that applies reconstruction and diagnostic AI models in tandem, which we apply to two tasks (classification, segmentation), three reconstruction approaches (U-Net, GAN, diffusion), and two data types (X-ray, MRI) to assess the potential downstream implications of reconstruction. We find that conventional reconstruction metrics poorly track task performance, where diagnostic accuracy remains largely stable even as reconstruction PSNR declines with increasing image noise. Fairness metrics exhibit greater variability, with reconstruction sometimes amplifying demographic biases, particularly regarding patient sex. However, the overall magnitude of this additional bias is modest compared to the inherent biases already present in diagnostic models. To explore potential bias mitigation, we adapt two strategies from classification literature to the reconstruction setting, but observe limited efficacy. Overall, our findings emphasize the importance of holistic performance and fairness assessments throughout the entire medical imaging workflow, especially as generative reconstruction models are increasingly deployed. 

%Reconstruction models designed to improve image quality from noisy or undersampled data, such as low-dose X-rays or accelerated MRI scans, are increasingly deployed in clinical workflows. However, these models are typically evaluated using pixel-level metrics like PSNR, leaving their impact on downstream diagnostic performance and fairness unclear. We introduce a scalable evaluation framework that simulates a realistic clinical workflow by chaining reconstruction with segmentation and classification models to address this gap. Using publicly available MRI and X-ray datasets, we demonstrate that conventional reconstruction metrics poorly track downstream performance, where diagnostic accuracy remains mainly stable even as reconstruction PSNR declines with increasing image noise. Conversely, fairness metrics exhibit greater variability, with reconstruction sometimes amplifying demographic biases, particularly regarding patient sex. However, the overall magnitude of this additional bias is modest compared to the inherent biases already present in diagnostic models. To explore potential bias mitigation, we adapt three established classification bias-reduction strategies to the reconstruction setting, but observe limited efficacy. Overall, our findings emphasize the importance of holistic performance and bias assessments throughout the entire medical imaging workflow, providing insights toward developing fairer and more effective AI systems in healthcare.
\end{abstract}

\begin{keywords}
Fairness, Image Reconstruction, GANs, Diffusion Models
\end{keywords}

\begin{figure*}[h]
\centering
\includegraphics[width=\linewidth]{figs/method.pdf}
\vspace{-20pt}
\caption{Combined pipeline for downstream bias evaluation and mitigation in medical image reconstruction. MRI and X-ray images undergo realistic simulated degradation and are subsequently reconstructed with three approaches before serving as input to downstream prediction models. Reconstruction quality, downstream performance, and fairness are evaluated. Subsequently, two bias mitigation strategies are applied exclusively during reconstruction fine-tuning.}
\label{fig:method}
\end{figure*}

\input{sections/introduction}
\input{sections/related_work}
\input{sections/methods}
\input{sections/results}
\input{sections/discussion}
\input{sections/conclusion}


\clearpage  


\bibliography{midl26_37}

\clearpage
%\appendix
\input{sections/appendix}

\end{document}
