\documentclass{midl} % Include author names
% \documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{physics}
\usepackage{pifont}
\usepackage{mathtools}


\definecolor{LightYellow}{RGB}{218, 165, 32}
\definecolor{LightRed}{RGB}{255, 127, 127}
\definecolor{LightGreen}{RGB}{144, 238, 144}
\definecolor{LightBlue}{RGB}{173, 216, 230}
\definecolor{SkyBlue}{RGB}{135, 206, 235}

% 255, 174, 66



% \jmlrvolume{-- Under Review}
% \jmlryear{2024}
% \jmlrworkshop{Full Paper -- MIDL 2024 submission}
% \editors{Under Review for MIDL 2024}

\jmlryear{2024}\jmlrworkshop{Full Paper -- MIDL 2024}\jmlrvolume{-- 073}\editors{Accepted for publication at MIDL 2024}

\newcommand{\mb}[1]{\textcolor{red}{#1}}
\newcommand{\pp}[1]{\textcolor{blue}{#1}}

\title[GazeDiff]{GazeDiff: A radiologist visual attention guided diffusion model for zero-shot disease classification}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Moinak Bhattacharya} \Email{moinak.bhattacharya@stonybrook.edu.edu}\\
% \addr $^{1}$ Address 1 \\
% \addr $^{2}$ Address 2 \AND
\Name{Prateek Prasanna}\Email{prateek.prasanna@stonybrook.edu.edu}\\
\addr Department of Biomedical Informatics, Stony Brook University, NY, US \\
% \Name{Author Name3\nametag{$^{2}$}} \Email{alphabeta@example.edu}\\
% \Name{Author Name4\midljointauthortext{Contributed equally}\nametag{$^{3}$}} \Email{uvw@foo.ac.uk}\\
% \addr $^{3}$ Address 3 \AND
% \Name{Author Name5\midlotherjointauthor\nametag{$^{4}$}} \Email{fgh@bar.com}\\
% \addr $^{4}$ Address 4
}

\begin{document}

\maketitle

\begin{abstract}
We present \textit{GazeDiff}, a novel architecture that leverages radiologists' eye gaze patterns as controls to 
% pretrained 
text-to-image diffusion models for zero-shot classification. Eye-gaze patterns provide important cues during the visual exploration process; existing diffusion-based models do not harness the valuable insights derived from these patterns during image interpretation. 
% We show that these patterns are crucial in image generation, by harnessing the complexity of visual search. 
% Leveraging this inherent complexity, 
\textit{GazeDiff} utilizes a novel expert visual attention-conditioned diffusion model to generate robust medical images. This model offers more than just image generation capabilities;
% , extending to the provision of conditional density estimates
 the density estimates derived from the gaze-guided diffusion model can effectively improve zero-shot classification performance.
%and perform zero-shot disease classification, without any additional training. 
We show the zero-shot classification efficacy of \textit{GazeDiff} on four publicly available datasets for two common pulmonary disease types, namely pneumonia, and tuberculosis. Code available \href{https://github.com/lordmoinak1/GazeDiff}{here}. 
% outperforms the baselines on all the four datasets.
\end{abstract}

\begin{keywords}
Eye-gaze, diffusion, chest x-rays, disease classification, zero-shot.
\end{keywords}

\input{1_introduction}
\begin{figure}[htbp]
\floatconts
  {fig:main}
  {\caption{\textbf{Overview of \textit{GazeDiff} architecture.} Radiologist's eye gaze patterns and the corresponding transcripts are collected. First, the Stable Diffusion block (SD-CXR) is locked and a trainable copy is created connected with zero convolution blocks to train with radiologists' eye gaze patterns as conditions. Then, a chest x-ray image $\chi$ and classifier prompt are fed to the finetuned gaze-conditioned model (CN-Gaze). This is used as a zero-shot classifier using the classifier objective.}}
  {\includegraphics[width=0.85\linewidth]{figures/midl2024_main.pdf}}
\end{figure}
\input{2_methods}
\input{3_experiments_and_results}
\input{4_conclusion}

% \section{Introduction}

% This is where the content of your paper goes.  Some random
% notes\footnote{Random footnote are discouraged}:
% \begin{itemize}
% \item You should use \LaTeX \cite{Lamport:Book:1989}.
% \item JMLR/PMLR uses natbib for references. For simplicity, here, \verb|\cite|  defaults to
%   parenthetical citations, i.e. \verb|\citep|. You can of course also
%   use \verb|\citet| for textual citations.
% \item Eprints such as arXiv papers can of course be cited \cite{Hinton:arXiv:2015:Distilling}. We recomend using a \verb|@misc| bibtex entry for these as shown in the sample bibliography.
% \item You should follow the guidelines provided by the conference.
% \item Read through the JMLR template documentation for specific \LaTeX
%   usage questions.
% \item Note that the JMLR template provides many handy functionalities
% such as \verb|\figureref| to refer to a figure,
% e.g. \figureref{fig:example},  \verb|\tableref| to refer to a table,
% e.g. \tableref{tab:example} and \verb|\equationref| to refer to an equation,
% e.g. \equationref{eq:example}.
% \end{itemize}

% \begin{table}[htbp]
%  % The first argument is the label.
%  % The caption goes in the second argument, and the table contents
%  % go in the third argument.
% \floatconts
%   {tab:example}%
%   {\caption{An Example Table}}%
%   {\begin{tabular}{ll}
%   \bfseries Dataset & \bfseries Result\\
%   Data1 & 0.12345\\
%   Data2 & 0.67890\\
%   Data3 & 0.54321\\
%   Data4 & 0.09876
%   \end{tabular}}
% \end{table}

% \begin{figure}[htbp]
%  % Caption and label go in the first argument and the figure contents
%  % go in the second argument
% \floatconts
%   {fig:example}
%   {\caption{Example Image}}
%   {\includegraphics[width=0.5\linewidth]{example-image}}
% \end{figure}

% \begin{algorithm2e}
% \caption{Computing Net Activation}
% \label{alg:net}
%  % older versions of algorithm2e have \dontprintsemicolon instead
%  % of the following:
%  %\DontPrintSemicolon
%  % older versions of algorithm2e have \linesnumbered instead of the
%  % following:
%  %\LinesNumbered
% \KwIn{$x_1, \ldots, x_n, w_1, \ldots, w_n$}
% \KwOut{$y$, the net activation}
% $y\leftarrow 0$\;
% \For{$i\leftarrow 1$ \KwTo $n$}{
%   $y \leftarrow y + w_i*x_i$\;
% }
% \end{algorithm2e}

% Acknowledgments---Will not appear in anonymized version
%\midlacknowledgments{Reported research was partly supported by NIH 1R21CA258493-01A1, and the OVPR and IEDM seed grants at Stony Brook University. The content is solely the responsibility of the authors and does not necessarily represent the oﬃcial views of the National Institutes of Health. We also thank Saumya Gupta for her insightful comments on the paper.}
\midlacknowledgments{The reported research was partly supported by NIH 1R21CA258493-01A1, NIH 75N92020D00021 (subcontract), and the OVPR and IEDM seed grants at Stony Brook University. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.}


\bibliography{midl24_073}
\input{appendix}

\end{document}