%\documentclass{midl} % Include author names
\documentclass{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution
\usepackage{multicol}
\usepackage{multirow}
\usepackage{movie15}
\usepackage{adjustbox}
% \usepackage{media9}
\usepackage{mwe} % to get dummy images
\jmlrvolume{-- Accepted}
\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
%\editors{Under Review for MIDL 2024}

\title[HARP]{HARP: Unsupervised Histopathological Artifact Restoration}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Moritz Fuchs\nametag{$^{1}$}} \Email{moritz.fuchs@gris.tu-darmstadt.de}\\
\addr $^{1}$ Technical University Darmstadt
\AND
\Name{Ssharvien Kumar\nametag{$^{1}$}} \Email{ssharvien.kumar.sivakumar@gris.tu-darmstadt.de}\\
\Name{Mirko Schöber\nametag{$^{1}$}} \Email{mirko.schöber@gris.tu-darmstadt.de}\\
\Name{Niklas Woltering\nametag{$^{2}$}} \Email{woltering@med.uni-frankfurt.de }\\
\addr $^{2}$ Johann Wolfgang Goethe University Medical School
\AND
\Name{Marie-Lisa Eich\nametag{$^{3}$}} \Email{marie-lisa.eich@charite.de}\\
\addr $^{3}$ Universitätsmedizin Berlin - Charité 
\AND
\Name{Leonille Schweizer\nametag{$^{2,3}$}} \Email{leonille.schweizer@med.uni-frankfurt.de}\\
\Name{Anirban Mukhopadhyay\nametag{$^{1}$}} \Email{anirban.mukhopadhyay@gris.tu-darmstadt.de}\\
}
%\author{Moritz Fuchs \IEEEmembership{Graduate Student Member, IEEE}, Ssharvien Kumar,Mirko Schöber, Leonille Schweizer, Yuri Tolkach, Anirban Mukhopadhyay%,\IEEEmembership{Fellow, IEEE}
%\thanks{This paragraph of the first footnote will contain the date on which
%you submitted your paper for review.
%This work was supported by the Bundesministerium für Gesundheit (BMG), Germany with grant [ZMVI1-2520DAT03A] and by the Bundesministerium für Bildung und Forschung (BMBF) with grant [01KD2210B].}
%\thanks{M. Fuchs, M. Konstantin, N. Schrade, and A. Mukhopadhyay are with Technical University Darmstadt, Darmstadt, 64283 Germany. (e-mail: moritz.fuchs@gris.informatik.tu-darmstadt.de).}
%\thanks{L. Schweizer is with the Institute of Neurology at Johann Wolfgang Goethe University Medical School, Frankfurt am Main, 60528 Germany.}
%\thanks{Y. Tolkach is with University Hospital Cologne, 50937 Germany.}}
%Ssharvien Kumar R Sivakumar, Mirko Schöber, Niklas Woltering, Marie-Lisa Eich, Leonille Schweizer, Anirban Mukhopadhyay
\begin{document}

\maketitle

\begin{abstract}
Histopathological analysis, vital for medical diagnostics, is often challenged by artifacts in sample preparation and imaging, such as staining inconsistencies and physical obstructions. Addressing this, our work introduces a novel, fully unsupervised histopathological artifact restoration pipeline (HARP). HARP integrates artifact detection, localization, and restoration into one pipeline. The first step to make artifact restoration applicable is an analysis of anomaly detection algorithms. Then, HARP leverages the power of unsupervised segmentation techniques to propose localizations for potential artifacts, for which we select the best localization based on our novel inpainting denoising diffusion model. Finally, HARP employs an inpainting model for artifact restoration while conditioning it on the artifact localizations. We evaluate the artifact detection quality along with the image reconstruction quality, surpassing the state-of-the-art artifact restoration. Furthermore, we demonstrate that HARP improves the robustness and reliability of downstream models and show that pathologists can not tell the difference between clean images and images restored through HARP. This demonstrates that HARP significantly improves image quality and diagnostic reliability, enhancing histopathological examination accuracy for AI systems.
\end{abstract}

\begin{keywords}
Histopathology, Artifact Restoration, Diffusion Models, Unsupervised
\end{keywords}

\input{chapters/Introduction}
\input{chapters/RelatedWork}
\input{chapters/Method}
\input{chapters/Experiments}
\input{chapters/Conclusion}



% Acknowledgments---Will not appear in the anonymized version
\midlacknowledgments{This work was supported by the Bundesministerium für Gesundheit (BMG), Germany with grant [ZMVI1-2520DAT03A] and by the Bundesministerium für Bildung und Forschung (BMBF) with grant [01KD2210B]. Special thanks to Simon Streit for his valuable contribution to this study.}


\bibliography{bibliography}

\newpage
\appendix
%
\section{AnomaLib preliminary Evaluation}
\begin{table}[htbp]
\floatconts
  {tab:anomalib}%
  {\caption{AnomaLib preliminary method evaluation: This table shows the attempt to compare anomaly detection algorithms included in the Anomalib framework. The results show how well the trained algorithms perform on a test dataset augmented with the four local artifacts from~\cite{schomig2021quality}. The algorithms are compared using the metrics AUROC, F1-Score, and accuracy. Algorithms marked with * were trained on a smaller dataset of 650 images due to hardware limitations. Algorithms marked with ** have been limited to 30 training epochs due to time constraints. The experiment was run three times with different random seeds. We select the three highlighted methods (DRÆM, FastFlow, and STFPM) for further evaluation.}}%
  {\begin{tabular}{|l|c|c|c|}
  \hline
  \bfseries Method & \bfseries AUROC $\uparrow$ & \bfseries F1-Score $\uparrow$ &  \bfseries Accuracy $\uparrow$ \\\hline\hline
    CFA                     & 0.742 ± 0.031 & 0.689 ± 0.016 & 0.648 ± 0.026\\
    CFLOW                   & 0.702 ± 0.022 & 0.671 ± 0.018 & 0.573 ± 0.029\\
    DFM                     & 0.655 ± 0.014 & 0.743 ± 0.008 & 0.656 ± 0.014\\
    \bfseries DRÆM **       & \textbf{0.872 ± 0.015} & \textbf{0.819 ± 0.009} & \textbf{0.823 ± 0.010}\\
    Efficient AD **         & 0.627 ± 0.020 & 0.667 ± 0.001 & 0.501 ± 0.002\\
    \bfseries FastFlow **   & \textbf{0.869 ± 0.025} & \textbf{0.784 ± 0.020} & \textbf{0.777 ± 0.021}\\
    PaDiM *                 & 0.683 ± 0.017 & 0.695 ± 0.004 & 0.563 ± 0.009\\
    PatchCore *             & 0.683 ± 0.002 & 0.699 ± 0.000 & 0.569 ± 0.000\\
    Reverse Distillation    & 0.489 ± 0.059 & 0.663 ± 0.004 & 0.496 ± 0.004\\
    \bfseries STFPM **      & \textbf{0.769 ± 0.033} & \textbf{0.688 ± 0.014} & \textbf{0.624 ± 0.085}\\\hline
  \end{tabular}}
\end{table}
\FloatBarrier
\section{Artifact Localization Evaluation}

\begin{table}[htbp]
\centering
\caption{DICE scores between the ground truth (GT) mask from (Stieber et al. 2022) and our unsupervised localizations for each type of artifact. We further compare HARP localizations \textbf{undilated (u.)} with the GT  masks to assess whether our initial localization under-segments the GT; as a result, we see that the dilation in HARP is necessary. The DICE only decreases significantly for dark spots. However, we see in Table~\ref{tab:ArtifactRestoration1} that this has a positive impact on the image quality.}
\begin{adjustbox}{max width=\linewidth}
  {\begin{tabular}{|p{2.2cm}|p{1cm}|p{1.8cm}|p{1.3cm}|p{1cm}|p{1.1cm}|p{1cm}|p{0.9cm}|p{1.4cm}|p{1cm}|p{1cm}|}
  \hline
   \bfseries Artifact:              & \bfseries Dark Spot  & \bfseries Squamos Epithelia  & \bfseries Thread   & \bfseries Blood Cells  & \bfseries Blood Group  & \bfseries Com-press-ion  & \bfseries Cut & \bfseries Air Bubble & \bfseries Over-lap & \bfseries Fold-ing\\\hline
   \bfseries Metric:                & \multicolumn{10}{c|}{DICE [\%]} \\\hline\hline
    \bfseries GT                    &               &               &               &               &               &               &               &               &               &               \\
    \bfseries vs. (Ours)            & 66.6          & \textbf{31.1} & \textbf{83.8} & \textbf{20.9} & \textbf{50.9} & \textbf{60.4} & \textbf{42.9} & \textbf{68.1} & \textbf{58.7} & \textbf{62.3} \\
    \bfseries HARP                  &               &               &               &               &               &               &               &               &               &               \\\hline
    \bfseries GT                    &               &               &               &               &               &               &               &               &               &               \\
    \bfseries vs.                   & \textbf{68.4} & 30.0          & 80.7          & 18.4          & 48.7          & 60.2          & 40.5          & \textbf{68.2} & 57.8          & 61.0          \\
    \bfseries HARP (u.)             &               &               &               &               &               &               &               &               &               &               \\\hline
  \end{tabular}}
\end{adjustbox}
\label{tab:ArtifactLocalization}
\end{table}
\FloatBarrier
\section{Qualitative Examples}

\begin{figure}[htbp]
    \centering
    \includegraphics[width=\textwidth]{figures/aritfact_studysample.png}
    \caption{Qualitative samples from the study: We used pairs of images (top and bottom), where one is from the original training distribution and one is a test sample that had an artifact, which got removed from HARP. We then asked the participants to label which image had an artifact and went through the pipeline. \\ Spoiler: Right answers from left to right:\\ Bottom, Top, Top, Bottom, Top   } 
    \label{fig:study}
\end{figure}

\begin{figure}[htbp]
    \centering
    \includegraphics[width=\textwidth]{figures/aritfact_samples.png}
    \caption{Qualitative samples from HARP for each artifact: We selected random samples from the HARP. For the blood cell and group artifacts, it clearly shows the failure case, when not everything is selected by the localization mask, usually due to a lack of a better localization proposal. The Air Bubble case is a rare sample that has a natural fold/compression artifact that got segmented with the air bubble and removed.} 
    \label{fig:samples}
\end{figure}
\FloatBarrier
\section{Video of HARP in Action}

\begin{figure}[htbp]
    \centering
    %\movie[width=9cm,height=7cm, poster]{}{figures/HARP_Paper_Video.mp4}
    \includemovie[poster,autoplay,externalviewer, text={Load HARP Video \quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad\quad  Click the symbol}]{6cm}{6cm}{figures/HARP_Paper_Video.mp4}
    \caption{Illustrative movie of the whole HARP Pipeline. Playing works with Adobe Acrobat.} 
    \label{fig:movie}
\end{figure}
\end{document}
