% !TEX root = ../../main.tex

\begin{figure}
    \begin{center}
    \includegraphics[width=0.48\linewidth]{figures/first_figure/fox_living17.pdf}  
    \end{center}
    \begin{minipage}{0.48\linewidth}
      \includegraphics[width=1\linewidth]{figures/first_figure/fox_fl2.pdf}
    \end{minipage}
    \begin{minipage}{0.48\linewidth}
      \includegraphics[width=1\linewidth]{figures/first_figure/fox_fl1.pdf}
    \end{minipage}
    \caption{
    % Visualization of two detected failure modes on class ``fox" over Living17 dataset\citep{santurkar2020breeds}. 
    % Accuracy of model over images of class ``fox" is $81.96\%$ but those two groups show a significant lower accuracy. e.g., when tags ``grass", ``stand", ``field", and ``dry" appear in the images, model's accuracy is $47.83\%$.
    % Appearance of tags ``white" and ``zoo" also represent a~harder subpopulation where model's accuracy drops to $35.29\%$.
    % We refer to Appendix~\ref{sec:app-vis} for more visualization over different datasets.
    Visualization of two detected failure modes of class ``fox" on a model trained on Living17.
    % While the overall accuracy is $81.96\%$,
    % we detect and describe two coherent subsets of images for which accuracy significantly drops: accuracy for foxes standing in dry grass fields drops to $47.83\%$,
    % while white foxes in a zoo are classified with only $35.29\%$ accuracy. See Appendix~\ref{sec:app-vis} for more examples.
    Overall accuracy for images of class ``fox" is $81.96\%$. However,
    we identify two coherent subsets of images with significant accuracy drops:
    foxes standing in dry grass fields ($47.83\%$ accuracy) and foxes in a zoo where a white object (fox or other objects) is detected ($35.29\%$ accuracy).
    See Appendix~\ref{sec:app-vis} for more examples.
    }
    \label{fig:teaser}
\end{figure}