% \begin{figure}
%     \centering
%     \begin{minipage}[t]{.48\textwidth}
%       \centering
%       \includegraphics[width=1.\linewidth]{figures/generation/bar-generation-living-17-tagging-10x8.pdf}
%       \captionof{figure}{The difference in classifier accuracy between images generated from success mode and failure mode captions on Living17.}
%       \label{fig:generation_gap}
%     \end{minipage}%
%     \hspace{0.2cm}
%     \begin{minipage}[t]{.48\textwidth}
%       \centering
%       \includegraphics[width=1.\linewidth]{figures/generation/generation-example.pdf}
%       \captionof{figure}{The accuracies and example images corresponding to one success mode and one failure mode caption for the ``bear" and `` parrot" classes from Living17.}
%       \label{fig:generation_examples}
%     \end{minipage}
%     \label{fig:generation_results}
% \end{figure}


\begin{figure}
    \centering
    \includegraphics[width=1.\linewidth]{figures/generation/images.pdf}
    \caption{
    Accuracy of model over $50$ generated images corresponding to one of the success modes and failure modes for classes ``bear", `` parrot", and ``fox"  from Living17.
    Accuracy gap shows that our method can identify hard and easy subpopulations. Images show that extracted tags are capable of describing detailed images.
    }
    \label{fig:generation_imgs}
\end{figure}

