% !TEX root = ../../main.tex

\begin{figure}
    \begin{minipage}{0.48\linewidth}
      \includegraphics[width=1\linewidth]{figures/generalization/living17_train_test_n30_t50_d30_fixed_rad.pdf}
    \end{minipage}
    \begin{minipage}{0.48\linewidth}
      \includegraphics[width=1\linewidth]{figures/generalization/celeba_age_train_test_n50_t200_d30_fixed_rad.pdf}
    \end{minipage}
    \caption{
    Evaluating detected failure modes on unseen data. \textbf{(Left)}: we extract failure modes on Living17 dataset using $s=30$ and $a=30\%$.
    $132$ failure groups (over $17$ classes) are detected and it is observed that around $86.01\%$ of detected failure modes exhibit at least $25\%$ drop in accuracy over unseen data that shows a significant degree of generalization.
    \textbf{(Right)}: same results for CelebA dataset where the parameters for failure mode detection is $s=50$ and $a = 30\%$. Around $79.31\%$ of failure modes show the drop of at least $20\%$.
    The trend of $y=x$ is seen in these plots.
    }
    \label{fig:generalization}
\end{figure}