\begin{figure}[!t]
    \centering
    \includegraphics[width=0.92\textwidth]{figures/hard/bel_goal_combined.pdf}
    \caption{Performance of humans and language agents equipped with the advanced memory method upon the introduction of harder social scenarios. The black vertical line marks the beginning of the harder scenarios. (Left) \believability scores over increasing episodes. (Right) \goalcompletion scores. The models maintain their performance on \believability despite the harder scenarios, but their \goalcompletion scores drop significantly, unlike humans who maintain consistent performance.}
    \label{fig:hard}
\end{figure}