\begin{figure}[!t]
    \centering
    \includegraphics[width=0.92\textwidth]{figures/main/bel_goal_combined.pdf}
    
    \caption{Performance of language agents and humans across multiple episodes. (Left) Evolution of \believability scores with an increasing number of episodes. (Right) Evolution of \goalcompletion scores. Scores of all models decline for both dimensions with the simpler memory method, while the advanced memory method leads to significant improvement. Humans consistently demonstrate excellent performance.}
    \label{fig:main}
\end{figure}