\begin{table}[htbp]
\centering
\caption{Correlation between Base LLM Benchmarks and Virtualhome Goal Interpretation Task Performance}
\label{tab:virtualhome_goal_interpretation_correlation}
\footnotesize
\begin{tabular}{lcccc}
\toprule
Base LLM Metrics & Node F1 & Edge F1 & Action F1 & All F1 \\
\midrule
GPQA & 0.495 & \textit{0.531} & 0.339 & \textit{0.554} \\
MUSR & 0.237 & 0.305 & 0.175 & 0.289 \\
IFEval & 0.087 & 0.163 & 0.314 & 0.162 \\
MMLU-PRO & 0.372 & 0.456 & 0.250 & 0.427 \\
BBH & 0.459 & \textit{0.567} & 0.343 & \textit{0.526} \\
MATH Lvl 5 & 0.136 & 0.274 & 0.163 & 0.187 \\
\bottomrule
\end{tabular}
\begin{tablenotes}
\footnotesize
\item Note: Bold values indicate strong correlations ($|r| \geq 0.7$), italic values indicate moderate correlations ($0.5 \leq |r| < 0.7$).
\end{tablenotes}
\end{table}
