\begin{table}[t]
\centering
\caption{Verification performance on BioDimBench. Values are computed over all candidate solutions.}
\label{tab:main_results}
\resizebox{\linewidth}{!}{%
\begin{tabular}{lcccc}
\toprule
Method & Accuracy $\uparrow$ & Invalid Precision $\uparrow$ & Invalid Recall $\uparrow$ & Invalid F1 $\uparrow$ \\
\midrule
Answer only & 0.667 & 1.000 & 0.600 & 0.750 \\
Unit only & 0.333 & 1.000 & 0.200 & 0.333 \\
Numeric + unit & 1.000 & 1.000 & 1.000 & 1.000 \\
Step-aware & 1.000 & 1.000 & 1.000 & 1.000 \\
\bottomrule
\end{tabular}%
}
\end{table}
