% Across-task summary table (auto-generated)
% Uses table* for full-width to fit all columns
\begin{table*}[t]
\centering
\caption{\textbf{Across-task v1 summary (snapshot).}
\textsc{FullObs} (full observation), \textsc{CI} (contrastive induction), and \textsc{EC} (existential completion).
\emph{Acc\_all} (exact-match accuracy) with denominator=\emph{all} instances
(missing or unparsable outputs count as incorrect).
\textsc{EC} reports \emph{Validity} (unbounded existential-completion success)
and budgeted accuracy Acc@gold$+25$.
Cov = coverage (fraction with parseable formula); Parse = parse error rate.}
\label{tab:summary_across_tasks}
\footnotesize
\begin{tabular}{@{}l|rrrr|rrrr|rrrr@{}}
\toprule
 & \multicolumn{4}{c|}{FullObs (375)} & \multicolumn{4}{c|}{CI (200)} & \multicolumn{4}{c}{EC (200)} \\
Model & Acc & @+25 & Cov & Parse & Acc & @+25 & Cov & Parse & Valid & @+25 & Cov & Parse \\
\midrule
Grok4 & \textbf{50.7\%} & \textbf{46.7\%} & 67.2\% & 0.3\% & 78.0\% & \textbf{75.5\%} & 85.0\% & 0.0\% & 53.0\% & 53.0\% & 99.5\% & 0.0\% \\
GPT-5.2 & 43.7\% & 19.5\% & 100.0\% & 0.0\% & \textbf{82.5\%} & 73.0\% & 100.0\% & 0.0\% & \textbf{78.0\%} & \textbf{59.5\%} & 100.0\% & 0.0\% \\
Grok4.1f & 17.9\% & 17.9\% & 98.4\% & 1.6\% & 60.5\% & 60.5\% & 98.0\% & 2.0\% & 41.0\% & 41.0\% & 98.5\% & 1.5\% \\
Gemini 3 & 16.0\% & 15.2\% & 100.0\% & 0.0\% & 55.0\% & 55.0\% & 100.0\% & 0.0\% & 53.5\% & 52.0\% & 100.0\% & 0.0\% \\
DSR & 10.4\% & 10.4\% & 99.7\% & 0.3\% & 41.5\% & 41.5\% & 99.0\% & 0.5\% & 33.0\% & 33.0\% & 100.0\% & 0.0\% \\
Opus 4.5 & 8.5\% & 8.5\% & 98.7\% & 1.3\% & 34.0\% & 34.0\% & 100.0\% & 0.0\% & 30.0\% & 30.0\% & 99.0\% & 1.0\% \\
Hermes4 & 2.7\% & 2.7\% & 99.5\% & 0.5\% & 2.5\% & 2.5\% & 99.5\% & 0.5\% & 15.5\% & 15.5\% & 100.0\% & 0.0\% \\
GPT-4o & 0.0\% & 0.0\% & 100.0\% & 0.0\% & 0.5\% & 0.5\% & 99.0\% & 1.0\% & 2.0\% & 2.0\% & 100.0\% & 0.0\% \\
\bottomrule
\end{tabular}
\end{table*}