% FullObs Appendix Tables (auto-generated)

% FullObs overall table
\begin{table}[h]
\centering
\caption{\textbf{FullObs v1 overall accuracy} (sorted by Acc@+25). Bold = best per column.}
\label{tab:fo_overall}
\small
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & @+25 & Acc\_all & Cov & Bloat \\
\midrule
Grok4 & \textbf{46.7\%} & \textbf{50.7\%} & 67.2\% & 4.0\% \\
GPT-5.2 & 19.5\% & 43.7\% & \textbf{100.0\%} & 24.3\% \\
Grok4.1f & 17.9\% & 17.9\% & 98.4\% & \textbf{0.0\%} \\
Gemini 3 & 15.2\% & 16.0\% & \textbf{100.0\%} & 0.8\% \\
DSR & 10.4\% & 10.4\% & 99.7\% & \textbf{0.0\%} \\
Opus 4.5 & 8.5\% & 8.5\% & 98.7\% & \textbf{0.0\%} \\
Hermes4 & 2.7\% & 2.7\% & 99.5\% & \textbf{0.0\%} \\
GPT-4o & 0.0\% & 0.0\% & \textbf{100.0\%} & \textbf{0.0\%} \\
\bottomrule
\end{tabular}
\end{table}

% Table 2: FullObs family breakdown
\begin{table}[h]
\centering
\caption{\textbf{FullObs v1 Acc@+25 by formula family.}}
\label{tab:fo_family}
\small
\begin{tabular}{@{}lrrrrr@{}}
\toprule
Family & Grok4 & GPT-5.2 & Grok4.1f & Gemini 3 & DSR \\
\midrule
A & \textbf{100.0\%} & 66.7\% & 77.8\% & 77.8\% & 0.0\% \\
B & \textbf{15.3\%} & 13.6\% & 5.1\% & 10.2\% & 8.5\% \\
C & \textbf{11.5\%} & 7.7\% & 0.0\% & 7.7\% & 3.8\% \\
D & \textbf{81.2\%} & 8.3\% & 22.9\% & 4.2\% & 16.7\% \\
F & \textbf{65.7\%} & 2.9\% & 8.6\% & 0.0\% & 2.9\% \\
G & \textbf{9.1\%} & 0.0\% & 0.0\% & 0.0\% & 0.0\% \\
H & \textbf{67.1\%} & 27.1\% & 24.7\% & 16.5\% & 8.2\% \\
M & \textbf{28.6\%} & 0.0\% & 0.0\% & 0.0\% & 0.0\% \\
oth & 59.5\% & \textbf{64.3\%} & 52.4\% & 57.1\% & 38.1\% \\
Z & 0.0\% & 0.0\% & 0.0\% & 0.0\% & 0.0\% \\
\bottomrule
\end{tabular}
\end{table}

% Table 3: FullObs formula size breakdown
\begin{table}[h]
\centering
\caption{\textbf{FullObs v1 formula size breakdown for valid predictions.}
Compact = AST $<$ gold; Equal = gold $\leq$ AST $\leq$ gold+1;
Longer = gold+1 $<$ AST $\leq$ gold+25; Bloat = AST $>$ gold+25.}
\label{tab:fo_failures}
\small
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & Compact & Equal & Longer & Bloat \\
\midrule
Grok4 & 2.7\% & \textbf{38.1\%} & 5.9\% & 4.0\% \\
GPT-5.2 & 2.4\% & 9.6\% & 7.5\% & 24.3\% \\
Grok4.1f & 2.4\% & 13.9\% & 1.6\% & \textbf{0.0\%} \\
Gemini 3 & \textbf{3.2\%} & 6.4\% & 5.6\% & 0.8\% \\
DSR & 1.9\% & 6.1\% & 2.4\% & \textbf{0.0\%} \\
Opus 4.5 & 2.7\% & 4.0\% & 1.9\% & \textbf{0.0\%} \\
Hermes4 & 0.8\% & 1.9\% & \textbf{0.0\%} & \textbf{0.0\%} \\
GPT-4o & 0.0\% & 0.0\% & \textbf{0.0\%} & \textbf{0.0\%} \\
\bottomrule
\end{tabular}
\end{table}