% CI Appendix Tables (auto-generated)

% CI overall table
\begin{table}[h]
\centering
\caption{\textbf{CI v1 overall accuracy} (sorted by Acc@+25). Bold = best per column.}
\label{tab:ci_overall}
\small
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & @+25 & Acc\_all & Cov & Bloat \\
\midrule
Grok4 & \textbf{75.5\%} & 78.0\% & 85.0\% & 2.5\% \\
GPT-5.2 & 73.0\% & \textbf{82.5\%} & \textbf{100.0\%} & 9.5\% \\
Grok4.1f & 60.5\% & 60.5\% & 98.0\% & \textbf{0.0\%} \\
Gemini 3 & 55.0\% & 55.0\% & \textbf{100.0\%} & \textbf{0.0\%} \\
DSR & 41.5\% & 41.5\% & 99.0\% & \textbf{0.0\%} \\
Opus 4.5 & 34.0\% & 34.0\% & \textbf{100.0\%} & \textbf{0.0\%} \\
Hermes4 & 2.5\% & 2.5\% & 99.5\% & \textbf{0.0\%} \\
GPT-4o & 0.5\% & 0.5\% & 99.0\% & \textbf{0.0\%} \\
\bottomrule
\end{tabular}
\end{table}

% Table 6: CI family breakdown
\begin{table}[h]
\centering
\caption{\textbf{CI v1 Acc@+25 by formula family.}}
\label{tab:ci_family}
\small
\begin{tabular}{@{}lrrrrr@{}}
\toprule
Family & Grok4 & GPT-5.2 & Grok4.1f & Gemini 3 & DSR \\
\midrule
A & \textbf{100.0\%} & \textbf{100.0\%} & \textbf{100.0\%} & 85.7\% & \textbf{100.0\%} \\
B & \textbf{36.4\%} & 27.3\% & 22.7\% & 18.2\% & 13.6\% \\
C & 5.3\% & \textbf{10.5\%} & 0.0\% & 0.0\% & 0.0\% \\
D & 70.0\% & \textbf{80.0\%} & 60.0\% & 55.0\% & 55.0\% \\
F & \textbf{89.5\%} & \textbf{89.5\%} & 78.9\% & 57.9\% & 63.2\% \\
G & \textbf{100.0\%} & \textbf{100.0\%} & \textbf{100.0\%} & 0.0\% & \textbf{100.0\%} \\
H & \textbf{92.3\%} & 84.6\% & 71.8\% & 59.0\% & 25.6\% \\
oth & \textbf{100.0\%} & \textbf{100.0\%} & 90.0\% & 50.0\% & 70.0\% \\
Z & \textbf{90.3\%} & 85.5\% & 69.4\% & 80.6\% & 50.0\% \\
\bottomrule
\end{tabular}
\end{table}

% Table 8: CI band failure breakdown
\begin{table}[h]
\centering
\caption{\textbf{CI v1 failure modes: core band.} YES-fail = formula doesn't match positive examples; NO-fail = formula accidentally matches negative examples.}
\label{tab:ci_band_core}
\small
\begin{tabular}{@{}lrrr@{}}
\toprule
Model & Correct & YES-fail & NO-fail \\
\midrule
GPT-5.2 & \textbf{85.0\%} & 14.2\% & \textbf{0.8\%} \\
Grok4 & 80.0\% & \textbf{3.3\%} & 3.3\% \\
Grok4.1f & 61.7\% & 20.8\% & 14.2\% \\
Gemini 3 & 58.3\% & 27.5\% & 14.2\% \\
DSR & 41.7\% & 43.3\% & 15.0\% \\
Opus 4.5 & 34.2\% & 37.5\% & 28.3\% \\
Hermes4 & 3.3\% & 85.8\% & 10.8\% \\
GPT-4o & 0.8\% & 95.0\% & 3.3\% \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\caption{\textbf{CI v1 failure modes: lift\_mix band.} YES-fail = formula doesn't match positive examples; NO-fail = formula accidentally matches negative examples.}
\label{tab:ci_band_lift_mix}
\small
\begin{tabular}{@{}lrrr@{}}
\toprule
Model & Correct & YES-fail & NO-fail \\
\midrule
GPT-5.2 & \textbf{78.8\%} & 21.2\% & \textbf{0.0\%} \\
Grok4 & 75.0\% & \textbf{6.2\%} & 1.2\% \\
Grok4.1f & 58.8\% & 31.2\% & 8.8\% \\
Gemini 3 & 50.0\% & 36.2\% & 13.8\% \\
DSR & 41.2\% & 42.5\% & 13.8\% \\
Opus 4.5 & 33.8\% & 41.2\% & 25.0\% \\
Hermes4 & 1.2\% & 81.2\% & 16.2\% \\
GPT-4o & 0.0\% & 88.8\% & 10.0\% \\
\bottomrule
\end{tabular}
\end{table}
