% CI Generalization vs Bloat Bins Table (auto-generated)
\begin{table}[t]
\centering
\caption{\textbf{CI generalization by AST delta bin.}
For train-correct predictions, we report holdout exact-match rate (\%).
Bins: $\leq$+1 (compact), +2..+25 (moderate), $>$+25 (bloated).
--- indicates no train-correct instances in that bin.}
\label{tab:ci_generalization_bins}
\small
\begin{tabular}{@{}lrrr@{}}
\toprule
Model & $\leq$+1 & +2..+25 & $>$+25 \\
\midrule
Grok4 & 8.5 (111) & 2.5 (40) & 0.0 (5) \\
GPT-5.2 & 3.5 (92) & 1.2 (54) & 0.0 (19) \\
Grok4.1f & 5.2 (100) & 1.9 (21) & --- \\
Gemini3 & 6.5 (92) & 1.1 (18) & --- \\
DSR & 6.0 (67) & 0.0 (16) & --- \\
Opus4.5 & 4.1 (58) & 2.0 (10) & --- \\
Hermes4 & 0.0 (5) & --- & --- \\
GPT-4o & 0.0 (1) & --- & --- \\
\bottomrule
\end{tabular}
\end{table}