% Within-problem bloat control (auto-generated)
\begin{table}[tb]
\centering
\caption{\textbf{Within-problem bloat control (FullObs).}
For problems with $\geq 2$ train-correct predictions across models,
we compare holdout generalization of shortest vs longest formulas.
$\Delta$ = short $-$ long; positive = shorter generalizes better.
(The remaining problems are ties with $\Delta = 0$.)
Fraction $\Delta < 0$: Short--Long 6\%, Near--Above 5\%.}
\label{tab:within_problem_bloat_fo}
\small
\begin{tabular}{@{}l@{\hspace{4pt}}c@{\hspace{4pt}}c@{\hspace{4pt}}c@{\hspace{4pt}}c@{\hspace{4pt}}c@{\hspace{4pt}}c@{}}
  \toprule
  & & \multicolumn{2}{c}{Holdout Gen} & & & \\
  \cmidrule(lr){3-4}
  Comparison & $n$ & Short & Long & $\Delta$ [CI] & $\Delta>0$ & $p$ \\
  \midrule
  Short--Long & 100 & 73.5\% & 39.4\% & +34.1 [25, 44] & 43\% & $<$0.001 \\
  Near--Above & 42 & 86.0\% & 9.9\% & +76.2 [64, 86] & 88\% & $<$0.001 \\
  \bottomrule
\end{tabular}
\end{table}