% Equality Usage Analysis (auto-generated)
% Shows frequency and validity of formulas containing equality predicates

\begin{table}[h]
\centering
\small
\caption{\textbf{Equality usage in FullObs predictions.} \emph{Total} = number of responses returned by model; \emph{\% Using =} = fraction of predictions containing equality; \emph{Avg AST} = mean AST size of equality-containing formulas; \emph{Valid \%} = validity rate among equality-containing predictions.}
\label{tab:eq_fullobs}
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & Total & \% Using = & Avg AST & Valid \% \\
\midrule
Grok4 & 252 & 6.7\% & 52.4 & 52.9\% \\
GPT-5.2 & 375 & \textbf{26.7\%} & 95.2 & \textbf{55.0\%} \\
Grok4.1f & 369 & 5.4\% & 19.2 & 15.0\% \\
Gemini 3 & 375 & 9.9\% & 32.0 & 13.5\% \\
DSR & 374 & 2.9\% & 24.3 & 18.2\% \\
Opus 4.5 & 370 & 5.4\% & 18.1 & 5.0\% \\
Hermes4 & 373 & 0.8\% & 9.0 & 0.0\% \\
GPT-4o & 375 & 0.5\% & 13.0 & 0.0\% \\
\midrule
\textit{Total} & 2863 & 7.3\% & 60.3 & 35.7\% \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\small
\caption{\textbf{Equality usage in CI predictions.} \emph{Total} = number of responses returned by model; \emph{\% Using =} = fraction of predictions containing equality; \emph{Avg AST} = mean AST size of equality-containing formulas; \emph{Valid \%} = validity rate among equality-containing predictions.}
\label{tab:eq_ci}
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & Total & \% Using = & Avg AST & Valid \% \\
\midrule
Grok4 & 170 & 17.6\% & 25.6 & \textbf{93.3\%} \\
GPT-5.2 & 200 & \textbf{28.0\%} & 49.6 & 89.3\% \\
Grok4.1f & 195 & 17.4\% & 19.9 & 85.3\% \\
Gemini 3 & 200 & 12.5\% & 21.9 & 76.0\% \\
DSR & 198 & 6.6\% & 23.7 & 69.2\% \\
Opus 4.5 & 200 & 9.0\% & 18.6 & 50.0\% \\
Hermes4 & 199 & 2.0\% & 18.0 & 0.0\% \\
GPT-4o & 198 & 1.0\% & 15.0 & 0.0\% \\
\midrule
\textit{Total} & 1560 & 11.7\% & 30.3 & 79.1\% \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\small
\caption{\textbf{Equality usage in EC predictions.} \emph{Total} = number of responses returned by model; \emph{\% Using =} = fraction of predictions containing equality; \emph{Avg AST} = mean AST size of equality-containing formulas; \emph{Valid \%} = validity rate among equality-containing predictions.}
\label{tab:eq_ec}
\begin{tabular}{@{}lrrrr@{}}
\toprule
Model & Total & \% Using = & Avg AST & Valid \% \\
\midrule
Grok4 & 199 & 6.5\% & 18.2 & 38.5\% \\
GPT-5.2 & 200 & \textbf{12.5\%} & 48.9 & \textbf{76.0\%} \\
Grok4.1f & 190 & 6.3\% & 16.6 & 58.3\% \\
Gemini 3 & 200 & 7.0\% & 23.7 & 35.7\% \\
DSR & 200 & 4.0\% & 16.0 & 37.5\% \\
Opus 4.5 & 198 & 11.6\% & 14.6 & 13.0\% \\
Hermes4 & 200 & 0.5\% & 13.0 & 0.0\% \\
GPT-4o & 200 & 0.0\% & --- & --- \\
\midrule
\textit{Total} & 1587 & 6.0\% & 25.7 & 43.8\% \\
\bottomrule
\end{tabular}
\end{table}
