% FullObs v1 band-wise accuracy (auto-generated)
\begin{table}[t]
\centering
\caption{\textbf{FullObs v1 band-wise accuracy} (Acc\_all, denominator = total problems per
band). Extremes are aggregated.}
\label{tab:fo_bands}
\small
\begin{tabular}{@{}lrrrrr@{}}
\toprule
Model & simple & easy & medium & hard & extreme \\
\midrule
Grok4 & \textbf{100.0\%} & 75.0\% & \textbf{43.0\%} & \textbf{39.0\%} & \textbf{16.0\%} \\
GPT-5.2 & \textbf{100.0\%} & \textbf{89.0\%} & 29.0\% & 21.0\% & 0.0\% \\
Grok4.1f & 92.0\% & 26.0\% & 8.0\% & 8.0\% & 4.0\% \\
Gemini 3 & 96.0\% & 26.0\% & 5.0\% & 4.0\% & 2.0\% \\
DSR & 68.0\% & 13.0\% & 7.0\% & 2.0\% & 0.0\% \\
Opus 4.5 & 80.0\% & 10.0\% & 1.0\% & 1.0\% & 0.0\% \\
Hermes4 & 40.0\% & 0.0\% & 0.0\% & 0.0\% & 0.0\% \\
GPT-4o & 0.0\% & 0.0\% & 0.0\% & 0.0\% & 0.0\% \\
\bottomrule
\end{tabular}
\end{table}