\begin{table*}
\centering
\caption{In-distribution experiment using \texttt{Qwen2.5-32B}. 
We report the mean and standard deviation of test set performance using 3 training seeds.
\textbf{Bold} and \underline{underlined} results denote the best and second best mean performance on each metric/dataset. %\textcolor{RK}{Here also a line on Ensemble achieves SOTA results but at the cost of much higher (maybe a no., if we know) resource usage than ScalaBL, which achieves competetive results.}
}
\begin{tabular}{@{}ccc|cccccc@{}}
\toprule
\multirow{2}{*}{\textbf{Metric}} & \multirow{2}{*}{\textbf{Method}}  & \multirow{2}{*}{\textbf{Params (M)}} & \multicolumn{6}{c}{\textbf{Datasets}} \\ 
  &  & & \textbf{WG-S} & \textbf{ARC-C} & \textbf{ARC-E} & \textbf{WG-M} & \textbf{OBQA} & \textbf{BoolQ} \\
\midrule
\multirow{6}{*}{\textbf{ACC ($\uparrow$)}}
& MLE & $9.646$ & $86.45_{\pm 0.6}$ & $93.90_{\pm 0.9}$ & $\underline{98.80}_{\pm 0.3}$ & $90.90_{\pm 0.3}$ & $\textbf{96.93}_{\pm 0.6}$ & $\underline{91.42}_{\pm 0.1}$ \\
& MAP & $9.646$ & $86.73_{\pm 0.8}$ & $93.85_{\pm 1.0}$ & $\textbf{98.83}_{\pm 0.4}$ & $\underline{91.00}_{\pm 0.1}$ & $\textbf{96.93}_{\pm 0.7}$ & $\textbf{91.47}_{\pm 0.1}$ \\
& MC-Dropout & $9.646$ & $\underline{86.81}_{\pm 0.5}$ & $94.18_{\pm 0.4}$ & $98.71_{\pm 0.5}$ & $90.63_{\pm 0.7}$ & $\underline{96.60}_{\pm 0.7}$ & $\underline{91.42}_{\pm 0.0}$ \\
& Ensemble & $28.938$ & $\textbf{86.99}_{\pm 0.4}$ & $\textbf{94.97}_{\pm 0.3}$ & $98.65_{\pm 0.3}$ & $\textbf{91.42}_{\pm 0.2}$ & $\textbf{96.93}_{\pm 0.6}$ & $91.11_{\pm 0.1}$ \\
& BLoB & $14.930$ & $84.92_{\pm 0.4}$ & $\underline{94.07}_{\pm 0.4}$ & $98.65_{\pm 0.5}$ & $90.71_{\pm 0.3}$ & $96.53_{\pm 0.3}$ & $90.57_{\pm 0.2}$ \\
%& ScalaBL (SVD) & $9.656$ & $84.48_{\pm 0.3}$ & $92.95_{\pm 0.0}$ & $98.65_{\pm 0.1}$ & $90.42_{\pm 0.3}$ & $\underline{96.60}_{\pm 0.4}$ & $91.03_{\pm 0.0}$ \\
& ScalaBL (ours) & $9.648$ & $84.73_{\pm 0.4}$ & $93.74_{\pm 0.7}$ & $98.65_{\pm 0.2}$ & $90.07_{\pm 0.1}$ & $96.33_{\pm 0.2}$ & $90.99_{\pm 0.1}$ \\
\midrule
\multirow{6}{*}{\textbf{ECE ($\downarrow$)}}
& MLE & $9.646$ & $12.85_{\pm 0.7}$ & $5.88_{\pm 0.8}$ & $1.04_{\pm 0.3}$ & $7.11_{\pm 0.4}$ & $2.18_{\pm 0.4}$ & $1.66_{\pm 0.1}$ \\
& MAP & $9.646$ & $12.48_{\pm 0.9}$ & $5.97_{\pm 0.7}$ & $\underline{0.96}_{\pm 0.4}$ & $6.83_{\pm 0.4}$ & $\underline{2.03}_{\pm 0.6}$ & $1.66_{\pm 0.2}$ \\
& MC-Dropout & $9.646$ & $12.22_{\pm 0.5}$ & $5.38_{\pm 0.3}$ & $1.22_{\pm 0.2}$ & $7.50_{\pm 0.2}$ & $2.50_{\pm 0.3}$ & $1.50_{\pm 0.1}$ \\
& Ensemble & $28.938$ & $11.20_{\pm 0.6}$ & $\textbf{4.89}_{\pm 0.2}$ & $\textbf{0.98}_{\pm 0.4}$ & $\textbf{5.02}_{\pm 0.1}$ & $\textbf{1.85}_{\pm 0.3}$ & $\textbf{0.74}_{\pm 0.1}$ \\
& BLoB & $14.930$ & $\textbf{7.49}_{\pm 0.3}$ & $5.07_{\pm 0.3}$ & $1.11_{\pm 0.3}$ & $6.18_{\pm 0.5}$ & $2.51_{\pm 0.5}$ & $\underline{1.39}_{\pm 0.1}$ \\
%& ScalaBL (SVD) & $9.656$ & $\underline{9.86}_{\pm 0.7}$ & $5.63_{\pm 0.4}$ & $1.12_{\pm 0.2}$ & $\underline{5.44}_{\pm 0.3}$ & $2.44_{\pm 0.4}$ & $1.47_{\pm 0.1}$ \\
& ScalaBL (ours) & $9.648$ & $\underline{10.92}_{\pm 0.3}$ & $\underline{5.03}_{\pm 0.6}$ & $1.06_{\pm 0.1}$ & $\underline{5.91}_{\pm 0.2}$ & $2.32_{\pm 0.6}$ & $1.40_{\pm 0.2}$ \\
\midrule
\multirow{6}{*}{\textbf{NLL ($\downarrow$)}}
& MLE & $9.646$ & $1.08_{\pm 0.1}$ & $0.49_{\pm 0.1}$ & $0.06_{\pm 0.0}$ & $0.35_{\pm 0.0}$ & $0.13_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
& MAP & $9.646$ & $1.05_{\pm 0.0}$ & $0.53_{\pm 0.0}$ & $0.06_{\pm 0.0}$ & $0.33_{\pm 0.0}$ & $0.13_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
& MC-Dropout & $9.646$ & $0.99_{\pm 0.0}$ & $0.50_{\pm 0.0}$ & $0.07_{\pm 0.0}$ & $0.36_{\pm 0.0}$ & $0.14_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
& Ensemble & $28.938$ & $0.67_{\pm 0.0}$ & $\textbf{0.30}_{\pm 0.0}$ & $\textbf{0.04}_{\pm 0.0}$ & $\textbf{0.25}_{\pm 0.0}$ & $\textbf{0.11}_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
& BLoB & $14.930$ & $\textbf{0.44}_{\pm 0.0}$ & $0.40_{\pm 0.0}$ & $0.06_{\pm 0.0}$ & $\underline{0.30}_{\pm 0.0}$ & $\underline{0.12}_{\pm 0.0}$ & $\textbf{0.17}_{\pm 0.0}$ \\
%& ScalaBL (SVD) & $9.656$ & $\underline{0.56}_{\pm 0.0}$ & $\underline{0.32}_{\pm 0.0}$ & $\underline{0.05}_{\pm 0.0}$ & $\underline{0.28}_{\pm 0.0}$ & $\underline{0.12}_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
& ScalaBL (ours) & $9.648$ & $\underline{0.65}_{\pm 0.0}$ & $\underline{0.32}_{\pm 0.0}$ & $\underline{0.05}_{\pm 0.0}$ & $\underline{0.30}_{\pm 0.0}$ & $\underline{0.12}_{\pm 0.0}$ & $\underline{0.18}_{\pm 0.0}$ \\
\bottomrule
\end{tabular}
\label{tab:qwen32B_main}
\end{table*}