\begin{table*}[h!]
\centering
\caption{Out-of-distribution experiment using \texttt{Qwen2.5-7B}. 
We report the mean and standard deviation of test set performance using 8 training seeds.
\textbf{Bold} and \underline{underlined} results denote the best and second best mean performance on each metric/dataset.
}\begin{tabular}{@{}cccccccc@{}}
\toprule
\textbf{Metric} & \textbf{Method} & \textbf{Params (M)} & \textbf{OBQA} & \textbf{ARC-C} & \textbf{ARC-E} & \textbf{Chemistry} & \textbf{Physics} \\
\midrule
\multirow{6}{*}{\textbf{ACC ($\uparrow$)}}
& MLE & $3.768$ & $\underline{92.25}_{\pm 0.9}$ & $90.88_{\pm 0.7}$ & $95.64_{\pm 0.5}$ & $53.00_{\pm 1.3}$ & $53.00_{\pm 1.5}$ \\
& MAP & $3.768$ & $91.72_{\pm 0.7}$ & $90.20_{\pm 0.9}$ & $95.53_{\pm 0.6}$ & $\underline{53.50}_{\pm 0.9}$ & $53.25_{\pm 3.1}$ \\
& MC-Dropout & $3.768$ & $91.80_{\pm 0.6}$ & $90.37_{\pm 0.5}$ & $95.51_{\pm 0.4}$ & $52.75_{\pm 1.3}$ & $51.00_{\pm 2.1}$ \\
& Ensemble & $11.305$ & $\textbf{92.70}_{\pm 0.6}$ & $90.84_{\pm 0.6}$ & $95.71_{\pm 0.5}$ & $53.25_{\pm 1.0}$ & $\textbf{53.88}_{\pm 1.2}$ \\
& Laplace & $3.768$ & $91.68_{\pm 0.4}$ & $90.51_{\pm 0.7}$ & $95.61_{\pm 0.4}$ & $48.75_{\pm 1.8}$ & $50.74_{\pm 2.3}$ \\
& BLoB & $5.403$ & $91.72_{\pm 0.7}$ & $\textbf{92.49}_{\pm 0.5}$ & $\textbf{96.07}_{\pm 0.5}$ & $\textbf{54.69}_{\pm 1.4}$ & $\underline{53.65}_{\pm 2.8}$ \\
& ScalaBL (ours) & $3.769$ & $90.90_{\pm 0.5}$ & $\underline{91.06}_{\pm 1.1}$ & $\underline{95.74}_{\pm 0.5}$ & $52.60_{\pm 1.8}$ & $53.13_{\pm 1.5}$ \\
\midrule
\multirow{6}{*}{\textbf{ECE ($\downarrow$)}}
& MLE & $3.768$ & $6.40_{\pm 0.8}$ & $7.72_{\pm 0.6}$ & $3.48_{\pm 0.4}$ & $23.29_{\pm 2.2}$ & $23.22_{\pm 3.2}$ \\
& MAP & $3.768$ & $6.61_{\pm 0.6}$ & $7.89_{\pm 0.9}$ & $3.31_{\pm 0.2}$ & $22.90_{\pm 1.9}$ & $21.52_{\pm 4.4}$ \\
& MC-Dropout & $3.768$ & $6.60_{\pm 0.4}$ & $7.63_{\pm 0.8}$ & $3.38_{\pm 0.2}$ & $23.74_{\pm 1.6}$ & $21.61_{\pm 2.1}$ \\
& Ensemble & $11.305$ & $4.96_{\pm 0.6}$ & $6.18_{\pm 0.6}$ & $2.63_{\pm 0.4}$ & $19.49_{\pm 1.4}$ & $17.33_{\pm 2.0}$ \\
& Laplace & $3.768$ & $\textbf{1.90}_{\pm 0.4}$ & $4.75_{\pm 0.7}$ & $\textbf{1.99}_{\pm 0.4}$ & $\textbf{14.31}_{\pm 2.1}$ & $\textbf{11.94}_{\pm 4.5}$ \\
& BLoB & $5.403$ & $\underline{2.16}_{\pm 0.5}$ & $\underline{4.46}_{\pm 0.5}$ & $2.35_{\pm 0.4}$ & $\underline{16.21}_{\pm 2.2}$ & $16.93_{\pm 2.4}$ \\
& ScalaBL (ours) & $3.769$ & $2.43_{\pm 0.7}$ & $\textbf{4.41}_{\pm 0.7}$ & $\underline{1.92}_{\pm 0.4}$ & $16.94_{\pm 1.8}$ & $\underline{16.29}_{\pm 1.8}$ \\
\midrule
\multirow{6}{*}{\textbf{NLL ($\downarrow$)}}
& MLE & $3.768$ & $0.38_{\pm 0.1}$ & $0.44_{\pm 0.0}$ & $0.23_{\pm 0.0}$ & $1.53_{\pm 0.1}$ & $1.18_{\pm 0.1}$ \\
& MAP & $3.768$ & $0.39_{\pm 0.0}$ & $0.46_{\pm 0.0}$ & $0.22_{\pm 0.0}$ & $1.52_{\pm 0.1}$ & $1.19_{\pm 0.1}$ \\
& MC-Dropout & $3.768$ & $0.36_{\pm 0.0}$ & $0.43_{\pm 0.0}$ & $0.21_{\pm 0.0}$ & $1.50_{\pm 0.1}$ & $1.19_{\pm 0.0}$ \\
& Ensemble & $11.305$ & $0.27_{\pm 0.0}$ & $0.33_{\pm 0.0}$ & $0.17_{\pm 0.0}$ & $1.29_{\pm 0.0}$ & $1.07_{\pm 0.0}$ \\
& Laplace & $3.768$ & $0.23_{\pm 0.0}$ & $0.32_{\pm 0.0}$ & $\underline{0.15}_{\pm 0.0}$ & $\textbf{1.11}_{\pm 0.0}$ & $1.03_{\pm 0.0}$ \\
& BLoB & $5.403$ & $\textbf{0.21}_{\pm 0.0}$ & $\underline{0.28}_{\pm 0.0}$ & $\underline{0.15}_{\pm 0.0}$ & $1.32_{\pm 0.1}$ & $\underline{0.99}_{\pm 0.0}$ \\
& ScalaBL (ours) & $3.769$ & $\underline{0.23}_{\pm 0.0}$ & $\textbf{0.27}_{\pm 0.0}$ & $\textbf{0.14}_{\pm 0.0}$ & $\underline{1.26}_{\pm 0.0}$ & $\textbf{0.96}_{\pm 0.0}$ \\
\bottomrule
\end{tabular}
\label{tab:qwen7B_ood}
\end{table*}