\begin{table*}
\centering
\caption{Out-of-distribution experiment using \texttt{Llama-2-7b}. 
We report the mean and standard deviation of test set performance using 3 training seeds.
\textbf{Bold} and \underline{underlined} results denote the best and second best mean performance on each metric/dataset.}
\begin{tabular}{@{}ccc|c|cc|ccc@{}}
\toprule
\multirow{3}{*}{\textbf{Metric}} & \multirow{3}{*}{\textbf{Method}}  & \multirow{3}{*}{\textbf{Params (M)}} & \multicolumn{5}{c}{\textbf{Datasets}} \\ 

& & &  \multicolumn{1}{c}{\textbf{In Dist.}} & \multicolumn{2}{c}{\textbf{Smaller Dist. Shift}} & \multicolumn{2}{c}{\textbf{Larger Dist. Shift}} \\ 
  \cline{4-9}
  &  & & \textbf{OBQA} & \textbf{ARC-C} & \textbf{ARC-E} & \textbf{Chemistry} & \textbf{Physics} & \\
\midrule
\multirow{8}{*}{\textbf{ACC ($\uparrow$)}}
& MLE & $4.483$ & $82.53_{\pm 0.4}$ & $\underline{69.48}_{\pm 0.5}$ & $75.59_{\pm 1.2}$ & $39.33_{\pm 1.5}$ & $29.00_{\pm 2.6}$ \\
& MAP & $4.483$ & $\underline{82.80}_{\pm 0.2}$ & $68.92_{\pm 1.2}$ & $76.29_{\pm 0.7}$ & $36.00_{\pm 1.0}$ & $\underline{31.00}_{\pm 1.0}$ \\
& MC-Dropout & $4.483$ & $83.07_{\pm 1.2}$ & $69.14_{\pm 0.5}$ & $76.17_{\pm 0.9}$ & $37.67_{\pm 2.1}$ & $28.00_{\pm 4.4}$ \\
& Ensemble & $13.449$ & $\textbf{83.53}_{\pm 0.2}$ & $69.37_{\pm 0.5}$ & $76.12_{\pm 1.0}$ & $38.33_{\pm 1.5}$ & $29.00_{\pm 2.6}$ \\
& BBB & $6.613$ & $82.06_{\pm 0.6}$ & $67.25_{\pm 1.2}$ & $75.83_{\pm 0.8}$ & $42.36_{\pm 0.5}$ & $30.21_{\pm 2.3}$ \\
& Laplace & $4.483$ & $82.12_{\pm 0.7}$ & $69.14_{\pm 1.2}$ & $74.94_{\pm 1.0}$ & $\textbf{44.10}_{\pm 1.3}$ & $\textbf{31.60}_{\pm 0.5}$ \\
& BLoB & $6.613$ & $82.47_{\pm 0.4}$ & $\textbf{69.56}_{\pm 1.1}$ & $\underline{76.55}_{\pm 0.3}$ & $\underline{43.40}_{\pm 0.6}$ & $30.56_{\pm 1.2}$ \\
%& ScalaBL (SVD) & $4.488$ & $81.60_{\pm 0.9}$ & $67.94_{\pm 1.1}$ & $76.13_{\pm 1.4}$ & $\underline{43.40}_{\pm 1.2}$ & $28.82_{\pm 0.6}$ \\
& ScalaBL (ours) & $4.484$ & $82.13_{\pm 0.2}$ & $\underline{69.48}_{\pm 0.5}$ & $\textbf{77.46}_{\pm 0.3}$ & $42.00_{\pm 2.6}$ & $30.33_{\pm 0.6}$ \\
\midrule
\multirow{8}{*}{\textbf{ECE ($\downarrow$)}}
& MLE & $4.483$ & $13.86_{\pm 0.5}$ & $23.07_{\pm 0.9}$ & $17.41_{\pm 0.9}$ & $22.56_{\pm 2.5}$ & $29.36_{\pm 2.3}$ \\
& MAP & $4.483$ & $13.91_{\pm 0.3}$ & $24.10_{\pm 0.9}$ & $16.93_{\pm 1.0}$ & $25.96_{\pm 2.0}$ & $28.30_{\pm 2.5}$ \\
& MC-Dropout & $4.483$ & $12.94_{\pm 1.2}$ & $23.44_{\pm 0.7}$ & $16.84_{\pm 0.7}$ & $23.78_{\pm 3.0}$ & $32.71_{\pm 4.0}$ \\
& Ensemble & $13.449$ & $10.81_{\pm 0.2}$ & $19.12_{\pm 1.1}$ & $13.66_{\pm 0.9}$ & $15.94_{\pm 1.5}$ & $\underline{20.86}_{\pm 2.5}$ \\
& BBB & $6.613$ & $11.38_{\pm 1.1}$ & $19.90_{\pm 0.7}$ & $13.41_{\pm 0.9}$ & $15.67_{\pm 1.2}$ & $26.10_{\pm 4.8}$ \\
& Laplace & $4.483$ & $8.70_{\pm 1.8}$ & $\textbf{5.84}_{\pm 0.6}$ & $\underline{8.51}_{\pm 1.1}$ & $\textbf{10.76}_{\pm 3.4}$ & $\textbf{13.91}_{\pm 0.9}$ \\
& BLoB & $6.613$ & $\textbf{2.80}_{\pm 0.5}$ & $13.82_{\pm 0.5}$ & $9.65_{\pm 0.7}$ & $\underline{15.39}_{\pm 3.4}$ & $22.66_{\pm 0.7}$ \\
%$& ScalaBL (SVD) & $4.488$ & $\underline{4.02}_{\pm 0.4}$ & $\underline{11.69}_{\pm 0.8}$ & $\textbf{7.96}_{\pm 1.1}$ & $\underline{15.31}_{\pm 1.1}$ & $23.90_{\pm 0.8}$ \\
& ScalaBL (ours) & $4.484$ & $\underline{3.62}_{\pm 0.9}$ & $\underline{11.85}_{\pm 0.6}$ & $\textbf{7.89}_{\pm 0.8}$ & $15.99_{\pm 3.3}$ & $21.98_{\pm 1.1}$ \\
\midrule
\multirow{8}{*}{\textbf{NLL ($\downarrow$)}}
& MLE & $4.483$ & $0.91_{\pm 0.1}$ & $1.42_{\pm 0.1}$ & $1.11_{\pm 0.1}$ & $1.62_{\pm 0.0}$ & $1.69_{\pm 0.1}$ \\
& MAP & $4.483$ & $0.89_{\pm 0.0}$ & $1.46_{\pm 0.1}$ & $1.12_{\pm 0.0}$ & $1.67_{\pm 0.1}$ & $1.70_{\pm 0.1}$ \\
& MC-Dropout & $4.483$ & $0.86_{\pm 0.1}$ & $1.39_{\pm 0.1}$ & $1.12_{\pm 0.1}$ & $1.64_{\pm 0.1}$ & $1.76_{\pm 0.0}$ \\
& Ensemble & $13.449$ & $0.64_{\pm 0.0}$ & $1.03_{\pm 0.0}$ & $0.82_{\pm 0.0}$ & $1.42_{\pm 0.0}$ & $1.49_{\pm 0.0}$ \\
& BBB & $6.613$ & $0.66_{\pm 0.1}$ & $1.06_{\pm 0.0}$ & $0.79_{\pm 0.0}$ & $1.49_{\pm 0.0}$ & $1.62_{\pm 0.1}$ \\
& Laplace & $4.483$ & $0.52_{\pm 0.0}$ & $\textbf{0.81}_{\pm 0.0}$ & $\underline{0.70}_{\pm 0.0}$ & $\textbf{1.35}_{\pm 0.0}$ & $\textbf{1.36}_{\pm 0.0}$ \\
& BLoB & $6.613$ & $\textbf{0.47}_{\pm 0.0}$ & $0.88_{\pm 0.0}$ & $\underline{0.70}_{\pm 0.0}$ & $\underline{1.38}_{\pm 0.0}$ & $\underline{1.43}_{\pm 0.0}$ \\
%& ScalaBL (SVD)& $4.488$ & $\underline{0.51}_{\pm 0.0}$ & $\underline{0.88}_{\pm 0.0}$ & $\textbf{0.65}_{\pm 0.0}$ & $1.39_{\pm 0.0}$ & $1.50_{\pm 0.0}$ \\
& ScalaBL (ours) & $4.484$ & $\underline{0.51}_{\pm 0.0}$ & $\underline{0.85}_{\pm 0.0}$ & $\textbf{0.63}_{\pm 0.0}$ & $1.40_{\pm 0.0}$ & $1.48_{\pm 0.0}$ \\
\bottomrule
\end{tabular}
\label{tab:llama2_ood}
\end{table*}