
\usepackage{amsthm}
\usepackage[T1]{fontenc}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{multirow}
\usepackage{mathtools}
\usepackage{breqn}
\usepackage{bbm}
\usepackage{dsfont}
\usepackage{graphicx}
\usepackage{natbib}
\usepackage{cases}
\usepackage[colorinlistoftodos]{todonotes}

\DeclareMathOperator{\Hessian}{Hess}

\newcommand{\calens}{ID-calibrated ensembles}
% Use the right version (caps) at the beginning of a sentence, in case we change the name.
\newcommand{\Calens}{ID-calibrated ensembles}
\newcommand{\nat}{natural}
\newcommand{\natshifts}{natural shifts}
\newcommand{\natshift}{natural shift}
\newcommand{\adv}{anticorrelated spurious}
\newcommand{\advshifts}{anticorrelated spurious shifts}
\newcommand{\advshift}{anticorrelated spurious shift}
\newcommand{\Adv}{Anticorrelated spurious}
\newcommand{\Advshifts}{Anticorrelated spurious shifts}
\newcommand{\Advshift}{Anticorrelated spurious shift}
\newcommand{\numidnat}{9}
\newcommand{\numnat}{11}
\newcommand{\numadv}{3}
\newcommand{\numtotal}{14}

% These results are all auto-generated from the data!
\newcommand{\stdaccid}{88.6}
\newcommand{\stdaccood}{64.3}
\newcommand{\stdaccidnatural}{88.7}
\newcommand{\stdaccoodnatural}{65.2}
\newcommand{\robaccid}{86.9}
\newcommand{\robaccood}{74.6}
\newcommand{\robaccidnatural}{86.8}
\newcommand{\robaccoodnatural}{72.3}
\newcommand{\calaccid}{90.0}
\newcommand{\calaccood}{74.7}
\newcommand{\calaccidnatural}{90.3}
\newcommand{\calaccoodnatural}{74.5}
% For tuning, logits did better than probs, so we report logits.
% This is to make the baseline more competitive.
% All numbers reported in Appendix.
\newcommand{\tunedaccid}{90.3}
\newcommand{\tunedaccood}{72.1}
\newcommand{\tunedaccidnatural}{90.5}
\newcommand{\tunedaccoodnatural}{72.1}
% For naive, adding probabilities worked better.
% So we report these in headline results.
% All numbers reported in Appendix.
\newcommand{\naiveaccid}{89.4}
\newcommand{\naiveaccood}{73.1}
\newcommand{\naiveaccidnatural}{90.0}
\newcommand{\naiveaccoodnatural}{73.4}
% ECE of standard and robust models (post-calibration)
\newcommand{\stdeceid}{1.6}
\newcommand{\stdeceood}{11.3}
\newcommand{\robeceid}{2.3}
\newcommand{\robeceood}{6.8}
\newcommand{\robrobaccidSeven}{89.7}
\newcommand{\robrobaccoodSeven}{76.2}
\newcommand{\stdstdaccidSeven}{90.7}
\newcommand{\stdstdaccoodSeven}{68.8}
\newcommand{\calaccidSeven}{91.8}
\newcommand{\calaccoodSeven}{76.5}

% % Results (before adding iwildcam, and removing cropland from std-std, rob-rob).
% \newcommand{\stdaccid}{89.1}
% \newcommand{\stdaccood}{64.5}
% \newcommand{\stdaccidnatural}{89.5}
% \newcommand{\stdaccoodnatural}{65.6}
% \newcommand{\robaccid}{87.3}
% \newcommand{\robaccood}{75.5}
% \newcommand{\robaccidnatural}{87.5}
% \newcommand{\robaccoodnatural}{73.2}
% \newcommand{\calaccid}{90.5}
% \newcommand{\calaccood}{75.3}
% \newcommand{\calaccidnatural}{91.1}
% \newcommand{\calaccoodnatural}{75.3}
% % For tuning, logits did better than probs, so we report logits.
% % This is to make the baseline more competitive.
% % All numbers reported in Appendix.
% \newcommand{\tunedaccid}{90.8}
% \newcommand{\tunedaccood}{72.6}
% \newcommand{\tunedaccidnatural}{91.3}
% \newcommand{\tunedaccoodnatural}{72.7}
% % For naive, adding probabilities worked better.
% % So we report these in headline results.
% % All numbers reported in Appendix.
% \newcommand{\naiveaccid}{89.9}
% \newcommand{\naiveaccood}{73.7}
% \newcommand{\naiveaccidnatural}{90.7}
% \newcommand{\naiveaccoodnatural}{74.3}

% \newcommand{\robrobaccidSeven}{89.6}
% \newcommand{\robrobaccoodSeven}{78.2}
% \newcommand{\stdstdaccidSeven}{91.4}
% \newcommand{\stdstdaccoodSeven}{70.8}
% \newcommand{\calaccidSeven}{92.0}
% \newcommand{\calaccoodSeven}{78.7}

% % ECE of standard and robust models (post-calibration)
% \newcommand{\stdeceid}{1.4}
% \newcommand{\stdeceood}{11.9}
% \newcommand{\robeceid}{2.4}
% \newcommand{\robeceood}{7.1}

% These numbers are if we use the ensemble numbers for the 7 datasets where we have multiple std and rob checkpoints.
% And use the singale model numbers for the other datasets.
% That doesn't seem fair though... I computed these just so we can get some sense of what things might look like for all
% the datasets (not just 7).
% \newcommand{\robrobaccid}{87.5}
% \newcommand{\robrobaccood}{75.7}
% \newcommand{\robrobaccidnatural}{87.7}
% \newcommand{\robrobaccoodnatural}{73.5}
% \newcommand{\stdstdaccid}{89.6}
% \newcommand{\stdstdaccood}{65.0}
% \newcommand{\stdstdaccidnatural}{90.2}
% \newcommand{\stdstdaccoodnatural}{66.3}

\newcommand{\jens}{j_\mathsf{ens}}
\newcommand{\jstd}{j_\mathsf{std}}
\newcommand{\jrob}{j_\mathsf{rob}}


\newcommand{\pred}{\mbox{pred}}
\newcommand{\Err}{\mbox{Err}}
\newcommand{\Errp}{\mbox{Err}_\mathsf{P}}
\newcommand{\Errid}{\mbox{Err}_\mathsf{id}}
\newcommand{\Errood}{\mbox{Err}_\mathsf{ood}}

\newcommand{\xval}{x^\mathsf{val}}
\newcommand{\yval}{y^\mathsf{val}}
\newcommand{\xtrain}{x^\mathsf{train}}
\newcommand{\ytrain}{y^\mathsf{train}}
\newcommand{\xtest}{x^\mathsf{test}}
\newcommand{\ytest}{y^\mathsf{test}}
\newcommand{\xood}{x^\mathsf{ood}}
\newcommand{\yood}{y^\mathsf{ood}}

\newcommand{\cstd}{c_\mathsf{std}}
\newcommand{\crob}{c_\mathsf{rob}}
\newcommand{\fstd}{f_\mathsf{std}}
\newcommand{\frob}{f_\mathsf{rob}}
\newcommand{\fstdbar}{\overline{f}_\mathsf{std}}
\newcommand{\frobbar}{\overline{f}_\mathsf{rob}}
\newcommand{\fens}{f_\mathsf{ens}}

\newcommand{\pstd}{p_\mathsf{std}}
\newcommand{\prob}{p_\mathsf{rob}}

\newcommand{\Tstd}{T_\mathsf{std}}
\newcommand{\Trob}{T_\mathsf{rob}}

\newcommand{\nval}{n_\mathsf{val}}
\newcommand{\ntest}{n_\mathsf{test}}
\newcommand{\ntrain}{n_\mathsf{train}}
\newcommand{\nood}{n_\mathsf{ood}}

\newcommand{\mse}{MSE}
\newcommand{\ptrain}{\hat{P}}
\newcommand{\covtrain}{\Sigma_{\hat{P}}}
\newcommand{\llp}{{L_{\mbox{lp}}}}
\newcommand{\lft}{{L_{\mbox{ft}}}}
\newcommand{\lftreg}{{L_{\mbox{ftreg}}}}


\newcommand{\headerror}{\mbox{Head-Error}}
\newcommand{\dataspan}{S}
\newcommand{\nullf}{T}

% \newcommand{\xid}{x_{\mathsf{id}}}
% \newcommand{\xood}{x_{\mathsf{ood}}}

\newcommand{\Pid}{P_\mathsf{id}}
\newcommand{\Pood}{P_\mathsf{ood}}
\newcommand{\Padv}{P_\mathsf{adv}}
\newcommand{\Ptrain}{\hat{P}}

\newcommand{\Lood}{L_{\mathsf{ood}}}
\newcommand{\Lid}{L_{\mathsf{id}}}

\newcommand{\wlp}{{w_{\mathsf{lp}}}}
\newcommand{\wft}{{w_{\mathsf{ft}}}}
\newcommand{\wscratch}{{w_{\mbox{sc}}}}
\newcommand{\what}{\hat{w}}
\newcommand{\wstar}{w_\star}
\newcommand{\Bstar}{B_\star}
\newcommand{\bstar}{b_\star}
\newcommand{\binit}{b_0}
\newcommand{\vstar}{v_\star}
\newcommand{\winit}{w_0}
\newcommand{\vinit}{v_0}
\newcommand{\Binit}{B_0}

\newcommand{\cangle}{\cos \theta_k}

\newcommand{\vlp}{{v_\mathsf{lp}}}
\newcommand{\Blp}{{B_\mathsf{lp}}}
\newcommand{\vft}{{v_\mathsf{ft}}}
\newcommand{\Bft}{{B_\mathsf{ft}}}
\newcommand{\vftt}{{v_{ft}^{t}}}
\newcommand{\Bftt}{{B_{ft}^{t}}}

\newcommand{\vinflp}{{v_{lp}^{\infty}}}
\newcommand{\vinfft}{{v_{ft}^{\infty}}}
\newcommand{\Binfft}{{B_{ft}^{\infty}}}
\newcommand{\binfft}{{b_{ft}^{\infty}}}

\renewcommand{\hat}{\widehat}

%\newtheorem{condition}{Condition}[section]
\newtheorem{assumption}{Assumption}[section]
\newtheorem{proposition}{Proposition}[section]
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{definition}{Definition}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{conjecture}{Conjecture}[section]
\newtheorem{example}{Example}[section]
\newtheorem{remark}{Remark}[section]
\newtheorem{claim}{Claim}[section]
\newtheorem*{remark*}{Remark}
\newtheorem{observation}[theorem]{Observation}
\newtheorem*{observation*}{Observation}

\numberwithin{equation}{section}

\newcommand{\E}{\mathop{{}\mathbb{E}}}
\DeclareMathOperator*{\Exp}{\mathbb{E}}

\newcommand{\R}{\mathbb{R}}

\newcommand{\cB}{\mathcal{B}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cD}{\mathcal{D}}
\newcommand{\cF}{\mathcal{F}}
\newcommand{\cG}{\mathcal{G}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cI}{\mathcal{I}}
\newcommand{\cL}{\mathcal{L}}
\newcommand{\cM}{\mathcal{M}}
\newcommand{\cN}{\mathcal{N}}
\newcommand{\cS}{\mathcal{S}}
\newcommand{\cU}{\mathcal{U}}
\newcommand{\cV}{\mathcal{V}}
\newcommand{\cX}{\mathcal{X}}
\newcommand{\cY}{\mathcal{Y}}
\newcommand{\cP}{\mathcal{P}}
\newcommand{\cZ}{\mathcal{Z}}


\newcommand{\poly}{\textup{poly}}
\newcommand{\argmin}{\arg \min}
\newcommand{\argmax}{\arg \max}
  

\newcommand{\Gnorm}[1]{{\left\vert\kern-0.25ex\left\vert\kern-0.25ex\left\vert #1 
		\right\vert\kern-0.25ex\right\vert\kern-0.25ex\right\vert}}
\newcommand{\gnorm}[1]{{\vert\kern-0.25ex\vert\kern-0.25ex\vert #1 
		\vert\kern-0.25ex\vert\kern-0.25ex\vert}}
\newcommand{\opnorm}[1]{\|#1 \|_{\textup{op}}}
\newcommand{\fronorm}[1]{\|#1\|_{\textup{fro}}}

\newcommand{\cover}{\mathcal{N}}
%\newcommand{\comp}{\cC}
\newcommand{\supp}{\textup{supp}}

\newcommand{\one}[1][]{\mathbbm{1}_{#1}}
\newcommand{\1}{\mathbbm{1}}

\newcommand{\ot}{\leftarrow}
\newcommand{\size}{s}
\newcommand{\lip}{\kappa}
\newcommand{\psize}{t}
\newcommand{\plip}{\tau}
\newcommand{\tlip}{\tilde{\kappa}}

\newcommand{\nn}{\textup{NN}}
\newcommand{\pmarg}{m}
\newcommand{\simpm}{\pmarg}

\newcommand{\ellzo}{\ell_{\textup{0-1}}}
\newcommand{\correct}{\cS_n}
\newcommand{\ravg}{R_n}

\newcommand{\empd}{L_2(P_n)}
\DeclareMathOperator{\Tr}{Tr}

\newcommand{\ent}{\ell_{ent}}
\newcommand{\lexp}{\ell_{exp}}
\newcommand{\expent}{g}
%\newcommand{\E}{\mathbb{E}}
%\newcommand{\R}{\mathbb{R}}
\newcommand{\density}{p}
\newcommand{\dratio}{s}
%\newcommand{\cN}{\mathcal{N}}
\newcommand{\sigopt}{\gamma^\star_{\sigma}}
\newcommand{\concave}{\nu}

\newcommand{\erf}{\textup{erf}}
\newcommand{\smooth}{\rho}
\newcommand{\comp}{K}
\newcommand{\mass}{\tau}
\newcommand{\constl}{0.25}
\newcommand{\optwone}{\textup{opt}_{w_1}}

\newcommand{\ws}{{w^{\textup{s}}}}
\newcommand{\ball}{\cB}

\newcommand{\Dt}{\cD_{\textup{tg}}}
\newcommand{\Dtone}{\cD_{\textup{tg},1}}
\newcommand{\ctarg}{c_{\textup{tg}}}
\DeclareMathOperator{\sign}{\textup{sgn}}
\DeclareMathOperator{\erfc}{\textup{erfc}}

\newcommand{\sigmamax}{\sigma_{\textup{max}}}
\newcommand\refeqn[1]{(\ref{eqn:#1})}
\newcommand\refeqns[2]{(\ref{eqn:#1}) and (\ref{eqn:#2})}

\newcommand{\softmax}{\mbox{softmax}}
