% \documentclass[11pt,letterpaper]{article}
% \usepackage[in]{fullpage}
\documentclass[accepted]{uai2022} % for initial submission
% \documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}

% \usepackage[authoryear,round]{natbib}
\usepackage{natbib} % has a nice set of citation styles and commands
\bibliographystyle{plainnat}
\renewcommand{\bibsection}{\subsubsection*{References}}
    
    
    
% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{graphicx}
% \usepackage{subfigure}
\usepackage{booktabs} % for professional tables
\usepackage{amsmath,amssymb} % define this before the line numbering.
\usepackage{subcaption}
% \usepackage[font=small]{caption}
\usepackage{caption}

% These are recommended to typeset algorithms but not required. See the subsubsection on algorithms. Remove them if you don't have algorithms in your paper.
\usepackage{algorithm}
\usepackage{algorithmic}


% Title

% Your title must be in mixed case, not sentence case.
% That means all verbs (including short verbs like be, is, using,and go),
% nouns, adverbs, adjectives should be capitalized, including both words in hyphenated terms, while
% articles, conjunctions, and prepositions are lower case unless they
% directly follow a colon or long dash
\title{On the Effectiveness of Adversarial Training Against Common Corruptions}


% \usepackage[nonatbib]{neurips_2021}
% \usepackage[authoryear,round]{natbib}
% \usepackage[numbers]{natbib}

% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{graphicx}
% \usepackage{subfigure}
\usepackage{booktabs} % for professional tables
\usepackage{amsmath,amssymb} % define this before the line numbering.
\usepackage{subcaption}


% \usepackage[links=true,citecolor=blue,urlcolor=red]{hyperref}  
\usepackage{nicefrac}
\usepackage[table]{xcolor}
\usepackage{arydshln}
\usepackage{multirow}
\usepackage{wrapfig} 


\newcommand{\Authornote}[2]{{\sffamily\small\color{red}{[#1: #2]}}}
\newcommand{\Maksymnote}[2]{{\sffamily\small\color{orange}{[#1: #2]}}}
\newcommand{\Klimnote}[2]{{\sffamily\small\color{olive}{[#1: #2]}}}
\newcommand{\comment}[1]{}
\newcommand{\mnote}{\Maksymnote{MA}}
\newcommand{\knote}{\Klimnote{KK}}
\newcommand{\nf}{\Authornote{NF}}

% Attempt to make hyperref and algorithmic work together better:
% \newcommand{\theHalgorithm}{\arabic{algorithm}}

\usepackage[linesnumbered,algoruled,boxed,lined,noend,algo2e]{algorithm2e}  % algo2e is needed to prevent clash with standard algorithm package



\newcommand{\Id}{\mathbbm{1}}
\def\argmax{\mathop{\rm arg\,max}}
\def\argmin{\mathop{\rm arg\,min}}
\def\argmaxop{\mathop{\rm arg\,max}\limits}
\def\argminop{\mathop{\rm arg\,min}\limits}
\def\minop{\mathop{\rm min}\limits}
\def\maxop{\mathop{\rm max}\limits}
\def\sign{\mathop{\rm sign}\limits}
\def\R{\mathbb{R}}
\def\N{\mathbb{N}}
\def\U{\mathcal{U}}

\def\assign{\coloneqq}


\newcommand{\myparagraph}{\textbf}
% \newcommand{\myparagraph}{\paragraph}

\newcommand{\norm}[1]{\left\|#1\right\|}
\newcommand{\abs}[1]{\left|#1\right|}
\newcommand{\inner}[1]{\left\langle#1\right\rangle}
\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
\newcommand{\red}[1]{\textcolor{red}{#1}}
\newcommand{\blue}[1]{\textcolor{blue}{#1}}

\DeclareMathOperator{\eps}{\mathbf{\varepsilon}}
\DeclareMathOperator{\lpips}{d_{\text{LPIPS}}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\l\relax
\DeclareMathOperator{\l}{\mathbf{\ell}}
\let\vec\relax
% \DeclareMathOperator{\vec}{vec}
\DeclareMathOperator{\diag}{\mathbf{\text{diag}}}

\newcommand{\vec}[1]{\boldsymbol{#1}}
\newcommand\eqdef{\ensuremath{\stackrel{\rm def}{=}}} % Equal by definition

\makeatletter
\def\blfootnote{\xdef\@thefnmark{}\@footnotetext}
\makeatother

\makeatletter
\renewcommand{\section}{\@startsection {section}{1}{\z@}%
	{-3.5ex \@plus -1ex \@minus -.2ex}%
	{2.3ex \@plus .2ex}%
	{\large\bfseries\MakeUppercase}}

\renewcommand{\subsection}{\@startsection{subsection}{2}{\z@}%
	{-3.25ex\@plus -1ex \@minus -.2ex}%
	{1.5ex \@plus .2ex}%
	{\large\bfseries\MakeUppercase}}

\renewcommand{\subsubsection}{\@startsection{subsubsection}{2}{\z@}%
	{-3.25ex\@plus -1ex \@minus -.2ex}%
	{1.5ex \@plus .2ex}%
	{\normalfont\normalsize\scshape\bfseries}}
\makeatother


% To fix list things: 
\usepackage{enumitem}

% \author{
%   Klim Kireev\textsuperscript{*}\\
%   EPFL\\
% %   \texttt{\footnotesize klim.kireev@epfl.ch}
%   \and
%   Maksym Andriushchenko\textsuperscript{*}\\
%   EPFL\\
% %   \texttt{\footnotesize maksym.andriushchenko@epfl.ch}
%   \and
%   Nicolas Flammarion \\
%   EPFL\\
% %   \texttt{\footnotesize nicolas.flammarion@epfl.ch}
% }
\author[1]{Klim~Kireev\textsuperscript{*}}
\author[1]{Maksym~Andriushchenko\textsuperscript{*}}
\author[1]{Nicolas~Flammarion}
% Add affiliations after the authors
\affil[1]{%
    EPFL\\
    Lausanne, Switzerland
}
% \affil[2]{%
%     EPFL
% }
% \affil[3]{%
%     EPFL
% }

% \date{\vspace{-5ex}}



\begin{document}

\maketitle
\blfootnote{\vspace{1em} \textsuperscript{*}Equal contribution.}


\begin{abstract}
\vspace{-7mm}
The literature on robustness towards common corruptions shows no consensus on whether adversarial training can improve the performance in this setting. First, we show that, when used with an appropriately selected perturbation radius, $\l_p$ adversarial training can serve as a strong baseline against common corruptions improving both accuracy and calibration. Then we explain why adversarial training performs better than data augmentation with simple Gaussian noise which has been observed to be a meaningful baseline on common corruptions. Related to this, we identify the \textit{$\sigma$-overfitting} phenomenon when Gaussian augmentation overfits to a particular standard deviation used for training which has a significant detrimental effect on common corruption accuracy. We discuss how to alleviate this problem and then how to further enhance $\l_p$ adversarial training by introducing an \textit{efficient relaxation} of adversarial training with \textit{learned perceptual image patch similarity} as the distance metric. Through experiments on CIFAR-10 and ImageNet-100, we show that our approach does not only improve the $\l_p$ adversarial training baseline but also has cumulative gains with data augmentation methods such as AugMix, DeepAugment, ANT, and SIN, leading to state-of-the-art performance on common corruptions.
The code of our experiments is publicly available at \url{https://github.com/tml-epfl/adv-training-corruptions}.
\end{abstract}


\section{Introduction}

% \begin{wrapfigure}{r}{0.445\textwidth}
% 	\vspace{-10mm}
% 	\begin{center}
% 		% \centerline{\includegraphics[width=\columnwidth]{figures/optimal_eps_old.png}}
% 		\includegraphics[width=0.44\columnwidth]{figures/at_acc_vs_eps.pdf}
% 		\vskip -0.1in
% 		\caption{
% 			Accuracy on common corruptions from CIFAR-10-C for ResNet-18 models adversarially trained using different $\l_\infty$ radii. We observe that the performance with $\eps=\nicefrac{1}{255}$ is significantly higher than with the standardly used $\eps=\nicefrac{8}{255}$.
% 		} 
% 		\label{fig:linf_eps_c10c_acc}
% 	\end{center}
% 	\vspace{-12mm}
% \end{wrapfigure}
\begin{figure}[t]
    \begin{center}
    % \centerline{\includegraphics[width=\columnwidth]{figures/optimal_eps_old.png}}
    \includegraphics[width=0.86\columnwidth]{figures/at_acc_vs_eps.pdf}
    \caption{
        Accuracy on common corruptions from CIFAR-10-C for ResNet-18 models adversarially trained using different $\l_\infty$ radii. We observe that the performance with $\eps=\nicefrac{1}{255}$ is significantly higher than with the standardly used $\eps=\nicefrac{8}{255}$.
    } 
    \label{fig:linf_eps_c10c_acc}
    \end{center}
\end{figure}
Despite achieving human-level performance on many computer vision tasks, deep neural networks are still not as robust as humans towards various distribution shifts \citep{szegedy2013intriguing, taori2020measuring} including common image corruptions \citep{hendrycks2019benchmarking}. 
%
Attempts to understand the vulnerability towards such shifts include analysis of the network architecture \citep{azulay2019deep},
the features contained in the data \citep{ilyas2019adversarial}, and frequency analysis of neural networks \citep{yin2019fourier, ortiz2020hold}.
%
Many approaches have been suggested to improve their robustness to these shifts including approaches based on data augmentations \citep{cubuk2018autoaugment, hendrycks2019augmix}, adversarial training \citep{madry2018towards, laidlaw2021perceptual}, and pretraining \citep{hendrycks2019using}.

Although data augmentation methods tend to improve the performance under common synthetic corruptions \citep{hendrycks2019augmix}, these augmentations are often ad hoc and may have substantial overlap with the corruptions evaluated at test time. 
% 
At the same time, there is a large amount of literature on adversarial training with $\l_p$-bounded perturbations \citep{goodfellow2014explaining,madry2018towards}.
%
Adversarial training emerged as a principled approach to improve the worst-case performance of the model against \textit{small} $\l_p$ perturbations.
%
However, common image corruptions have a very high $\l_p$ distance from clean samples, so the utility of using $\l_p$ adversarial training for them is not obvious.
This leads us to explore the following question:
\begin{center}
    \textit{How can we improve the performance on common image corruptions using adversarial training?}
\end{center}
%\begin{figure}[t]
%    \begin{center}
%    % \centerline{\includegraphics[width=\columnwidth]{figures/optimal_eps_old.png}}
%    \includegraphics[width=0.85\columnwidth]{figures/at_acc_vs_eps.pdf}
%    \caption{
%        Accuracy on common corruptions from CIFAR-10-C for ResNet-18 models adversarially trained using different $\l_\infty$ radii. We observe that the performance with $\eps=\nicefrac{1}{255}$ is significantly higher than with the standardly used $\eps=\nicefrac{8}{255}$.
%    } 
%    \label{fig:linf_eps_c10c_acc}
%    \end{center}
%\end{figure}

% \myparagraph{Contributions.}
We make the following contributions in our paper:
\begin{itemize}
	\vspace{-.1cm}
    \item We show that $\l_p$ adversarial training with an \textit{appropriately selected} perturbation radius can serve as a strong baseline against common image corruptions improving both accuracy and calibration on corrupted images.
    \vspace{-.1cm}
    \item We analyze the success of $\l_p$ adversarial training via a comparison to other natural baselines such as Gaussian data augmentation. 
    % and analyze why the latter can lead to suboptimal performance on common corruptions.
    We observe that it can overfit to the perturbation size it has been trained which, however, does not happen for adversarial training.
    \vspace{-.1cm}
    \item We introduce an efficient relaxation of adversarial training with \textit{learned perceptual image patch similarity} (LPIPS) \citep{zhang2018unreasonable} based on layerwise adversarial perturbations. This new relaxation is at least as effective as previous approaches \citep{laidlaw2021perceptual} but significantly faster to train. 
    \vspace{-.1cm}
    \item We show that our relaxation approach has cumulative gains with existing data augmentation methods such as AugMix, DeepAugment, ANT, and SIN leading to state-of-the-art performance on common corruptions from CIFAR-10-C and ImageNet-100-C.
\end{itemize}




\section{Related work}
We provide here an overview of relevant works on common image corruptions, different data augmentation methods proposed to improve the performance on corruptions, and then we discuss papers on adversarial robustness with respect to both $\l_p$ and non-$\l_p$ perturbations.

\myparagraph{Common image corruptions.}
%
\cite{dodge2017study} first find that despite being on par with the human vision on standard images, deep networks perform suboptimally on common corruptions such as noise and blur.
%
\cite{geirhos2018generalisation} measure the performance of deep networks on $12$ different image corruption types but find that data augmentation on one type of corruption does not tend to improve the performance on others. 
%
However, these findings are reconsidered in \cite{rusak2020simple} where Gaussian data augmentation is shown to help for a wide range of image corruptions.
%
In a standardization effort, \cite{hendrycks2019benchmarking} introduce a few image classification datasets---in particular, CIFAR-10-C and ImageNet-C---with 15 different common corruptions from four categories: noise, blur, weather, and digital corruptions.
%
\cite{ovadia2019can} show that not only acccuracy but also calibration deteriorates under these common corruptions.
%and a common approach of temperature scaling \citep{guo2017calibration} cannot mitigate the miscalibration completely.
%
\citep{schneider2020improving, nandy2021adversarially} show that robustness to common corruptions can be improved by using test-time adaptation, e.g., via recomputing the batch normalization statistics.
%
\cite{radford2021learning} show that contrastive pretraining on a very large set of image-caption pairs can substantially improve robustness on various distribution shifts including common corruptions.


\myparagraph{Data augmentations.}
%
Data augmentation is a widely used technique to improve the generalization. 
%
Besides classical image transformations like random flipping or cropping, many other approaches have been proposed such as linearly interpolating between images and their labels \citep{zhang2018mixup}, replacing a part of the image with either a black-colored patch \citep{devries2017improved} or a part of another image \citep{yun2019cutmix}.
%
% If the augmentation set used to be manually crafted, the selection process can be now automatized with an automatic augmentation search \citep{cubuk2018autoaugment}.
%
One of the best-performing methods in terms of accuracy and calibration on common corruptions is AugMix \citep{hendrycks2019augmix}, which combines carefully selected augmentations with a regularization term based on the Jensen-Shannon divergence. 
%
% \cite{laermann2019achieving} combine stability training with various input perturbations such as Gaussian noise and show an improvement on different image corruptions. 
%
\cite{taori2020measuring} observe that improvements on synthetic distribution shifts (such as common corruptions) do not necessarily transfer to real distribution shifts. 
%
However, \cite{hendrycks2020many} show an example when improving robustness against synthetic blurs also helps against naturally obtained blurred images.


\myparagraph{$\l_p$ adversarial robustness.}
%
Adversarial training in deep learning has been first considered in \cite{goodfellow2014explaining} and later framed as a robust optimization problem by \cite{madry2018towards}.
%
% There have been many attempts in the literature to improve standard adversarial training, and we refer to an exhaustive list of the best-performing approaches to the leaderboard from \cite{croce2020robustbench}. 
% The state-of-the-art approach of \cite{rebuffi2021Fixing} is still based on adversarial training enhanced by additional training data, larger models, and weight averaging.
%
The view that adversarial training damages or at least does not improve the performance on \textit{common corruptions} has been prevalent in the literature \citep{hendrycks2019augmix, rusak2020simple, hendrycks2020many}.
%
However, previous works directly use publicly available robust models without adjusting the perturbation radius used for adversarial training. 
% 
For example, \citet{rusak2020simple} show that adversarially trained ImageNet models from \cite{xie2019feature}, \cite{shafahi2019adversarial}, and \cite{shafahi2020universal} do not help on ImageNet-C compared to standardly trained models.
%
However, \cite{ford2019adversarial} report that $\l_\infty$ adversarially trained models on CIFAR-10 from \cite{madry2018towards} do lead to an improvement on CIFAR-10-C compared to a standard model.
%
The approach of \cite{xie2020adversarial}, AdvProp, relies on $\l_\infty$ adversarial training to improve standard and corruption accuracy but they advocate the use of \textit{auxiliary} batch normalization layers for standard and adversarial training examples. We find that similar performance can be achieved on common corruptions using vanilla adversarial training without a customized use of BatchNorm layers.
%
\cite{kang2019transfer} study the robustness transfer between $\l_p$-robust models and \textit{adversarially optimized} elastic and JPEG corruptions. They show that $\l_p$ adversarial training can increase robustness against these two types of adversarial perturbations, but robustness does not transfer in all the cases and sometimes may even hurt robustness against other perturbation types. 
%
% \cite{hendrycks2019benchmarking} report that the Adversarial Logit Pairing method \citep{kannan2018adversarial} (whose $\l_p$-robustness is questionable, see \cite{engstrom2018evaluating} and \cite{mosbach2018logit}) enables improvements on corruptions from Tiny ImageNet-P dataset which was introduced in the same paper. Similarly, \cite{wong2021learning} report that $\l_2$ and $\l_\infty$ adversarial training improve on 3 held-out corruption types which are, however not standard to report in the literature on common corruptions \citep{hendrycks2019benchmarking}.
% 
% Concurrently with our work, \cite{yu2021understanding} analyze the bias-variance tradeoff in adversarial training and observe that it can reduce both bias and variance on CIFAR-10-C for small perturbation radii.




\myparagraph{Non-$\l_p$ adversarial robustness.}
%
\cite{volpi2018generalizing} propose Lagrangian-style adversarial training in the input space and in the last layer of the network. 
%
\cite{stutz2019disentangling} propose \textit{on-manifold} adversarial training which is performed in the latent space of a VAE-GAN generative model. However, its success crucially depends on the quality of the generative model which could not be scaled beyond simple image recognition datasets.
%
% \cite{wong2021learning} propose to train a variational auto-encoder to generate CIFAR-10-C corruptions and use it for data augmentation and adversarial training. We note that their technique assumes knowledge of some subset of corruptions from CIFAR-10-C which differs from the setting we consider in our paper.
%
% \cite{robey2020model} also use adversarial training in the latent space of a deep generative model to learn models robust to some realistic non-$\l_p$ bounded perturbations.
%
\cite{wei2020improved} derive generalization bounds that motivate adversarial training with respect to all network layers which they use to improve $\l_p$ robustness. 
%
Recently, \cite{laidlaw2021perceptual} provided algorithms for approximate \textit{perceptual adversarial training} based on the LPIPS distance \citep{zhang2018unreasonable} which is defined via activations of a neural network. They aim at improving robustness against new types of adversarial perturbations that were unseen during training.



\section{$\ell_p$ adversarial training improves the performance on common corruptions}
Here we formally introduce adversarial training and show that it can lead to non-trivial improvements in accuracy and calibration on common corruptions.
% for a small enough perturbation radius.

\myparagraph{Background on adversarial training.}
Let $\l(x,y;\theta)$ denote the loss of a classifier parametrized by $\theta \in \R^m $ on the sample $(x,y)\sim D$ where $D$ is the data distribution.
Previous works \citep{shaham2015understanding,madry2018towards} formalized the goal of training adversarially robust models as the following optimization problem: 
\begin{align}
\minop_{\theta} \E_{(x, y) \sim D} \big[ \maxop_{\delta\in\Delta} \l(x+\delta, y; \theta) \big].
\label{eq:rob_opt_general}
\end{align}
In this section, we focus on the $\ell_p$ threat model, i.e. $\Delta = \{\delta\in\R^d: \norm{\delta}_p \leq \varepsilon, \ x + \delta \in [0, 1]^d\}$, where the adversary can change each input $x$ in an $\eps$-ball around it while making sure that the input $x+\delta$ does not exceed its natural range.
A common way to solve the inner maximization problem is the \textit{projected gradient descent} method (PGD) defined by the following recursion initialized at $\delta^{(0)}$:
\begin{align} \label{eq:pgd-def}
    \delta^{(t+1)} \defeq \Pi_{\Delta} \left[\delta^{(t)} + \alpha \nabla_{\delta^{(t)}} \l(x+\delta^{(t)}, y; \theta)\right],
\end{align}
where $\Pi$ is the projection operator on the set $\Delta$, and $\alpha$ is the step size of PGD. 
%
Instead of the gradient, one often uses the gradient sign update for $\l_\infty$ perturbations or the $\l_2$ normalized update for $\l_2$ perturbations.
%
$\delta^{(0)}$ can be initialized as any point inside $\Delta$, e.g. as zero,
or randomly \citep{madry2018towards}.

The one-iteration variant of PGD  is known as the \textit{fast gradient method} (FGM) when the normalized $\l_2$ update is used and as the \textit{fast gradient sign method} (FGSM) when the $\l_\infty$ sign update is used \citep{goodfellow2014explaining}. 
%
Note that in both cases the step size is $\alpha=\eps$ which leads to perturbations located on the boundary of the set $\Delta$.
%
These methods are fast but sometimes prone to \textit{catastrophic overfitting} when the model overfits to FGM/FGSM but is not robust to iterative PGD attacks \citep{tramer2018ensemble, wong2020fast}.
%
This problem can be alleviated by specific regularization methods like CURE \citep{moosavi2019robustness,huang2020bridging} or GradAlign \citep{andriushchenko2020understanding}. 
%
However, for small enough $\eps$, adversarial training with FGM/FGSM works as well as multi-step PGD \citep{andriushchenko2020understanding}. 



\myparagraph{Experimental details.}
We do experiments on two common image classification datasets: CIFAR-10 \citep{krizhevsky2009learning} which has $32\times32$ images, and ImageNet-100 \citep{russakovsky2015imagenet} with $224\times224$ images where we take each tenth class following \cite{laidlaw2021perceptual}. We choose ImageNet-100 since we always perform a grid search over the main hyperparameters such as the perturbation radius for adversarial training 
% or standard deviation of Gaussian noise 
which would be too expensive to do on the full ImageNet. Unless mentioned otherwise, we use PreAct ResNet-18 architecture \citep{he2016identity}. We specify the exact hyperparameters in App.~A. We evaluate the accuracy on common corruptions using CIFAR-10-C and ImageNet-C datasets from \citep{hendrycks2019benchmarking} which contain 15 different synthetic corruptions in 4 categories: blur, noise, digital, weather corruptions. We report the accuracy by averaging over all 5 severity levels. 
% The code of our experiments is publicly available\footnote{\url{https://github.com/tml-epfl/adv-training-corruptions}}.



\myparagraph{Adversarial training improves accuracy and calibration.} 
% \begin{table*}[t]
%     \centering
%     % \setlength{\tabcolsep}{2.7pt}
%     \small
%     \begin{tabular}{@{}lcccccc@{}}
%     % \toprule
%                       & \multicolumn{3}{c}{\textbf{CIFAR-10}} & \multicolumn{3}{c}{\textbf{ImageNet-100}}  \\
%                         \cmidrule(lr){2-4} \cmidrule(lr){5-7}
%     % \textbf{Training}  & Accuracy & C-10-C acc. & C-10-C ECE & Accuracy & IN-100-C acc. & IN-100-C ECE \\
%                       & Standard & Corruption & Corruption & Standard & Corruption & Corruption \\
%     \textbf{Training} & accuracy & accuracy   & calibration error        & accuracy & accuracy & calibration error \\
%     \midrule
%     Standard & 95.1\% & 74.6\% & 16.6\% &  86.6\% & 47.5\% & 10.0\% \\
%     $\l_\infty$ adversarial  & 93.3\% & 82.7\% & 10.8\% &   86.5\% & 47.7\% & 12.4\% \\
%     $\l_2$ adversarial & 93.6\% & \textbf{83.4\%} & \textbf{10.5\%} &   86.3\% & \textbf{48.4\%} & \textbf{9.4\%}\\
%     % \bottomrule
%     \end{tabular}
%     \caption{Accuracy and calibration of ResNet-18 models trained on CIFAR-10 and ImageNet-100. $\l_\infty$ and $\l_2$ adversarial training substantially improves accuracy and calibration error (ECE) on corrupted samples.
%     }
%     \label{tab:at_helps}
% \end{table*}
\begin{table}[t]
    \centering
    % \setlength{\tabcolsep}{2.7pt}
    \small
    \caption{Accuracy and calibration of ResNet-18 models trained on CIFAR-10 and ImageNet-100. $\l_\infty$ and $\l_2$ adversarial training substantially improves accuracy and calibration error (ECE) on corrupted samples.}
    \begin{tabular}{@{}lccc}
    % \toprule
    % \textbf{Training}  & Accuracy & C-10-C acc. & C-10-C ECE & Accuracy & IN-100-C acc. & IN-100-C ECE \\
                      & \textbf{Standard} & \textbf{Corruption} & \textbf{Corruption} \\
    \textbf{Training} & \textbf{accuracy} & \textbf{accuracy}   & \textbf{calibration error}  \\
    \midrule
                      & \multicolumn{3}{c}{\textbf{CIFAR-10}} \\
                        \cmidrule(lr){2-4}
    Standard & 95.1\% & 74.6\% & 16.6\% \\
    $\l_\infty$ adversarial  & 93.3\% & 82.7\% & 10.8\% \\
    $\l_2$ adversarial & 93.6\% & \textbf{83.4\%} & \textbf{10.5\%} \\
    \midrule
                      & \multicolumn{3}{c}{\textbf{ImageNet-100}} \\
                        \cmidrule(lr){2-4}
    Standard & 86.6\% & 47.5\% & 10.0\% \\
    $\l_\infty$ adversarial  & 86.5\% & 47.7\% & 12.4\% \\
    $\l_2$ adversarial & 86.3\% & \textbf{48.4\%} & \textbf{9.4\%}\\
    % \bottomrule
    \end{tabular}
    \label{tab:at_helps}
\end{table}
%\begin{figure}[t]
%    \begin{center}
%        \includegraphics[width=0.8\columnwidth]{figures/calibration_linf.pdf}
%        \caption{Expected calibration error on CIFAR-10-C for $\l_\infty$ adversarially trained models.}
%        \label{fig:calibration_linf}
%    \end{center}
%\end{figure}
% While larger $\l_p$-perturbations can be interesting for security-driven applications \citep{tramer2019adversarial, saadatpanah2019adversarial},  it is not a realistic threat model for many computer vision tasks \citep{gilmer2018motivating}.
%
% We emphasize that the selection of the perturbation radius $\epsilon$ crucially impacts the performance on common corruptions. 
%
We start by showing in Fig.~\ref{fig:linf_eps_c10c_acc} the common corruption accuracy of $\l_\infty$ adversarially trained models as it is the most widely studied setting \citep{madry2018towards} and has been reported multiple times in common corruption literature \citep{hendrycks2019augmix, ford2019adversarial, rusak2020simple}.
%
Since we are interested primarily in small-$\eps$ adversarial training, we rely throughout the paper on FGM/FGSM for $\l_2$/$\l_\infty$ norms respectively to solve the inner maximization problem~\eqref{eq:rob_opt_general} which only leads to a $2\times$ computational overhead. 
%
Note however that we exceptionally use PGD with $10$ steps for $\eps \in \{\nicefrac{8}{255}, \nicefrac{10}{255}\}$ to prevent catastrophic overfitting 
%for large $\eps$ 
and allow a direct comparison with previous works.
%
We observe that \textit{for the small-$\eps$ regime} around $\eps=\nicefrac{1}{255}$, we get a significant improvement in corruption accuracy: 74.5\% accuracy is achieved with standard training, 82.7\% with adversarial training using $\eps=\nicefrac{1}{255}$, and 73.8\% using the standardly reported threshold $\eps_\infty=\nicefrac{8}{255}$.\footnote{The exact numbers differ from \citep{ford2019adversarial} since we use ResNet-18 instead of WRN-28-10 and different hyperparameters.}
%
The reason is that the tradeoff between robustness and accuracy \citep{tsipras2018robustness} has to be carefully balanced---if the standard accuracy drops for higher $\eps$, the corruption accuracy also deteriorates. Thus, selecting the most robust $\l_p$-model does not lead to the optimal performance on common corruptions.
%
Alternatively, one can also balance this tradeoff by mixing clean and adversarial samples, but it overall leads to similar results (see App.~C for details), so we focus on adversarial training with 100\% adversarial samples for the rest of the paper.

% \begin{wrapfigure}{r}{0.43\textwidth}
% 	\vspace{-9.5mm}
% 	\begin{center}
% 		\includegraphics[width=0.435\columnwidth]{figures/calibration_linf.pdf}
% 		\vspace{-6.5mm}
% 		\caption{Expected calibration error on CIFAR-10-C for $\l_\infty$ adversarially trained models.}
% 		\label{fig:calibration_linf}
% 	\end{center}
% 	\vspace{-7mm}
% \end{wrapfigure}
\begin{figure}[t]
    \begin{center}
        \includegraphics[width=0.85\columnwidth]{figures/calibration_linf.pdf}
        \caption{Expected calibration error on CIFAR-10-C for $\l_\infty$ adversarially trained models.}
        \label{fig:calibration_linf}
    \end{center}
\end{figure}
Additionally, we show that predicted probabilities of adversarially trained models are significantly better \textit{calibrated on common corruptions}. 
We believe that calibration is another important aspect of the model's trustworthiness, which is particularly important in the presence of out-of-distribution data such as corrupted images.
In Fig.~\ref{fig:calibration_linf}, we plot the expected calibration error (ECE) \citep{guo2017calibration} on CIFAR-10-C for models trained with different $\l_\infty$-radii.
We observe that the ECE---both with and without temperature rescaling (see App.~B for details)---follows a decreasing trend over $\l_\infty$-radii which is expected since a classifier that predicts uniform probabilities over classes is perfectly calibrated.
In particular, the most accurate model trained with $\eps_\infty=\nicefrac{1}{255}$ has a much lower ECE than the standard model: 10.8\% instead of 16.6\%, and with temperature rescaling 6.7\% instead of 11.3\%. 
% We note that although a classifier that predicts a uniform probability over classes is perfectly calibrated, Fig.~\ref{fig:calibration_linf} and Fig.~\ref{fig:linf_eps_c10c_acc} together show that there exists a sweet spot where \textit{both} accuracy and calibration are significantly improved.

We further compare the performance in the $\l_2$ perturbation model. In Table~\ref{tab:at_helps}, we report results of standard, $\l_\infty$, and $\l_2$ adversarial training on CIFAR-10 and ImageNet-100 where we perform a detailed grid search for each model over the perturbation radius $\eps$. To the best of our knowledge, we show for the first time that adversarial training improves calibration (see also App.~B) while increasing the accuracy and that it helps on ImageNet-C, and not only on CIFAR-10-C. We generally observe that $\l_2$ adversarial training performs better than $\l_\infty$, thus we focus on it in the next section.




\section{Understanding the effect of adversarial training on image corruptions}
Here we compare $\l_2$ adversarial training to other natural baselines 
% including Gaussian data augmentation---which has been observed to be a meaningful baseline against common corruptions---
and discuss the main conceptual differences.

\label{sec:at_vs_gauss}
\begin{figure}[t]
    \begin{center}
        \includegraphics[width=0.83\columnwidth]{figures/barplot_corruption_types.pdf}
        \caption{Accuracy for different corruption types on CIFAR-10-C. Unlike other methods, adversarial training improves the performance on each corruption.}
        \label{fig:barplot_corruption_types}
    \end{center}
\end{figure}
% \begin{wrapfigure}{r}{0.43\textwidth}
% 	\vspace{-10mm}
% 	\begin{center}
% 		\includegraphics[width=0.43\columnwidth]{figures/barplot_corruption_types.pdf}
% 		\vspace{-7mm}
% 		\caption{Accuracy for different corruption types on CIFAR-10-C. Unlike other methods, adversarial training improves the performance on each corruption type compared to standard training.}
% 		\label{fig:barplot_corruption_types}
% 	\end{center}
% 	\vspace{-8mm}
% \end{wrapfigure}

\myparagraph{Comparing natural baselines across corruption types.}
%
We compare $\ell_2$ adversarial training with a few simple baselines: standard training, gradient regularization \citep{drucker1992improving}, and standard Gaussian data augmentation. To ensure a fair comparison, we perform a grid search for each method over the perturbation radius $\eps$, regularization parameter $\lambda$, and noise standard deviation $\sigma$ respectively.
%
We choose to compare to gradient regularization since it is an established regularization method that may have a similar effect to adversarial training with small perturbations \citep{simon2019first}.
%
We aggregate the corruptions over each type (blurs, digital, noise, weather) and plot the results in Fig.~\ref{fig:barplot_corruption_types} and report results over each corruption in Fig.~12 in the Appendix.

First, we observe that adversarial training is the best performing method
%($83.4\%$ corruption accuracy) followed by Gaussian augmentation ($80.5\%$), gradient regularization ($78.3\%$), and standard training ($74.6\%$). 
%
and that unlike other methods, $\l_2$ adversarial training helps for \textit{each} corruption type. 
%
At the same time, Gaussian augmentation \textit{degrades} the performance on digital and weather corruptions while very significantly improving the performance for noise corruptions which is expected as the Gaussian noise used for training is also contained in the noise corruptions. 
%
Interestingly, for the fog and contrast corruptions, the performance degrades for \textit{all} methods (see Table~10 in App.~H), consistently with the observation made in \cite{ford2019adversarial}. 
%
Our results also suggest that the impact of gradient regularization is limited and it cannot explain the accuracy gains of both adversarial training and Gaussian augmentation as one could expect from the fact that these methods are equivalent to gradient regularization when used with \textit{sufficiently} small parameters $\sigma$ and $\eps$ \citep{bishop1995training}. 



\myparagraph{Worst-case vs average-case behavior.}
%
\cite{ford2019adversarial} show that the robustness to Gaussian noise and adversarial perturbations are closely related. More precisely, they show using concentration of measure arguments that a non-zero error rate under Gaussian perturbation implies the existence of small adversarial perturbations and consequently that improving adversarial robustness leads to an improvement in robustness against Gaussian perturbations. This finding is consistent with what we observe here.
What remains to be understood is why adversarial training performs \textit{better} than Gaussian augmentation on common corruptions. The main difference between both methods appears when analyzing the objectives that both methods minimize. For a single sample $x$, the loss function considered in Gaussian augmentation is:
\begin{align*}
    \mathbb{E}_{d \sim N(0, I\sigma^2)} \left[ \l(\theta, x + d) \right]  \  \sim  \  \mathbb{E}_{\rho: ||\rho||_2 = \sigma\sqrt{d}} \left[ \l(\theta, x + \rho) \right],
\end{align*}
since Gaussian vectors with variance $\sigma^2 I$ are highly concentrated on the sphere of radius $\sigma \sqrt{d}$ in high dimensions. Therefore Gaussian augmentation amounts to minimize an \emph{averaged} objective where perturbations are averaged over the \textit{sphere}. However, the objective behind adversarial training defined in Eq.~\eqref{eq:rob_opt_general} amounts to minimize a \emph{worst-case} loss based on the worst-case perturbation in the \textit{ball}. The key difference is that  minimization of the expected value of the loss \emph{does not guarantee} any behavior inside the sphere.

% \myparagraph{$\sigma$-overfitting.}  %%% having a separate paragraph may imply sigma-overfitting as a standalone thing while it's rather a part of the "Worst-case vs average-case behavior" story 
To investigate this behavior, we perform the following experiment in Fig.~\ref{fig:sigma_overfitting}. For random $1000$ test set images from CIFAR-10, we evaluate the loss with additive Gaussian noise of $\sigma \in [0, 0.1]$ 
and average the loss function over both images and perturbations for (1) a standard model, (2) a model trained with Gaussian augmentation with $\sigma=0.05$ where all 100\% training samples are augmented, (3) a model trained with Gaussian augmentation for $\sigma=0.1$ where only 50\% training samples are augmented, and (4) $\l_2$ adversarially trained model with $\varepsilon=0.1$. We notice that the loss function for 100\% Gaussian augmentation is minimal at $\sigma$ which is only slightly less than $\sigma=0.05$ used for its training. \textit{Hence, the model has overfitted not only to the type of noise but also to its magnitude.} The loss function outside \emph{and inside} of the sphere is bigger than on its surface. However, there is a simple fix if we train with 50\% Gaussian noise in each batch, as suggested, e.g., in \cite{rusak2020simple} in contrast to \cite{ford2019adversarial}. This scheme allows to alleviate the $\sigma$-overfitting behavior and also achieve better accuracy on clean samples (93.2\% instead of 92.5\%) and, most importantly, \textit{significantly} improve on common corruptions (85.0\% instead of 80.5\%). At the same time, $\l_2$ adversarial training does not suffer from this problem and both 100\% and 50\% schemes work nearly equally well (details can be found in App.~C). We provide a further discussion on $\sigma$-overfitting in App.~D together with additional experiments on ImageNet-100 where $\sigma$-overfitting has even more noticeable behavior. %which is likely due to the higher input dimensionality of ImageNet.

% \begin{figure}[t]
%     \centering
%     \begin{minipage}{.48\textwidth}
%         \centering
%         \includegraphics[width=0.9\columnwidth]{figures/sigma_overfitting_cifar10.pdf}
%         \vspace{-1mm}
%         \caption{Average cross-entropy loss under Gaussian noise for different training methods. 
%         }
%         \label{fig:sigma_overfitting}
%     \end{minipage}
%     \hspace{2mm}
%     \begin{minipage}{.48\textwidth}
%         \centering
%         \includegraphics[width=0.485\columnwidth]{figures/heatmap_l2.pdf}
%         \includegraphics[width=0.50\columnwidth]{figures/heatmap_lpips.pdf}
%         \caption{Average $\l_2$ and LPIPS distance for different common corruptions from CIFAR-10-C.}
%         \label{fig:heatmap_distances_corruptions}
%     \end{minipage}
% \end{figure}
\begin{figure}[t]
    \centering
    \begin{minipage}{.47\textwidth}
        \centering
        \includegraphics[width=0.87\columnwidth]{figures/sigma_overfitting_cifar10.pdf}
        \caption{Average cross-entropy loss under Gaussian noise for different training methods. 
        }
        \label{fig:sigma_overfitting}
    \end{minipage}
    \hspace{2mm}
    \begin{minipage}{.49\textwidth}
        \centering
        \includegraphics[width=0.48\columnwidth]{figures/heatmap_l2.pdf}
        \includegraphics[width=0.50\columnwidth]{figures/heatmap_lpips.pdf}
        \caption{Average $\l_2$ and LPIPS distance for different common corruptions from CIFAR-10-C.}
        \label{fig:heatmap_distances_corruptions}
    \end{minipage}
\end{figure}


\myparagraph{Local vs global $\l_p$ behavior.}
Interestingly, adversarial training with worst-case perturbations bounded within a \textit{tiny} $\l_2$ ball leads to robustness significantly beyond this radius. Fig.~\ref{fig:heatmap_distances_corruptions} illustrates that common corruptions have an $\l_2$ norm an \textit{order of magnitude larger} than $\eps=0.1$ used for $\l_2$ adversarial training.
%
This is in contrast with adversarial robustness that does not significantly extend beyond the radius used for training \citep{madry2018towards}. 
%
Related to this, \cite{ford2019adversarial} argue that \textit{for Gaussian noise} improving the minimum distance to the decision boundary (e.g. via adversarial training) also leads to an improvement of the average distance. We have a similar mechanism at play for adversarial $\l_2$ perturbations and common corruptions which may explain the generalization of adversarial training to large average-case perturbations. % such as those from CIFAR-10-C.
However, our setting is more complex compared to \cite{ford2019adversarial} since at the training and test time we deal with \textit{different} and \textit{diverse} types of noise.
% Next we discuss how we can improve adversarial training by using a non-$\l_p$ distance.
%




\section{Improving adversarial training by relaxing a perceptual distance}
\label{sec:rlat}
As shown above, $\l_p$ adversarial training already leads to encouraging results on common corruptions. 
Moreover, the $\l_2$ distance appears to be more suitable for adversarial training than $\l_\infty$ on both datasets as implied by Table~\ref{tab:at_helps}. 
This observation suggests that using more advanced distances such as perceptual ones can further improve corruption robustness.
% We explore next how much we can improve it by using a distance which better captures the perceptual similarity between images.


\myparagraph{From $\l_p$ distances to LPIPS.}
One of the main disadvantages of $\l_p$-norms is that they are very sensitive under simple transformations such as rotations or translations \citep{sharif2018suitability}. One possible solution is to consider \textit{perceptual distances}\footnote{Not necessarily distances in a strict mathematical sense that assumes a certain set of axioms to hold.} which capture these invariances better such as the \textit{learned perceptual image patch similarity} (LPIPS) distance introduced in \cite{zhang2018unreasonable} and which is based on the activations of a convolutional network. The LPIPS distance is formally defined as %$\lpips(x,x')^2= \sum_{l=1}^L \alpha_l\|\phi_l(x)-\phi_l(x')\|_2^2, $
\begin{align}
\label{eq:lpips_def}
\lpips(x,x')^2= \sum_{l=1}^L \alpha_l\|\phi_l(x)-\phi_l(x')\|_2^2, 
\end{align}
where $L$ is the depth of the network, $\phi_l$ is its feature map up to the $l$-th layer, and $\{\alpha_l\}_{l=1}^L$ are some constants that weigh the contributions of the $\l_2$ distances between activations. There are two crucial elements in LPIPS: the learned network and learned coefficients $\{\alpha_l\}_{l=1}^L$. \cite{zhang2018unreasonable} propose to take a network pre-trained on ImageNet and learn coefficients on their collected dataset of human judgemenets about which images are closer to each other.
%
Both \cite{zhang2018unreasonable} and \cite{laidlaw2021perceptual} argue about better suitability of LPIPS to measure image similarity. In App.~E we analyse the suitability of LPIPS over $\l_2$ specifically on the images from CIFAR-10-C with a detailed breakdown over corruption types. In particular, we show that the LPIPS distance is better correlated with the error rate of the network, and the increase over severity levels is more monotonic compared to $\l_2$ as can be also seen in Fig.~\ref{fig:heatmap_distances_corruptions}.



\myparagraph{LPIPS adversarial training.}
In view of the positive features of LPIPS, adversarial training using LPIPS appears to be a promising approach to improve the performance on common corruptions.
The worst-case loss problem considered in~\eqref{eq:rob_opt_general} using the LPIPS distance can be formulated as:
%
\begin{align}
\label{eq:lpips_at_obj}    
 \max_{\delta} \l(x+\delta, y; \theta) \quad \text{s.t.} \quad \lpips(x,x+\delta)\leq \varepsilon.
\end{align}
%
However, this optimization problem is challenging since $\lpips$ is itself defined by a neural network, and the projection onto the LPIPS-ball---as required when using PGD to solve~\eqref{eq:lpips_at_obj}---does not admit a closed-form expression. This problem was considered in \cite{laidlaw2021perceptual} who propose two approximate attacks: the Perceptual Projected Gradient Descent (PPGD) and the Lagrangian Perceptual Attack (LPA). We discuss their approach in more detail in App.~F but emphasize that they either need to perform an approximate projection which is computationally expensive or come up with some scheme for tuning the Lagrange multiplier $\lambda$ in the Lagrangian formulation. Furthermore, they suggest in both cases to use 10-step iterative attacks for approximate LPIPS adversarial training which limits the scalability of the method to large datasets such as ImageNet. 


\myparagraph{Relaxed LPIPS adversarial training.}
We propose here a relaxation of the LPIPS adversarial objective~\eqref{eq:lpips_at_obj}. For the simplicity of presentation, let us start by assuming that the LPIPS distance is defined using a \textit{single} intermediate layer of the network, i.e. $\lpips(x,x') = \| \phi(x) -\phi(x')\|_2$. Then we can write a neural network $f$ as the composition of the feature map $\phi$ and the remaining part of the network $f(x)=h(\phi(x))$. 
The LPIPS adversarial objective~\eqref{eq:lpips_at_obj} in this notation becomes 
% $ \max_{\delta} \l(h(\phi(x+\delta)))$ such that $\| \phi(x)-\phi(x+\delta) \|_2\leq \varepsilon$.
\begin{align*}
    \max_{\delta} \l(h(\phi(x+\delta))) \quad \text{s.t.} \quad \| \phi(x+\delta)-\phi(x) \|_2\leq \varepsilon.
\end{align*}
We first introduce the slack variable $\tilde \delta=\phi(x+\delta)-\phi(x)$ which allows us to rewrite the objective as 
% $  \max_{\delta, \tilde \delta} \l(h(\phi(x)+\tilde \delta))$ under the constraints $ \| \tilde \delta \|_2\leq \varepsilon$ and $ \tilde \delta=\phi(x)-\phi(x+\delta)$. 
\begin{align*}
    \max_{\delta, \tilde \delta} \l(h(\phi(x)+\tilde \delta)) \ \ \text{s.t.} \ \ \| \tilde \delta \|_2\leq \varepsilon,  \ \  \tilde \delta=\phi(x+\delta)-\phi(x).
\end{align*}
Then we perform the key step: we omit the constraint on the slack variable and obtain the following relaxation
%
\begin{align}
\label{eq:lpips_relax}
    \max_{\tilde \delta} \l(h(\phi(x)+\tilde \delta)) \quad \text{s.t.} \quad \| \tilde \delta \|_2\leq \varepsilon,
\end{align}
i.e. we lift the requirement that there should exist a $\delta$ in the \textit{input} space that corresponds to the layerwise perturbation $\tilde \delta$. 

A similar relaxation can be derived when the LPIPS distance is defined using multiple layers (see App.~F):
%
\begin{align}
    \label{eq:lpips_relax_multilayer}
    &\max_{\tilde \delta^{(1)}, \dots, \tilde \delta^{(L)}} \quad \l(g_L(\dots g_1(x+\tilde\delta^{(1)}) \dots + \tilde\delta^{(L)})) \\ 
                                             &\text{ s.t. } \ \ \ \| \tilde \delta^{(l)} \|_2\leq \varepsilon_l \ \ \forall l \in \mathcal{L}_{LPIPS}, \ \ \ \tilde \delta^{(l)} = 0 \ \ \forall l \not\in \mathcal{L}_{LPIPS}, \nonumber
\end{align}
where the network is written under its compositional form $f = g_L \circ \cdots \circ g_1$, $\mathcal{L}_{LPIPS}$ is the set of layer indices used in LPIPS and $\eps_l$ denotes the $\l_2$ bound imposed at the $l$-th layer. 
We denote this relaxation 
as \textit{relaxed LPIPS adversarial training} (RLAT) and solve it efficiently using a single-iteration adversarial attack similar to FGM. %\citep{goodfellow2014explaining}. 
We emphasize that the projection of each $\tilde \delta^{(l)}$ onto the corresponding $\l_2$ balls is computationally cheap to perform, unlike the LPIPS projection.% \citep{laidlaw2021perceptual}. 

Since we perform relaxation and \textit{train} the network which is also used to compute LPIPS, the exact layerwise coefficients $\alpha_l$ from the original LPIPS \cite{zhang2018unreasonable} are no longer applicable and cannot be used to set the layerwise bounds $\eps_l$. Therefore, we set our own values of $\eps_l$ which we specify in App.~F together with detailed derivations of RLAT, its precise algorithm and other implementation details.
% In all our experiments we opt to use the \textit{same} network as the one used for classification although an external network can also possibly be used as discussed in \cite{laidlaw2021perceptual}.
Finally, we remark that related layerwise adversarial training methods have been proposed before \citep{stutz2019disentangling,volpi2018generalizing,wei2020improved}.
However, viewing layerwise adversarial training as an efficient relaxation of LPIPS adversarial training is novel, as well as applying these methods for general robustness such as common corruptions. 






\section{Empirical evaluation of RLAT}
\label{sec:main_exps}
% Here we evaluate the performance of RLAT compared to other established methods and show that (1) RLAT also substantially improves LPIPS robustness, (2) RLAT can be also successfully combined with different data augmentation methods. %leading to improved results on common corruptions.
Here we first show that RLAT indeed substantially improves the LPIPS robustness. Second, we compare RLAT to other established methods and show that it consistently leads to improved accuracy and calibration on common corruptions.

% \begin{wrapfigure}{r}{0.43\textwidth}
% 	\vspace{-5mm}
% 	\begin{center}
% 		\includegraphics[width=0.43\columnwidth]{figures/lpips_robustness.pdf}
% 		\vspace{-5mm}
% 		\caption{LPIPS adversarial robustness of different training schemes on CIFAR-10.}
% 		\label{fig:lpips_robustness}
% 	\end{center}
% 	\vspace{-7mm}
% \end{wrapfigure}
\myparagraph{LPIPS robustness of RLAT.}
%Since our proposed method is only a relaxation of LPIPS adversarial training, it is natural to ask whether it improves the LPIPS robustness. 
%For this, 
We use the Lagrangian Perceptual Attack attack developed in \cite{laidlaw2021perceptual} to estimate the LPIPS adversarial accuracy under different LPIPS radii and plot results in Fig.~\ref{fig:lpips_robustness} on CIFAR-10. We use standard, $\l_2$ adversarial training (AT), Fast PAT, and RLAT models with their main hyperparameters selected to perform best on common corruptions.\footnote{We note that \cite{laidlaw2021perceptual} focus on robustness to unseen adversarial examples that involve a \textit{worst-case} optimization process, while we focus on unseen \textit{average-case} common corruptions. This is the reason why the optimal perturbation radii that we consider are noticeably smaller than in their paper.} We observe that %our relaxed LPIPS adversarial training 
RLAT indeed substantially improves LPIPS robustness, even more than other approaches such as $\l_2$ AT and Fast PAT. This gives further evidence that both $\l_2$ and RLAT training \textit{do not suffer from catastrophic overfitting}, even though trained with one-step perturbations similar to FGSM. We provide a similar evaluation for $\l_2$ robustness in App.~F (Fig.~10).


\myparagraph{Main experimental setup.}
We compare the results for RLAT with additional baselines: $\l_2$~and $\l_\infty$~adversarial training (with $100\%$ adversarial samples per batch), Gaussian augmentation (with both $50\%$ and $100\%$ augmentations per batch), AdvProp \citep{xie2020adversarial}, Fast PAT~\citep{laidlaw2021perceptual}, and also four data augmentation approaches: DeepAugment \citep{hendrycks2020many}, AugMix \citep{hendrycks2019augmix}, adversarial noise training (ANT) \citep{rusak2020simple}, and Stylized ImageNet (SIN) \citep{geirhos2018imagenettrained}. 
We use AugMix method additionally with the Jensen-Shannon regularization term as proposed in \cite{hendrycks2019augmix}.
We train all methods from random initialization except ANT where we follow the scheme of \cite{rusak2020simple}. % that involves fine-tuning a standard pretrained model.
%
All comparisons between methods are performed with a grid search over their main hyperparameters (reported in App.~A) such as $\sigma$ in Gaussian augmentation or $\epsilon$ in adversarial training which we perform on the main 15 corruptions from CIFAR-10-C / ImageNet-C. In App.~H we further verify that selecting the main hyperparameters on validation corruptions leads to the same results. 
For Fast PAT on CIFAR-10, we do a grid search over their parameter $\eps$, but on ImageNet-100 we report the results based on the models provided by the authors due to limited computational resources.
%due to our limited computational resources, it is not feasible to run a proper grid search over $\eps$ (each model takes 5 days of training on 4 Nvidia RTX 2080 Ti GPUs). 
To assess calibration, we report the expected calibration error (ECE) (see App.~H for ECE with temperature rescaling \cite{guo2017calibration}).
More details can be found in our repository \url{https://github.com/tml-epfl/adv-training-corruptions}.


Since the main goal of the common corruption benchmark \citep{hendrycks2019benchmarking} is to show the model’s behavior on \textit{unseen} corruptions, we do not use overlapping augmentations in training (see App.~A). 
The only exception is Gaussian augmentation which we mark in gray in Table~\ref{tab:main_cifar10_imagenet100} following \citep{rusak2020simple} since it belongs to common corruptions. 
We note that removing only Gaussian noise from evaluation is not sufficient, because other noises can be affected as well by training with Gaussian augmentation. 
Thus, the results of 100\% and 50\% Gaussian augmentation are shown only for illustrative purposes suggesting that adversarial training with no prior knowledge about the corruptions can obtain almost the same results as direct augmentation.



\begin{table}[t!]
    \centering
    \small
    \caption{Accuracy and calibration of ResNet-18 models trained on CIFAR-10 and ImageNet-100. Gray-colored numbers correspond to methods partially trained with the corruptions from CIFAR-10-C and ImageNet-100-C.}
    \setlength{\tabcolsep}{4.0pt}
    \begin{tabular}{@{}lccc}
        % \toprule
                          & \textbf{Standard} & \textbf{Corruption} & \textbf{Corruption} \\
        \textbf{Training} & \textbf{accuracy} & \textbf{accuracy}   & \textbf{calibr. error}  \\
        \midrule
        \\
                          & \multicolumn{3}{c}{\textbf{CIFAR-10}}  \\
                            \cmidrule(lr){2-4}
        Standard & 95.1\% & 74.6\% & 16.6\% \\
        100\% Gaussian & 92.5\% & \color{gray} 80.5\% & \color{gray} 13.2\%  \\ 
        50\%  Gaussian & 93.2\% & \color{gray} 85.0\% & \color{gray} 9.1\%   \\ 
        Fast PAT & 93.4\% & 80.6\% & 12.0\%   \\
        AdvProp  & 94.7\% & 82.9\% & 10.1\%     \\
        $\l_\infty$ adversarial  & 93.3\% & 82.7\% & 10.8\% \\
        $\l_2$ adversarial & 93.6\% & 83.4\% & 10.5\% \\
        RLAT & 93.1\% & \textbf{84.1\%} & \textbf{9.9\%} \\
        \midrule
        DeepAugment & 94.1\% & 85.3\% & 8.7\% \\
        DeepAugment + RLAT & 93.6\% & \textbf{87.8\%} & \textbf{6.1\%} \\
        \midrule
        AugMix & 95.0\% & 86.6\% & 6.9\%  \\ 
        AugMix + RLAT & 94.8\% & \textbf{88.5\%} & \textbf{4.5\%}  \\ 
        \midrule
        AugMix + JSD & 95.0\% & 88.6\% & 6.5\%  \\
        % AugMix + JSD + Linf & & 89.0\% &  \\
        % AugMix + JSD + L2 & & 89.0\% &  \\
        AugMix + JSD + RLAT & 94.8\% & \textbf{89.6\%}  & \textbf{5.4\%}  \\  % best: augmix-l2-scaling-descending-1.0-1.0-12.75_last.csv
        \\
                          & \multicolumn{3}{c}{\textbf{ImageNet-100}}  \\
                            \cmidrule(lr){2-4}
        Standard & 86.6\% & 47.5\% & 10.0\% \\
        100\% Gaussian & 86.4\% & \color{gray} 46.7\% & \color{gray} 11.7\% \\ 
        50\%  Gaussian & 83.8\% & \color{gray} 55.2\% & \color{gray} 6.1\% \\ 
        Fast PAT  & 71.5\% & 45.2\% & 8.0\% \\
        $\l_\infty$ adversarial &  86.5\% & 47.7\% & 12.4\% \\
        $\l_2$ adversarial &  86.3\% & 48.4\% & 9.4\% \\
        RLAT & 86.5\% & \textbf{48.8\%} & \textbf{9.1\%} \\
        \midrule
        AugMix & 86.7\% & 52.3\% & 7.5\% \\ 
        AugMix + RLAT & 86.8\% & \textbf{54.8\%} & \textbf{4.7\%} \\ 
        \midrule
        AugMix + JSD &   88.4\% & 59.3\% & 1.9\% \\
        % AugMix + JSD + Linf & & 89.0\% &  \\
        % AugMix + JSD + L2 & & 89.0\% &  \\
        AugMix + JSD + RLAT & 87.1\% & \textbf{61.1\%} & \textbf{1.8\%} \\  % best: augmix-l2-scaling-descending-1.0-1.0-12.75_last.csv

        \midrule
        SIN & 86.6\% & 53.7\% & 6.7\% \\  % bs=28
        SIN + RLAT & 86.5\% & \textbf{54.3\%} & \textbf{6.0\%} \\   % 2.55
        \midrule
        ANT\textsuperscript{3x3}  & 85.9\% & 57.7\% & 5.1\% \\ 
        ANT\textsuperscript{3x3} + RLAT  & 85.3\% & \textbf{58.3\%} & \textbf{4.4\%} \\
    \end{tabular}
    \label{tab:main_cifar10_imagenet100}
\end{table}


\begin{figure}[t]
    \begin{center}
        \includegraphics[width=0.8\columnwidth]{figures/lpips_robustness.pdf}
        \caption{LPIPS adversarial robustness of different training schemes on CIFAR-10.}
        \label{fig:lpips_robustness}
    \end{center}
\end{figure}
\myparagraph{Main experimental results.}
We show the main experimental results on CIFAR-10-C and ImageNet-100-C in Table~\ref{tab:main_cifar10_imagenet100}. 
First of all, we observe that $\l_p$ adversarial training is a strong baseline on common corruptions on both datasets with a larger gain on CIFAR-10-C. 
Using our proposed relaxed LPIPS adversarial training further improves the corruption accuracy on both datasets: from 74.6\% to 84.1\% on CIFAR-10-C and from 47.5\% to 48.8\% compared to standard models. Moreover, RLAT also improves calibration compared to the standard model: from 16.6\% to 9.9\% ECE on CIFAR-10-C and from 10.0\% to 9.1\% ECE on ImageNet-100-C.
We also observe that 100\% Gaussian augmentation even deteriorates the performance on ImageNet-100-C
while 50\% Gaussian augmentation significantly improves the average accuracy which is consistent with \citet{rusak2020simple}.

We observe that RLAT can be successfully combined with existing data augmentations, leading to better accuracy and calibration. E.g.,
adding RLAT on top of DeepAugment helps to improve the CIFAR-10-C accuracy from 85.3\% to 87.8\%.
Combining RLAT with the AugMix augmentation improves the corruption accuracy from 86.6\% to 88.5\% on CIFAR-10-C and on ImageNet-100-C from 52.3\% to 54.8\%. Combining SIN and ANT\textsuperscript{3x3} improves the accuracy on ImageNet-100-C from 53.7\% to 54.3\% and from 57.7\% to 58.3\%, respectively. 
Moreover, we see that RLAT consistently improves ECE in all settings, and we refer to App.~H for ECE with temperature rescaling %\citep{guo2017calibration}
which qualitatively shows the same behavior.

Additionally, we added our models to the RobustBench leaderboard\footnote{\url{https://robustbench.github.io/}} where our method has the best performance among the architectures of comparable sizes (i.e., ResNet-18). The models which perform better have larger architectures and some of them additionally rely on ensembles. 
% This further confirms the utility of RLAT.
% Thus, RLAT is not only a helpful technique on its own but can also benefit from advanced data augmentations.


\myparagraph{Runtime of RLAT.}
We report a full runtime comparison between standard training, $\l_2$ / $\l_\infty$ adversarial training, RLAT, and Fast PAT in Table~\ref{tab:execution_time}. 
The main observation is that RLAT is significantly faster than Fast PAT (e.g., 1.8 hours vs. 9.4 hours on CIFAR-10) and leads only to a slight overhead compared to $\l_2$ / $\l_\infty$ adversarial training (1.8 hours vs 1.3 hours on CIFAR-10). %and standard training (0.8 hours on CIFAR-10).
% On ImageNet-100, RLAT takes 6.2 hours on a single V100 GPU which can be compared to 120 hours on 4 Nvidia RTX 2080 Ti GPUs for Fast PAT (although Fast PAT uses a larger network, ResNet-50 instead of ResNet-18).
% With the same ResNet-18 architecture, RLAT achieves a significant speed-up over Fast PAT: 
% The runtime of RLAT is not much higher than the runtime of $\l_2/\l_\infty$ adversarial training 
%
%
These runtimes show further the advantage of the single-step adversarial training procedure of RLAT compared to the multi-step approach of Fast PAT. 
It would be interesting in future work to develop a single-step version of Fast-LPA which is, however, not straightforward because of their Lagrangian formulation and the need to tune the parameter $\lambda$ over the iterations of Fast-LPA.
% We report a full runtime comparison in App.~\ref{sec:app_exp_details}. %Table~\ref{tab:execution_time} in the Appendix.
\begin{table}[t]
    \centering
    \small
    \caption{Wall-clock time in hours for ResNet-18 trained with different methods on CIFAR-10 and ImageNet-100 using one Nvidia V100 GPU. \textsuperscript{*} denotes the time reported by \cite{laidlaw2021perceptual} for a larger model (ResNet-50) using different hardware (4 Nvidia RTX 2080 Ti GPUs). %We report this time in order to show the order of training time in this case. 
    }
    \begin{tabular}{@{}lrr@{}}
        % \toprule
                    & \multicolumn{2}{c}{\textbf{Dataset}} \\
                      \cmidrule(lr){2-3}
        \textbf{Training} & CIFAR-10 & ImageNet-100 \\
        \midrule
        Standard & 0.8h & 3.9h \\
        $\l_2$/$\l_\infty$ adversarial & 1.3h & 5.8h\\
        RLAT & 1.8h & 6.2h\\
        Fast PAT & 9.4h & \textsuperscript{*}120h\\
        % \bottomrule
    \end{tabular}
    \label{tab:execution_time}
\end{table}


\myparagraph{Additional experiments.}
We refer to the Appendix for further experimental results. 
In App.~G, we evaluate the performance of the models from Table~\ref{tab:main_cifar10_imagenet100} on ImageNet-A, ImageNet-R, and Stylized ImageNet to better understand how well the improvements on common corruptions transfer to other distribution shifts. In App.~H, we provide more detailed results such as those presented in Table~\ref{tab:main_cifar10_imagenet100} but with breakdowns over different corruptions and severities. We also present results for larger network architectures and for AugMix combined with $\l_p$ adversarial training in App.~H, as well as results of RLAT over multiple random seeds.



\section{Conclusions and future work}
Our findings suggest that adversarial training can be successfully used to improve accuracy and calibration on common image corruptions. 
Even simple $\l_p$ adversarial training can serve as a strong baseline if the optimal perturbation radius is chosen for the given problem.
More advanced adversarial training schemes involve perceptual distances, such as LPIPS, and we provide a relaxation of LPIPS adversarial training with an efficient single-step procedure. We observe that the developed relaxation (RLAT) substantially improves the LPIPS robustness and can be successfully combined with existing data augmentations.
We hope that RLAT would be of interest also for other domains such as natural language processing where robustness to commonly occurring corruptions (e.g., typos) is an important task. 



\small
\bibliography{literature}
% \bibliographystyle{icml2021}
% \bibliographystyle{abbrvnat}
\clearpage
\normalsize



\end{document}
