\documentclass[accepted]{uai2023}

% If your paper is accepted, change the options for the package
% aistats2023 as follows:
%
%\usepackage[accepted]{aistats2023}
%
% This option will print headings for the title of your paper and
% headings for the authors names, plus a copyright note at the end of
% the first column of the first page.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}

% If you set papersize explicitly, activate the following three lines:
%\special{papersize = 8.5in, 11in}
%\setlength{\pdfpageheight}{11in}
%\setlength{\pdfpagewidth}{8.5in}

% If you use natbib package, activate the following three lines:
%\renewcommand{\bibname}{References}
%\renewcommand{\bibsection}{\subsubsection*{\bibname}}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
\bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{enumitem}

\usepackage[title]{appendix}
\usepackage{amsthm}
\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
%\usepackage{hyperref}       % hyperlinks

%\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography

\usepackage{algorithm}
%\usepackage{algorithmic}  
\usepackage{longtable}
\newtheorem{ex}{Example}
\newtheorem{mydef}{Definition}
%\newtheorem{thm}{Theorem}
\newtheorem{theorem}{Theorem}
\newtheorem{remark}{Remark}
\newtheorem{lemma}{Lemma}
\newtheorem{cor}{Corollary}
\newtheorem{obs}{Observation}
\newtheorem{prop}{Proposition}
\newtheorem{assumption}{Assumption}
\usepackage{amsmath}
\allowdisplaybreaks
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (begin) %%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\gr}{\nabla}
\newcommand{\grw}{\nabla_{\w}} 
\newcommand{\grd}{\nabla_{\delta}}
\newcommand{\grL}{\nabla_{\Lambda}}
\newcommand{\grpsi}{\nabla_{\bpsi}}
\newcommand{\grPsi}{\nabla_{\Psi}}
\newcommand{\sgr}{\tilde{\nabla}}
\newcommand{\sgrw}{\sgr_{\w}} 
\newcommand{\sgrd}{\sgr_{\delta}}
\newcommand{\sgrL}{\sgr_{\Lambda}}
\newcommand{\sgrpsi}{\sgr_{\psi}}
\newcommand{\sgrbpsi}{\sgr_{\bpsi}}
\newcommand{\sgrPsi}{\sgr_{\Psi}}
\newcommand{\w}{\bm{w}}
\newcommand{\bpsi}{\bm{\psi}}
\newcommand{\bPsi}{\bm{\Psi}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\wbar}{\overline{\w}}
\newcommand{\sigmaw}{\sigma^2_{\w}}
\newcommand{\sigmapsi}{\sigma^2_{\psi}}

%\interdisplaylinepenalty=2500


%\usepackage[bookmarks=false]{hyperref}
\usepackage[T1]{fontenc}
%\usepackage[cmex10]{amsmath}
%\usepackage{cite}
\usepackage[utf8]{inputenc}
\usepackage{pgfplots}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{pstricks}
%\usepackage{appendix}
\usepackage{color}
\usepackage{amsmath}
\usepackage{bbm}
\usepackage{tcolorbox}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{wrapfig}
\usepackage{lipsum}  
\usetikzlibrary{positioning}
\usepackage{tcolorbox}
\usepackage{amsfonts,amssymb}
%\usepackage{amsfonts,amssymb,amsthm}
%\usepackage{natbib}
\usepackage{mathtools}
\usepackage{commath}
\usepackage{relsize}
\usepackage{bbm}
\usepackage{bm}
\usepackage[font={small}]{caption}
\usepackage{comment,color,soul}
% \usepackage{enumerate} 
\usepackage{amsfonts}
\usepackage{url}
\usepackage{lipsum}
\usepackage[thinlines]{easytable}
\usepackage{nicefrac}
%\usepackage{authblk}
%\usepackage{titling}
\usepackage{algorithm}
\usepackage{algpseudocode}

\usepackage{dblfloatfix}
%\usepackage{mysymbol}
\usepackage{float}
\usepackage{multirow}
% \usepackage{color}
\usepackage{colortbl}
% \usepackage{enumitem}


 



%\def\changenumberingoff{\def\Let@{\def\\{\notag\math@cr}}

\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}




    

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (end) %%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}

\usepackage{longtable}

\usepackage{amsmath}
\allowdisplaybreaks
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (begin) %%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%\interdisplaylinepenalty=2500


%\usepackage[bookmarks=false]{hyperref}
\usepackage[T1]{fontenc}
%\usepackage[cmex10]{amsmath}
%\usepackage{cite}
\usepackage[utf8]{inputenc}
\usepackage{pgfplots}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{pstricks}
%\usepackage{appendix}
\usepackage{color}
\usepackage{amsmath}
\usepackage{tcolorbox}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{wrapfig}
\usepackage{lipsum}  
\usetikzlibrary{positioning}
\usepackage{tcolorbox}
\usepackage{amsfonts,amssymb}
%\usepackage{amsfonts,amssymb,amsthm}
%\usepackage{natbib}
\usepackage{mathtools}
\usepackage{commath}
\usepackage{relsize}
\usepackage[font={small}]{caption}
\usepackage{comment}
% \usepackage{enumerate} 
\usetikzlibrary{arrows,automata}
\usepackage{amsfonts}
\usepackage{url}
\usepackage{lipsum}
\usepackage[thinlines]{easytable}
\usepackage{nicefrac}
%\usepackage{authblk}
%\usepackage{titling}
\usepackage{algorithm}
\usepackage{algpseudocode}
% \usepackage{mysymbol}
\usepackage{float}
\usepackage{multirow}
% \usepackage{color}
\usepackage{colortbl}

    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{On the Role of Generalization in Transferability of Adversarial Examples}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<wangyilin210210@link.cuhk.edu.hk>?Subject=Your UAI 2023 paper}{Yilin Wang}}
\author[1]{\href{mailto:<farnia@cse.cuhk.edu.hk>?Subject=Your UAI 2023 paper}{Farzan Farnia}}
% Add affiliations after the authors
\affil[1]{
    Department of Computer Science and Engineering, \linebreak
    The Chinese University of Hong Kong, \linebreak
    Hong Kong SAR
}

  %% Choose your variant of English; be consistent
% \usepackage[british]{babel}

%% Some suggested packages, as needed:

  
  \begin{document}
\maketitle

\begin{abstract}
Black-box adversarial attacks designing adversarial examples for unseen deep neural networks (DNNs) have received great attention over the past years. However, the underlying factors driving the transferability of black-box adversarial examples still lack a thorough understanding. In this paper, we aim to demonstrate the role of the generalization behavior of the substitute classifier used for generating adversarial examples in the transferability of the attack scheme to unobserved DNN classifiers. To do this, we apply the max-min adversarial example game framework and show the importance of the generalization properties of the substitute DNN from training to test data in the success of the black-box attack scheme in application to different DNN classifiers. We prove theoretical generalization bounds on the difference between the attack transferability rates on training and test samples. Our bounds suggest that operator norm-based regularization methods could improve the transferability of the designed adversarial examples. We support our theoretical results by performing several numerical experiments showing the role of the substitute network's generalization in generating transferable adversarial examples. Our empirical results indicate the power of Lipschitz regularization and early stopping methods in improving the transferability of designed adversarial examples.       
\end{abstract}

\section{Introduction}
Deep neural networks (DNNs) have attained impressive results in many machine learning problems from image recognition \citep{krizhevsky2009learning}, speech processing \citep{deng2013recent}, and bioinformatics \citep{alipanahi2015predicting}. The standard evaluation of a trained DNN machine is typically performed over test samples drawn from the same underlying distribution that has generated the empirical training data. The numerous successful applications of deep learning models reported in the literature  demonstrate DNNs' surprising generalization power from training samples to unseen test data. Such promising results on unobserved data despite DNNs' enormous capacity for memorizing training examples have attracted a lot of attention in the machine learning community.

While DNNs usually achieve satisfactory generalization performance, they have been frequently observed to lack robustness against minor adversarial perturbations to their input data \citep{szegedy2013intriguing,biggio2013evasion,goodfellow2014explaining}, widely known as adversarial attacks. According to these observations, an adversarial attack scheme can generate imperceptible perturbations that fools the DNN classifier to predict wrong labels with high confidence scores. Such adversarial perturbations are usually created through maximizing a target DNN's prediction loss over a small neighborhood around an input sample. While DNNs often show successful generalization behavior to test samples drawn from the underlying distribution of training data, the minor perturbations designed by adversarial attack schemes can completely undermine their prediction results.    

Specifically, adversarial examples have been commonly reported to be capable of transferring to unseen DNN classifiers \citep{tramer2017ensemble,ilyas2018black,cheng2018query,zhou2018transferable}. Based on these reports, an adversarial example designed for a specific classifier could further alter the prediction of another DNN machine with a different architecture and training set. Such observations have inspired the development of several \emph{black-box adversarial attack schemes} in which the adversarial examples are designed for a substitute classifier and then are evaluated on a different target DNN.  

Several recent papers have attempted to theoretically study the transferability of black-box adversarial attacks. These works have mostly focused on the effects of non-robust features \citep{tramer2017space,ilyas2019adversarial,inkawhich2019feature}, causality \citep{zhang2021causaladv}, and equilibrium \citep{bose2020adversarial,meunier2021mixed} in adversarial training problems on transferable adversarial examples. The mentioned studies reveal the dependency of adversarial examples on non-robust features that can be easily perturbed through minor adversarial noise, and also how the transferability of adversarial examples depends on the equilibrium in the game between the adversary and classifier players. On the other hand, the connection between the train-to-test generalization performance of the substitute network and the transferability of the designed examples has not been explored in the literature. Hence, it remains unclear whether a substitute DNN with a smaller generalization gap  results in more transferable adversarial examples.

In this work, we attempt to understand the interconnections between the train-to-test generalization error and the attack transferability rate of DNNs in black-box adversarial attacks. We aim to show that a smaller generalization gap not only improves the classification accuracy on unseen test data, but further could result in higher transferability rates for the designed adversarial examples. To this end, we analyze the transferability of adversarial examples through the lens of the max-min framework of \emph{Adversarial Example Game (AEG)} introduced by \cite{bose2020adversarial}. According to this approach, the adversary player searches for the most transferable attack strategy that reaches the maximum prediction error under the most robust DNN classifier. We focus on the generalization performance of the AEG learner from training samples to test data, and demonstrate its importance in the transferability power of the generated adversarial perturbations.   

Specifically, we focus on the standard class of norm-bounded adversarial attacks and define the train-to-test generalization error of a function class's minimum risk under norm-bounded adversarial perturbations. Subsequently, we prove theoretical bounds on the defined generalization error for multi-layer DNNs with spectrally-normalized weight matrices,  
which enables us to bound the generalization gap between the training and test transferability rates of norm-bounded attack schemes. Also, the shown generalization bound suggests the application of Lipschitz regularization methods in training a substitute DNN with improved transferability of generated adversarial examples.

Finally, we numerically evaluate our theoretical results on multiple standard image recognition datasets and DNN architectures. Our empirical results further support the existing connections between the generalization and transferability properties of black-box adversarial attacks. The numerical findings demonstrate that a better generalization score for the substitute DNN could significantly boost the transferability rate of designed adversarial examples. In addition, we empirically demonstrate that both explicit and implicit regularization techniques can help generate more transferable examples. We validate this result for explicit Lipschitz regularization and implicit early-stopping schemes. We can summarize the main contributions of our work as follows:
\begin{itemize}[leftmargin=1mm]
    \item Drawing connections between the generalization properties of the substitute DNN classifier and the transferability rate of designed adversarial examples
    \item Proving generalization error bounds on the difference between the transferability rates of DNN-based adversarial examples designed for training and test data
    \item Demonstrating the power of Lipschitz regularization and early stopping methods in generating more transferable adversarial examples
    \item Conducting numerical experiments on the generalization and transferability aspects of black-box adversarial attacks
\end{itemize}

\section{Related Work}
Transferability of adversarial examples has been extensively studied in the deep learning literature. The related literature includes a large body of papers \citep{ilyas2018black,cheng2018query,bhagoji2018practical,alzantot2019genattack,cheng2019improving,moon2019parsimonious,guo2019simple,mohaghegh2020advflow,wang2020amora} proposing black-box adversarial attack schemes aiming to transfer from a source DNN to an unseen target DNN classifier and several related works \citep{levine2020robustness,salman2020denoised,singla2020second,li2020blacklight} on developing robust training mechanisms against black-box adversarial attacks. Regarding the relationship between accuracy and transferability, \citep{wu2018understanding} observes a positive correlation between the clean accuracy and transferability of adversarial examples following the neural net. On the other hand, \cite{gubri2022lgv} report that the best clean test accuracy does not provide the highest transferability rate. \citep{qin2022boosting,gubri2022lgv} also study the relationship between transferability rate and the loss function's sharpness. 

In addition, several game theoretic frameworks have been proposed to analyze the transferability of adversarial examples. The related works \citep{bose2020adversarial,meunier2021mixed} study the adversarial example game between the classifier and adversary players. However, these works mostly focus on the equilibrium and convergence behavior in adversarial example games and do not discuss the generalization aspect of the game. In another related work, \cite{pal2020game} study the adversarial learning task through the lens of game theory. Unlike our work, the generalization analysis in \citep{pal2020game} focuses only on the generalization behavior of the robust classification rule and not on the generalization properties of the transferable adversary player.    

Furthermore, the generalization properties of adversarially-learned models have been the topic of several related papers.
References \citep{schmidt2018adversarially,raghunathan2019adversarial} discuss numerical and theoretical results that generalization of adversarially-trained neural nets is inferior to that of standard ERM-learned models with the same number of training data. The related work by \cite{rice2020overfitting} empirically studies the overfitting phenomenon in adversarial training problems and reveals the different generalization properties of standard and adversarial training schemes. 
In another study, \cite{wu2020adversarial} show the connection between the generalization of adversarially-learned models and the flatness of the weight loss landscape. \citep{yin2019rademacher,awasthi2020adversarial} develop Rademacher-complexity-based generalization bounds for adversarially-trained models which suggest the application of norm-based regularization techniques for improving the generalization behavior of adversarial training methods. \cite{farnia2018generalizable} prove Pac-Bayes generalization bounds for adversarially-learned DNNs with bounded spectral norms for their weight matrices. Also, \cite{attias2019improved} perform VC-based generalization analysis for adversarial training schemes and derives upper-bounds on their sample complexity. However, we note that all these papers focus on the generalization of adversarially-trained models and do not study the connection between generalization and transferability of black-box attacks.  




\section{Preliminaries: Adversarial Attacks and Training}
In this section, we give a brief review of standard norm-bounded adversarial attack and training schemes. Consider a supervised learning problem where the learner seeks a prediction rule $f$ from  function space $\mathcal{F}$ to predict a label variable $Y\in\mathcal{Y}$ from the observation of a $d$-dimensional feature vector $\mathbf{X}\in\mathcal{X}$. In this work, we focus on the following set of $L$-layer neural network functions with activation function $\psi$:
\begin{equation}
    \begin{aligned}
    &\mathcal{F_V} = \left\{ f_\mathbf{v}: f_\mathbf{v}(\mathbf{x})=V_L \psi\bigl(\cdots \psi(V_{0}\mathbf{x}) \cdot\bigr),\, \mathbf{v}\in\mathcal{V}  \right\}
    \label{neural_network_functions}
    \end{aligned}
\end{equation}
In the above, we use vector $\mathbf{v}$ belonging to  feasible set $\mathcal{V}$ to parameterize the $L$-layer neural net $f_{\mathbf{v}}$. According to this notation,  $\mathbf{v}$ concatenates all the entries of the neural net's weight matrices $V_0,\ldots,V_L$.

Given a loss function $\ell$ and $n$ training samples in dataset $S=\{(\mathbf{x}_i,y_i)_{i=1}^n\}$, the standard risk minimization approach aims to find the prediction rule $f^*\in \mathcal{F_V}$ minimizing the expected loss (risk) $\mathbb{E}[\ell(f(\mathbf{X}),Y)]$ where the expectation is taken according to the underlying distribution of data $P_{\mathbf{X},Y}$. Since the supervised learner only observes the training samples and lacks any further knowledge of the underlying $P_{\mathbf{X},Y}$, the empirical risk minimization (ERM) framework sets out to minimize the empirical risk function estimated using the training examples:
\begin{equation}\label{Eq: ERM Problem}
    \min_{\mathbf{v}\in\mathcal{V}}\: \frac{1}{n}\sum_{i=1}^n \ell\bigl(f_\mathbf{v}(\mathbf{x}_i),y_i\bigr).
\end{equation}
However, the ERM learner typically lacks robustness to norm-bounded adversarial perturbations. A standard approach to generate a norm-bounded adversarial perturbation is through maximizing the loss function over a norm ball around a given data point $(\mathbf{x},y)$:
\begin{equation}\label{Eq: Standard Perturbation}
    \max_{\boldsymbol{\delta}:\: \Vert \boldsymbol{\delta}\Vert\le \epsilon}\; \ell\bigl(f(\mathbf{x}+\boldsymbol{\delta}),y\bigr).
\end{equation}
Here $\boldsymbol{\delta}\in\mathbb{R}^d$ represents the $d$-dimensional perturbation vector added to the feature vector $\mathbf{x}$, and $\Vert\cdot \Vert$ denotes a norm function used to measure the attack power that is bounded by parameter $\epsilon\ge 0$. 

In order to gain robustness against norm-bounded perturbations, the adversarial training (AT) scheme \citep{madry2017towards} alters the ERM objective function to the expected worst-case loss function over norm-bounded adversarial perturbations and solves the following min-max optimization problem:
\begin{equation}\label{Eq: AT Empirical distribution}
\begin{aligned}
     &\min_{\mathbf{v}\in\mathcal{V}}\: \frac{1}{n}\sum_{i=1}^n\biggl[ \max_{\boldsymbol{\delta}_i:\: \Vert \boldsymbol{\delta}_i\Vert\le \epsilon}\: \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\boldsymbol{\delta}_i),y_i\bigr)\biggr]  \\
     \equiv \: & \min_{\mathbf{v}\in\mathcal{V}}\: \max_{\substack{\boldsymbol{\delta}_1,\ldots,\boldsymbol{\delta}_n: \\
   \forall i, \: \Vert \boldsymbol{\delta}_i \Vert\le \epsilon}
   }\: \frac{1}{n}\sum_{i=1}^n\bigl[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\boldsymbol{\delta}_i),y_i\bigr)\bigr]
\end{aligned}
\end{equation}
Note that the above minimax problem indeed estimates the solution to the following learning problem formulated over the true distribution of data $P_{\mathbf{X},Y}$:
\begin{equation}\label{Eq: AT True distribution}
     \min_{\mathbf{v}\in\mathcal{V}}\; \mathbb{E}_{(\mathbf{X},Y)\sim P}\biggl[\, \max_{\boldsymbol{\delta}:\: \Vert \boldsymbol{\delta}\Vert\le \epsilon}\: \ell\bigl(f_\mathbf{v}(\mathbf{X}+\boldsymbol{\delta}),Y\bigr)\biggl].
\end{equation}
It can be seen that the above optimization problem is indeed equivalent to the following min-max problem where the maximization is performed over $\Delta_\epsilon $ containing all mappings $\delta:\mathcal{X}\times \mathcal{Y} \rightarrow \mathbb{R}^d$ whose output is $\epsilon$-norm-bounded, i.e. $\forall \mathbf{x},y: \: \Vert \delta(\mathbf{x},y)\Vert\le \epsilon$:
\begin{equation}\label{Eq: AT True distribution_MinMax}
     \min_{\mathbf{v}\in\mathcal{V}}\; \max_{\delta \in \Delta_\epsilon}\; \mathbb{E}_{\mathbf{X},Y\sim P}\bigl[\, \ell\bigl(f_\mathbf{v}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr)\bigl].
\end{equation}
In next sections, we will discuss the association between the above min-max problem and the adversarial example game for generating transferable adversarial examples.


\section{A Max-Min Approach to Transferable Adversarial Examples}
The transferability of adversarial examples has been extensively studied in the literature. A useful framework to theoretically study transferable examples is the max-min framework of \emph{adversarial example game (AEG)} proposed by \cite{bose2020adversarial}. According to this approach, the adversary searches for the most transferable attack scheme $\delta\in\Delta$ from a set of attack strategies $\Delta$ that achieves the maximum expected loss under the most robust classifier $f_{\mathbf{v}}\in\mathcal{F_V}$ from DNN function space $\mathcal{F_V}$. Therefore, the AEG approach reduces the transferable adversary's task to solving the following max-min optimization problem:
\begin{equation}\label{Eq: AEG empirical distribution}
     \max_{\delta \in \Delta}\; \min_{\mathbf{v}\in\mathcal{V}}\; \frac{1}{n}\sum_{i=1}^n\biggl[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta(\mathbf{x}_i,y_i)),y_i\bigr) \biggr] %\; \approx \; \max_{\delta \in \Delta_\epsilon}\; \min_{\mathbf{w}\in\mathcal{W}}\;  \mathbb{E}_{\mathbf{X},Y\sim P}\bigl[\, \ell\bigl(f_\mathbf{w}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr)\bigl].
\end{equation}
The above bi-level optimization problem indeed swaps the maximization and minimization order of the AT optimization problem, and focuses on the max-min version of the min-max AT optimization task. Note that as shown by \cite{meunier2021mixed}, the adversarial example game is in general not guaranteed to have a pure Nash equilibrium where each player's deterministic strategy is optimal when fixing the other player's strategy. Due to the lack of pure Nash equilibria, the AEG max-min and AT min-max optimization problems may not share any common solutions. 

Note that the AEG framework introduces the following metric for evaluating the transferability of an attack scheme $\delta: \mathcal{X}\times\mathcal{Y}\rightarrow \mathbb{R}^d$:
 \begin{equation}\label{Eq: Transferability def empirical distribution}
    \widehat{\mathcal{L}}_{\text{\rm transfer}}(\delta) \, :=\, \min_{\mathbf{v}\in\mathcal{V}}\; \frac{1}{n}\sum_{i=1}^n\bigl[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta(\mathbf{x}_i,y_i)),y_i\bigr) \bigr] %\; \approx \; \max_{\delta \in \Delta_\epsilon}\; \min_{\mathbf{w}\in\mathcal{W}}\;  \mathbb{E}_{\mathbf{X},Y\sim P}\bigl[\, \ell\bigl(f_\mathbf{w}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr)\bigl].
\end{equation}
The above transferability score indeed estimates the following score measuring transferability under the underlying distribution $P_{\mathbf{X},Y}$:  
\begin{equation}\label{Eq: Transferability def true distribution}
\begin{aligned}
    &\mathcal{L}_{\text{\rm transfer}}(\delta) := 
    \min_{\mathbf{v}\in\mathcal{V}}\, \mathbb{E}_{P_{X,Y}}\biggl[ \ell\left(f_\mathbf{v}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\right)\biggr].
\end{aligned}
\end{equation}
Based on this discussion, the AEG optimization problem in \eqref{Eq: AEG empirical distribution} similarly estimates the solution to the following max-min AEG problem formed around the underlying distribution $P_{\mathbf{X},Y}$:
\begin{equation}\label{Eq: AEG true distribution}
    \begin{aligned}
      &\max_{\delta \in \Delta}\:\mathcal{L}_{\text{\rm transfer}}(\delta) \; \equiv \;\\
      &\max_{\delta \in \Delta}\; \min_{\mathbf{v}\in\mathcal{V}}\;  \mathbb{E}_{(\mathbf{X},Y)\sim P}\biggl[ \ell\left(f_\mathbf{v}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\right)\biggr].
    \end{aligned}
\end{equation}
Therefore, the primary goal of the transferable adversary is to solve the above problem targeting the distribution of test data instead of training examples. However, since the true distribution is unknown to the adversary, the AEG framework switches to the empirical max-min problem \eqref{Eq: AEG empirical distribution}. This discussion motivates the following definition of the generalization error for adversarial examples' transferability performance:
\begin{mydef}\label{Def :Generalization Error black-box}
We define the generalization error of an attack scheme $\delta: \mathcal{X}\times\mathcal{Y}\rightarrow \mathbb{R}^d$ over DNN classifier space $\mathcal{F_V}$ as follows:
\begin{align}\label{Eq: Gen erro black-box attack}
    \epsilon_{\text{\rm gen}}(\delta) :=& \, \widehat{\mathcal{L}}_{\text{\rm transfer}}(\delta) -  \mathcal{L}_{\text{\rm transfer}}(\delta) \\
    =& \,  \min_{\mathbf{v}\in\mathcal{V}}\biggl\{ \frac{1}{n}\sum_{i=1}^n\left[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta(\mathbf{x}_i,y_i)),y_i\bigr) \right]\biggr\}\nonumber \\
    &-  \min_{\mathbf{v}\in\mathcal{V}}\biggl\{ \mathbb{E}\left[ \ell\bigl(f_\mathbf{v}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr)\right] \biggr\}. \nonumber
\end{align}
\end{mydef}
Note that the above definition is consistent with the standard definition of generalization error in minimax learning frameworks such as generative adversarial network (GAN) and adversarial training approaches in the literature  \citep{arora2017generalization,yin2019rademacher,farnia2020gans,xing2021generalization,farnia2021train,lei2021stability} where the generalization error of the min (or max) player is defined as the difference between the worst-case empirical and population objectives under the other player's optimal action. Therefore, in order for a black-box adversarial attack to be effective, we need the attack scheme to generalize well from training samples to test data, and based on the max-min AEG framework the generalization error is defined in the  sense of Definition \ref{Def :Generalization Error black-box}. 


\section{A Generalization Bound for Adversarial Example Games}
In this section, we aim to analyze the generalization error of a black-box adversarial attack scheme based on the substitute classifier of a $L$-layer DNN $\mathcal{H_W}$. To characterize a one-to-one correspondence between the choice of the DNN weights and the assigned attack scheme, we consider the following definition of an optimal attack scheme for a substitute neural net $h_{\mathbf{w}}\in \mathcal{H_W}$, which revisits the distributionally robust optimization approach to the adversarial training problem \citep{sinha2017certifying}.
\begin{mydef}
Given a classifier $h_{\mathbf{w}}$, we call the attack scheme $\delta^*_\mathbf{w}:\mathcal{X}\times \mathcal{Y}\rightarrow \mathbb{R}^d$ $\lambda$-optimal if it solves the following optimization problem:
\begin{equation*}
    \max_{\delta:\mathcal{X}\times \mathcal{Y}\rightarrow \mathbb{R}^d}\; \mathbb{E}\bigl[\ell\bigl(h_\mathbf{w}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr)\bigl] - \frac{\lambda}{2} \mathbb{E}\bigl[\Vert  \delta(\mathbf{X},Y) \Vert^2\bigr].
\end{equation*}
\end{mydef}
The above definition of a $\lambda$-optimal attack revisits the notion of Wasserstein-based distributional adversarial attacks in the distributionally robust optimization literature \citep{sinha2017certifying}, where the attack norm bound parameterized by $\epsilon$ implicitly depends on coefficient $\lambda$.  
Here, the definition of $\lambda$-optimal attacks employs a regularization term to  penalize the averaged norm-squared of perturbations. As shown in Proposition \ref{Prop: correspondence}, this definition allows us to establish a one-to-one correspondence between $\lambda$-optimal attack schemes and $\lambda$-smooth DNN classifiers. The one-to-one correspondence property addresses the intractable nature of the analysis of an optimal $\epsilon$-norm bounded adversarial attack scheme which could be non-unique for non-convex neural nets. %We defer the proofs to the Appendix.
\begin{prop}\label{Prop: correspondence}
Consider the $L_2$-norm function $\Vert\cdot\Vert_2$ for measuring the attack power. Suppose that the composition $\ell \circ h_{\mathbf{w}}$ is a $\lambda$-smooth differentiable function of $\mathbf{x}$, i.e. for every $\mathbf{x},\mathbf{x}',y$ we have $\Vert\nabla_{\mathbf{x}}\ell(h_\mathbf{w}(\mathbf{x}),y)- \nabla_{\mathbf{x}}\ell(h_\mathbf{w}(\mathbf{x}'),y)\Vert_2\le \lambda\Vert\mathbf{x}-\mathbf{x}'\Vert_2$. Then, there exists a unique $\lambda$-optimal attack scheme $ \delta^*(\mathbf{x},y)$ for $h_{\mathbf{w}}$  given by:
\begin{equation*}
    \delta^*(\mathbf{x},y) \, =\,   \left(\text{\rm Id}_{\mathbf{x}} - \frac{1}{\lambda}\nabla_{\mathbf{x}}\ell\circ h_\mathbf{w}\right)^{-1}(\mathbf{x},y) \, - \, \mathbf{x}.
\end{equation*}
In the above equation $\text{\rm Id}_{\mathbf{x}}$ represents the identity function on feature vector $\mathbf{x}$, and $(\cdot)^{-1}$ denotes the inverse of an invertible transformation.
\end{prop}
\begin{proof}
    We defer the proof to the Appendix.
\end{proof}
The above proposition reveals a bijection between smooth DNN classifiers and optimal attack schemes. Therefore, in our generalization analysis, we focus on bounding the generalization error for the resulting $\lambda$-optimal attack schemes corresponding to $\lambda$-smooth DNN substitute classifiers.

In the following theorem, we show a generalization error bound for the class of $\lambda$-optimal black-box attack schemes coming from spectrally-regularized DNN functions. This theorem extends the uniform convergence generalization bounds \citep{bartlett2017spectrally,neyshabur2017pac} from standard deep supervised learning problems to the max-min adversarial example game learning framework. In the theorem, we use the following set of assumptions on the loss function $\ell$ and the target and substitute classes of neural networks. Also, note that $\Vert\cdot\Vert_2$ denotes the $L_2$-operator (spectral) norm in application to a matrix, i.e. the matrix's maximum singular value, and $\Vert\cdot\Vert_{2,1}$ denotes the $(2,1)$-norm of a matrix which is the summation of the $L_2$-norms of the matrix's rows. 
\begin{assumption}\label{Assumption: loss}
Loss function $\ell(y,y')$ is a $c$-bounded, $1$-Lipschitz, and $1$-smooth function of the input $y$, i.e. for every $y_1,y_2,y'\in\mathcal{Y}$ we have $\vert \ell(y_1,y')\vert\le c$, $|\ell(y_1,y')-\ell(y_2,y')| \le \Vert y_1-y_2 \Vert_2 $, and $\Vert\nabla_y\ell(y_1,y')-\nabla_y\ell(y_2,y')\Vert_2 \le \Vert y_1-y_2 \Vert_2 $.
\end{assumption}
\begin{assumption}\label{Assumption: substitute neural network}
The set of substitute DNNs in the black-box attack scheme $\mathcal{H_W}=\{h_\mathbf{w}:\, \mathbf{w}\in\mathcal{W} \}$ contains $L$-layer neural networks $h_\mathbf{w}(\mathbf{x})=W_L \phi_{L}\bigl(W_{L-1}\phi_{L-1}(\cdots W_1\phi_{1}(W_{0}\mathbf{x}) \cdot\bigr)$. We suppose that the dimensions of matrices $W_0,\ldots,W_k$ is bounded by $D$, and assume every activation $\phi_i$ satisfies $\phi_i(0)=0$ and is $\gamma_i$-Lipschitz and $\gamma_i$-smooth, i.e. $\max\{|\phi_i'(z)|,|\phi_i''(z)|\}\le \gamma_i$ holds for every $z\in\mathbb{R}$.
\end{assumption}
\begin{assumption}\label{Assumption: target neural network}
The class of target classifiers $\mathcal{F_V}=\{f_\mathbf{v}:\, \mathbf{v}\in\mathcal{V} \}$ consists of $K$-layer neural network functions $f_\mathbf{v}(\mathbf{x})=V_K \psi_{L}\bigl(V_{L-1}\psi_{L-1}(\cdots V_1\psi_{1}(V_{0}\mathbf{x}) \cdot\bigr)$ with activation function $\psi_i$'s. We suppose that the dimensions of matrices $V_0,\ldots,V_k$ is bounded by $D$. Also, we assume every $\psi_i$ satisfies $\psi_i(0)=0$ and is $\xi_i$-Lipschitz, i.e. $\max_z\: |\psi_i'(z)|\le \xi_i$. Also, we define the capacity $R_\mathcal{V}$ as
\begin{equation*}
    R_{\mathcal{V}} := \sup_{\mathbf{v}\in\mathcal{V}}\,\left\{  \bigl(\prod_{i=0}^K \xi_i \Vert V_i \Vert_2 \bigr)\biggl(\sum_{i=0}^K \frac{\Vert V^\top_i \Vert^{2/3}_{2,1}}{\Vert V_i \Vert^{2/3}_{2}} \biggr)^{3/2}\right\}.
\end{equation*}
\end{assumption}


\begin{theorem}\label{Thm: generalization bound}
Suppose that the loss function, substitute DNN, and target DNN in a black-box adversarial attack satisfy Assumptions \ref{Assumption: loss}, \ref{Assumption: substitute neural network} and \ref{Assumption: target neural network}. Assuming $\Vert \mathbf{X}\Vert_2\le B$ for the $n\times d$ data matrix $\mathbf{X}$ and $\lambda(1-\tau) \ge(\prod_{i=0}^L \gamma_i \Vert W_i \Vert_2)\sum_{i=0}^L\prod_{j=0}^L \gamma_j \Vert W_j \Vert_2 $ holds for constant $\tau > 0$ and every $\mathbf{w}\in\mathcal{W}$, then for every $\omega>0$ with probability at least $1-\omega$ the following bound will hold for every $\mathbf{w}\in\mathcal{W}$:
\begin{align}
    \epsilon_{\text{\rm gen}}(\delta^*_{\mathbf{w}})  \,\le &\: 
    \mathcal{O}\biggl( c\sqrt{\frac{\log(1/\omega)}{n}} \\
    +&\, \frac{(B+\frac{L_{\mathbf{w}}}{\lambda}) \bigl(R_{\mathcal{V}}+\frac{1}{\tau^2}L_{\mathbf{w}}R_{\mathbf{w}}\bigr) \log(n)\log(D) }{n} \biggr)  \nonumber
\end{align}
where the Lipschitz and capacity terms $L_{\mathbf{w}},\, R_{\mathbf{w}}$ are defined as:
\begin{equation}
\begin{aligned}
    &L_{\mathbf{w}} :=  \prod_{i=0}^L \gamma_i \Vert W_i \Vert_2 ,\quad \\
    &R_{\mathbf{w}} :=  \left(\sum_{i=0}^L\prod_{j=0}^i \gamma_j \Vert W_j \Vert_2 \right)\biggl(\sum_{i=0}^L \frac{\Vert W^\top_i \Vert^{2/3}_{2,1}}{\Vert W_i \Vert^{2/3}_{2}} \biggr)^{3/2}.
\end{aligned}
\end{equation}
\end{theorem}
\begin{proof}
    We defer the proof to the Appendix.
\end{proof}
The above theorem bounds the generalization error of the attack scheme $\delta^*_{\mathbf{w}}$ corresponding to the substitute DNN $f_{\mathbf{w}}$ in terms of the spectral capacity of the substitute network. As a result, this bound motivates norm-based spectral regularization \citep{yoshida2017spectral,miyato2018spectral,farnia2018generalizable} for improving the generalization performance of black-box attack schemes. 


\section{Numerical Results}
\begin{figure*}[h]
    \centering
    \vspace{-0.1cm}
    % \hspace{-0.8cm}\includegraphics[height=4cm]{ Figures/Experiment_Fig/tran_L2_FGM_cifar10.png}
    \includegraphics[width=0.875\linewidth]{ Figures/trans_3_datasets_only_test_witherrorbar.png}
    \caption{Generalization errors of substitute DNNs (the lower the better), and transferability rates of adversarial examples generated from the substitute model (the higher the better) for CIFAR-10 (rows 1-2), CIFAR-100 (rows 3-4) and SVHN (rows 5-6) datasets. ResNet18 and VGG-16 architectures were used as the target DNNs.}
    \vspace{-0.2cm}
    
    % The top row shows the results for AlexNet and Inception trained by FGM adversarial training on CIFAR-10 data. The bottom row shows the results for the Inception-Net trained by $L_\infty$-bounded PGD on SVHN.}
    \label{fig:tran_L2_FGM}
\end{figure*}

\begin{table*}[t]
\centering
\resizebox{0.8\linewidth}{!}
{
\begin{tabular}{|ccccccc|}
\hline
Dataset & Model & Method & β & \begin{tabular}[c]{@{}c@{}}Generalization \\ Error\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (VGG-16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (ResNet-18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.545$\pm$ 0.031 & 0.105$\pm$0.011 & 0.087$\pm$0.009 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.342$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.162$\pm$0.012 & \cellcolor[HTML]{EFEFEF}0.139$\pm$0.01 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.512$\pm$0.022 & 0.093$\pm$0.014 & 0.077$\pm$0.031 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.414$\pm$0.018 & \cellcolor[HTML]{EFEFEF}0.149$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.123$\pm$0.009 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.505$\pm$0.028 & 0.089$\pm$0.007 & 0.070$\pm$0.007 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.451$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.147$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.122$\pm$0.011 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.508$\pm$0.020 & 0.104$\pm$0.009 & 0.084$\pm$0.010 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.258$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.150$\pm$0.008 & \cellcolor[HTML]{EFEFEF}0.134$\pm$0.007 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.487$\pm$0.011 & 0.093$\pm$0.010 & 0.081$\pm$0.010 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.288$\pm$0.017 & \cellcolor[HTML]{EFEFEF}0.113$\pm$0.011 & \cellcolor[HTML]{EFEFEF}0.122$\pm$0.011 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.466$\pm$0.019 & 0.092$\pm$0.012 & 0.078$\pm$0.011 \\ \cline{4-4}
\multicolumn{1}{|c|}{\multirow{-12}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.320$\pm$0.031 & \cellcolor[HTML]{EFEFEF}0.136$\pm$0.018 & \cellcolor[HTML]{EFEFEF}0.113$\pm$0.015 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.789$\pm$0.055 & 0.229$\pm$0.032 & 0.260$\pm$0.031 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.601± 0.043 & \cellcolor[HTML]{EFEFEF}0.323$\pm$0.023 & \cellcolor[HTML]{EFEFEF}0.353$\pm$0.025 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.777$\pm$0.033 & 0.265$\pm$0.028 & 0.277$\pm$0.021 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.655$\pm$0.030 & \cellcolor[HTML]{EFEFEF}0.313$\pm$0.021 & \cellcolor[HTML]{EFEFEF}0.321$\pm$0.026 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.758$\pm$0.041 & 0.258$\pm$0.019 & 0.232$\pm$0.023 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.611$\pm$0.037 & \cellcolor[HTML]{EFEFEF}0.342$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.310$\pm$0.020 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.602$\pm$0.03 & 0.303$\pm$0.017 & 0.270$\pm$0.021 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.494$\pm$0.028 & \cellcolor[HTML]{EFEFEF}0.330$\pm$0.017 & \cellcolor[HTML]{EFEFEF}0.301$\pm$0.020 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.700$\pm$0.040 & 0.288$\pm$0.054 & 0.255$\pm$0.019 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.565$\pm$0.031 & \cellcolor[HTML]{EFEFEF}0.331$\pm$0.037 & \cellcolor[HTML]{EFEFEF}0.288$\pm$0.043 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.717$\pm$0.033 & 0.268$\pm$0.017 & 0.236$\pm$0.017 \\ \cline{4-4}
\multicolumn{1}{|c|}{\multirow{-12}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.558$\pm$0.030 & \cellcolor[HTML]{EFEFEF}0.313$\pm$0.020 & \cellcolor[HTML]{EFEFEF}0.275$\pm$0.019 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.298$\pm$0.020 & 0.211$\pm$0.018 & 0.225$\pm$0.017 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.199$\pm$0.012 & \cellcolor[HTML]{EFEFEF}0.276$\pm$0.008 & \cellcolor[HTML]{EFEFEF}0.292$\pm$0.011 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.334$\pm$0.015 & 0.187$\pm$0.018 & 0.199$\pm$0.018 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.211$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.279$\pm$0.013 & \cellcolor[HTML]{EFEFEF}0.287$\pm$0.014 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373$\pm$0.021 & 0.134$\pm$0.013 & 0.126$\pm$0.013 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.203$\pm$0.013 & \cellcolor[HTML]{EFEFEF}0.277$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.257$\pm$0.016 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.342$\pm$0.021 & 0.193$\pm$0.017 & 0.177$\pm$0.018 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.115$\pm$0.010 & \cellcolor[HTML]{EFEFEF}0.339$\pm$0.021 & \cellcolor[HTML]{EFEFEF}0.313$\pm$0.019 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.366$\pm$0.011 & 0.156$\pm$0.011 & 0.166$\pm$0.015 \\ \cline{4-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{I-FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.187$\pm$0.012 & \cellcolor[HTML]{EFEFEF}0.301$\pm$0.009 & \cellcolor[HTML]{EFEFEF}0.288$\pm$0.011 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373$\pm$0.022 & 0.134$\pm$0.015 & 0.126$\pm$0.016 \\ \cline{4-4}
\multicolumn{1}{|c|}{\multirow{-12}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.203$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.277$\pm$0.018 & \cellcolor[HTML]{EFEFEF}0.257$\pm$0.016 \\ \hline
\end{tabular}
% \begin{tabular}{|ccccccc|}
% \hline
% Dataset & Model & Method & $\beta$ &\begin{tabular}[c]{@{}c@{}}Generalization \\Error\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (VGG-16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (ResNet18)\end{tabular} \\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.545 $\pm$ 0.031 & 0.105 $\pm$ 0.011 & 0.087 $\pm$ 0.009 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.342 $\pm$ 0.022 & \cellcolor[HTML]{EFEFEF}0.162 $\pm$ 0.012 & \cellcolor[HTML]{EFEFEF}0.139 $\pm$ 0.013\\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.505 $\pm$ 0.028 & 0.089 $\pm$ 0.007 & 0.070 $\pm$ 0.007 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.451 $\pm$ 0.022 & \cellcolor[HTML]{EFEFEF}0.147 $\pm$ 0.014 & \cellcolor[HTML]{EFEFEF}0.122 $\pm$ 0.011 \\ \cline{2-7} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.508 $\pm$ 0.020 & 0.104 $\pm$ 0.009 & 0.084 $\pm$ 0.010 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.258 $\pm$ 0.015 & \cellcolor[HTML]{EFEFEF}0.150 $\pm$ 0.008& \cellcolor[HTML]{EFEFEF}0.134 $\pm$ 0.007 \\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.466 $\pm$ 0.019 & 0.092 $\pm$ 0.012 & 0.078 $\pm$ 0.011 \\ \cline{4-4}
% \multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.320 $\pm$ 0.031 & \cellcolor[HTML]{EFEFEF}0.136 $\pm$ 0.018 & \cellcolor[HTML]{EFEFEF}0.113 $\pm$ 0.015\\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.789 $\pm$ 0.055 & 0.229 $\pm$ 0.032 & 0.260 $\pm$ 0.031 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.601$\pm$ 0.043 & \cellcolor[HTML]{EFEFEF}0.323 $\pm$ 0.023 & \cellcolor[HTML]{EFEFEF}0.353 $\pm$ 0.025\\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.758 $\pm$ 0.041 & 0.258 $\pm$ 0.019 & 0.232 $\pm$ 0.023 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.611 $\pm$ 0.037 & \cellcolor[HTML]{EFEFEF}0.342 $\pm$ 0.022 & \cellcolor[HTML]{EFEFEF}0.310 $\pm$ 0.020\\ \cline{2-7} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.602 $\pm$ 0.033 & 0.303 $\pm$ 0.017 & 0.270 $\pm$ 0.021 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.494 $\pm$ 0.028 & \cellcolor[HTML]{EFEFEF}0.330 $\pm$ 0.017 & \cellcolor[HTML]{EFEFEF}0.301 $\pm$ 0.020\\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.717 $\pm$ 0.033 & 0.268 $\pm$ 0.017 & 0.236 $\pm$ 0.017 \\ \cline{4-4}
% \multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.558 $\pm$ 0.030 & \cellcolor[HTML]{EFEFEF}0.313 $\pm$ 0.020 & \cellcolor[HTML]{EFEFEF}0.275 $\pm$ 0.019\\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.298 $\pm$ 0.020 & 0.211 $\pm$ 0.018 & 0.225 $\pm$ 0.017 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.199 $\pm$ 0.012 & \cellcolor[HTML]{EFEFEF}0.276 $\pm$ 0.008 & \cellcolor[HTML]{EFEFEF}0.292 $\pm$ 0.011\\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373 $\pm$ 0.021 & 0.134 $\pm$ 0.013 & 0.126 $\pm$ 0.013 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.203 $\pm$ 0.013 & \cellcolor[HTML]{EFEFEF}0.277 $\pm$ 0.014& \cellcolor[HTML]{EFEFEF}0.257 $\pm$ 0.016\\ \cline{2-7} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.342 $\pm$ 0.021 & 0.193 $\pm$ 0.017 & 0.177 $\pm$ 0.018 \\ \cline{4-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGD}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.115 $\pm$ 0.010& \cellcolor[HTML]{EFEFEF}0.339 $\pm$ 0.021& \cellcolor[HTML]{EFEFEF}0.313 $\pm$ 0.019\\ \cline{3-4}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373 $\pm$ 0.022 & 0.134 $\pm$ 0.015 & 0.126 $\pm$ 0.016 \\ \cline{4-4}
% \multicolumn{1}{|c|}{\multirow{-8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.0} & \cellcolor[HTML]{EFEFEF}0.203 $\pm$ 0.014 & \cellcolor[HTML]{EFEFEF}0.277 $\pm$ 0.018 & \cellcolor[HTML]{EFEFEF}0.257 $\pm$ 0.016\\ \hline
% \end{tabular}



}
\caption{Generalization error (Gen. Err.) and $L_2$-norm-based adversarial examples' transferability rates on three image datasets, with and without spectral regularization ($\beta=\infty$ means no spectral regularization). %Train($\mathcal{F}$) and Test($\mathcal{F}$) represents the perturbations' transferability, which are generated from training and test samples, respectively. 
%VGG16 and RN18 represent setting target model as VGG16 and ResNet18.
}
\label{tab:generalization_transferability_SN}
\end{table*}



In this section, we provide the results of our numerical experiments for validating the connection between the generalization and transferability properties of black-box adversarial attacks. The numerical discussion focuses on the question of whether achieving a better generalization score for the substitute DNN can improve the success of the designed perturbations in application to a different DNN classifier. To answer this question, we tested an explicit norm-based regularization method, spectral normalization \citep{yoshida2017spectral,tsuzuku2018lipschitz,farnia2018generalizable}, as well as an implicit regularization technique, early stopping \citep{yao2007early,rice2020overfitting}, to evaluate the power of these regularization methods in attaining more transferable black-box attacks.

\begin{table*}[t]
\centering
\resizebox{0.82\linewidth}{!}{
    \begin{tabular}{|cccccc|}
    \hline
    Dataset & Model & Method & \begin{tabular}[c]{@{}c@{}}Generalization \\ Error\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (VGG-16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (ResNet-18)\end{tabular} \\ \hline
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.517$\pm$0.027 & 0.127$\pm$0.013 & 0.104$\pm$0.012 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.073$\pm$0.018 & \cellcolor[HTML]{EFEFEF}0.198$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.172$\pm$0.011 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.488$\pm$0.017 & 0.114$\pm$0.014 & 0.108$\pm$0.012 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.112$\pm$0.016 & \cellcolor[HTML]{EFEFEF}0.181$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.156$\pm$0.014 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.467$\pm$0.017 & 0.100$\pm$0.012 & 0.089$\pm$0.012 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.126$\pm$0.031 & \cellcolor[HTML]{EFEFEF}0.170$\pm$0.010 & \cellcolor[HTML]{EFEFEF}0.147$\pm$0.014 \\ \cline{2-6} 
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.579$\pm$0.037 & 0.098$\pm$0.009 & 0.077$\pm$0.007 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.061$\pm$0.041 & \cellcolor[HTML]{EFEFEF}0.154$\pm$0.017 & \cellcolor[HTML]{EFEFEF}0.136$\pm$0.017 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.533$\pm$0.054 & 0.102$\pm$0.023 & 0.098$\pm$0.016 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.077$\pm$0.054 & \cellcolor[HTML]{EFEFEF}0.149$\pm$0.031 & \cellcolor[HTML]{EFEFEF}0.132$\pm$0.014 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.520$\pm$0.039 & 0.100$\pm$0.007 & 0.087$\pm$0.005 \\
    \multicolumn{1}{|c|}{\multirow{-12}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.092$\pm$0.007 & \cellcolor[HTML]{EFEFEF}0.152$\pm$0.010 & \cellcolor[HTML]{EFEFEF}0.127$\pm$0.011 \\ \hline
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.646$\pm$0.017 & 0.283$\pm$0.016 & 0.258$\pm$0.009 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.137$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.330$\pm$0.011 & \cellcolor[HTML]{EFEFEF}0.286$\pm$0.012 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.688$\pm$0.016 & 0.284$\pm$0.022 & 0.254$\pm$0.021 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.165$\pm$0.010 & \cellcolor[HTML]{EFEFEF}0.333$\pm$0.024 & \cellcolor[HTML]{EFEFEF}0.289$\pm$0.019 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.711$\pm$0.013 & 0.270$\pm$0.017 & 0.239$\pm$0.013 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.146$\pm$0.013 & \cellcolor[HTML]{EFEFEF}0.327$\pm$0.014 & \cellcolor[HTML]{EFEFEF}0.289$\pm$0.008 \\ \cline{2-6} 
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.764$\pm$0.012 & 0.252$\pm$0.011 & 0.227$\pm$0.016 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.091$\pm$0.010 & \cellcolor[HTML]{EFEFEF}0.294$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.266$\pm$0.017 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.744$\pm$0.010 & 0.252$\pm$0.021 & 0.221$\pm$0.017 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.097$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.303$\pm$0.018 & \cellcolor[HTML]{EFEFEF}0.256$\pm$0.021 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.756$\pm$0.022 & 0.261$\pm$0.025 & 0.232$\pm$0.017 \\
    \multicolumn{1}{|c|}{\multirow{-12}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.122$\pm$0.019 & \cellcolor[HTML]{EFEFEF}0.291$\pm$0.017 & \cellcolor[HTML]{EFEFEF}0.259$\pm$0.032 \\ \hline
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.341$\pm$0.028 & 0.207$\pm$0.015 & 0.220$\pm$0.014 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.057$\pm$0.006 & \cellcolor[HTML]{EFEFEF}0.298$\pm$0.017 & \cellcolor[HTML]{EFEFEF}0.322$\pm$0.021 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.336$\pm$0.041 & 0.188$\pm$0.026 & 0.176$\pm$0.036 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.066$\pm$0.040 & \cellcolor[HTML]{EFEFEF}0.233$\pm$0.035 & \cellcolor[HTML]{EFEFEF}0.220$\pm$0.028 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.380$\pm$0.039 & 0.136$\pm$0.017 & 0.129$\pm$0.019 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{Inception}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.180$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.213$\pm$0.019 & \cellcolor[HTML]{EFEFEF}0.219$\pm$0.020 \\ \cline{2-6} 
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.307$\pm$0.041 & 0.211$\pm$0.011 & 0.228$\pm$0.012 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD-ES} & \cellcolor[HTML]{EFEFEF}0.030$\pm$0.004 & \cellcolor[HTML]{EFEFEF}0.256$\pm$0.011 & \cellcolor[HTML]{EFEFEF}0.278$\pm$0.013 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM} & 0.337$\pm$0.012 & 0.187$\pm$0.012 & 0.200$\pm$0.023 \\
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{I-FGM-ES} & \cellcolor[HTML]{EFEFEF}0.067$\pm$0.022 & \cellcolor[HTML]{EFEFEF}0.255$\pm$0.023 & \cellcolor[HTML]{EFEFEF}0.267$\pm$0.018 \\ \cline{3-3}
    \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.373$\pm$0.029 & 0.157$\pm$0.021 & 0.170$\pm$0.019 \\
    \multicolumn{1}{|c|}{\multirow{-12}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-6}{*}{AlexNet}} & \multicolumn{1}{c|}{FGM-ES} & \cellcolor[HTML]{EFEFEF}0.064$\pm$0.011 & \cellcolor[HTML]{EFEFEF}0.241$\pm$0.015 & \cellcolor[HTML]{EFEFEF}0.260$\pm$0.014 \\ \hline
    \end{tabular}




% \begin{tabular}{|cccccc|}
% \hline
% Dataset & Model & Method & \begin{tabular}[c]{@{}c@{}}Generalization \\ Error \end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (VGG-16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability Rate\\ (ResNet18)\end{tabular} \\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.517 $\pm$ 0.027 & 0.127 $\pm$ 0.013 & 0.104 $\pm$ 0.012 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD-ES} & \cellcolor[HTML]{EFEFEF}0.073 $\pm$ 0.018 & \cellcolor[HTML]{EFEFEF}0.198 $\pm$ 0.014 & \cellcolor[HTML]{EFEFEF}0.172 $\pm$ 0.011\\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.467 $\pm$ 0.017 & 0.100 $\pm$ 0.012 & 0.089 $\pm$ 0.012 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.126 $\pm$ 0.031 & \cellcolor[HTML]{EFEFEF}0.170 $\pm$ 0.010 & \cellcolor[HTML]{EFEFEF}0.147 $\pm$ 0.014 \\ \cline{2-6} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.579 $\pm$ 0.037 & 0.098 $\pm$ 0.009 & 0.077 $\pm$ 0.007 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD-ES} & \cellcolor[HTML]{EFEFEF}0.061 $\pm$ 0.041 & \cellcolor[HTML]{EFEFEF}0.154 $\pm$ 0.017 & \cellcolor[HTML]{EFEFEF}0.136 $\pm$ 0.017\\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.520 $\pm$ 0.039  & 0.100 $\pm$ 0.007 & 0.087 $\pm$ 0.005 \\
% \multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.092 $\pm$ 0.007 & \cellcolor[HTML]{EFEFEF}0.152 $\pm$ 0.010 & \cellcolor[HTML]{EFEFEF}0.127 $\pm$ 0.011\\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.341 $\pm$ 0.028 & 0.207 $\pm$ 0.015 & 0.220 $\pm$ 0.014 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD-ES} & \cellcolor[HTML]{EFEFEF}0.057 $\pm$ 0.006 & \cellcolor[HTML]{EFEFEF}0.298 $\pm$ 0.017 & \cellcolor[HTML]{EFEFEF}0.322 $\pm$ 0.021 \\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.380 $\pm$ 0.039 & 0.136 $\pm$ 0.017 & 0.129 $\pm$ 0.019 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.180 $\pm$ 0.022 & \cellcolor[HTML]{EFEFEF}0.213 $\pm$ 0.019 & \cellcolor[HTML]{EFEFEF}0.219 $\pm$ 0.020 \\ \cline{2-6} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.307 $\pm$ 0.041 & 0.211 $\pm$ 0.011 & 0.228 $\pm$ 0.012 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD-ES} & \cellcolor[HTML]{EFEFEF}0.030 $\pm$ 0.004 & \cellcolor[HTML]{EFEFEF}0.256 $\pm$ 0.011 & \cellcolor[HTML]{EFEFEF}0.278 $\pm$ 0.013\\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.373 $\pm$ 0.029 & 0.157 $\pm$ 0.021 & 0.170 $\pm$ 0.019 \\
% \multicolumn{1}{|c|}{\multirow{-8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.064 $\pm$ 0.011 & \cellcolor[HTML]{EFEFEF}0.241 $\pm$ 0.015 & \cellcolor[HTML]{EFEFEF}0.260 $\pm$ 0.014 \\ \hline
% \end{tabular}


}

\caption{\label{tab:early_stopping} Generalization error and adversarial examples' transferability with and without early stopping (ES)%\vspace{-2mm}
}
\end{table*}



For generating norm-bounded perturbations, we used standard projected gradient descent (PGD) and fast gradient method (FGM) \citep{goodfellow2014explaining} to design perturbations. We implemented the PGD and FGM algorithms by projecting the perturbations according to both standard $L_2$-norm and $L_\infty$-norm, where the latter results in the widely-used fast gradient sign method (FGSM) attack scheme \citep{goodfellow2014explaining} in the FGM case. For simulating $L_2$-norm-bounded perturbations, we chose the maximum $L_2$-norm (attack power) as $\epsilon=\gamma\mathbb{E}_{\hat{P}}[\Vert X\Vert_2]$ with $\gamma=0.05$ unless stated otherwise. For $L_\infty$-norm-bounded attacks, we chose $\epsilon=8/255$ for the normalized samples. For optimizing PGD perturbations, we applied $r=15$ PGD steps, where we used the standard rule $\alpha=1.5\epsilon/r$ to choose the stepsize parameter $\alpha$. We trained every DNN model for 100 epochs using the Adam optimizer \citep{kingma2014adam} with a batch-size of $128$. %To avoid label leaking pruth to generaroblem described in \textcolor{red}{PAPER}, we used the prediction of the training model instead of the ground tte adversarial samples. 
The numerical experiments were implemented using the PyTorch platform and were run on one standard RTX-3090 GPU. 
\iffalse
so less $\phi(\mathcal{F})$ indicates better generalization for model $\mathcal{F}$. $\phi(\mathcal{F})$ is formulated as: 
\begin{equation}
    \phi(\mathcal{F})=\mathop{E}_{x\in \mathcal{X}_{Train}}[P(\mathcal{F}(x+p)=y_{gt})]-\mathop{E}_{x\in \mathcal{X}_{test}}[P(\mathcal{F}(x+p)=y_{gt})].
\end{equation}
\fi



In our experiments, we used three standard image recognition datasets: 1) CIFAR-10, 2) CIFAR-100 \citep{cifar}, 3) SVHN \citep{SVHN}, and the following four neural network architectures: 1) AlexNet \citep{krizhevsky2012imagenet}, 2) Inception-Net \citep{inceptionv1}, 3) VGG-16 \citep{VGG}, 4) ResNet-18 \citep{he2016deep}. In the reported results, we evaluate a prediction model's generalization performance using the accuracy gap between the training and test sets. For evaluating the transferability performance, we used the generated black-box adversarial examples and measured the transferability rate as the target network's averaged classification error over the designed adversarial examples on the test set. Therefore, a higher transferability rate implies more transferable adversarial examples, which implies that under a worse transferability score for training data, which is the case under a stronger norm-based regularization, the generalization of the attack scheme has improved.   

In the transferability evaluation of the generated adversarial examples, we  considered only the samples for which their clean data had been labeled correctly by the target network, because we expect the clean version of an adversarial example to be labeled correctly by the target network. %Therefore, we note that the better transferability scores reported for the regularized DNNs is not a mere consequence of their better performance on clean test samples. 
Also, we used different training sets for the substitute and target classifiers to separate the generalization effects of the substitute and target DNNs. To do this, we split the training set in half and used each half for training one of the classifiers. Finally, consistent to our theoretical analysis, we used PGD adversarial training for training the substitute DNN and applied standard ERM training for training the target DNNs. %\vspace{-5mm}


\iffalse
For model $\mathcal{F}$, define the expected transferability of all perturbations generated from $\mathcal{F}$ as $\Omega(\mathcal{F})$, and less $\Omega(\mathcal{F})$ represents better transferability for perturbations generated from $\mathcal{F}$. $\Omega(\mathcal{F})$ can be formulated as:
 \begin{equation}
     \Omega(\mathcal{F})=\mathop{E}_{x\in \mathcal{X}}[P(\mathcal{F}'(x+p)=y_{gt})]
 \end{equation}
 
 It's worth noting that when evaluating $\Omega(\mathcal{F})$, we only generate adversarial perturbations from clean samples when these clean samples can be labeled by $\mathcal{F}$ correctly. This is to make sure that the generated perturbations are representative to model $\mathcal{F}$, and only in this case the transferability of perturbations makes sense.
 \fi

\begin{figure*}[t]
    \centering
    \includegraphics[width=0.95\linewidth]{ Figures/visualization_only_perturbations.png}
    \caption{Visualization of adversarial perturbations. Each set of three pictures shows the original sample, the untransferable perturbation from the unregularized DNN, and the transferable perturbation generated by the regularized model (left to right). The perturbation is re-scaled to 0-255 for visualization. $A\rightarrow B$ indicates the groundtruth label $A$ and the transferable example's predicted label $B$. %\vspace{-2mm}%. When the intransferable perturbation can not fool the target model, the transferable perturbation can fool the target model to predict it as $B$.
    }
    \label{fig:visualization}
\end{figure*}

 

%\vspace{-2mm}
\subsection{Transferability under Spectral Regularization}
We evaluated the generalization and transferability performance of the discussed black-box attack schemes for Lipschitz-regularized neural nets. To apply spectral regularization, we used the spectral normalization method \citep{miyato2018spectral,farnia2018generalizable} constraining the $L_2$-operator norm of the substitute DNN's weight matrices. %As a result, the spectrally-regularized DNN is guaranteed to have a bounded Lipschitz constant scaling with the product of the operator norms. 
We define hyper-parameter $\beta$ as the maximum allowed $L_2$-operator norm. Then, the standard spectral normalization method modifies each weight matrix $W_i$ in \eqref{neural_network_functions} to $\widetilde{W}_i$:%\vspace{-1mm}
\begin{equation*}
    \widetilde{W}_i \, := \, \frac{W_i}{\max\{1,\frac{\Vert W_i\Vert_2}{\beta}\}} \, =\, \begin{cases}
    W_i \quad &\text{\rm if}\; \Vert W_i\Vert_2\le \beta, \\
    \frac{\beta}{\Vert W_i\Vert_2}W_i \quad &\text{\rm otherwise.}
    \end{cases}
\end{equation*}
The above operation will regularize the matrix's operator norm to be upper-bounded by $\beta$. %Assuming the spectral norm of all activation functions are upper-bounded by $1$, e.g. the ReLU and ELU activation functions, then the Lipschitz coefficient of the regularized $L$-layer DNN will be bounded by $\beta^L$.



Figure~\ref{fig:tran_L2_FGM} shows the generalization error of the model and attack transferability rates of the generated perturbations using the substitute classifier AlexNet and Inception-Net under different spectral-norm hyperparameter $\beta$'s.  The numerical results show that in all cases through applying the stronger regularization coefficients $\beta=1.0,1.3$, the AlexNet and Inception classifiers achieve the highest generalization performance and attack transferability rates to the target ResNet18 and VGG16. Therefore, spectral regularization not only helped the DNN classifier gain a better generalization score, which is an expected outcome, but further improved the transferability of the perturbations to unseen DNNs with different architectures. These numerical results suggest the impact of the substitute DNN's generalization on the transferability of the adversarial examples.  


%\textcolor{red}{Surprisingly, there shows a strong correlation between generalization and migration, which proves that better generalization will lead to better transferability for generated perturbations. We will report more results in supplementary material.(or show another table for more results.)}


% \begin{table}[htbp]
% \centering
% \caption{Generalization performance (Gen. Err.) of different DNN classifiers on three standard image recognition datasets, with and without Lipschitz regularization, and the generated perturbations' transferability. $\beta=\infty$ implies no spectral regularization. %Train($\mathcal{F}$) and Test($\mathcal{F}$) represents the perturbations' transferability, which are generated from training and test samples, respectively. 
% %VGG16 and RN18 represent setting target model as VGG16 and ResNet18.
% }

% \begin{tabular}{ccccccccc}
% \hline
% Dataset & Model & Method & $\beta$ & Gen. Err. & \begin{tabular}[c]{@{}c@{}}Train\\ (V16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Test\\ (V16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Train\\ (R18)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Test\\ (R18)\end{tabular} \\ \hline
% \multicolumn{1}{c|}{\multirow{14}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{8}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD}} & \multicolumn{1}{c|}{$\infty$} & 0.508 & 0.821 & 0.896 & 0.855 & 0.916 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.258} & \textbf{0.768} & \textbf{0.850} & \textbf{0.781} & \textbf{0.866} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM}} & \multicolumn{1}{c|}{$\infty$} & 0.466 & 0.871 & 0.908 & 0.889 & 0.922 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.320} & \textbf{0.773} & \textbf{0.864} & \textbf{0.806} & \textbf{0.887} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD($L_\infty$)}} & \multicolumn{1}{c|}{$\infty$} & 0.406 & 0.769 & 0.853 & 0.795 & 0.876 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.239} & \textbf{0.714} & \textbf{0.809} & \textbf{0.753} & \textbf{0.843} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{$\infty$} & 0.504 & 0.809 & 0.876 & 0.829 & 0.899 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.243} & \textbf{0.715} & \textbf{0.809} & \textbf{0.750} & \textbf{0.847} \\ \cline{2-9} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{6}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD}} & \multicolumn{1}{c|}{$\infty$} & 0.545 & 0.838 & 0.895 & 0.858 & 0.913 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.342} & \textbf{0.740} & \textbf{0.838} & \textbf{0.759} & \textbf{0.861} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM}} & \multicolumn{1}{c|}{$\infty$} & 0.505 & 0.874 & 0.911 & 0.892 & 0.930 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.451} & \textbf{0.742} & \textbf{0.853} & \textbf{0.772} & \textbf{0.878} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{$\infty$} & 0.596 & 0.833 & 0.898 & 0.849 & 0.911 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.487} & \textbf{0.678} & \textbf{0.801} & \textbf{0.703} & \textbf{0.831} \\ \hline
% \multicolumn{1}{c|}{\multirow{6}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD}} & \multicolumn{1}{c|}{$\infty$} & 0.602 & 0.500 & 0.697 & 0.541 & 0.730 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & \textbf{0.494} & \textbf{0.486} & \textbf{0.670} & \textbf{0.511} & \textbf{0.699} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM}} & \multicolumn{1}{c|}{$\infty$} & 0.717 & 0.526 & 0.732 & 0.554 & 0.764 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & \textbf{0.558} & \textbf{0.488} & \textbf{0.687} & \textbf{0.511} & \textbf{0.725} \\ \cline{2-9} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM}} & \multicolumn{1}{c|}{$\infty$} & 0.758 & 0.521 & 0.742 & 0.546 & 0.768 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.611} & \textbf{0.450} & \textbf{0.658} & \textbf{0.470} & \textbf{0.690} \\ \hline
% \multicolumn{1}{c|}{\multirow{6}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{4}{*}{AlexNet}} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD}} & \multicolumn{1}{c|}{$\infty$} & 0.298 & 0.765 & 0.789 & 0.764 & 0.775 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.199} & \textbf{0.701} & \textbf{0.724} & \textbf{0.691} & \textbf{0.708} \\ \cline{3-4}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{FGM}} & \multicolumn{1}{c|}{$\infty$} & 0.374 & 0.841 & 0.852 & 0.839 & 0.844 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.199} & \textbf{0.718} & \textbf{0.743} & \textbf{0.710} & \textbf{0.727} \\ \cline{2-9} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{2}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{2}{*}{PGD($L_\infty$)}} & \multicolumn{1}{c|}{$\infty$} & 0.184 & 0.636 & 0.607 & 0.630 & 0.582 \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & \textbf{0.082} & \textbf{0.558} & \textbf{0.526} & \textbf{0.551} & \textbf{0.503} \\ \hline
% \end{tabular}
% \label{tab:generalization_transferability_SN}
% \end{table}


% \begin{table}[]










Table~\ref{tab:generalization_transferability_SN} shows our numerical results validating the connection between the substitute DNN's generalization and $L_2$-norm-based designed adversarial examples' transferability. In this table, we report the performance of spectral regularization under the best $\beta$ hyperparameter for validation samples. As can be seen in this table, spectral regularization manages to consistently improve the transferability rates of the adversarial examples, which confirms our hypothesis that better generalization will lead to more transferable adversarial examples. The numerical results for $L_\infty$-norm-based adversarial examples can be found in the Appendix.%\vspace{-3mm}

%Surprisingly, in all these experiments, \textcolor{red}{There is a strict positive correlation between generalization and migration}.







\subsection{Transferability via Early Stopping}
%\vspace{-3mm}
Next, we used the implicit regularization mechanism of early stopping \citep{yao2007early} to validate that better generalization achieved under early stopping can help to generate more transferable adversarial examples. To perform early stopping, we used 30\% of the original test set as the validation set, and used the remaining 70\% to measure the test accuracy. We stopped the DNN training when the trained model achieved its best performance on the validation samples. 

We present the CIFAR-10 and SVHN numerical results in Table~\ref{tab:early_stopping}, and the complete set of obtained numerical results is in the Appendix. Our numerical results suggest that both the generalization and transferability scores considerably improve under early stopping regularization. The observation is consistent with the results reported in the literature \citep{benz2021batch} and our hypothesis on the impact of the generalization of the substitute network on the transferability of adversarial examples.




% \begin{table}[htbp]
% \centering
% \caption{Generalization and transferability for multiple architectures and datasets. $^*$ represents applying early stopping during training as regularization.}
% \begin{tabular}{|cccccccc|}
% \hline
% Dataset & Model & Method & Gen. Err. & \begin{tabular}[c]{@{}c@{}}Train\\ (VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Test\\ (VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Train\\ (ResNet18)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Test\\ (ResNet18)\end{tabular} \\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.517 & 0.837 & 0.874 & 0.859 & 0.896 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD$^*$} & \cellcolor[HTML]{EFEFEF}0.073 & \cellcolor[HTML]{EFEFEF}0.778 & \cellcolor[HTML]{EFEFEF}0.802 & \cellcolor[HTML]{EFEFEF}0.813 & \cellcolor[HTML]{EFEFEF}0.828 \\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.467 & 0.868 & 0.900 & 0.887 & 0.911 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM$^*$} & \cellcolor[HTML]{EFEFEF}0.126 & \cellcolor[HTML]{EFEFEF}0.784 & \cellcolor[HTML]{EFEFEF}0.830 & \cellcolor[HTML]{EFEFEF}0.816 & \cellcolor[HTML]{EFEFEF}0.853 \\ \cline{2-8} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.579 & 0.868 & 0.903 & 0.888 & 0.923 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD$^*$} & \cellcolor[HTML]{EFEFEF}0.061 & \cellcolor[HTML]{EFEFEF}0.832 & \cellcolor[HTML]{EFEFEF}0.846 & \cellcolor[HTML]{EFEFEF}0.854 & \cellcolor[HTML]{EFEFEF}0.864 \\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.520 & 0.873 & 0.900 & 0.893 & 0.913 \\
% \multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM$^*$} & \cellcolor[HTML]{EFEFEF}0.092 & \cellcolor[HTML]{EFEFEF}0.827 & \cellcolor[HTML]{EFEFEF}0.848 & \cellcolor[HTML]{EFEFEF}0.849 & \cellcolor[HTML]{EFEFEF}0.873 \\ \hline
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.341 & 0.790 & 0.793 & 0.789 & 0.780 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD$^*$} & \cellcolor[HTML]{EFEFEF}0.057 & \cellcolor[HTML]{EFEFEF}0.699 & \cellcolor[HTML]{EFEFEF}0.702 & \cellcolor[HTML]{EFEFEF}0.692 & \cellcolor[HTML]{EFEFEF}0.678 \\ \cline{2-8} 
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGD} & 0.307 & 0.765 & 0.789 & 0.762 & 0.772 \\
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGD$^*$} & \cellcolor[HTML]{EFEFEF}0.030 & \cellcolor[HTML]{EFEFEF}0.735 & \cellcolor[HTML]{EFEFEF}0.744 & \cellcolor[HTML]{EFEFEF}0.727 & \cellcolor[HTML]{EFEFEF}0.723 \\ \cline{3-3}
% \multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.373 & 0.827 & 0.843 & 0.823 & 0.830 \\
% \multicolumn{1}{|c|}{\multirow{-6}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{AlexNet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM$^*$} & \cellcolor[HTML]{EFEFEF}0.064 & \cellcolor[HTML]{EFEFEF}0.758 & \cellcolor[HTML]{EFEFEF}0.759 & \cellcolor[HTML]{EFEFEF}0.754 & \cellcolor[HTML]{EFEFEF}0.740 \\ \hline
% \end{tabular}
% \label{tab:early_stopping}
% \end{table}

% \subsection{Validation of transferable adversarial samples' gradient}
%\subsection{Visualization of transferable perturbations}



Finally, Figure~\ref{fig:visualization} illustrates 12 uniformly-sampled transferable adversarial examples under spectral regularization and early stopping. We note that the adversarial examples designed by the unregularized DNN for these test samples failed to transfer to the target DNNs. We also observed that the transferable perturbations generated from a regularized DNN had sharper edges and less noise power in the background, and concentrated the power on the central part. 


\section{Conclusion}
In this paper, we provided theoretical and numerical evidence on how the generalization properties of a substitute neural network can influence the transferability of the generated adversarial examples to other classifiers. While the transferability of black-box adversarial attacks and generalization power of the substitute classifier may seem two orthogonal factors, our results indicate existing interconnections between the two aspects. However, our bounds were based on uniform convergence analysis which cannot directly capture the interconnections between the generalization and optimization properties. An interesting future direction is to extend the generalization analysis to overparameterized function spaces in order to understand the role of benign overfitting in the transferability of adversarial examples. Also, our experimental results motivate further studies of how other popular regularization methods in deep learning, such as batch normalization and dropout, can affect the transferability of adversarial perturbations. 


\subsection*{Acknowledgments}
This work was partially
supported by a grant from the Research Grants Council of the Hong
Kong Special Administrative Region, China, Project 14209920, and was partially supported by a CUHK Direct Research Grant. Also, the authors would like to thank the anonymous reviewers for their constructive feedback and suggestions.

{\small{
\bibliography{wang_695}
}}

\end{document}
