\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
% nice language for creating drawings and diagrams



% If your paper is accepted, change the options for the package
% aistats2023 as follows:
%
%\usepackage[accepted]{aistats2023}
%
% This option will print headings for the title of your paper and
% headings for the authors names, plus a copyright note at the end of
% the first column of the first page.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}

% If you set papersize explicitly, activate the following three lines:
%\special{papersize = 8.5in, 11in}
%\setlength{\pdfpageheight}{11in}
%\setlength{\pdfpagewidth}{8.5in}

% If you use natbib package, activate the following three lines:
%\renewcommand{\bibname}{References}
%\renewcommand{\bibsection}{\subsubsection*{\bibname}}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
\bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{enumitem}

\usepackage[title]{appendix}
\usepackage{amsthm}
\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
%\usepackage{hyperref}       % hyperlinks

%\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography

\usepackage{algorithm}
%\usepackage{algorithmic}  
\usepackage{longtable}
\newtheorem{ex}{Example}
\newtheorem{mydef}{Definition}
%\newtheorem{thm}{Theorem}
\newtheorem{theorem}{Theorem}
\newtheorem{remark}{Remark}
\newtheorem{lemma}{Lemma}
\newtheorem{cor}{Corollary}
\newtheorem{obs}{Observation}
\newtheorem{prop}{Proposition}
\newtheorem{assumption}{Assumption}
\usepackage{amsmath}
\allowdisplaybreaks
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (begin) %%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\gr}{\nabla}
\newcommand{\grw}{\nabla_{\w}} 
\newcommand{\grd}{\nabla_{\delta}}
\newcommand{\grL}{\nabla_{\Lambda}}
\newcommand{\grpsi}{\nabla_{\bpsi}}
\newcommand{\grPsi}{\nabla_{\Psi}}
\newcommand{\sgr}{\tilde{\nabla}}
\newcommand{\sgrw}{\sgr_{\w}} 
\newcommand{\sgrd}{\sgr_{\delta}}
\newcommand{\sgrL}{\sgr_{\Lambda}}
\newcommand{\sgrpsi}{\sgr_{\psi}}
\newcommand{\sgrbpsi}{\sgr_{\bpsi}}
\newcommand{\sgrPsi}{\sgr_{\Psi}}
\newcommand{\w}{\bm{w}}
\newcommand{\bpsi}{\bm{\psi}}
\newcommand{\bPsi}{\bm{\Psi}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\wbar}{\overline{\w}}
\newcommand{\sigmaw}{\sigma^2_{\w}}
\newcommand{\sigmapsi}{\sigma^2_{\psi}}

%\interdisplaylinepenalty=2500


%\usepackage[bookmarks=false]{hyperref}
\usepackage[T1]{fontenc}
%\usepackage[cmex10]{amsmath}
%\usepackage{cite}
\usepackage[utf8]{inputenc}
\usepackage{pgfplots}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{pstricks}
%\usepackage{appendix}
\usepackage{color}
\usepackage{amsmath}
\usepackage{bbm}
\usepackage{tcolorbox}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{wrapfig}
\usepackage{lipsum}  
\usetikzlibrary{positioning}
\usepackage{tcolorbox}
\usepackage{amsfonts,amssymb}
%\usepackage{amsfonts,amssymb,amsthm}
%\usepackage{natbib}
\usepackage{mathtools}
\usepackage{commath}
\usepackage{relsize}
\usepackage{bbm}
\usepackage{bm}
\usepackage[font={small}]{caption}
\usepackage{comment,color,soul}
% \usepackage{enumerate} 
\usepackage{amsfonts}
\usepackage{url}
\usepackage{lipsum}
\usepackage[thinlines]{easytable}
\usepackage{nicefrac}
%\usepackage{authblk}
%\usepackage{titling}
\usepackage{algorithm}
\usepackage{algpseudocode}

\usepackage{dblfloatfix}
%\usepackage{mysymbol}
\usepackage{float}
\usepackage{multirow}
% \usepackage{color}
\usepackage{colortbl}
% \usepackage{enumitem}


 



%\def\changenumberingoff{\def\Let@{\def\\{\notag\math@cr}}

\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}




    

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (end) %%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}

\usepackage{longtable}

\usepackage{amsmath}
\allowdisplaybreaks
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Amir added (begin) %%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%\interdisplaylinepenalty=2500


%\usepackage[bookmarks=false]{hyperref}
\usepackage[T1]{fontenc}
%\usepackage[cmex10]{amsmath}
%\usepackage{cite}
\usepackage[utf8]{inputenc}
\usepackage{pgfplots}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{pstricks}
%\usepackage{appendix}
\usepackage{color}
\usepackage{amsmath}
\usepackage{tcolorbox}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{wrapfig}
\usepackage{lipsum}  
\usetikzlibrary{positioning}
\usepackage{tcolorbox}
\usepackage{amsfonts,amssymb}
%\usepackage{amsfonts,amssymb,amsthm}
%\usepackage{natbib}
\usepackage{mathtools}
\usepackage{commath}
\usepackage{relsize}
\usepackage[font={small}]{caption}
\usepackage{comment}
% \usepackage{enumerate} 
\usetikzlibrary{arrows,automata}
\usepackage{amsfonts}
\usepackage{url}
\usepackage{lipsum}
\usepackage[thinlines]{easytable}
\usepackage{nicefrac}
%\usepackage{authblk}
%\usepackage{titling}
\usepackage{algorithm}
\usepackage{algpseudocode}
% \usepackage{mysymbol}
\usepackage{float}
\usepackage{multirow}
% \usepackage{color}
\usepackage{colortbl}

    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz}

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\externaldocument{uai2023-template}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{On the Role of Generalization in Transferability of Adversarial Examples\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<wangyilin210210@link.cuhk.edu.hk>?Subject=Your UAI 2023 paper}{Yilin Wang}}
\author[1]{\href{mailto:<farnia@cse.cuhk.edu.hk>?Subject=Your UAI 2023 paper}{Farzan Farnia}}
% Add affiliations after the authors
\affil[1]{
    Department of Computer Science and Engineering, \linebreak
    The Chinese University of Hong Kong, \linebreak
    Hong Kong SAR
}

  %% Choose your variant of English; be consistent
% \usepackage[british]{babel}

%% Some suggested packages, as needed:

  
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle


\appendix
\section{Proofs}
\subsection{Proof of Proposition 1}
To prove the proposition, note that the optimization problem for $\lambda$-optimal adversarial attack scheme can be written as
\begin{equation*}
    \max_{\delta:\mathcal{X}\times \mathcal{Y}\rightarrow \mathbb{R}^d}\; \mathbb{E}\biggl[\ell\bigl(h_\mathbf{w}(\mathbf{X}+\delta(\mathbf{X},Y)),Y\bigr) - \frac{\lambda}{2}\Vert  \delta(\mathbf{X},Y) \Vert^2\biggr].
\end{equation*}
We observe that the above optimization problem decouples into separate problems for every $(\mathbf{x},y)$, and hence $\delta^*(\mathbf{x},y)$ is the optimal solution to the following optimization problem
\begin{equation*}
    \max_{\boldsymbol{\delta}\in\mathbb{R}^d}\; \ell\bigl(h_\mathbf{w}(\mathbf{x}+\boldsymbol{\delta}),y\bigr) - \frac{\lambda}{2}\Vert  \boldsymbol{\delta} \Vert^2.
\end{equation*}
Since $\ell \circ h_{\mathbf{w}}$ is assumed to be $\lambda$-smooth in $\mathbf{x}$, the objective function in the above optimization problem is a concave function of $\boldsymbol{\delta}$. This is because the Hessian of the above objective function will be negative semi-definite as
\begin{equation}
     \nabla_{\boldsymbol{\delta}}^2 \biggl[\ell\bigl(h_\mathbf{w}(\mathbf{x}+\boldsymbol{\delta}),y\bigr) - \frac{\lambda}{2}\Vert  \boldsymbol{\delta} \Vert^2 \biggr] =  \nabla_{\boldsymbol{\delta}}^2 \ell\bigl(h_\mathbf{w}(\mathbf{x}+\boldsymbol{\delta}),y\bigr) -\lambda I_{d\times d} \preceq \mathbf{0}.
\end{equation}
Therefore, applying the first-order necessary condition implies that a globally optimal solution $\delta^*(\mathbf{x},y)$ to the above concave objective function will be the solution to
\begin{equation*}
    \nabla_{\mathbf{x}}\ell\bigl(h_\mathbf{w}(\mathbf{x}+\delta^*(\mathbf{x},y)),y\bigr) - \lambda \delta^*(\mathbf{x},y) = \mathbf{0}. 
\end{equation*}
The above necessary and sufficient condition for $\delta^*(\mathbf{x},y)$ can be rewritten as:
\begin{equation*}
    \bigl( \mathbf{x} + \delta^*(\mathbf{x},y)\bigr) -\frac{1}{\lambda}\nabla_{\mathbf{x}}\ell\bigl(h_\mathbf{w}(\mathbf{x}+\delta^*(\mathbf{x},y)),y\bigr) = \mathbf{x}. 
\end{equation*}
Note that this condition is equivalent to the following equation which completes the proof:
\begin{equation*}
    \delta^*(\mathbf{x},y) \, =\,   \left(\text{\rm Id}_{\mathbf{x}} - \frac{1}{\lambda}\nabla_{\mathbf{x}}\ell\circ h_\mathbf{w}\right)^{-1}(\mathbf{x}) \, - \, \mathbf{x}.
\end{equation*}


\subsection{Proof of Theorem 1}
\begin{assumption}\label{Assumption: loss}
Loss function $\ell(y,y')$ is a $c$-bounded, $1$-Lipschitz, and $1$-smooth function of the input $y$, i.e. for every $y_1,y_2,y'\in\mathcal{Y}$ we have $\vert \ell(y_1,y')\vert\le c$, $|\ell(y_1,y')-\ell(y_2,y')| \le \Vert y_1-y_2 \Vert_2 $, and $\Vert\nabla_y\ell(y_1,y')-\nabla_y\ell(y_2,y')\Vert_2 \le \Vert y_1-y_2 \Vert_2 $.
\end{assumption}
\begin{assumption}\label{Assumption: substitute neural network}
The set of substitute DNNs in the black-box attack scheme $\mathcal{H_W}=\{h_\mathbf{w}:\, \mathbf{w}\in\mathcal{W} \}$ contains $L$-layer neural networks $h_\mathbf{w}(\mathbf{x})=W_L \phi_{L}\bigl(W_{L-1}\phi_{L-1}(\cdots W_1\phi_{1}(W_{0}\mathbf{x}) \cdot\bigr)$. We suppose that the dimensions of matrices $W_0,\ldots,W_k$ is bounded by $D$, and assume every activation $\phi_i$ satisfies $\phi_i(0)=0$ and is $\gamma_i$-Lipschitz and $\gamma_i$-smooth, i.e. $\max\{|\phi_i'(z)|,|\phi_i''(z)|\}\le \gamma_i$ holds for every $z\in\mathbb{R}$.
\end{assumption}
\begin{assumption}\label{Assumption: target neural network}
The class of target classifiers $\mathcal{F_V}=\{f_\mathbf{v}:\, \mathbf{v}\in\mathcal{V} \}$ consists of $K$-layer neural network functions $f_\mathbf{v}(\mathbf{x})=V_K \psi_{L}\bigl(V_{L-1}\psi_{L-1}(\cdots V_1\psi_{1}(V_{0}\mathbf{x}) \cdot\bigr)$ with activation function $\psi_i$'s. We suppose that the dimensions of matrices $V_0,\ldots,V_k$ is bounded by $D$. Also, we assume every $\psi_i$ satisfies $\psi_i(0)=0$ and is $\xi_i$-Lipschitz, i.e. $\max_z\: |\psi_i'(z)|\le \xi_i$. Also, we define the capacity $R_\mathcal{V}$ as
\begin{equation*}
    R_{\mathcal{V}} := \sup_{\mathbf{v}\in\mathcal{V}}\,\left\{  \bigl(\prod_{i=0}^K \xi_i \Vert V_i \Vert_2 \bigr)\biggl(\sum_{i=0}^K \frac{\Vert V^\top_i \Vert^{2/3}_{2,1}}{\Vert V_i \Vert^{2/3}_{2}} \biggr)^{3/2}\right\}.
\end{equation*}
\end{assumption}

To show Theorem 1, we first present the following lemmas.
\begin{lemma}[\citep{farnia2018generalizable}, Lemma 7]\label{Lemma: smooth DNN}
Under Assumptions \ref{Assumption: loss}, \ref{Assumption: substitute neural network}, the substitute neural network's loss function $\ell(h_{\mathbf{w}}(\mathbf{x}),y)$ is $\kappa$-smooth in input vector $\mathbf{x}$, i.e its gradient with respect to $\mathbf{x}$ is $\kappa$-Lipschitz and satisfies
\begin{equation*}
    \forall \mathbf{x},\mathbf{x}'\in\mathcal{X},\, y\in\mathcal{Y}:\quad \big\Vert \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}),y\bigr) - \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}'),y\bigr)\big\Vert \le \kappa \Vert \mathbf{x} - \mathbf{x}'\Vert,
\end{equation*}
where $\kappa = \left(\sum_{i=0}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert\right)\left(\prod_{i=0}^L \gamma_i\Vert W_i\Vert\right)$.
\end{lemma}
\iffalse
\begin{proof}

We show this lemma through induction on the number of layers $L$. Note that based on Assumption \ref{Assumption: loss}, we can view the loss function as a $1$-Lipschitz and $1$-smooth non-linear function added to the neural network. For the base case of a $0$-layer neural network $h_{\mathbf{w}}(\mathbf{x})=W_0\mathbf{x}$, the Lipschitz constant of $\nabla_{\mathbf{x}}\ell(h_{\mathbf{w}}(\mathbf{x}),y)=W_0^\top  (\nabla\ell)(W_0\mathbf{x},y)$ is bounded by $\Vert W_0\Vert_2^2$. 

Therefore, we suppose that the bound holds for a $L$-layer neural network $h_{\mathbf{w_L}}$, and prove the bound will further apply to the $L+1$-layer neural net $h_{\mathbf{w_{L+1}}}$. Note that according to the chain rule
\begin{align*}
    J_{\mathbf{x}}h_{\mathbf{w_{0:L+1}}}(\mathbf{x}) = J_{\mathbf{x}}(h_{\mathbf{w_{0:L}}}(\mathbf{x}))J_{\mathbf{x}'_L}h_{\mathbf{w_{L+1}}}(\mathbf{x}).
\end{align*}
In the above, $J_{\mathbf{x}}h_{\mathbf{w_{0:L}}}(\mathbf{x})$ denotes the Jacobian matrix of the neural net's layer-$L$'s output with respect to input $\mathbf{x}$, with $\mathbf{x}'_L$ denoting the variable representing the neural net's zeroth-layer output. Therefore, the Lipschitz constant of $J_{\mathbf{x}}h_{\mathbf{w_{0:L+1}}}(\mathbf{x})$ will be bounded by 
\begin{align*}
    &\gamma_{L}\Vert W_L \Vert_2^2\prod_{i=0}^L\bigl(\gamma_i\Vert W_{i+1} \Vert_2\bigr) + \gamma_L\Vert W_L \Vert_2\left(\sum_{i=0}^{L+1} \prod_{j=0}^i \gamma_j\Vert W_j\Vert\right)\left(\prod_{i=0}^{L+1} \gamma_i\Vert W_i\Vert\right) \\ 
    \le \, & 2\bigl(\prod_{i=0}^L\gamma_i\Vert W_i \Vert_2\bigr)\left(\gamma_0\Vert W_0 \Vert_2 + \sum_{j=0}^L\prod_{i=0}^j \right) 
\end{align*}
\end{proof}
\fi

\begin{lemma}
Under Theorem 1's assumptions, the $\lambda$-optimal attack scheme $\delta^*_\mathbf{w}:\mathcal{X}\times\mathcal{Y}\rightarrow \mathbb{R}^d$ satisfies the following output norm constraint for every $(\mathbf{x},y)\in \mathcal{X}\times\mathcal{Y}$:
\begin{equation*}
    \big\Vert \delta^*_\mathbf{w}(\mathbf{x},y)\big\Vert \le \frac{\prod_{i=0}^L \gamma_i\Vert W_i\Vert_2}{\lambda} = \frac{L_{\mathbf{w}}}{\lambda}.
\end{equation*}
\end{lemma}
\begin{proof}
Note that since $\lambda > \left(\sum_{i=0}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert_2\right)\prod_{i=0}^L \gamma_i\Vert W_i\Vert_2$ holds according to Theorem 1's assumptions, the smoothness condition of Proposition \ref{Prop: correspondence} will hold according to Lemma \ref{Lemma: smooth DNN}. As a result, we have
\begin{equation*}
    {\delta}^*_{\mathbf{w}}(\mathbf{x},y) = \frac{1}{\lambda}\nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\mathbf{w}}(\mathbf{x},y)),y\bigr).
\end{equation*}
Therefore,
\begin{equation*}
    \big\Vert{\delta}^*_{\mathbf{w}}(\mathbf{x},y)\big\Vert \le \frac{\Vert \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\mathbf{w}}(\mathbf{x},y)),y\bigr)\Vert}{\lambda} \le \frac{\prod_{i=0}^L \gamma_i\Vert W_i\Vert_2}{\lambda}.
\end{equation*}
The final inequality follows from the Lipschitz coefficient of DNN function $h_{\mathbf{w}}$ which is the composition of linear transformation $W_i$'s (with Lipschitz constant $\Vert W_i\Vert_2$) and activation non-linearity $\phi_i$'s  (with Lipschitz constant $\gamma_i$). Therefore, the proof is complete.
\end{proof}

\begin{lemma}\label{Lemma: Perturbation FGM}
Under Assumption \ref{Assumption: substitute neural network}, the substitute neural network $h_{\mathbf{w}}$'s gradient satisfies the following error bound under a perturbation  matrix $\Delta_k$ with $L_2$-operator norm $\Vert\Delta_k\Vert_2\le t$ to wight matrix $W_k$, where we define $\widetilde{\mathbf{w}}=\operatorname{vec}(W_0,\ldots,W_{k-1},W_k+\Delta_k,W_{k+1},\ldots,W_L)$:
\begin{equation*}
    \big\Vert \nabla_{\mathbf{x}}h_{\mathbf{w}}(\mathbf{x}) - \nabla_{\mathbf{x}}h_{\widetilde{\mathbf{w}}}(\mathbf{x})\big\Vert \le \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\Vert W_k\Vert_2}{\Vert\Delta_k\Vert_2}
\end{equation*}
\end{lemma}
\begin{proof}
Note that the neural network's Jacobian with respect to the input follows from:
\begin{align*}
    \mathrm{J}_{h_{\mathbf{w}}}(\mathbf{x}) = \prod_{i=0}^L W_i^\top \operatorname{diag}\bigl( \phi'_i(h_{\mathbf{w_{0:i}}}(\mathbf{x})) \bigr).
\end{align*}
In the above, $h_{\mathbf{w_{0:i}}}(\mathbf{x})$ denotes the neural net's output at layer $i$. Therefore, for $\widetilde{\mathbf{w}}$ which is different from $\mathbf{w}$ only at layer $k$ we will have:
\begin{align*}
    \big\Vert \mathrm{J}_{h_{\mathbf{w}}}(\mathbf{x}) - \mathrm{J}_{h_{\widetilde{\mathbf{w}}}}(\mathbf{x}) \big\Vert_2 \, &\le \, \sum_{i=k}^L\biggl[ \bigl(\prod_{j=0}^L \gamma_j\Vert W_j \Vert_2\bigr) \bigl(\prod_{j=0}^i \gamma_j\Vert W_j \Vert_2\bigr)\biggr] \frac{\Vert\Delta_k\Vert_2}{\Vert W_k\Vert_2} \\
    &= \, \biggl(\prod_{j=0}^L \gamma_j\Vert W_j \Vert_2\biggr)\sum_{i=k}^L\biggl[  \prod_{j=0}^i \gamma_j\Vert W_j \Vert_2\biggr] \frac{\Vert\Delta_k\Vert_2}{\Vert W_k\Vert_2} \\
    &= \, \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\Vert W_k\Vert_2}{\Vert\Delta_k\Vert_2}.
\end{align*}
The proof is hence finished.
\end{proof}

\begin{lemma}\label{Lemma: Perturbation WRM}
Under Theorem 1's assumptions, the $\lambda$-optimal attack scheme $\delta^*_\mathbf{w}:\mathcal{X}\times\mathcal{Y}\rightarrow \mathbb{R}^d$ satisfies the following error bound under a norm-bounded perturbation $\Delta_k:\: \Vert\Delta_k\Vert_2\le t$: to wight matrix $W_k$ where we define $\widetilde{\mathbf{w}}=\operatorname{vec}(W_0,\ldots,W_{k-1},W_k+\Delta_j,W_{k+1},\ldots,W_L)$:
\begin{equation*}
    \big\Vert \delta^*_{\mathbf{w}}(\mathbf{x},y) - \delta^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \le \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\tau\lambda \Vert W_k\Vert_2 }\Vert\Delta_k\Vert_2
\end{equation*}
\end{lemma}
\begin{proof}
Since $\lambda > \left(\sum_{i=0}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert_2\right)\prod_{i=0}^L \gamma_i\Vert W_i\Vert_2$ follows from Theorem 1's assumption, Proposition \ref{Prop: correspondence} will hold according to Lemma \ref{Lemma: smooth DNN} and implies that
\begin{equation*}
    {\delta}^*_{\mathbf{w}}(\mathbf{x},y) = \frac{1}{\lambda}\nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\mathbf{w}}(\mathbf{x},y)),y\bigr).
\end{equation*}
As a result,
\begin{align*}
    &\big\Vert {\delta}^*_{\mathbf{w}}(\mathbf{x},y) - {\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \\
    = \, & \frac{1}{\lambda}\biggl\Vert \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\mathbf{w}}(\mathbf{x},y)),y\bigr) - \nabla_{\mathbf{x}}\ell\bigl(h_{\widetilde{\mathbf{w}}}(\mathbf{x}+{\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)),y\bigr)\biggr\Vert \\
    {\le} \, & \frac{1}{\lambda}\biggl\Vert \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\mathbf{w}}(\mathbf{x},y)),y\bigr) - \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)),y\bigr)\biggr\Vert \, \\
    &\quad + \, \frac{1}{\lambda}\biggl\Vert \nabla_{\mathbf{x}}\ell\bigl(h_{\mathbf{w}}(\mathbf{x}+{\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)),y\bigr) - \nabla_{\mathbf{x}}\ell\bigl(h_{\widetilde{\mathbf{w}}}(\mathbf{x}+{\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)),y\bigr)\biggr\Vert \\
     \stackrel{(a)}{\le}  \, & \frac{\operatorname{Lip}(\nabla_{\mathbf{x}}\ell \circ h_{\mathbf{w}} )}{\lambda} \big\Vert {\delta}^*_{\mathbf{w}}(\mathbf{x},y) - {\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \, + \, \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\lambda\Vert W_k\Vert_2 }{\Vert\Delta_k\Vert_2} \\
     \stackrel{(b)}{\le}  \, & \frac{L_{\mathbf{w}}\sum_{i={0}}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\lambda} \big\Vert {\delta}^*_{\mathbf{w}}(\mathbf{x},y) - {\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \, + \, \frac{L_{\mathbf{w}}\sum_{i={k}}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\lambda\Vert W_k\Vert_2}{\Vert\Delta_k\Vert_2} \\
     \stackrel{(c)}{\le}  \, & (1-\tau) \big\Vert {\delta}^*_{\mathbf{w}}(\mathbf{x},y) - {\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \, + \, \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\lambda\Vert W_k\Vert_2}{\Vert\Delta_k\Vert_2}
\end{align*}
Here, $(a)$ follows from the definition of Lipschitz constant and Lemma \ref{Lemma: Perturbation FGM}'s weight perturbation bound. $(b)$ comes from a direct application of Lemma \ref{Lemma: smooth DNN}, and $(c)$ follows from Theorem 1' assumption. Therefore, the above inequalities collectively lead to the following bound, which completes the proof:
\begin{equation*}
   \tau\,\big\Vert {\delta}^*_{\mathbf{w}}(\mathbf{x},y) - {\delta}^*_{\widetilde{\mathbf{w}}}(\mathbf{x},y)\big\Vert \; \le \; \frac{L_{\mathbf{w}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{\lambda\Vert W_k\Vert_2}{\Vert\Delta_k\Vert_2}.
\end{equation*}
\end{proof}

To prove Theorem 1, we follow a covering-number-based approach similar to the generalization analysis in \citep{bartlett2017spectrally} for the standard non-adversarial deep supervised learning problem. To do this, we consider the norm constraints $\Vert W_i\Vert_2\le a'_i,\, \Vert W_i\Vert_{2,1}\le b'_i$ for every $i=0,\ldots,L$, and $\Vert V_j\Vert_2\le a_i,\, \Vert V_j\Vert_{2,1}\le b_i$ for every $j=0,\ldots,K$. Now, we define the following covering resolution parameters for the classifier and substitute DNNs' different layers:
\begin{align*}
    \epsilon'_k = \frac{{\tau\lambda a'_k }\alpha'_k \epsilon}{2(\prod_{i=0}^K \xi_i a_i)(\prod_{i=0}^L \gamma_i a'_i)\bigl(\sum_{i=k}^L \prod_{j=0}^i \gamma_j a'_j)}, \; &\text{\rm where }\; \alpha'_k =\frac{1}{A'}\frac{{b'_k}^{2/3}}{{a'_k}^{2/3}} ,\; A'=\sum_{i=0}^L \frac{{b'_i}^{2/3}}{{a'_i}^{2/3}} \\
    \epsilon_j = \frac{{a_j }\alpha_j \epsilon}{2\prod_{i=j}^K \gamma_i a_i}, \; &\text{\rm where }\; \alpha_j =\frac{1}{A}\frac{b_j^{2/3}}{a_j^{2/3}} ,\; A=\sum_{i=0}^K \frac{b_i^{2/3}}{a_i^{2/3}}.
\end{align*}
Note that Lemma \ref{Lemma: Perturbation WRM} implies that by finding an $\epsilon'_k$-covering for each $W_k$ and $\epsilon_j$-covering for each $V_j$, the covering resolution for $\mathcal{F}\circ\Delta_H|_S$ will be upper-bounded by
\begin{equation*}
    \sum_{k=0}^L\biggl[ \frac{L_{\mathbf{w}}L_{\mathbf{v}}\sum_{i=k}^L \prod_{j=0}^i \gamma_j\Vert W_j\Vert}{{\tau\lambda \Vert W_k\Vert_2 }}\epsilon'_k\biggr] + \sum_{k=0}^K\biggl[\frac{\prod_{i=k}^K \gamma_i\Vert V_i\Vert_2}{{\Vert V_k\Vert_2 }}\epsilon_k\biggr] = \epsilon.
\end{equation*}
Therefore, by applying Lemma A.7 from \citep{bartlett2017spectrally} we will have the following bound on the $\epsilon$-covering-number for the set $\mathcal{F}\circ\Delta_H|_S = \{\ell(f_{\mathbf{v}}(\mathbf{X}+\delta^*_{\mathbf{w}}({\mathbf{X}},Y) ) :\; \forall 0\le i\le K: \Vert V_i\Vert_2\le a_i,\, \Vert V_i\Vert_{2,1}\le b_i,\; \forall 0\le j\le L:\: \Vert W_j\Vert_2\le a'_j,\, \Vert W_j\Vert_{2,1}\le b'_j   \}$

\begin{align*}
    &\log \mathcal{N}\bigl(\mathcal{F}\circ\Delta_H|_S , \Vert\cdot\Vert_2,\epsilon \bigr) \, \\
    \le \, &\sum_{i=0}^L \sup_{{\mathbf{w}}_{-i}, \mathbf{v}\in\mathcal{W,V}}\biggl[ \log \mathcal{N}\bigl(\bigl\{\delta^*_{\mathbf{w}}(\mathbf{X},Y):\: \Vert \mathbf{W}_i\Vert_2\le a'_i ,\, \Vert \mathbf{W}_i\Vert_{2,1}\le b'_i \bigr\}, \Vert\cdot\Vert_2,\epsilon'_i \bigr) \biggr] \\
    &\; + \sum_{i=0}^K \sup_{{\mathbf{w}}, \mathbf{v}_{-i}\in\mathcal{W,V}}\biggl[ \log \mathcal{N}\bigl(\bigl\{ h_{\mathbf{v}_{0:i}}(\mathbf{X}),Y):\: \Vert \mathbf{V}_i\Vert_2\le a_i ,\, \Vert \mathbf{V}_i\Vert_{2,1}\le b_i \bigr\}, \Vert\cdot\Vert_2,\epsilon_i \bigr) \biggr] \\
    \le \, &\sum_{i=0}^L \sup_{{\mathbf{w}}_{-i}, \mathbf{v}\in\mathcal{W,V}}\biggl[ \log \mathcal{N}\bigl(\bigl\{\delta^*_{\mathbf{w}}(\mathbf{X},Y):\:  \Vert \mathbf{W}_i\Vert_{2,1}\le b'_i \bigr\}, \Vert\cdot\Vert_2,\epsilon'_i \bigr) \biggr] \\
    &\; + \sum_{i=0}^K \sup_{{\mathbf{w}}, \mathbf{v}_{-i}\in\mathcal{W,V}}\biggl[ \log \mathcal{N}\bigl(\bigl\{ h_{\mathbf{v}_{0:i}}(\mathbf{X}),Y):\:  \Vert \mathbf{V}_i\Vert_{2,1}\le b_i \bigr\}, \Vert\cdot\Vert_2,\epsilon_i \bigr) \biggr] \\
    \le \, &\sum_{i=0}^L \biggl[ \sup_{{\mathbf{w}}_{-i}, \mathbf{v}\in\mathcal{W,V}} \frac{{b'_i}^2\Vert \delta^*_{\mathbf{w}}(\mathbf{X},Y)\Vert_2^2}{{\epsilon'_i}^2} \log(2W^2) \biggr] \\
    &\; + \sum_{i=0}^L \biggl[ \sup_{{\mathbf{w}}, \mathbf{v}_{-i}\in\mathcal{W,V}} \frac{b_i^2\Vert h_{\mathbf{v}_{0:i}}(\mathbf{X})\Vert_2^2}{\epsilon_i^2} \log(2W^2) \biggr] \\
    \le \, &\sum_{i=0}^K \bigl[ \sup_{{\mathbf{w}}_{-i}, \mathbf{v}\in\mathcal{W,V}} \frac{L_{\mathbf{w}}^2\log(2W^2)}{\lambda^2\epsilon^2} \frac{{b'_i}^2}{{\epsilon'_i}^2}  \bigr] \\
    &\; +   \sum_{i=0}^L \biggl[ \frac{4b_i^2(B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda})^2\prod_{i=0}^K \xi_i^2 a_i^2}{\epsilon^2}\sum_{i=0}^K\frac{b_i^2}{\alpha_i^2 a_i^2} \biggr] \\
    \le \, &\frac{\log(2W^2)\prod_{i=0}^L \gamma^2_i {a'_i}^2}{\lambda^2\epsilon^2} \sum_{k=0}^K \bigl[   \frac{{b'_i}^2(\sum_{i=k}^L\prod_{j=0}^i \gamma_i a_i)^2}{{\alpha'_i}^2{a'_i}^2}  \bigr] \\
    &\; +   \sum_{i=0}^L \biggl[ \frac{4b_i^2(B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda})^2\prod_{i=0}^K \xi_i^2 a_i^2}{\epsilon^2}\sum_{i=0}^K\frac{b_i^2}{\alpha_i^2 a_i^2} \biggr] \\
    \le \, &\frac{\log(2W^2)\prod_{i=0}^L \gamma^2_i {a'_i}^2 (\sum_{i=0}^L\prod_{j=0}^i \gamma_i a_i)^2}{\lambda^2\epsilon^2} \sum_{k=0}^K \bigl[   \frac{{b'_k}^2}{{\alpha'_k}^2 {a'_k}^2}  \bigr] \\
    &\; +   \sum_{i=0}^L \biggl[ \frac{4 b_i^2 (B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda})^2\prod_{j=0}^K \xi_j^2 a_j^2}{\epsilon^2}\sum_{k=0}^K\frac{b_k^2}{\alpha_k^2 a_k^2} \biggr] 
    \\
    \le \, &\frac{4\log(2W^2)\prod_{i=0}^L {\gamma}^2_i {a'_i}^2 \bigl(\sum_{i=0}^L\prod_{j=0}^i \gamma_j a_j\bigr)^2}{\lambda^2 \epsilon^2} \sum_{i=0}^L \bigl[   \frac{{b'}_i^2}{{{\alpha}'_i}^2{a'_i}^2}  \bigr] \\
    &\; +  \frac{4\log(2W^2)(B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda})^2\prod_{i=0}^K \xi_i^2 a_i^2}{\epsilon^2} \sum_{i=0}^K\bigl[\frac{b_i^2}{{\alpha}_i^2 a_i^2} \bigr] \\
    = \, & \frac{C}{\epsilon^2}
\end{align*}
where we define
\begin{align*}
    C \, := \, 4\log(2W^2) \biggl[ \, &\frac{\prod_{i=0}^L \gamma^2_i {a'_i}^2 \bigl(\sum_{i=0}^L\prod_{j=0}^i \gamma_j a_j\bigr)^2}{\lambda^2} \left[\sum_{i=0}^L\frac{{b'_i}^{2/3}}{{a'_i}^{2/3}} \right]^{3} \\
    &+ \bigl(B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda}\bigr)^2\prod_{i=0}^K \xi_i^2 a_i^2\left[\sum_{i=0}^K\frac{b_i^{2/3}}{a_i^{2/3}} \right]^{3}\biggr].
\end{align*}

Now, based on the above covering-number bound, we use the Dudley entropy integral bound \citep{bartlett2017spectrally} which bounds the empirical Rademacher complexity of $\mathcal{F}\circ \Delta_{\mathcal{H}}|_S$ as 
\begin{align*}
    \mathcal{R}(\mathcal{F}\circ \Delta_{\mathcal{H}}|_S) \, &\le \, \inf_{\alpha\ge 0}\; \biggl\{\frac{4\alpha}{\sqrt{n}} + \frac{12}{n}\int_{\alpha}^{\sqrt{n}}\sqrt{\log \mathcal{N}\bigl(\mathcal{F}\circ\Delta_H|_S , \Vert\cdot\Vert_2,\epsilon \bigr)} d\epsilon \biggr\} \\
    & \le \, \inf_{\alpha\ge 0}\;\biggl\{\frac{4\alpha}{\sqrt{n}} + {\frac{12\sqrt{C}}{n}\log(\frac{\sqrt{n}}{\alpha} ) \biggr\} }\\
    & \le \frac{4}{n^{3/2}} + \frac{18\log(n)\sqrt{C}}{n}
\end{align*}
where the last line follows from choosing $\alpha = 1/n$. Also, note that since for every non-negative constants $a,b\ge 0$ we have $\sqrt{a+b}\le \sqrt{a}+\sqrt{b}$, then
\begin{align*}
    \sqrt{C} \, \le \, 2\sqrt{\log(2D^2)} \biggl[ \, &\frac{\prod_{i=0}^L \gamma_i {a'_i} \bigl(\sum_{i=0}^L\prod_{j=0}^i \gamma_j a_j\bigr)}{\lambda} \left[\sum_{i=0}^L\frac{{b'_i}^{2/3}}{{a'_i}^{2/3}} \right]^{3/2} \\
    &+ \bigl(B+\frac{\prod_{i=0}^L \gamma_i a'_i}{\lambda}\bigr)\prod_{i=0}^K \xi_i a_i\left[\sum_{i=0}^K\frac{b_i^{2/3}}{a_i^{2/3}} \right]^{3/2}\biggr].
\end{align*}
Consequently, we have  the following bound where $R_{\mathcal{V}}$ and $R_{\mathcal{W}}$ are defined as in Theorem 1:
\begin{equation*}
     \mathcal{R}(\mathcal{F}\circ \Delta_{\mathcal{H}}|_S) \le \, \mathcal{O}\biggl(\frac{(B+\frac{L_{\mathbf{w}}}{\lambda}) \bigl(R_{\mathcal{V}}+\frac{1}{\tau^2}L_{\mathbf{w}}R_{\mathcal{W}}\bigr) \log(n) }{n}\log(D) \biggr) 
\end{equation*}
Therefore, according to standard Rademacher complexity-based generalization bounds \citep{bartlett2002rademacher}, for every $\omega>0$ with probability at least $1-\omega$  we have for every $\mathbf{v,w}\in\mathcal{V,W}$:
\begin{align*}
     &\frac{1}{n}\sum_{i=1}^n\left[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta^*_{\mathbf{w}}(\mathbf{x}_i,y_i)),y_i\bigr) \right] -  \mathbb{E}\left[ \ell\bigl(f_\mathbf{v}(\mathbf{X}+\delta^*_{\mathbf{w}}(\mathbf{X},Y)),Y\bigr)\right]  \\
     \le\, & \mathcal{O}\biggl(c\sqrt{\frac{\log(1/\omega)}{n}} + \frac{(B+\frac{L_{\mathbf{w}}}{\lambda}) \bigl(R_{\mathcal{V}}+\frac{1}{\tau^2}L_{\mathbf{w}}R_{\mathbf{w}}\bigr) \log(n) }{n}\log(D) \biggr),
\end{align*}
which implies that 
\begin{align*}
     \epsilon_{\mathrm{gen}}(\delta^*_{\mathbf{w}}) \, &= \,
     \min_{\mathbf{v}\in\mathcal{V}}\biggl\{\frac{1}{n}\sum_{i=1}^n\left[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta^*_{\mathbf{w}}(\mathbf{x}_i,y_i)),y_i\bigr) \right]\biggr\} -  \min_{\mathbf{v}\in\mathcal{V}}\biggl\{\mathbb{E}\left[ \ell\bigl(f_\mathbf{v}(\mathbf{X}+\delta^*_{\mathbf{w}}(\mathbf{X},Y)),Y\bigr)\right]\biggr\} \\
     &\le \, \max_{\mathbf{v}\in\mathcal{V}}\biggl\{\frac{1}{n}\sum_{i=1}^n\left[ \ell\bigl(f_\mathbf{v}(\mathbf{x}_i+\delta^*_{\mathbf{w}}(\mathbf{x}_i,y_i)),y_i\bigr) \right] -  \mathbb{E}\left[ \ell\bigl(f_\mathbf{v}(\mathbf{X}+\delta^*_{\mathbf{w}}(\mathbf{X},Y)),Y\bigr)\right] \biggr\} \\
     &\le \, \mathcal{O}\biggl(c\sqrt{\frac{\log(1/\omega)}{n}} + \frac{(B+\frac{L_{\mathbf{w}}}{\lambda}) \bigl(R_{\mathcal{V}}+\frac{1}{\tau^2}L_{\mathbf{w}}R_{\mathbf{w}}\bigr) \log(n) }{n}\log(D) \biggr).
\end{align*}
Therefore, the theorem's proof is complete.

\section{Additional Numerical Experiments}

In this section, we report the complete set of our numerical results.
Table~\ref{tab:full_eraly_stopping} shows the complete results for DNNs regularized by early stopping, i.e. the complete version of Table~\ref{tab:early_stopping}. Moreover, Tables~\ref{tab:more_gen_trans1},\ref{tab:more_gen_trans2},\ref{tab:more_gen_trans3} are the full versions of Table~\ref{tab:generalization_transferability_SN}, demonstrating the relationship between generalization errors and transferability rates of the discussed datasets and DNN architectures. In addition to our earlier results, the accuracies for adversarially-perturbed training and test samples are also included in the tables.

%It might be concerned that the improvement of test adversarial example's transferability doesn't benefit from better generalization but from higher test accuracy. To eliminate this concern, we used the intersection of test samples which can be labeled correctly by the substitute models with and without regularization to evaluate transferability. 

In addition to the previous results, we also present the transferability rates averaged over test samples whose adversarial examples are predicted correctly by both the regularized and unregularized substitute DNNs. The transferability rates over those test samples intersecting the correctly predicted samples by regularized and unregularized substitute DNNs are shown in Tables~\ref{tab:spectral_normalization_intersection} and \ref{tab:early_stopping_intersection} under the title Transferability Rate-Int. Our numerical results suggest that over the test samples correctly predicted by both regularzied and unregularized DNNs, a better generalization score again results in higher transferability rates for designed adversarial examples. 
% Last, Figure \ref{fig:transferability_with_error_bar} presents the plots of the main text's Figure \ref{fig:tran_L2_FGM} with error bars.

Last, Tables~\ref{tab:same_training_set_spectral} and \ref{tab:same_training_set_es} show the numerical results when substitute model and target are trained with the same training set. 



\begin{table}[htbp]

\caption{Generalization and transferability rates for different DNN architectures and image datasets with and without early stopping (ES)}
\centering
\resizebox{0.8\linewidth}{!}{\begin{tabular}{|cccccccc|}
\hline
Dataset & Model & Method & Train Acc & Test Acc & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.970 & 0.453 & 0.517 & 0.127 & 0.104 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.591 & \cellcolor[HTML]{EFEFEF}0.518 & \cellcolor[HTML]{EFEFEF}0.073 & \cellcolor[HTML]{EFEFEF}0.198 & \cellcolor[HTML]{EFEFEF}0.172 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.997 & 0.529 & 0.467 & 0.100 & 0.089 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.657 & \cellcolor[HTML]{EFEFEF}0.530 & \cellcolor[HTML]{EFEFEF}0.126 & \cellcolor[HTML]{EFEFEF}0.170 & \cellcolor[HTML]{EFEFEF}0.147 \\ \cline{2-8} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 1.000 & 0.421 & 0.579 & 0.098 & 0.077 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.548 & \cellcolor[HTML]{EFEFEF}0.487 & \cellcolor[HTML]{EFEFEF}0.061 & \cellcolor[HTML]{EFEFEF}0.154 & \cellcolor[HTML]{EFEFEF}0.136 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 1.000 & 0.480 & 0.520 & 0.100 & 0.087 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.594 & \cellcolor[HTML]{EFEFEF}0.501 & \cellcolor[HTML]{EFEFEF}0.092 & \cellcolor[HTML]{EFEFEF}0.152 & \cellcolor[HTML]{EFEFEF}0.127 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.877 & 0.231 & 0.646 & 0.283 & 0.258 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.408 & \cellcolor[HTML]{EFEFEF}0.271 & \cellcolor[HTML]{EFEFEF}0.137 & \cellcolor[HTML]{EFEFEF}0.330 & \cellcolor[HTML]{EFEFEF}0.286 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.984 & 0.272 & 0.711 & 0.270 & 0.239 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.457 & \cellcolor[HTML]{EFEFEF}0.312 & \cellcolor[HTML]{EFEFEF}0.146 & \cellcolor[HTML]{EFEFEF}0.327 & \cellcolor[HTML]{EFEFEF}0.289 \\ \cline{2-8} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.966 & 0.202 & 0.764 & 0.252 & 0.227 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.338 & \cellcolor[HTML]{EFEFEF}0.248 & \cellcolor[HTML]{EFEFEF}0.091 & \cellcolor[HTML]{EFEFEF}0.294 & \cellcolor[HTML]{EFEFEF}0.266 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.990 & 0.234 & 0.756 & 0.261 & 0.232 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.399 & \cellcolor[HTML]{EFEFEF}0.278 & \cellcolor[HTML]{EFEFEF}0.122 & \cellcolor[HTML]{EFEFEF}0.291 & \cellcolor[HTML]{EFEFEF}0.259 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.925 & 0.585 & 0.341 & 0.207 & 0.220 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.711 & \cellcolor[HTML]{EFEFEF}0.654 & \cellcolor[HTML]{EFEFEF}0.057 & \cellcolor[HTML]{EFEFEF}0.298 & \cellcolor[HTML]{EFEFEF}0.322 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.998 & 0.619 & 0.380 & 0.136 & 0.129 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.898 & \cellcolor[HTML]{EFEFEF}0.718 & \cellcolor[HTML]{EFEFEF}0.180 & \cellcolor[HTML]{EFEFEF}0.213 & \cellcolor[HTML]{EFEFEF}0.219 \\ \cline{2-8} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.848 & 0.541 & 0.307 & 0.211 & 0.228 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.624 & \cellcolor[HTML]{EFEFEF}0.594 & \cellcolor[HTML]{EFEFEF}0.030 & \cellcolor[HTML]{EFEFEF}0.256 & \cellcolor[HTML]{EFEFEF}0.278 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.949 & 0.576 & 0.373 & 0.157 & 0.170 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.691 & \cellcolor[HTML]{EFEFEF}0.627 & \cellcolor[HTML]{EFEFEF}0.064 & \cellcolor[HTML]{EFEFEF}0.241 & \cellcolor[HTML]{EFEFEF}0.260 \\ \hline
\end{tabular}



}
\label{tab:full_eraly_stopping}
\end{table}


\begin{table}[htbp]
\centering
% \vspace{-10mm}
\caption{Generalization and transferability rates on CIFAR-10 data, with and without using spectral regularization}
\resizebox{\linewidth}{!}{

\begin{tabular}{|ccccccccc|}
\hline
Dataset & Model & Method & $\beta$ & Train Acc & Test Acc & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.998 & 0.532 & 0.466 & 0.092 & 0.078 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.830 & \cellcolor[HTML]{EFEFEF}0.511 & \cellcolor[HTML]{EFEFEF}0.320 & \cellcolor[HTML]{EFEFEF}0.136 & \cellcolor[HTML]{EFEFEF}0.113 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.936 & 0.479 & 0.456 & 0.124 & 0.097 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.997 & 0.509 & 0.488 & 0.099 & 0.084 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 0.999 & 0.541 & 0.458 & 0.087 & 0.073 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 1.000 & 0.554 & 0.445 & 0.090 & 0.077 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.951 & 0.443 & 0.508 & 0.104 & 0.084 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.759 & \cellcolor[HTML]{EFEFEF}0.502 & \cellcolor[HTML]{EFEFEF}0.258 & \cellcolor[HTML]{EFEFEF}0.150 & \cellcolor[HTML]{EFEFEF}0.134 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.913 & 0.459 & 0.454 & 0.125 & 0.099 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.945 & 0.447 & 0.499 & 0.115 & 0.095 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 0.979 & 0.451 & 0.529 & 0.110 & 0.092 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{3} & 0.988 & 0.468 & 0.520 & 0.103 & 0.082 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.946 & 0.442 & 0.504 & 0.124 & 0.101 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.694 & \cellcolor[HTML]{EFEFEF}0.451 & \cellcolor[HTML]{EFEFEF}0.243 & \cellcolor[HTML]{EFEFEF}0.191 & \cellcolor[HTML]{EFEFEF}0.153 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.842 & 0.421 & 0.421 & 0.165 & 0.129 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.951 & 0.419 & 0.531 & 0.129 & 0.106 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 0.980 & 0.453 & 0.527 & 0.121 & 0.103 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{3} & 0.988 & 0.470 & 0.517 & 0.112 & 0.091 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.708 & 0.525 & 0.184 & 0.419 & 0.393 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.627 & \cellcolor[HTML]{EFEFEF}0.545 & \cellcolor[HTML]{EFEFEF}0.082 & \cellcolor[HTML]{EFEFEF}0.497 & \cellcolor[HTML]{EFEFEF}0.474 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.674 & 0.534 & 0.140 & 0.466 & 0.443 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.710 & 0.520 & 0.190 & 0.442 & 0.422 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 0.739 & 0.505 & 0.234 & 0.408 & 0.389 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-24}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-6}{*}{PGM($L_\infty$)}} & \multicolumn{1}{c|}{3} & 0.745 & 0.504 & 0.241 & 0.398 & 0.380 \\ \cline{2-9} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 1.000 & 0.495 & 0.505 & 0.089 & 0.070 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.977 & \cellcolor[HTML]{EFEFEF}0.526 & \cellcolor[HTML]{EFEFEF}0.451 & \cellcolor[HTML]{EFEFEF}0.147 & \cellcolor[HTML]{EFEFEF}0.122 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 1.000 & 0.496 & 0.504 & 0.132 & 0.105 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 1.000 & 0.474 & 0.526 & 0.115 & 0.090 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.477 & 0.523 & 0.108 & 0.088 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 1.000 & 0.496 & 0.504 & 0.098 & 0.079 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.999 & 0.454 & 0.545 & 0.105 & 0.087 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.861 & \cellcolor[HTML]{EFEFEF}0.519 & \cellcolor[HTML]{EFEFEF}0.342 & \cellcolor[HTML]{EFEFEF}0.162 & \cellcolor[HTML]{EFEFEF}0.139 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.996 & 0.400 & 0.596 & 0.102 & 0.089 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.914 & \cellcolor[HTML]{EFEFEF}0.426 & \cellcolor[HTML]{EFEFEF}0.487 & \cellcolor[HTML]{EFEFEF}0.199 & \cellcolor[HTML]{EFEFEF}0.169 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.998 & 0.387 & 0.610 & 0.169 & 0.142 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 1.000 & 0.368 & 0.632 & 0.146 & 0.120 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.420 & 0.580 & 0.128 & 0.106 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{3} & 1.000 & 0.435 & 0.565 & 0.115 & 0.096 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.685 & 0.474 & 0.211 & 0.446 & 0.423 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.628 & \cellcolor[HTML]{EFEFEF}0.473 & \cellcolor[HTML]{EFEFEF}0.156 & \cellcolor[HTML]{EFEFEF}0.472 & \cellcolor[HTML]{EFEFEF}0.457 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.680 & 0.436 & 0.245 & 0.441 & 0.423 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.697 & 0.420 & 0.277 & 0.421 & 0.403 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 0.679 & 0.408 & 0.271 & 0.404 & 0.388 \\
\multicolumn{1}{|c|}{\multirow{-44}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-20}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-6}{*}{PGM($L_\infty$)}} & \multicolumn{1}{c|}{3} & 0.628 & 0.439 & 0.189 & 0.426 & 0.405 \\ \hline
\end{tabular}

}
\label{tab:more_gen_trans1}
\end{table}


















\begin{table}[htbp]
\centering
% \vspace{-10mm}
\caption{Generalization and transferability rates on CIFAR-100 dataset, with and without spectral regularization.}
\resizebox{\linewidth}{!}{
\begin{tabular}{|ccccccccc|}
\hline
Dataset & Model & Method & $\beta$ & Train Acc & Test Acc & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.996 & 0.279 & 0.717 & 0.268 & 0.236 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1} & 0.761 & 0.277 & 0.484 & 0.250 & 0.290 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.853 & \cellcolor[HTML]{EFEFEF}0.294 & \cellcolor[HTML]{EFEFEF}0.558 & \cellcolor[HTML]{EFEFEF}0.313 & \cellcolor[HTML]{EFEFEF}0.275 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.998 & 0.260 & 0.738 & 0.236 & 0.263 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.284 & 0.715 & 0.238 & 0.272 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 0.999 & 0.262 & 0.736 & 0.221 & 0.257 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.857 & 0.255 & 0.602 & 0.303 & 0.270 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-8}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.750 & \cellcolor[HTML]{EFEFEF}0.256 & \cellcolor[HTML]{EFEFEF}0.494 & \cellcolor[HTML]{EFEFEF}0.330 & \cellcolor[HTML]{EFEFEF}0.301 \\ \cline{2-9} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 1.000 & 0.242 & 0.758 & 0.258 & 0.232 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.925 & \cellcolor[HTML]{EFEFEF}0.315 & \cellcolor[HTML]{EFEFEF}0.611 & \cellcolor[HTML]{EFEFEF}0.342 & \cellcolor[HTML]{EFEFEF}0.310 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 1.000 & 0.289 & 0.710 & 0.304 & 0.266 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 1.000 & 0.285 & 0.715 & 0.291 & 0.248 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.284 & 0.716 & 0.288 & 0.253 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 1.000 & 0.269 & 0.730 & 0.265 & 0.240 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 1.000 & 0.210 & 0.789 & 0.229 & 0.260 \\
\multicolumn{1}{|c|}{\multirow{-16}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-8}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.889 & \cellcolor[HTML]{EFEFEF}0.288 & \cellcolor[HTML]{EFEFEF}0.601 & \cellcolor[HTML]{EFEFEF}0.323 & \cellcolor[HTML]{EFEFEF}0.353 \\ \hline
\end{tabular}
}
\label{tab:more_gen_trans2}
\end{table}

\begin{table}[htbp]
\centering
% \vspace{-10mm}
\caption{Generalization and transferability rates on SVHN dataset, with and without spectral regularization.}
\resizebox{\linewidth}{!}{
\begin{tabular}{|ccccccccc|}
\hline
Dataset & Model & Method & $\beta$ & Train Acc & Test Acc & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.991 & 0.618 & 0.373 & 0.134 & 0.126 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.848 & \cellcolor[HTML]{EFEFEF}0.645 & \cellcolor[HTML]{EFEFEF}0.203 & \cellcolor[HTML]{EFEFEF}0.277 & \cellcolor[HTML]{EFEFEF}0.257 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.967 & 0.605 & 0.362 & 0.223 & 0.209 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.998 & 0.573 & 0.426 & 0.202 & 0.187 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.571 & 0.429 & 0.158 & 0.149 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 1.000 & 0.642 & 0.358 & 0.121 & 0.118 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.934 & 0.592 & 0.342 & 0.193 & 0.177 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-8}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.767 & \cellcolor[HTML]{EFEFEF}0.652 & \cellcolor[HTML]{EFEFEF}0.115 & \cellcolor[HTML]{EFEFEF}0.339 & \cellcolor[HTML]{EFEFEF}0.313 \\ \cline{2-9} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.991 & 0.618 & 0.373 & 0.134 & 0.126 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.848 & \cellcolor[HTML]{EFEFEF}0.645 & \cellcolor[HTML]{EFEFEF}0.203 & \cellcolor[HTML]{EFEFEF}0.277 & \cellcolor[HTML]{EFEFEF}0.257 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.3} & 0.967 & 0.605 & 0.362 & 0.223 & 0.209 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{1.6} & 0.998 & 0.573 & 0.426 & 0.202 & 0.187 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{2} & 1.000 & 0.571 & 0.429 & 0.158 & 0.149 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-6}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{3} & 0.999 & 0.581 & 0.418 & 0.145 & 0.133 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.844 & 0.546 & 0.298 & 0.211 & 0.225 \\
\multicolumn{1}{|c|}{\multirow{-16}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-8}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.817 & \cellcolor[HTML]{EFEFEF}0.618 & \cellcolor[HTML]{EFEFEF}0.199 & \cellcolor[HTML]{EFEFEF}0.276 & \cellcolor[HTML]{EFEFEF}0.292 \\ \hline
\end{tabular}}
\label{tab:more_gen_trans3}
\end{table}





\begin{table}[htbp]
\centering
% \vspace{-10mm}
\caption{Generalization and transferability rates for different DNN architectures and image datasets with and without spectral regularization. Transferability Rate-Int. means averaged transferability rate on adversarial examples correctly labeled by both the regularized and unregularized DNNs.}
\resizebox{0.8\linewidth}{!}{
\begin{tabular}{|ccccccc|}
\hline
Dataset & Model & Method & $\beta$ & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate-Int(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate-Int(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.466 & 0.032 & 0.026 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.320 & \cellcolor[HTML]{EFEFEF}0.077 & \cellcolor[HTML]{EFEFEF}0.058 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.508 & 0.030 & 0.026 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.258 & \cellcolor[HTML]{EFEFEF}0.063 & \cellcolor[HTML]{EFEFEF}0.052 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.504 & 0.029 & 0.028 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.243 & \cellcolor[HTML]{EFEFEF}0.070 & \cellcolor[HTML]{EFEFEF}0.090 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.184 & 0.136 & 0.181 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-8}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_\infty$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.082 & \cellcolor[HTML]{EFEFEF}0.182 & \cellcolor[HTML]{EFEFEF}0.162 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.505 & 0.035 & 0.029 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.451 & \cellcolor[HTML]{EFEFEF}0.076 & \cellcolor[HTML]{EFEFEF}0.068 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.545 & 0.037 & 0.030 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.342 & \cellcolor[HTML]{EFEFEF}0.077 & \cellcolor[HTML]{EFEFEF}0.063 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.596 & 0.039 & 0.019 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_\infty$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.487 & \cellcolor[HTML]{EFEFEF}0.089 & \cellcolor[HTML]{EFEFEF}0.070 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.211 & 0.227 & 0.222 \\
\multicolumn{1}{|c|}{\multirow{-16}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-8}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_\infty$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.156 & \cellcolor[HTML]{EFEFEF}0.271 & \cellcolor[HTML]{EFEFEF}0.248 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.717 & 0.126 & 0.112 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.558 & \cellcolor[HTML]{EFEFEF}0.154 & \cellcolor[HTML]{EFEFEF}0.131 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.602 & 0.141 & 0.123 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1.3} & \cellcolor[HTML]{EFEFEF}0.494 & \cellcolor[HTML]{EFEFEF}0.160 & \cellcolor[HTML]{EFEFEF}0.141 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.758 & 0.127 & 0.101 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.611 & \cellcolor[HTML]{EFEFEF}0.180 & \cellcolor[HTML]{EFEFEF}0.159 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.789 & 0.114 & 0.108 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.601 & \cellcolor[HTML]{EFEFEF}0.159 & \cellcolor[HTML]{EFEFEF}0.151 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373 & 0.010 & 0.013 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.203 & \cellcolor[HTML]{EFEFEF}0.103 & \cellcolor[HTML]{EFEFEF}0.125 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.342 & 0.024 & 0.031 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.115 & \cellcolor[HTML]{EFEFEF}0.102 & \cellcolor[HTML]{EFEFEF}0.125 \\ \cline{2-7} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.373 & 0.013 & 0.014 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.203 & \cellcolor[HTML]{EFEFEF}0.092 & \cellcolor[HTML]{EFEFEF}0.112 \\ \cline{3-4}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{$\infty$} & 0.298 & 0.044 & 0.053 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}1} & \cellcolor[HTML]{EFEFEF}0.199 & \cellcolor[HTML]{EFEFEF}0.080 & \cellcolor[HTML]{EFEFEF}0.101 \\ \hline
\end{tabular}
}
\label{tab:spectral_normalization_intersection}
\end{table}

\begin{table}[htbp]
\caption{Generalization and transferability rates for different DNN architectures and image datasets with and without early stopping (ES), Transferability Rate-Int. means averaged transferability rate on adversarial examples correctly labeled by both the regularized and unregularized DNNs.}
\centering
\resizebox{0.8\linewidth}{!}{
\begin{tabular}{|cccccc|}
\hline
Dataset & Model & Method & Gen.Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate-Int.(VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate-Int.(ResNet18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.517 & 0.030 & 0.127 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.073 & \cellcolor[HTML]{EFEFEF}0.063 & \cellcolor[HTML]{EFEFEF}0.198 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.467 & 0.032 & 0.100 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.126 & \cellcolor[HTML]{EFEFEF}0.077 & \cellcolor[HTML]{EFEFEF}0.170 \\ \cline{2-6} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.579 & 0.037 & 0.098 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.061 & \cellcolor[HTML]{EFEFEF}0.077 & \cellcolor[HTML]{EFEFEF}0.154 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.520 & 0.035 & 0.100 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.092 & \cellcolor[HTML]{EFEFEF}0.076 & \cellcolor[HTML]{EFEFEF}0.152 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.646 & 0.141 & 0.283 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.137 & \cellcolor[HTML]{EFEFEF}0.160 & \cellcolor[HTML]{EFEFEF}0.330 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.711 & 0.126 & 0.270 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} &\multicolumn{1}{c|}{FGM-ES}  & \cellcolor[HTML]{EFEFEF}0.146 & \cellcolor[HTML]{EFEFEF}0.154 & \cellcolor[HTML]{EFEFEF}0.327 \\ \cline{2-6} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.764 & 0.114 & 0.252 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.091 & \cellcolor[HTML]{EFEFEF}0.159 & \cellcolor[HTML]{EFEFEF}0.294 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.756 & 0.127 & 0.261 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.122 & \cellcolor[HTML]{EFEFEF}0.180 & \cellcolor[HTML]{EFEFEF}0.291 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.341 & 0.024 & 0.207 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.057 & \cellcolor[HTML]{EFEFEF}0.102 & \cellcolor[HTML]{EFEFEF}0.298 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.380 & 0.010 & 0.136 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.180 & \cellcolor[HTML]{EFEFEF}0.103 & \cellcolor[HTML]{EFEFEF}0.213 \\ \cline{2-6} 
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM} & 0.307 & 0.044 & 0.211 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}PGM-ES} & \cellcolor[HTML]{EFEFEF}0.030 & \cellcolor[HTML]{EFEFEF}0.080 & \cellcolor[HTML]{EFEFEF}0.256 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.373 & 0.013 & 0.157 \\
\multicolumn{1}{|c|}{\multirow{-8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\cellcolor[HTML]{EFEFEF}FGM-ES} & \cellcolor[HTML]{EFEFEF}0.064 & \cellcolor[HTML]{EFEFEF}0.092 & \cellcolor[HTML]{EFEFEF}0.241 \\ \hline
\end{tabular}}
\label{tab:early_stopping_intersection}
\end{table}



% \subsection{Valid the connection between generalization and transferability by gradient}

% In this section, we valid the connection between generalization and transferability by analysing the norm of the gradient when generating adversarial samples. By the definition of generalization, better generalization indicates more subtle overfitting during training.

% In adversarial training, a model might overlearns from the adversarial training samples, and thus leads to poor generalization. In the extreme case, a model will disable the attack strategy on the training samples, that means the adversarial samples will has the same or very close loss value as the clean training samples. In this case, the gradient of the input samples to the loss value will be close to zero, since the loss values are almost the same in the neighborhood of the input sample. This indicates that when a model suffers from poor generalization, the gradient of a input training sample to the loss will be close to zero.

% As for transferability, one reason impeding the transferability of perturbations is the generated perturbations will overfits to the parameters of the substitute model.  When overfitting happens, the gradient usually be larger than that doesn't happens.

% In this section, we compare the $L_2$-norm of the gradient when generating adversarial samples to valid that when applying regularization to a model in adversarial training, both the generalization and transferability can be improved. Because the value of $L_2$-norm of the gradient on different datasets and architectures are not comparable, we use the relative $L_2$-norm, we define model $\mathcal{F}_n$'s relative $L_2$-norm of the gradient at epoch $n$ as $R(\mathcal{F}_n)$, where:
% \begin{equation}
%     g(\mathcal{F}_n)=\mathbb{E}_{x\in \mathcal{X}}\left( \frac{\partial Loss(\mathcal{F}_n(x),y,\mathcal{W})}{\partial x}\right)
% \end{equation}

% \begin{equation}
%     R(\mathcal{F}_n)=\frac{||g(\mathcal{F}_n)||_2}{||g(\mathcal{F}_1)||_2}
% \end{equation}

% By this definition, higher $R(\mathcal{F}_n)$ indicates larger expected gradient norm when generating perturbations. Fig.~\ref{fig:gradient_validation} shows the relative $L_2$-norm of the gradient during training by different adversarial training strategies on different models and datasets with and without Lipschitz regularization.
% Tab.~\ref{tab:gradient_early_stopping} shows the relative $L_2$-norm of the gradient on models with and without early stopping.



% Surprisingly, the changing trend of relative $L_2$-norm of the gradient is consistent with the connection between generalization and transferability: Applying regularization onto a model can lead to larger relative $L_2$-norm of the gradient on the training set, thus avoid overfitting in adversarial training, so the model's generalization improves. While the relative $L_2$-norm of the gradient on the test set decreased, thus avoids perturbations overfitting to the parameters of the model, thus improves perturbation's transferability.

% \begin{figure}
%     \centering
%     \hspace{-0.8cm}\includegraphics[height=4cm]{NeurIPS2021/submission/Figures/Experiment_Fig/gradient_validation.png}
%     \caption{Relative $L_2$-norm of the gradient during training for models with and without Lipschitz regularization. Train and test represents the relative $L_2$-norm of the gradient on training and test sets, respectively.}
%     \label{fig:gradient_validation}
% \end{figure}

% % Please add the following required packages to your document preamble:
% % \usepackage{multirow}
% \begin{table}[htbp]
% \centering
% \begin{tabular}{ccccc}
% \hline
% Dataset & Model & Method & Train & Test \\ \hline
% \multicolumn{1}{c|}{\multirow{8}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{4}{*}{Inc}} & \multicolumn{1}{c|}{PGM} & 121.80\% & 3241.00\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{307.00\%} & \textbf{494.40\%} \\ \cline{3-3}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 6.39\% & 631.10\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{204.80\%} & \textbf{404.90\%} \\ \cline{2-5} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{4}{*}{Alex}} & \multicolumn{1}{c|}{PGM} & 20.21\% & 2455.20\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{160.40\%} & \textbf{254.71\%} \\ \cline{3-3}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 0.23\% & 321.30\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{120.40\%} & \textbf{199.50\%} \\ \hline
% \multicolumn{1}{c|}{\multirow{8}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{4}{*}{Inc}} & \multicolumn{1}{c|}{PGM} & 276.13\% & 2895.49\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{520.68\%} & \textbf{838.74\%} \\ \cline{3-5} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 15.29\% & 1128.37\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{387.93\%} & \textbf{687.18\%} \\ \cline{2-3}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{4}{*}{Alex}} & \multicolumn{1}{c|}{PGM} & 94.39\% & 2586.88\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{344.57\%} & \textbf{529.25\%} \\ \cline{3-5} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 12.35\% & 847.94\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{313.89\%} & \textbf{505.57\%} \\ \hline
% \multicolumn{1}{c|}{\multirow{8}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{4}{*}{Inc}} & \multicolumn{1}{c|}{PGM} & 1127.70\% & 18177.00\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{2015.90\%} & \textbf{6221.90\%} \\ \cline{3-3}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 53.57\% & 4320.00\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{67.00\%} & \textbf{4319.40\%} \\ \cline{2-5} 
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{4}{*}{Alex}} & \multicolumn{1}{c|}{PGM} & 75.60\% & 672.00\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{PGM$^*$} & \textbf{109.50\%} & \textbf{294.70\%} \\ \cline{3-3}
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM} & 15.00\% & 282.30\% \\
% \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{FGM$^*$} & \textbf{77.00\%} & \textbf{223.70\%} \\ \hline
% \end{tabular}
% \caption{Relative $L_2$-norm of the gradient for models with and without early stopping.$^*$ represents applying early stopping to the model.}
% \label{tab:gradient_early_stopping}
% \end{table}

% \begin{figure}[t]

%     \centering
%     \includegraphics[width=\linewidth]{NeurIPS2021/submission/Figures/Experiment_Fig/trans_3_datasets_only_test_witherrorbar.png}
%     \caption{Figure 1 including error bars}
%     \label{fig:transferability_with_error_bar}
% \end{figure}

% \iffalse
% \subsection{Numerical Results for ERM-trained substitute DNNs}
% \begin{figure}[t]
%     \centering
%     \includegraphics[width=\linewidth]{NeurIPS2021/submission/Figures/Experiment_Fig/trans_ERM.png}
%     \caption{Transferability for perturbations on ERM-trained models}
%     \label{fig:ERM-trained_models}
    
%     \centering
%     \includegraphics[width=\linewidth]{NeurIPS2021/submission/Figures/Experiment_Fig/trans_3_datasets_only_test_witherrorbar.png}
%     \caption{Figure with error bar}
%     \label{fig:transferability_with_error_bar}
% \end{figure}
% \textcolor{red}{
% Fig.~\ref{fig:ERM-trained_models} shows a draft of the attack performance of $L_2$-FGM on standard trained models. Attack error is the accuracy of the perturbations, and transferability rate follows the same definition as above. I'm not sure if this result helps because in some cases, the transferability rate gap between adversarial samples generated from training and test samples might still decrease with the increase of $\beta$. I also evaluated this by $L_2$-PGD, $L_\infty$-FGM and $L_\infty$-PGD, but this problems still exists.
% }

% \textcolor{red}{
% Fig.~\ref{fig:transferability_with_error_bar} shows the same results as in Fig.~\ref{fig:tran_L2_FGM} with error bars.}
% \fi











% Please add the following required packages to your document preamble:
% \usepackage{multirow}
% \usepackage[table,xcdraw]{xcolor}
% If you use beamer only pass "xcolor=table" option, i.e. \documentclass[xcolor=table]{beamer}

\begin{table}[htpb]
\centering
\caption{{Generalization error and transferability rates when the target and substitute models are trained using the same training set, with and without spectral normalization.}}
\resizebox{\linewidth}{!}{
{
\begin{tabular}{|ccccccccc|}
\hline
Dataset & Model & Method & $\beta$ & Train Acc & Test Acc & Gen. Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate (VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability \\ Rate (R18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.983 & 0.563 & 0.420 & 0.995 & 0.108 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1 & \cellcolor[HTML]{EFEFEF}0.801 & \cellcolor[HTML]{EFEFEF}0.565 & \cellcolor[HTML]{EFEFEF}0.236 & \cellcolor[HTML]{EFEFEF}0.176 & \cellcolor[HTML]{EFEFEF}0.174 \\ \cline{3-3}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.896 & 0.498 & 0.398 & 0.129 & 0.134 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1 & \cellcolor[HTML]{EFEFEF}0.757 & \cellcolor[HTML]{EFEFEF}0.544 & \cellcolor[HTML]{EFEFEF}0.213 & \cellcolor[HTML]{EFEFEF}0.183 & \cellcolor[HTML]{EFEFEF}0.183 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.822 & 0.281 & 0.541 & 0.211 & 0.213 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1.3 & \cellcolor[HTML]{EFEFEF}0.619 & \cellcolor[HTML]{EFEFEF}0.309 & \cellcolor[HTML]{EFEFEF}0.310 & \cellcolor[HTML]{EFEFEF}0.253 & \cellcolor[HTML]{EFEFEF}0.254 \\ \cline{3-3}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.669 & 0.292 & 0.377 & 0.253 & 0.254 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Inception}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1.3 & \cellcolor[HTML]{EFEFEF}0.559 & \cellcolor[HTML]{EFEFEF}0.310 & \cellcolor[HTML]{EFEFEF}0.249 & \cellcolor[HTML]{EFEFEF}0.285 & \cellcolor[HTML]{EFEFEF}0.286 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.901 & 0.629 & 0.272 & 0.160 & 0.164 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{FGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1 & \cellcolor[HTML]{EFEFEF}0.813 & \cellcolor[HTML]{EFEFEF}0.667 & \cellcolor[HTML]{EFEFEF}0.146 & \cellcolor[HTML]{EFEFEF}0.262 & \cellcolor[HTML]{EFEFEF}0.267 \\ \cline{3-3}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & \multicolumn{1}{c|}{} & $\infty$ & 0.792 & 0.578 & 0.214 & 0.234 & 0.235 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-4}{*}{Alexnet}} & \multicolumn{1}{c|}{\multirow{-2}{*}{PGM($L_2$)}} & \cellcolor[HTML]{EFEFEF}1 & \cellcolor[HTML]{EFEFEF}0.775 & \cellcolor[HTML]{EFEFEF}0.637 & \cellcolor[HTML]{EFEFEF}0.139 & \cellcolor[HTML]{EFEFEF}0.281 & \cellcolor[HTML]{EFEFEF}0.286 \\ \hline
\end{tabular}}}
\label{tab:same_training_set_spectral}
\end{table} 






% Please add the following required packages to your document preamble:
% \usepackage{multirow}
% \usepackage[table,xcdraw]{xcolor}
% If you use beamer only pass "xcolor=table" option, i.e. \documentclass[xcolor=table]{beamer}
\begin{table}[htbp]
\centering
\caption{{Generalization error and transferability rates when the target and substitute models are trained using the same training set, with and without early stopping.}}
\resizebox{\linewidth}{!}{
{
\begin{tabular}{|cccccccc|}
\hline
Dataset & Model & Attack & Train Acc & Test Acc & Gen. Err. & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate (VGG16)\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability\\ Rate (R18)\end{tabular} \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 0.984 & 0.553 & 0.431 & 0.010 & 0.108 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{Inception}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.622 & \cellcolor[HTML]{EFEFEF}0.583 & \cellcolor[HTML]{EFEFEF}0.039 & \cellcolor[HTML]{EFEFEF}0.139 & \cellcolor[HTML]{EFEFEF}0.143 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 1.000 & 0.525 & 0.475 & 0.084 & 0.094 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{Cifar10}} & \multicolumn{1}{c|}{\multirow{-2}{*}{Alexnet}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.667 & \cellcolor[HTML]{EFEFEF}0.548 & \cellcolor[HTML]{EFEFEF}0.119 & \cellcolor[HTML]{EFEFEF}0.132 & \cellcolor[HTML]{EFEFEF}0.137 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 0.939 & 0.290 & 0.649 & 0.224 & 0.263 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{Inception}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.429 & \cellcolor[HTML]{EFEFEF}0.340 & \cellcolor[HTML]{EFEFEF}0.088 & \cellcolor[HTML]{EFEFEF}0.261 & \cellcolor[HTML]{EFEFEF}0.315 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 0.912 & 0.248 & 0.664 & 0.246 & 0.271 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{Cifar100}} & \multicolumn{1}{c|}{\multirow{-2}{*}{Alexnet}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.338 & \cellcolor[HTML]{EFEFEF}0.307 & \cellcolor[HTML]{EFEFEF}0.032 & \cellcolor[HTML]{EFEFEF}0.283 & \cellcolor[HTML]{EFEFEF}0.306 \\ \hline
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 0.997 & 0.621 & 0.376 & 0.130 & 0.136 \\
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{\multirow{-2}{*}{Inception}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.883 & \cellcolor[HTML]{EFEFEF}0.667 & \cellcolor[HTML]{EFEFEF}0.216 & \cellcolor[HTML]{EFEFEF}0.226 & \cellcolor[HTML]{EFEFEF}0.224 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{} & FGM($L_2$) & 0.948 & 0.578 & 0.370 & 0.171 & 0.160 \\
\multicolumn{1}{|c|}{\multirow{-4}{*}{SVHN}} & \multicolumn{1}{c|}{\multirow{-2}{*}{Alexnet}} & \cellcolor[HTML]{EFEFEF}FGM($L_2$)-ES & \cellcolor[HTML]{EFEFEF}0.685 & \cellcolor[HTML]{EFEFEF}0.628 & \cellcolor[HTML]{EFEFEF}0.057 & \cellcolor[HTML]{EFEFEF}0.250 & \cellcolor[HTML]{EFEFEF}0.253 \\ \hline
\end{tabular}}}
\label{tab:same_training_set_es}
\end{table}





\begin{table}[htbp]
\centering
\caption{{Transferability rates when adversarial examples are generated by different methods and from different substitute models.}}
\resizebox{\linewidth}{!}{
\begin{tabular}{|cccccc|}
\hline
substitute DNN & regularization method of DNN & target DNN & \begin{tabular}[c]{@{}c@{}}Transferability \\ Rate of FGM\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability \\ Rate of I-FGSM\end{tabular} & \begin{tabular}[c]{@{}c@{}}Transferability \\ Rate of PGD\end{tabular} \\ \hline
\multicolumn{1}{|c|}{\multirow{3}{*}{\begin{tabular}[c]{@{}c@{}}PGD-trained \\ Inception\end{tabular}}} & \multicolumn{1}{c|}{spectral normalization} & \multicolumn{1}{c|}{\multirow{3}{*}{ERM-trained VGG16}} & 0.134 & 0.148 & 0.151 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{early stop} & \multicolumn{1}{c|}{} & 0.172 & 0.177 & 0.198 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{None} & \multicolumn{1}{c|}{} & 0.092 & 0.097 & 0.106 \\ \hline
\multicolumn{1}{|c|}{\multirow{3}{*}{\begin{tabular}[c]{@{}c@{}}ERM-trained \\ Inception\end{tabular}}} & \multicolumn{1}{c|}{spectral normalization} & \multicolumn{1}{c|}{\multirow{3}{*}{ERM-trained VGG16}} & 0.071 & 0.070 & 0.062 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{early stop} & \multicolumn{1}{c|}{} & 0.088 & 0.099 & 0.113 \\ \cline{2-2}
\multicolumn{1}{|c|}{} & \multicolumn{1}{c|}{None} & \multicolumn{1}{c|}{} & 0.030 & 0.044 & 0.024 \\ \hline
\end{tabular}}
\end{table}

{\small{
\bibliography{wang_695}
}}

\end{document}
