%\documentclass[twoside]{article}

\documentclass[accepted]{uai2023}

% \documentclass[11pt]{article}


% % Fonts
% \usepackage{palatino}
% \linespread{1.05}			% Palatino needs more leading (space between lines)
% \usepackage{times}
% %\usepackage{mathpazo}		% use [sc] option for more line spacing
% %\usepackage[T1]{fontenc}
% % End Fonts

% \usepackage[letterpaper,margin=1in]{geometry}
% %\usepackage[margin=1.0in]{geometry}

% \setlength{\parskip}{4pt}
% \setlength{\parindent}{0pt}

\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage{hyperref}       % hyperlinks
\usepackage{amsfonts,amsmath,amssymb,amsthm}       % blackboard math symbols
\usepackage{array}
%\usepackage{subcaption}
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage[dvipsnames]{xcolor}         % colors
\usepackage{bm}

%\usepackage{natbib}
\usepackage[round]{natbib}
\renewcommand{\bibname}{References}
\renewcommand{\bibsection}{\subsubsection*{\bibname}}

\usepackage{graphicx}
\usepackage{subfigure}
%\usepackage{booktabs}
\usepackage{hyperref}
% Attempt to make hyperref and algorithmic work together better:
%\newcommand{\theHalgorithm}{\arabic{algorithm}}

\usepackage{bigints}
\usepackage{amssymb,amsopn,algorithm,algorithmic,float,bbm,bm,enumerate,color,multirow,gensymb}
%\usepackage{algpseudocode} %
%\renewcommand{\algorithmicrequire}{\textbf{Input:}}
%\renewcommand{\algorithmicensure}{\textbf{Output:}}

\usepackage{epsfig,subfigure,graphicx}

\usepackage{comment}
%\usepackage[dvipsnames]{xcolor}
%\usepackage{authblk}
\usepackage{afterpage}
\usepackage{thmtools,thm-restate}

\input{notation}

\usepackage[dvipsnames]{xcolor}
%\usepackage[]{color-edits}
\usepackage[suppress]{color-edits}
\addauthor{ab}{violet}
\addauthor{abg}{green}
\addauthor{pc}{red}
\addauthor{mb}{magenta}
\addauthor{lz}{teal}

\newcommand\pc[1]{\textcolor{red}{#1}} %Pedro's highlights 
%\newcommand\ab[1]{\textcolor{blue}{#1}} %Arindam's highlights


% \title{Deep Learning Optimization with Smooth Activations:\\Linear Width Suffices for the NTK condition}


% The \author macro works with any number of authors. There are two commands
% used to separate the names and addresses of multiple authors: \And and \AND.
%
% Using \And between authors leaves it to LaTeX to determine where to break the
% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
% authors names on the first line, and the last on the second line, try using
% \AND instead of \And before the third author name.
%\bibliographystyle{plainnat}
\renewcommand{\bibsection}{\subsubsection*{References}}


%\tile{Deep Learning Optimization with Smooth Activations:\\Linear Width Suffices}
\title{Neural Tangent Kernel at Initialization: Linear Width Suffices}

%\aistatstitle{Deep Learning Optimization with Smooth Activations:\\Linear Width Suffices for the NTK condition}

% Add authors
\author[1]{\href{mailto:<arindamb@illinois.edu>?Subject=Your UAI 2023 paper}
{Arindam Banerjee}{}}
\author[1]{\href{mailto:<pacisne@gmail.com>?Subject=Your UAI 2023 paper}{Pedro Cisneros-Velarde}{}}
\author[2]{\href{mailto:<l5zhu@ucsd.edu>?Subject=Your UAI 2023 paper}{Libin Zhu}{}}
\author[2]{\href{mailto:<mbelkin@ucsd.edu>?Subject=Your UAI 2023 paper}{Mikhail Belkin}{}}
\affil[1]{University of Illinois at Urbana-Champaign}
\affil[2]{University of California, San Diego}
% Department of Computer Science\\
% Halicioğlu Data Science Institute\\
%University of California, San Diego\\



% \author{%
% %\hspace*{-10mm}
%   Arindam Banerjee \\ 
%   Department of Computer Science\\
%   University of Illinois  Urbana-Champaign\\
%   \texttt{arindamb@illinois.edu} \\
%   \and
%   Pedro Cisneros-Velarde\\
%     Department of Computer Science\\
%     \hspace*{-5mm} University of Illinois Urbana-Champaign\\
%     \texttt{pacisne@gmail.com}
%   \and
%     Libin Zhu\\
%     Department of Computer Science\\
%     University of California, San Diego\\
%     \texttt{l5zhu@ucsd.edu}
%     \and
%     Mikhail Belkin\\ 
%     Halicioglu Data Science Institute\\
%     University of California, San Diego\\
%     \texttt{mbelkin@ucsd.edu}
% } 

% \maketitle

\begin{document}
\maketitle

\begin{abstract}
In this paper we study the problem of lower bounding the minimum eigenvalue of the neural tangent kernel (NTK) at initialization, an important quantity for the theoretical analysis of training in neural networks. We consider feedforward neural networks with smooth activation functions.
\abedit{Without any distributional assumptions on the input,} we present a novel result: we show that 
for suitable initialization variance, $\widetilde{\Omega}(n)$ width, where $n$ is the number of training samples, suffices to ensure that the NTK at initialization is positive definite, improving prior results for smooth activations under our setting. 
\pcedit{Prior to our work, the sufficiency of linear width has only been shown either for networks with ReLU activation functions, and sublinear width has been shown for smooth networks but with additional conditions on the distribution of the data.} \abedit{The technical challenge in the analysis stems from the layerwise inhomogeneity of smooth activation functions and we handle the challenge using {\em generalized} Hermite series expansion of such activations.}
\end{abstract}


\section{INTRODUCTION}
\label{sec:arXiv_intro}
\input{sec/arx1_intro}

\section{RELATED WORK}
\label{sec:arXiv_related}
\input{sec/arx1_related}

\section{PROBLEM SETUP: DEEP LEARNING WITH SMOOTH ACTIVATIONS}
\label{sec:arXiv_dlopt}
\input{sec/arx1_dlopt}



% \section{Optimization Guarantees with Restricted Strong Convexity}
% \label{sec:arXiv_rsc-opt}
% \input{arXiv1/sec/arx1_rsc-opt}

\section{NEURAL TANGENT KERNEL AT INITIALIZATION}
\label{sec:arXiv_ntk}
\input{sec/arx1_ntk}

%\section{GLOBAL CONVERGENCE BASED ON NEURAL TANGENT KERNEL}
%\label{sec:arXiv_conv}
%\input{sec/arxiv1_conv}

% \section{RSC Condition: Experimental Results}
% \label{sec:arXiv_expt}
% \input{arXiv1/sec/arx1_expt}
\input{sec/arx1_exp}

\section{CONCLUSIONS}
\label{sec:arXiv_conc}
\input{sec/arx1_conc}

%\noindent {\bf Acknowledgements.} The research was supported by NSF grants IIS 21-31335, OAC 21-30835, DBI 20-21898, and a C3.ai research award. 

%\bibliographystyle{plainurl}
\bibliographystyle{plainnat}
%\bibliographystyle{alpha}
%\bibliographystyle{apalike}
\bibliography{biblio}


%\title{NTK at Initilization: Linear Width Suffices}

%\onecolumn
%\appendix
%\maketitle



%\section{NEURAL TANGENT KERNEL AT INITIALIZATION}
%\label{app:arXiv_ntk}
%\input{app/arx1app_ntk}

\end{document}
