%\documentclass[twoside]{article}

% \usepackage[margin=1in]{geometry}
\documentclass[accepted]{uai2023}
\usepackage[utf8]{inputenc}

\usepackage{algorithm}
\usepackage{algorithmic}



% For theorems and such
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}

%\PassOptionsToPackage{numbers, compress}{natbib}
\PassOptionsToPackage{}{natbib}
\usepackage[round]{natbib}
\renewcommand{\bibname}{References}
\renewcommand{\bibsection}{\subsubsection*{\bibname}}
%\usepackage{hyperref}


\usepackage{enumitem}

\usepackage[dvipsnames]{xcolor}
\usepackage[]{color-edits}

\newcommand{\pc}[1]{\textcolor{red}{#1}} %Pedro's highlights 
\newcommand{\bc}[1]{\textcolor{blue}{#1}} %Shawn's highlights
\newcommand{\A}{\mathcal{A}}  

\newcommand{\round}{\operatorname{round}}
\newcommand{\tr}{\mathrm{tr}}

%\newcommand{\M}{\mathcal{M}}
\newcommand{\M}{\mathcal{M}\mathcal{G}}
\renewcommand{\S}{\mathcal{S}}
\renewcommand{\A}{\mathcal{A}}
\newcommand{\G}{\mathcal{G}}
\newcommand{\F}{\mathcal{F}}
\newcommand{\V}{\mathcal{V}}

\renewcommand{\L}{\mathcal{L}}
\newcommand{\N}{\mathcal{N}}

\newcommand{\R}{\mathbb{R}}

\newcommand{\E}{\mathbb{E}}
\newcommand{\Pe}{\mathbb{P}}
\renewcommand{\P}{\mathcal{P}}

\newcommand{\I}{\mathbb{I}}

\newcommand{\Z}{\mathbb{Z}}

\newcommand{\norm}[1]{\left\|#1\right\|}

\newcommand{\bre}{\mathrm{br}}


\newcommand{\wo}{\overline{w}}
\newcommand{\wu}{\underline{w}}
\newcommand{\Vo}{\overline{V}}
\newcommand{\Vu}{\underline{V}}

\newcommand{\dist}{\operatorname{dist}}


\DeclareMathOperator*{\diag}{diag}


%
%\usepackage[title]{appendix}


\newtheorem{theorem}{Theorem}[section]
\newtheorem{definition}{Definition}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{proposition}{Proposition}[section]
\newtheorem{assumption}{Assumption}[section]
\newtheorem{remark}{Remark}[section]


% Shawn: adding some packages
% let me borrow some shorthands
\usepackage{smile}
% KL divergence
\newcommand{\KL}{\textrm{KL}}
\bibliographystyle{plainnat}

\newcommand{\titolo}{Finite-sample Guarantees for Nash Q-learning with Linear Function Approximation}
\newcommand{\algname}{Nash Q-learning with optimistic value iteration}
\newcommand{\algbrev}{NQOVI}

\renewcommand{\bibsection}{\subsubsection*{References}}


\title{
\titolo
}
% A more catchy title perhaps:
%\title{
%Unearthing Nash Q-learning: Finite-sample Guarantees for an old problem
%}
\author[1]{\href{mailto:<pacisne@gmail.com>?Subject=Your UAI 2023 paper}{Pedro Cisneros-Velarde}{}}
\author[2]{\href{mailto:<sanmi@cs.standord.edu>?Subject=Your UAI 2023 paper}
{Sanmi Koyejo}{}}
\affil[1]{University of Illinois at Urbana-Champaign}
\affil[2]{Stanford University, Google Research}
%\date{January 2022}

% Add authors
%\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2023 paper}{Jane~J.~von~O'L\'opez}{}}
%\author[1]{Harry~Q.~Bovik}
%\author[3,1]{Further~Coauthor}
%\affil[3]{%
%    Another Affiliation\\
%    Address\\
%    …
%  }



\begin{document}
\maketitle
% \externaldocument{supplement}
%\title{aifarms-project (name-tbd)}
%\title{Parallel Processing in Reinforcement Learning: \pc{Online and Reward-Free Cases} \pc{\emph{(khe name can be changed organically as we add more stuff to the project!)}} \bc{The Power of Parallel Exploration in Reinforcement Learning: a Regret Perspective?}}



\begin{abstract}
Nash Q-learning may be considered one of the first and most known algorithms in multi-agent reinforcement learning (MARL) for learning policies that constitute a Nash equilibrium of an underlying general-sum Markov game. 
Its original proof provided asymptotic guarantees and was for the tabular case. Recently, finite-sample guarantees have been provided 
using more modern RL techniques for the tabular case.  
Our work analyzes Nash Q-learning 
using 
linear function approximation -- a representation regime introduced when the state space is large or continuous -- and provides finite-sample guarantees that indicate its sample efficiency. We find that the obtained performance nearly matches an existing efficient result for single-agent RL under the same representation and has a polynomial gap when compared to the best-known result for the tabular case. 
\end{abstract}

%================
\input{intro}
\input{literature_review}
%
\subsection{Notation} 
Let $\|\cdot\|$ be the Euclidean norm, and $\|v\|_A = \sqrt{v^TAv}$ for positive semidefinite matrix $A$. %Let $\preceq, \succeq$ be the matrix Loewner order. 
Let $[k]=\{1, 2, \dots, k\}$ for a positive integer $k$. Let $I_m$ be the $m \times m$ identity matrix. Let $\Delta(\A)$ be the probability simplex defined on a given finite set $\A$. 
%
%We define the clipping operator as $\Pi_{[0,a]}[b]:= \min\{b, a\}^+ = \min\{\max\{b, 0\}, a\}$ for any $a>0$ and $b\in\R$. 
Given the big-O complexity notation $\cO$, we use $\tilde{\cO}$ to hide polylogarithmic terms in the quantities of interest.

%==================================================
%==================================================

\input{prelim}

\input{proposed}

\input{proving}

\input{conclusion}

\bibliography{NashQ}

%\input{supplement}

\end{document}
