% if you need to pass options to natbib, use, e.g.:
%     \PassOptionsToPackage{numbers, compress}{natbib}
% before loading neurips_2021

% ready for submission
% \usepackage[nonatbib]{neurips_2021}

% to compile a preprint version, e.g., for submission to arXiv, add add the
% [preprint] option:
%     \usepackage[preprint]{neurips_2021}

% to compile a camera-ready version, add the [final] option, e.g.:
%     \usepackage[final]{neurips_2021}

% to avoid loading the natbib package, add option nonatbib:
%    \usepackage[nonatbib, preprint]{neurips_2021}

\usepackage[algo2e, linesnumbered, vlined,ruled]{algorithm2e}
\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage{hyperref}       % hyperlinks
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{mathtools}
\usepackage{natbib}
\usepackage{xcolor}
\usepackage{color}

%\usepackage[ruled]{algorithm2e}
%\usepackage{algorithmic}

\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{amsthm}
\usepackage{amssymb}

% for Lemma A.1 ... in the appendix
\usepackage{chngcntr}
\usepackage{apptools}
\AtAppendix{\counterwithin{lemma}{section}}

\newcommand{\calA}{{\mathcal{A}}}
\newcommand{\calB}{{\mathcal{B}}}
\newcommand{\calC}{{\mathcal{C}}}
\newcommand{\calX}{{\mathcal{X}}}
\newcommand{\calS}{{\mathcal{S}}}
\newcommand{\calF}{{\mathcal{F}}}
\newcommand{\calI}{{\mathcal{I}}}
\newcommand{\calJ}{{\mathcal{J}}}
\newcommand{\calK}{{\mathcal{K}}}
\newcommand{\calL}{{\mathcal{L}}}
\newcommand{\calD}{{\mathcal{D}}}
\newcommand{\calE}{{\mathcal{E}}}
\newcommand{\calR}{{\mathcal{R}}}
\newcommand{\calT}{{\mathcal{T}}}
\newcommand{\calP}{{\mathcal{P}}}
\newcommand{\calQ}{{\mathcal{Q}}}
\newcommand{\calU}{{\mathcal{U}}}
\newcommand{\calZ}{{\mathcal{Z}}}
\newcommand{\calM}{{\mathcal{M}}}
\newcommand{\calN}{{\mathcal{N}}}
\newcommand{\avgR}{\wh{\cal{R}}}
\newcommand{\ips}{\wh{r}}
\newcommand{\whpi}{\wh{\pi}}
\newcommand{\whE}{\wh{\E}}
\newcommand{\whV}{\wh{V}}
\newcommand{\Reg}{\text{\rm Reg}}
\newcommand{\whReg}{\wh{\text{\rm Reg}}}
\newcommand{\flg}{\text{\rm flag}}
\newcommand{\one}{\boldsymbol{1}}
\newcommand{\p}{\prime}
\newcommand{\evt}{\textsc{Event}}
\newcommand{\unif}{\text{\rm Unif}}
\newcommand{\KL}{\text{\rm KL}}
\newcommand{\V}{\text{\rm Var}}
\newcommand{\frakm}{\mathfrak{m}}
\newcommand{\frakn}{\mathfrak{n}}
\newcommand{\tc}[2]{\textcolor{#1}{#2}}

\DeclareMathOperator*{\diag}{diag}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator*{\spn}{sp}
%\DeclareMathOperator*{\arginf}{arginf}
%\DeclareMathOperator*{\argsup}{argsup}
%\DeclareMathOperator*{\range}{range}
%\DeclareMathOperator*{\mydet}{det_{+}}
\DeclarePairedDelimiter\abs{\lvert}{\rvert}
%\DeclarePairedDelimiter\bigabs{\big\lvert}{\big\rvert}
%\DeclarePairedDelimiter\ceil{\lceil}{\rceil}
%\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
\DeclarePairedDelimiter\bigceil{\left\lceil}{\right\rceil}
\DeclarePairedDelimiter\bigfloor{\left\lfloor}{\right\rfloor}

\newcommand{\eat}[1]{}


%%%%% colors
\newcommand{\blue}[1]{\textcolor{blue}{#1}}
\newcommand{\red}[1]{\textcolor{red}{#1}}

%%%%  brackets
\newcommand{\inner}[2]{\left\langle #1, #2 \right\rangle}
\newcommand{\inners}[2]{\langle #1, #2\rangle}
\newcommand{\minimax}[1]{\left\llangle #1 \right\rrangle}
\newcommand{\rbr}[1]{\left(#1\right)}
\newcommand{\sbr}[1]{\left[#1\right]}
\newcommand{\cbr}[1]{\left\{#1\right\}}
\newcommand{\nbr}[1]{\left\|#1\right\|}
\newcommand{\abr}[1]{\left|#1\right|}
\newcommand{\bigO}[1]{\order\left( #1 \right)}
\newcommand{\tilO}[1]{\otil\left( #1 \right)}
\newcommand{\lowO}[1]{\lorder\left( #1 \right)}
\newcommand{\bigo}[1]{\order( #1 )}
\newcommand{\tilo}[1]{\otil( #1 )}
\newcommand{\lowo}[1]{\lorder( #1 )}
\DeclarePairedDelimiter\ceil{\lceil}{\rceil}
\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}

%%% Notations
\newcommand{\Tmax}{\ensuremath{T_{\max}}}
\newcommand{\smax}{\ensuremath{s_{\max}}}
\newcommand{\T}{\ensuremath{T_\star}}
\newcommand{\B}{B_\star}
\newcommand{\cmin}{\ensuremath{c_{\min}}}
\newcommand{\cmininv}{\ensuremath{c^{-1}_{\min}}}
\newcommand{\propers}{\ensuremath{\Pi_{\rm proper}}}
\newcommand{\deviation}{\textsc{Deviation}}
\newcommand{\dev}{\textsc{Dev}}
\newcommand{\reg}{\textsc{Reg}}
\newcommand{\bias}{\textsc{Bias}}
\newcommand{\var}{\textsc{Var}}
\newcommand{\err}{\textsc{Err}}
\newcommand{\exreg}{\textsc{ExReg}}
\newcommand{\bernoulli}{\textrm{Bernoulli}}
\newcommand{\aux}{\chi}
\newcommand{\pich}{\Phi}
\newcommand{\PSD}{\propers}
\newcommand{\SA}{\Gamma}
\newcommand{\qfeat}{\phi} % (1+\lambda h)q
\newcommand{\hatqf}{\widehat{\qfeat}}
\newcommand{\hatx}{\widehat{x}}
\newcommand{\regz}{\psi} % regularizer
\newcommand{\sfast}{s_{\rm fast}}
\newcommand{\afast}{a_{\rm fast}}

% Notations
\renewcommand{\sp}{\text{sp}(v^\star)}
\newcommand{\optJ}{J^{\star}}
\newcommand{\ssp}{\texttt{PSRL-SSP}}
\newcommand{\optV}{V^{\star}}
\newcommand{\optQ}{Q^{\star}}
\newcommand{\sumt}{\sum_{t=1}^T}
\newcommand{\hatQ}{\widehat{Q}}
\newcommand{\hatV}{\widehat{V}}
\newcommand{\refV}{V^{\text{\rm ref}}}
\newcommand{\RefV}{V^{\text{\rm REF}}}
\newcommand{\tilV}{\widetilde{V}}
\newcommand{\tilmu}{\widetilde{\mu}}
\newcommand{\tiltheta}{\widetilde{\theta}}
\newcommand{\hatthetal}{\widehat{\theta}_\ell}
\newcommand{\sinit}{s_\text{init}}
\newcommand{\Tnew}{T_\text{new}}

%%% abbreviation
\newcommand{\onetnew}{\one_{\cbr{t+1 \notin \Tnew}}}
\newcommand{\sums}{\sum_{s\in\calS}}
\newcommand{\sumsp}{\sum_{s'\in\calS^+}}
\newcommand{\sumsdp}{\sum_{s''\in\calS^+}}
\newcommand{\suma}{\sum_{a\in\calA_s}}
\newcommand{\sumsaf}[1][s, a]{\sum_{(#1)\in\SA}}
\newcommand{\sumtilsaf}[1][s, a]{\sum_{(#1)\in\tilSA}}
\newcommand{\sumsah}{\sum_{(s, a), h}}
\newcommand{\sumsa}[1][s, a]{\sum_{(#1)}}
\newcommand{\sumu}[1][(s, a), s', h]{\sum_{#1\in\unk}}
\newcommand{\sumh}{\sum_{h=1}^H}
\newcommand{\sumhp}{\sum_{h=1}^{H'}}
\newcommand{\sumap}{\sum_{a'\in\calA}}
\newcommand{\sumk}{\sum_{k=1}^K}
\newcommand{\suml}{\sum_{\ell = 1}^{L_M}}
\newcommand{\summ}{\sum_{m = 1}^{M}}
\newcommand{\sumtmplus}{\sum_{t = t_m+1}^{t_{m+1}-1}}
\newcommand{\oneomega}{\one_{\Omega^\ell_{s_t, a_t}}}
\newcommand{\onekm}{\one_{\{k(m) \leq K\}}}
\newcommand{\sumtl}{\sum_{t=t_\ell}^{t_{\ell+1}-1}}
\newcommand{\sumtm}{\sum_{t=t_m}^{t_{m+1}-1}}
\newcommand{\ERR}{\textrm{ERR}}
\newcommand{\BIAS}{\textrm{BIAS}}
\newcommand{\REG}{\textrm{REG}}
\newcommand{\const}{\textrm{Constant}}
\newcommand{\hatb}{\widehat{b}}
\newcommand{\thetalm}{\theta_{\ell}}
\newcommand{\hatc}{\widehat{c}}
\newcommand{\hatl}{\widehat{\ell}}
\newcommand{\hatq}{\widehat{q}}
\newcommand{\Alsa}{A_\ell(s, a)}
\newcommand{\Alstat}{A_\ell(s_t, a_t)}
\newcommand{\Aldef}{\frac{\log (SAn_\ell^+(s, a)/\delta)}{n_\ell^+(s, a)}}
\newcommand{\Vlstat}{\mathbb{V}_\ell(s_t, a_t)}
\newcommand{\optpi}{\pi^\star}
\newcommand{\optq}{q_{\optpi}}
\newcommand{\tiloptq}{q_{\tiloptpi}}
\newcommand{\tilJ}{\widetilde{J}}
\newcommand{\tilT}{\widetilde{T}}
\newcommand{\tilN}{\widetilde{N}}
\newcommand{\tilR}{\widetilde{R}}
\newcommand{\hatJ}{\widehat{J}}
\newcommand{\hatT}{\widehat{T}}
\newcommand{\hatP}{\widehat{P}}
\newcommand{\tils}{\widetilde{s}}
\newcommand{\tila}{\widetilde{a}}
\newcommand{\tilc}{\widetilde{c}}
\newcommand{\tilf}{\widetilde{f}}
\newcommand{\tilq}{\widetilde{q}}
\newcommand{\tilM}{\widetilde{M}}
\newcommand{\tilP}{\widetilde{P}}
\newcommand{\tilA}{\widetilde{\calA}}
\newcommand{\tilS}{\widetilde{\calS}}
\newcommand{\tilSA}{\widetilde{\SA}}
\newcommand{\tilD}{\widetilde{D}}
\newcommand{\tilpi}{{\widetilde{\pi}}}
\newcommand{\tiloptpi}{{\widetilde{\pi}^\star}}
\newcommand{\tilDelta}{{\widetilde{\Delta}}}
\newcommand{\tilPi}{\widetilde{\Pi}}
\newcommand{\N}{\mathbf{N}} % counter N
\newcommand{\M}{\mathbf{M}}
\newcommand{\Nk}{\N^+_{i_k}}
\newcommand{\Ni}{\N^+_i}
\newcommand{\Nc}{\N^c_k}
\newcommand{\Mk}{\M^+_{i_k}}
\newcommand{\Mi}{\M^+_i}
\newcommand{\h}[1]{\vec{h}\circ #1}
\newcommand{\unk}{U} % unknown indices
\newcommand{\A}{A}
\newcommand{\Ai}{\A_i}
\newcommand{\Ak}{\A_{i_k}}
\newcommand{\lfs}{\calX} % loop-free state space
\newcommand{\tcs}{\widetilde{\Omega}_P} % transition confidence set
\newcommand{\Nin}{N^{\text{in}}}
\newcommand{\tilNin}{\widetilde{N}^{\text{in}}}
\newcommand{\Nout}{N^{\text{out}}}
\newcommand{\refmu}{\mu^{\text{ref}}}
\newcommand{\refsigma}{\sigma^{\text{ref}}}
\newcommand{\refnu}{\nu^{\text{ref}}}
\newcommand{\cl}{\check{l}}
\newcommand{\lti}[1][t,i]{l_{#1}}
\newcommand{\clti}[1][t,i]{\cl_{#1}}

\newcommand{\field}[1]{\mathbb{#1}}
\newcommand{\fY}{\field{Y}}
\newcommand{\fX}{\field{X}}
\newcommand{\fH}{\field{H}}
\newcommand{\fR}{\field{R}}
\newcommand{\fQ}{\field{Q}}
\newcommand{\fN}{\field{N}}
\newcommand{\E}{\field{E}}
\newcommand{\fV}{\field{V}}
\newcommand{\testblock}{\textsc{EndofBlockTest}\xspace}
\newcommand{\testreplay}{\textsc{EndofReplayTest}\xspace}

\newcommand{\theset}[2]{ \left\{ {#1} \,:\, {#2} \right\} }
\newcommand{\Ind}{\field{I}}
\newcommand{\eye}[1]{ \boldsymbol{I}_{#1} }
\newcommand{\norm}[1]{\left\|{#1}\right\|}
%\newcommand{\trace}[1]{\text{tr}\left({#1}\right)}
\newcommand{\trace}[1]{\textsc{tr}({#1})}

\newcommand{\defeq}{\stackrel{\rm def}{=}}
\newcommand{\sgn}{\mbox{\sc sgn}}
\newcommand{\scI}{\mathcal{I}}
\newcommand{\scO}{\mathcal{O}}
\newcommand{\scN}{\mathcal{N}}

\newcommand{\dt}{\displaystyle}
\renewcommand{\ss}{\subseteq}
\newcommand{\wh}{\widehat}
\newcommand{\wt}{\widetilde}
\newcommand{\ve}{\varepsilon}
\newcommand{\hlambda}{\wh{\lambda}}
\newcommand{\yhat}{\wh{y}}

\newcommand{\hDelta}{\wh{\Delta}}
\newcommand{\hdelta}{\wh{\delta}}
\newcommand{\spin}{\{-1,+1\}}

%\newcommand{\theHalgorithm}{\arabic{algorithm}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}{Lemma}[section]
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

%\newtheorem{lemma}{Lemma}
%\newtheorem{theorem}{Theorem}
%\newtheorem{cor}[theorem]{Corollary}
%\newtheorem{remark}{Remark}
%\newtheorem{prop}{Proposition}
%\newtheorem{definition}{Definition}
%\newtheorem{assumption}{Assumption}
%%\newtheorem*{main}{Main Result}
%\newtheorem{fact}[theorem]{Fact}

\newcommand{\paren}[1]{\left({#1}\right)}
\newcommand{\brackets}[1]{\left[{#1}\right]}
\newcommand{\braces}[1]{\left\{{#1}\right\}}

\newcommand{\normt}[1]{\norm{#1}_{t}}
\newcommand{\dualnormt}[1]{\norm{#1}_{t,*}}

\newcommand{\order}{\ensuremath{\mathcal{O}}}
\newcommand{\lorder}{\ensuremath{\Omega}}
\newcommand{\otil}{\ensuremath{\tilde{\mathcal{O}}}}

\newcommand{\specialcell}[2][c]{\begin{tabular}[#1]{@{}c@{}}#2\end{tabular}}

\usepackage{prettyref}
\newcommand{\pref}[1]{\prettyref{#1}}
\newcommand{\pfref}[1]{Proof of \prettyref{#1}}
\newcommand{\savehyperref}[2]{\texorpdfstring{\hyperref[#1]{#2}}{#2}}
\newrefformat{eq}{\savehyperref{#1}{Eq.~\textup{(\ref*{#1})}}}
\newrefformat{eqn}{\savehyperref{#1}{Equation~\ref*{#1}}}
\newrefformat{lem}{\savehyperref{#1}{Lemma~\ref*{#1}}}
\newrefformat{def}{\savehyperref{#1}{Definition~\ref*{#1}}}
\newrefformat{line}{\savehyperref{#1}{Line~\ref*{#1}}}
\newrefformat{thm}{\savehyperref{#1}{Theorem~\ref*{#1}}}
\newrefformat{corr}{\savehyperref{#1}{Corollary~\ref*{#1}}}
\newrefformat{cor}{\savehyperref{#1}{Corollary~\ref*{#1}}}
\newrefformat{sec}{\savehyperref{#1}{Section~\ref*{#1}}}
\newrefformat{subsec}{\savehyperref{#1}{Section~\ref*{#1}}}
\newrefformat{app}{\savehyperref{#1}{Appendix~\ref*{#1}}}
\newrefformat{assum}{\savehyperref{#1}{Assumption~\ref*{#1}}}
\newrefformat{ex}{\savehyperref{#1}{Example~\ref*{#1}}}
\newrefformat{fig}{\savehyperref{#1}{Figure~\ref*{#1}}}
\newrefformat{alg}{\savehyperref{#1}{Algorithm~\ref*{#1}}}
\newrefformat{rem}{\savehyperref{#1}{Remark~\ref*{#1}}}
\newrefformat{conj}{\savehyperref{#1}{Conjecture~\ref*{#1}}}
\newrefformat{prop}{\savehyperref{#1}{Proposition~\ref*{#1}}}
\newrefformat{proto}{\savehyperref{#1}{Protocol~\ref*{#1}}}
\newrefformat{prob}{\savehyperref{#1}{Problem~\ref*{#1}}}
\newrefformat{claim}{\savehyperref{#1}{Claim~\ref*{#1}}}
\newrefformat{que}{\savehyperref{#1}{Question~\ref*{#1}}}
\newrefformat{op}{\savehyperref{#1}{Open Problem~\ref*{#1}}}
\newrefformat{fn}{\savehyperref{#1}{Footnote~\ref*{#1}}}
\newrefformat{tab}{\savehyperref{#1}{Table~\ref*{#1}}}
\newrefformat{fig}{\savehyperref{#1}{Figure~\ref*{#1}}}