
\newcommand{\QED}{\blacksquare}
\newcommand{\micromodel}{\mathcal{M}^m}
\newcommand{\macromodel}{\mathcal{M}^M}
\newcommand{\micrograph}{\mathcal{G}_{\mathcal{M}^m}}
\newcommand{\macrograph}{\mathcal{G}_{\mathcal{M}^M}}
\newcommand{\dointv}{\operatorname{do}}
\newcommand{\lub}{\operatorname{lub}}
\newcommand{\nn}{\operatorname{nn}}
\newcommand{\bern}[1]{\mathtt{Bernoulli}(#1)}
\newcommand{\unif}[2]{\mathtt{Unif}(#1,#2)}
\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}

\newcommand{\dist}{\operatorname{D}}
\newcommand{\jsd}{\operatorname{D}_{\operatorname{J}}}
\newcommand{\dtv}{\operatorname{D}_{\operatorname{TV}}}
\newcommand{\dkl}{\operatorname{D}_{\operatorname{KL}}}
\newcommand{\dwassII}{\operatorname{D}_{\operatorname{W}_2}}
\newcommand{\dwassP}{\operatorname{D}_{\operatorname{W}_P}}

\newcommand{\compose}{\circ}

\newcommand{\system}{\mathfrak{S}}

\newcommand{\scm}{\mathcal{M}}
\newcommand{\scmi}{\mathcal{M'}}
\newcommand{\envars}{\mathcal{X}}
\newcommand{\exvars}{\mathcal{U}}
\newcommand{\structfuncs}{\mathcal{F}}
\newcommand{\probfuncs}{\mathcal{P}}
\newcommand{\intervset}{\mathcal{I}}
\newcommand{\intervseti}{\mathcal{I'}}
\newcommand{\parentset}{\mathbf{Pa}}
\newcommand{\distributes}{\sim}
\newcommand{\domain}{\mathbb{D}}
\newcommand{\dirag}{\mathcal{G}}


\newcommand{\abs}{\boldsymbol{\alpha}}
\newcommand{\Rset}{V}
\newcommand{\amap}{m}
\newcommand{\alphamap}[1]{\alpha_{#1}}
\newcommand{\abserr}{e(\abs)}

\newcommand{\bandit}{\mathcal{B}}
\newcommand{\banditi}{\mathcal{B'}}

\newcommand{\actionset}{\mathcal{A}}
\newcommand{\actionseti}{\mathcal{A'}}

\newcommand{\agent}{\mathfrak{A}}
\newcommand{\agenti}{\mathfrak{A'}}

\newcommand{\rewardfunc}{\mathcal{G}}
\newcommand{\rewarddistr}[1]{G_{#1}}
\newcommand{\rewardvar}{G}
\newcommand{\rewardval}{g}
\newcommand{\rewardvali}{g^{\prime}}
\newcommand{\rewardvalt}[1]{g^{(#1)}}
\newcommand{\rewardvalit}[1]{g^{\prime(#1)}}
%\newcommand{\rewardset}{\mathcal{R}}
%\newcommand{\rewardseti}{\mathcal{R}^{\prime}}

\newcommand{\Q}{\mathcal{Q}}
\newcommand{\Qi}{\mathcal{Q'}}
\newcommand{\Qt}[1]{\mathcal{Q}^{(#1)}}
\newcommand{\Qit}[1]{\mathcal{Q'}^{(#1)}}

\newcommand{\suppstats}{\hat{\mathcal{S}}}
\newcommand{\suppstatsi}{\hat{\mathcal{S'}}}
\newcommand{\suppstatst}[1]{\hat{\mathcal{S}}^{(#1)}}
\newcommand{\suppstatsit}[1]{\hat{\mathcal{S'}}^{(#1)}}

\newcommand{\policy}{\pi}
\newcommand{\policyi}{\pi^{\prime}}
\newcommand{\policyt}[1]{\pi^{(#1)}}
\newcommand{\policyit}[1]{\pi^{\prime(#1)}}
\newcommand{\optpolicy}{\pi^{*}}
\newcommand{\optpolicyi}{\pi^{\prime*}}

\newcommand{\action}{a}
\newcommand{\actioni}{a^{\prime}}
\newcommand{\actiont}[1]{a^{(#1)}}
\newcommand{\actionit}[1]{a^{\prime(#1)}}
\newcommand{\optaction}{a^{*}}
\newcommand{\optactioni}{a^{\prime*}}
\newcommand{\optinterv}{\mathbf{x}^{*}}

\newcommand{\alg}{\mathtt{ALG}}
\newcommand{\algucb}{\mathtt{UCB}}
\newcommand{\algto}{\mathtt{TOpt}}
\newcommand{\algte}{\mathtt{TExp}}
\newcommand{\algimit}{\mathtt{IMIT}}
\newcommand{\algrep}{\mathtt{REP}}

\newcommand{\prob}{\mathbb{P}}
%\newcommand{\expval}{\mathbb{E}}
\newcommand{\expval}[2]{\mathbb{E}_{#1}\left[{#2}\right]}
\newcommand{\estexpval}[2]{\hat{\mathbb{E}}_{#1}\left[{#2}\right]}
\newcommand{\expvalt}[3]{\mathbb{E}_{#1}^{(#3)}\left[{#2}\right]}
\newcommand{\estexpvalt}[3]{\hat{\mathbb{E}}_{#1}^{(#3)}\left[{#2}\right]}
\newcommand{\empexpval}[1]{\hat{\mathbb{E}}\left[{#1}\right]}
%\newcommand{\expvalop}[1]{\mathbb{E}_{#1}}
\newcommand{\expvalop}[1]{\mathbb{E}}
\newcommand{\cov}[2]{\mathbb{COV}_{#1}\left[{#2}\right]}
\newcommand{\var}[2]{\mathbb{V}_{#1}\left[{#2}\right]}
%\newcommand{\cov}{\mathbb{COV}}
%\newcommand{\var}{\mathbb{V}}

\newcommand{\counter}[1]{\mathcal{C}(#1)}
\newcommand{\gap}[1]{\Delta(#1)}

\newcommand{\trajectoryset}{\mathcal{D}}

\newcommand{\ucb}{\texttt{UCB}}
\newcommand{\transferoptimum}{\texttt{TOpt}}
\newcommand{\imitation}{\texttt{IMIT}}
\newcommand{\transferexpect}{\texttt{TExp}}
\newcommand{\bucb}{\texttt{B-UCB}}

\newcommand{\cregr}{R}
\newcommand{\cregri}{R^{\prime}}
\newcommand{\cregrdir}{R^{\prime}_{\textnormal{dir}}}
\newcommand{\cregrimit}{R^{\prime}_{\textnormal{imit}}}
\newcommand{\cregrrep}{R^{\prime}_{\textnormal{rep}}}
\newcommand{\cregrQtr}{R^{\prime}_{\textnormal{Qt}}}
\newcommand{\cregrQex}{R^{\prime}_{\textnormal{Qe}}}

\newcommand{\sregr}{\bar{R}}
\newcommand{\sregri}{\bar{R}^{\prime}}

\newcommand{\optintervseti}{\intervseti_{\mathord{+}}}
\newcommand{\rdiscr}{s(\abs)}
\newcommand{\alphaext}{\alphamap{\mathbb{E}}}
\newcommand{\functionclass}{\mathcal{F}}

\newcommand{\etaimit}{\eta_{\textnormal{imit}}(\abs)}
\newcommand{\etarep}{\eta_{\textnormal{rep}}(\abs)}

\DeclareMathSymbol{\mh}{\mathord}{operators}{`\-}

\newcommand{\tdo}[1]{{\color{blue}[#1]}}