% !TEX root =  main.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Recommended, but optional, packages for figures and better typesetting:
%\usepackage{microtype}
%\usepackage{graphicx}
%\usepackage{subfigure}
%\usepackage{booktabs} % for professional tables

\usepackage{minitoc}
\usepackage{placeins}

% Attempt to make hyperref and algorithmic work together better:
\newcommand{\theHalgorithm}{\arabic{algorithm}}
%\usepackage{algorithm}
%\usepackage{algorithmic}

% toggle command
\usepackage{etoolbox}

%\usepackage{times}
\usepackage{savesym,multirow,url,xspace,graphicx,enumitem,color}

\makeatletter
\@ifpackageloaded{appendix}{}{%
\usepackage[toc,page,header]{appendix}}
\@ifpackageloaded{hyperref}{}{%
\usepackage[linktocpage]{hyperref}}
\makeatother
\hypersetup{
    colorlinks=true,
    linkcolor=blue,
    filecolor=magenta,
    urlcolor=black,
    citecolor=blue,
}


%\usepackage[tight]{subfigure}
\usepackage{subcaption}
\usepackage{dsfont}
\usepackage{multicol}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% tikz %%%%%%%%%%%%%%%%%%%%%%
\usepackage{tikz}
\usetikzlibrary{automata, positioning, arrows, calc, backgrounds}
\tikzset{
	->, % makes the edges directed
%	>=stealth’, % makes the arrow heads bold
	every state/.style={thick, fill=gray!10}, % sets the properties for each ’state’ node
	initial text=$ $, % sets the text that appears on the start arrow
}
\usetikzlibrary{arrows.meta}
\usepackage{subcaption}
\usepackage{pgfplots}
\pgfplotsset{width=10cm,compat=1.9}
\usepackage{diagbox}
\usepackage{caption}
\usepackage[normalem]{ulem}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\val}{{\text{val}}}
%\newcommand{\citet}[1]{\citeauthor{#1} (\citeyear{#1})}
\newcommand{\ben}[1]{\textcolor{orange}{[Ben: #1]}}
\newcommand{\goran}[1]{\textcolor{green}{[GR: #1]}}
\newcommand{\todo}[1]{{\textcolor{red}{{\bf TODO:} #1}}}
\newcommand{\new}[1]{\textcolor{black}{#1}}
% mute todos (for example for presenting)
\newcommand{\mutetodo}{
\renewcommand{\ben}[1]{}
\renewcommand{\goran}[1]{}
\renewcommand{\todo}[1]{}
}
\mutetodo{}
%\newcommand{\annotate}[1]{{\textcolor{blue}{{\bf Note:} #1}}}
%\newcommand{\annotate}[1]{{\textcolor{red}{{\bf #1}}}}
\newcommand{\annotate}[1]{{\textcolor{blue}{#1}}}
% \newcommand{\deleted}[1]{{\sout{\textcolor{blue}{#1}}}}
\newcommand{\deleted}[1]{{\textcolor{red}{#1}}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\makeatletter
\newif\if@restonecol
\makeatother
\let\algorithm\relax
\let\endalgorithm\relax
\let\proof\relax
\let\endproof\relax
\usepackage[lined, boxed, ruled, commentsnumbered]{algorithm2e}
%, noend
\usepackage{amsmath,amssymb,xfrac,amsthm}
\usepackage{mathtools}
\setitemize{noitemsep,topsep=1pt,parsep=1pt,partopsep=1pt, leftmargin=12pt}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
% \newtheorem{theorem}{Theorem}[section]
% \newtheorem{proposition}[theorem]{Proposition}
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{corollary}[theorem]{Corollary}
% \theoremstyle{definition}
% \newtheorem{definition}[theorem]{Definition}
% \newtheorem{assumption}[theorem]{Assumption}
% \theoremstyle{remark}
% \newtheorem{remark}[theorem]{Remark}

% theorems old:
\newtheorem{lemma}{Lemma}
\newtheorem{theorem}{Theorem}
\newtheorem*{theorem*}{Theorem}
\newtheorem{fact}{Fact}
\newtheorem{corollary}{Corollary}
\newtheorem{proposition}{Proposition}
\newtheorem{definition}{Definition}
\newtheorem{assumption}{Assumption}
\newtheorem{remark}{Remark}
\makeatletter
\newcommand{\rmnum}[1]{\romannumeral #1}
\newcommand{\Rmnum}[1]{\expandafter\@slowromancap\romannumeral #1@}
%\usepackage{mathtools}
\makeatother
\usepackage{caption}
\newcounter{copyrightbox}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% our macros
% for comments
%\usepackage{algpseudocode}
\usepackage{enumitem}
\usepackage{wasysym}
\usepackage{verbatim}

% argmin and argmax
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator{\sign}{sgn}
%\renewcommand{\algorithmcfname}{Algorithm}

\newcommand{\pushline}{\Indp}% Indent
\newcommand{\popline}{\Indm\dosemic}% Undent

%% our macros
%\usepackage{enumerate}
\usepackage{makecell}

% Caligraphic Vars
\newcommand{\cS}{{S}}
\newcommand{\cD}{{\mathcal{D}}}
\newcommand{\calD}{{\mathcal{D}}}
\newcommand{\calH}{{\mathcal{H}}}
\newcommand{\calL}{{\mathcal{L}}}
\newcommand{\calO}{{\mathcal{O}}}
\newcommand{\calS}{{\mathcal{S}}}
\newcommand{\calA}{{\mathcal{A}}}
\newcommand{\calF}{{\mathcal{F}}}
%\newcommand{\cA}{\mathcal{A}}
\newcommand{\cA}{1}
\newcommand{\cM}{{\mathcal{M}}}
\newcommand{\cR}{{\mathcal{R}}}
\newcommand{\calR}{{\mathcal{R}}}
\newcommand{\cP}{{\mathcal{P}}}
\newcommand{\calP}{{\mathcal{P}}}
%\newcommand{\cL}{{\mathcal{L}}}
\newcommand{\cL}{2}

% from Ben:
\renewcommand{\cL}{{\mathcal{L}}}
\newcommand{\Pc}{\mathcal{P}} % the transition change function
\newcommand{\Rc}{\mathcal{R}} % the reward change function
\newcommand{\E}{\mathbb{E}} % expectation
\newcommand{\ch}{\texttt{Ch}} % the change function
\renewcommand{\P}{\mathbb{P}} % probability
\newcommand{\one}{\mathbbm{1}}
\newcommand{\defeq}{:=}
\newcommand{\on}[1]{\operatorname{#1}}
\newcommand{\repdep}{\text{\textbf{RepeatedDeployment}}}
\newcommand{\dist}{\operatorname{dist}}
\newcommand{\distpr}{\operatorname{d}_{P,r}}
\newcommand{\maxdist}{\operatorname{max}\operatorname{dist}}
\newcommand{\casebycase}[2]{
    \underline{Case #1}: \emph{#2.}\\
}
\newcommand{\what}[1]{\widehat{#1}}
\newcommand{\GD}{\operatorname{GD}}
\newcommand{\MR}{\operatorname{MR}}
\newcommand{\hGD}{\widehat{\operatorname{GD}}}
\newcommand{\approxrepdep}{\text{\textbf{ApproxRepeatedDeployment}}}
\renewcommand{\bar}[1]{\overline{#1}}
\renewcommand{\comment}[1]{{\color{black!65!white} [comment: #1]}}

\newcommand{\R}{\mathbb{R}}

\newcommand{\opt}{\textsc{{Opt}}\xspace}

\newcommand{\bigObound}{\ensuremath{\mathcal{O}}}
\newcommand{\smallObound}{\ensuremath{o}}
\newcommand{\Thetabound}{\ensuremath{\Theta}}
\newcommand{\Omegabound}{\ensuremath{\Omega}}

\newcommand{\epsP}{\ensuremath{\delta}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Algos
\newcommand{\PiAttack}{\textsc{ImplicitAttack}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
\newcommand{\regret}{\textsc{Regret}}

\newcommand{\sizeS}{|S|}
\newcommand{\sizeA}{|A|}
\newcommand{\setS}{S}
\newcommand{\setA}{A}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{bm}
\usepackage{bbm}
\let\mathbbm\mathds
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}

\newcommand{\normL}[1]{\left\lVert#1\right .}
\newcommand{\normR}[1]{\left . #1\right\rVert}

\newcommand{\abs}[1]{\left|#1\right|}


\newcommand{\vecdot}[2]{\left<#1, #2\right>}

\newcommand{\bracket}[1]{\left[#1\right]}

\newcommand{\floor}[1]{\lfloor #1 \rfloor}

\newcommand{\ceil}[1]{\lceil #1 \rceil}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EXTRA

\newcommand{\expct}[1]{\mathbb{E}\left[#1\right]}
\newcommand{\expctu}[2]{\mathbb{E}_{#1}\left[#2\right]}
\renewcommand{\Pr}[1]{\ensuremath{\mathbb{P}\left[#1\right] }}
\newcommand{\Pru}[2]{\ensuremath{\mathbb{P}_{#1}\left[#2\right] }}
\newcommand{\ind}[1]{\mathds{1}\left[#1\right]}
\newcommand{\pos}[1]{\left[#1\right]^+}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ATTACK-DEFENSE (NOTATION)


\newcommand{\eps}{\epsilon}

\newcommand{\ACost}{\textsc{Cost}}

\newcommand{\pola}{\pi_{\cA}}
%\newcommand{\polap}{\pi_{\cA, \theta}}
\newcommand{\polap}{\pi_{\theta}}
\newcommand{\Pola}{\Pi^{\cA}}
\newcommand{\Polad}{\Pi^{\cA}_{\text{det}}}


\newcommand{\poll}{\pi_{\cL}}
%\newcommand{\pollp}{\pi_{\cL, \phi}}
\newcommand{\pollp}{\pi_{\phi}}
\newcommand{\pollpo}{\pi_{\phi_0}}
\newcommand{\Poll}{\Pi^{\cL}}
\newcommand{\Polld}{\Pi^{\cL}_{\text{det}}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MDP (NOTATION) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\newcommand{\targetpi}{\ensuremath{{\pi_{\dagger}}}}
\newcommand{\targetpi}{\ensuremath{{\pi^{\dagger}_{\cL}}}}
\newcommand{\initpi}{\ensuremath{{\pi_{\cA}^0}}}

\newcommand{\targetpiproxy}{\ensuremath{{\tilde \pi^{\dagger}_{\cL}}}}

\newcommand{\dpi}{\ensuremath{ \delta }}

\newcommand{\score}{{\rho}}
\newcommand{\hatscore}{\widehat{\score}}
\newcommand{\barscore}{\overline{\score}}


\newcommand{\occupancy}{{\psi}}
\newcommand{\Occupancy}{{\Psi}}

\newcommand{\occstate}{{\mu}}
\newcommand{\occdiffmatrix}{{\mathbf \Phi}}

\newcommand{\optpil}{\pi^{*}_{\cL}}
\newcommand{\Optl}{\textsc{{Opt}}_{\cL}^{\eps} }

\newcommand{\Dist}{\textsc{Dist}}
\newcommand{\Cost}{\textsc{Cost}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Algorithm

\newcommand{\conspolsearch}{\textsc{Conservative Policy Search}}

\newcommand{\metalgo}{\textsc{Meta-Gradient}}

%\usepackage[]{algorithm2e}

%\usepackage{algorithm}
\usepackage[noend]{algorithmic}
\renewcommand{\algorithmiccomment}[1]{// #1}
\algsetup{indent=2em}

%EXTRA
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}

\usepackage{wrapfig}

% recommended by chat gpt ;)
\usepackage{booktabs} % For prettier tables
\usepackage{multirow} % To merge rows
\usepackage{threeparttable} % For table notes

\usepackage{tcolorbox}
\newtcolorbox{redroundedbox}{
  colback=white,
  colframe=red!80!gray,
  boxrule=2pt,
  arc=4mm,
  left=6pt,
  right=6pt,
  top=6pt,
  bottom=6pt
}