%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Including Packages
\usepackage{color}
\usepackage{algorithm}
% \usepackage{algorithmic}
\usepackage[noend]{algpseudocode}
\usepackage{mathrsfs}
\usepackage{dsfont}
\usepackage{lmodern}
\usepackage{array}
\usepackage{bbm}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage[mathscr]{eucal}
\usepackage{graphicx}
\usepackage{mathrsfs}
\usepackage{psfrag}
\usepackage{color}
\usepackage{here}
\usepackage{wasysym}
\usepackage{enumitem}
% \usepackage[dvipsnames, table]{xcolor}
% \usepackage{caption}
\usepackage{subcaption}
\usepackage{amsthm}
\usepackage{lipsum}
\usepackage{todonotes}
\usepackage{tabulary}
\usepackage{pifont}
\usepackage{outlines}
\usepackage{thmtools, thm-restate}
\newtheorem{duplicate}{Theorem}

\newtheorem{theorem} {Theorem}
\newtheorem{conjecture} {Conjecture}
\newtheorem{proposition} {Proposition}
\newtheorem{lemma} {Lemma}
\newtheorem{corollary} {Corollary}
\newtheorem{assumption} {Assumption}
\newtheorem{definition} {Definition}
\newtheorem{property} {Property}
\newtheorem{axiom} {Axiom}
\newtheorem{fact} {Fact}
\newtheorem{remark} {Remark}
\newtheorem{example} {Example}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Setting up macro shortcuts
\newcommand{\mdp}{\mathds{M}}
\newcommand{\Exp}{\mathds{E}}
\newcommand{\Expk}{\mathds{E}_{k}}
\newcommand{\Prob}{\mathds{P}}
\newcommand{\Real}{\mathds{R}}
\newcommand{\Nat}{\mathbb{N}}
\newcommand{\Ind}{\mathds{1}}
\newcommand{\Rmax}{R_{\rm max}}
\newcommand{\riskyopt}{\succcurlyeq_{\rm ro}}
\newcommand{\cid}{\succcurlyeq_{\rm CID}}
\newcommand{\so}{\succcurlyeq_{\rm so}}
\newcommand{\single}{\succcurlyeq_{\rm sc}}
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\softmax}{softmax}

\newcommand{\state}{\mathcal{S}}
\newcommand{\action}{\mathcal{A}}
\newcommand{\se}{s_{\mathrm{e}}}
\newcommand{\TD}{\mathrm{TD}}

\newcommand{\Xc}{\mathcal{X}}
\newcommand{\Yc}{\mathcal{Y}}
\newcommand{\Zc}{\mathcal{Z}}
\newcommand{\Ic}{\mathcal{I}}
\newcommand{\Pc}{\mathcal{P}}
\newcommand{\Qc}{\mathcal{Q}}
\newcommand{\Ec}{\mathcal{E}}
\newcommand{\Fc}{\mathcal{F}}
\newcommand{\Gc}{\mathcal{G}}
\newcommand{\Rc}{\mathcal{R}}
\newcommand{\Sc}{\mathcal{S}}
\newcommand{\Ac}{\mathcal{A}}
\newcommand{\Nc}{\mathcal{N}}
\newcommand{\Mc}{\mathcal{M}}
\newcommand{\Tc}{\mathcal{T}}
\newcommand{\Vc}{\mathcal{V}}
\newcommand{\Dc}{\mathcal{D}}
\newcommand{\Bc}{\mathcal{B}}
\newcommand{\Hc}{\mathcal{H}}
\newcommand{\Lc}{\mathcal{L}}
\newcommand{\Oc}{\mathcal{O}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\dtrain}{\mathcal{D}_{\mathrm{train}}}
\newcommand{\ENN}{{\tt ENN}}
\newcommand{\KL}{\mathbf{d}_{\mathrm{KL}}}
\newcommand{\KLBAR}{\overline{\mathbf{d}}_{\mathrm{KL}}}
\newcommand{\data}{\mathcal{D}}
\newcommand{\actions}{\mathcal{A}}

\newcommand{\bx}{\mathbf{x}}
\newcommand{\by}{\mathbf{y}}

\newcommand{\bY}{\mathbf{Y}}


\newcommand{\reals}{\mathbf{R}}
\newcommand{\environment}{\mathcal{E}}
\newcommand{\proxy}{\tilde{\mathcal{E}}}
\newcommand{\KLp}{\mathbf{\tilde{d}}_{\textrm{KL}}}
\newcommand{\Regret}{\mathrm{Regret}}

\newcommand{\ass}{\hspace{-1mm} = \hspace{-0.5mm} \mathbf{\cdot} \hspace{0.5mm}}
\newcommand{\tick}{\textcolor{ForestGreen}{\ding{51}}}
\newcommand{\ok}{\textcolor{Dandelion}{\ding{108}}}
\newcommand{\cross}{\textcolor{BrickRed}{\ding{55}}}
\newcommand{\fillpara}{\vspace{22mm}}
\newcommand{\exitstate}{\bar{s}}

\definecolor{ian_highlight}{RGB}{100, 2, 2}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Repeat caption
\newcommand{\repeatcaption}[2]{%
  \renewcommand{\thefigure}{\ref{#1}}%
  \captionsetup{list=no}%
  \caption{#2 (repeated from page \pageref{#1})}%
  \addtocounter{figure}{-1}% So that next figure after the repeat gets the right number.
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Weird algo stuff (Comment out if you don't want the for loop lines)

\errorcontextlines\maxdimen

% begin vertical rule patch for algorithmicx (http://tex.stackexchange.com/questions/144840/vertical-loop-block-lines-in-algorithmicx-with-noend-option)
\makeatletter
% start with some helper code
% This is the vertical rule that is inserted
    \newcommand*{\algrule}[1][\algorithmicindent]{\makebox[#1][l]{\hspace*{.5em}\thealgruleextra\vrule height \thealgruleheight depth \thealgruledepth}}%
% its height and depth need to be adjustable
\newcommand*{\thealgruleextra}{}
\newcommand*{\thealgruleheight}{.75\baselineskip}
\newcommand*{\thealgruledepth}{.25\baselineskip}

\newcount\ALG@printindent@tempcnta
\def\ALG@printindent{%
    \ifnum \theALG@nested>0% is there anything to print
        \ifx\ALG@text\ALG@x@notext% is this an end group without any text?
            % do nothing
        \else
            \unskip
            \addvspace{-1pt}% FUDGE to make the rules line up
            % draw a rule for each indent level
            \ALG@printindent@tempcnta=1
            \loop
                \algrule[\csname ALG@ind@\the\ALG@printindent@tempcnta\endcsname]%
                \advance \ALG@printindent@tempcnta 1
            \ifnum \ALG@printindent@tempcnta<\numexpr\theALG@nested+1\relax% can't do <=, so add one to RHS and use < instead
            \repeat
        \fi
    \fi
    }%
\usepackage{etoolbox}
% the following line injects our new indent handling code in place of the default spacing
% \patchcmd{\ALG@doentity}{\noindent\hskip\ALG@tlm}{\ALG@printindent}{}{\errmessage{failed to patch}}
\makeatother

% the required height and depth are set by measuring the content to be shown
% this means that the content is processed twice
\newbox\statebox
\newcommand{\myState}[1]{%
    \setbox\statebox=\vbox{#1}%
    \edef\thealgruleheight{\dimexpr \the\ht\statebox+1pt\relax}%
    \edef\thealgruledepth{\dimexpr \the\dp\statebox+1pt\relax}%
    \ifdim\thealgruleheight<.75\baselineskip
        \def\thealgruleheight{\dimexpr .75\baselineskip+1pt\relax}%
    \fi
    \ifdim\thealgruledepth<.25\baselineskip
        \def\thealgruledepth{\dimexpr .25\baselineskip+1pt\relax}%
    \fi
    %\showboxdepth=100
    %\showboxbreadth=100
    %\showbox\statebox
    \State #1%
    %\State \usebox\statebox
    %\State \unvbox\statebox
    %reset in case the next command is not wrapped in \myState
    \def\thealgruleheight{\dimexpr .75\baselineskip+1pt\relax}%
    \def\thealgruledepth{\dimexpr .25\baselineskip+1pt\relax}%
}
% end vertical rule patch for algorithmicx
