% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{color}
\usepackage{comment}
\usepackage{enumitem}
\usepackage{epstopdf}
\usepackage{latexsym}
\usepackage{multicol}
\usepackage{multirow}
\usepackage{mathtools}
\usepackage{soul}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\newtheorem{thm}{Theorem}
%\newtheorem{definition}{Definition}
\newtheorem{cor}{Corollary}
\newtheorem{lem}{Lemma}
\newtheorem{prop}{Proposition}
\newtheorem{defn}{Definition}
\newtheorem{obs}{Observation}
\newtheorem{ex}{Example}

% MATH -----------------------------------------------------------
\newcommand{\norm}[1]{\left\Vert#1\right\Vert}
\newcommand{\abs}[1]{\left\vert#1\right\vert}
\newcommand{\set}[1]{\left\{#1\right\}}

\newcommand{\Real}{\mathbb R}
\newcommand{\eps}{\varepsilon}
\newcommand{\To}{\longrightarrow}
\newcommand{\X}{\mathbf{X}}
\newcommand{\x}{\mathbf{x}}
\newcommand{\BX}{\mathbf{B}(X)}
\newcommand{\bb}{\mathbf{b}}
\newcommand{\M}{\mathcal{M}}
\newcommand{\Li}{\mathcal{L}}
\newcommand{\T}{\mathcal{T}}
\newcommand{\R}{\mathcal{R}}
\newcommand{\ba}{\mathbf{a}}
\newcommand{\bm}{\mathbf{m}}
\newcommand{\aframe}{\hat{\theta}}

\newcommand{\mdp}{\textsf{MDP}}
\newcommand{\pomdp}{\textsf{POMDP}}
\newcommand{\decpomdp}{\textsf{Dec-POMDP}}
\newcommand{\ipomdp}{\textsf{I-POMDP}}
\newcommand{\ipomdplite}{\textsf{IPOMDP-Lite}}
\newcommand{\nestedmdp}{\textsf{Nested-MDP}}
\newcommand{\cipomdp}{\textsf{CI-POMDP}}
\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}
\def\Sym#1{{\mbox{\it #1}}}

\title{Decision-Theoretic Planning with Communication in Open Multiagent Systems (Supplementary material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Anirudh Kakarlapudi}
\author[1]{Gayathri Anil}
\author[2]{\href{mailto:<aeck@oberlin.edu>?Subject=Your UAI 2022 paper}{Adam Eck}{}}
\author[1]{Prashant Doshi}
\author[3]{Leen-Kiat Soh}
% Add affiliations after the authors
\affil[1]{%
    Computer Science Department\\
    University of Georgia\\
    Athens, Georgia, USA
}
\affil[2]{%
    Computer Science Department\\
    Oberlin College\\ 
    Oberlin, Ohio, USA
}
\affil[3]{%
    Computer Science and Engineering Department\\
    University of Nebraska\\
    Lincoln, Nebraska, USA
  }
  
  \begin{document}
\maketitle

\appendix

\section{Monte Carlo Tree Search}

Here, we provide the pseudocode for single agent POMCP \cite{Silver10:POMCP} in Alg~\ref{alg:POMCP}, as well as an example AND-OR tree in Fig.~\ref{fig:pomcp_tree} referenced in Section 4.1.

\setcounter{algorithm}{1}

\algrenewcommand\algorithmicindent{0.4em}
\begin{algorithm}[!ht]
\caption{POMCP \citep{Silver10:POMCP}}
\begin{algorithmic}[1]
\Procedure{CreatePlan}{$b$}
\For{$traj \in 1, 2, \ldots, \tau$}
\State $s \leftarrow \text{SampleParticle}\left(b\right)$
\State UpdateTree$\left(s, 0, \varepsilon\right)$
\EndFor
\State return $\underset{a \in A}{\operatorname{argmax}} \text{ } Q(\varepsilon, a)$
\EndProcedure

\Procedure{UpdateTree}{$s, t, h$}
\If {$t \ge H$}
\State return 0
\EndIf
\If {$h$ is a leaf}
\State $\text{Expand}(h)$
\State return Rollout$\left(s, t\right)$
\EndIf
\State $a^* \leftarrow \text{ChooseAction}(h)$
\State $s', r, o \leftarrow \text{SimulateComm}(s, a)$
\State $R \leftarrow r + \gamma * \text{UpdateTree}(s', t+1, hao)$
\State $\text{StoreResults}(h, s, a, R)$
\State return $R$
\EndProcedure

%\Procedure{Rollout}{$s, t$}
%\State $R \leftarrow 0$
%\For{$t' \in 0, 1, \cdots, H - t$} 
%\State $a \sim \text{Uniform}(A)$
%\State $s', r \leftarrow \text{Simulate}(s, a)$
%\State $R \leftarrow R + \gamma^{t'} r, ~~~s \leftarrow s'$
%\EndFor
%\State return $R$
%\EndProcedure

\Procedure{Expand}{$h$}
\State $B(h) \leftarrow \emptyset, ~~~~n(h) \leftarrow 0$
\State $n(ha) \leftarrow 0, ~~Q(h, a) \leftarrow 0 ~~\forall a \in A$
\EndProcedure

\Procedure{ChooseAction}{$h$}
\State return $\underset{a \in A}{\operatorname{argmax}} \text{ } Q(h, a) + \sqrt{\frac{\log{n(h)}}{n(ha)}}$
\EndProcedure

\Procedure{StoreResults}{$h, s, a, R$}
\State $B(h) \leftarrow B(h) \cup \{s\}$
\State $n(h) \leftarrow n(h) + 1, , ~~~n(ha) \leftarrow n(ha) + 1$
\State $Q(h, a) \leftarrow Q(h, a) + \frac{R - Q(h, a)}{n(ha)}$
\EndProcedure
\end{algorithmic}
\label{alg:POMCP}
\end{algorithm}

\setcounter{figure}{4}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{pomcp_tree.png}
\caption{The first two levels of an example AND-OR tree created by MCTS for a POMDP with 2 actions, 2 observations, and 4 states}
\label{fig:pomcp_tree}
\end{figure}

\section{Agent Actions Performed in Experiments}

In Figs.~\ref{fig:actions_ipomcppf_setup1}-\ref{fig:actions_cipomcppf_cost5_setup3}, we document the actions chosen by each agent in all three setups for both the $\text{I-POMCP-PF}_O$ and $\text{CI-POMCP-PF}_O$ algorithms (using communication costs of 0 and 1 to show both ends of the spectrum).  Note: for all setups, action 0 = the left most fire, action 1 = the middle fire, action 2 = the right fire, and action 3 = NOOP.

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{IPOMCPPF-actions_time-1.png}
\caption{Agent actions using I-POMCP-PF in Setup 1}
\label{fig:actions_ipomcppf_setup1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-1-0.png}
\caption{Agent actions using CI-POMCP-PF (cost = 0) in Setup 1}
\label{fig:actions_cipomcppf_cost0_setup1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-1-5.png}
\caption{Agent actions using CI-POMCP-PF (cost = 1) in Setup 1}
\label{fig:actions_cipomcppf_cost5_setup1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{IPOMCPPF-actions_time-2.png}
\caption{Agent actions using I-POMCP-PF in Setup 2}
\label{fig:actions_ipomcppf_setup2}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-2-0.png}
\caption{Agent actions using CI-POMCP-PF (cost = 0) in Setup 2}
\label{fig:actions_cipomcppf_cost0_setup2}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-2-5.png}
\caption{Agent actions using CI-POMCP-PF (cost = 1) in Setup 2}
\label{fig:actions_cipomcppf_cost5_setup2}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{IPOMCPPF-actions_time-3.png}
\caption{Agent actions using I-POMCP-PF in Setup 3}
\label{fig:actions_ipomcppf_setup3}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-3-0.png}
\caption{Agent actions using CI-POMCP-PF (cost = 0) in Setup 3}
\label{fig:actions_cipomcppf_cost0_setup3}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-actions_time-3-5.png}
\caption{Agent actions using CI-POMCP-PF (cost = 1) in Setup 3}
\label{fig:actions_cipomcppf_cost5_setup3}
\end{figure}

\section{Messages Sent in Experiments}

In Figs.~\ref{fig:messages_cipomcppf_cost0}-\ref{fig:messages_cipomcppf_cost5_setup3}, we document the messages sent by each agent in all three setups for $\text{CI-POMCP-PF}_O$ algorithms (using communication costs of 0 and 1 to show both ends of the spectrum).

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-0cost.png}
\caption{Messages sent using CI-POMCP-PF (cost = 0) in Setups 1-3}
\label{fig:messages_cipomcppf_cost0}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-1cost.png}
\caption{Messages sent using CI-POMCP-PF (cost = 1) in Setups 1-3}
\label{fig:messages_cipomcppf_cost1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-1-0.png}
\caption{Messages sent using CI-POMCP-PF (cost = 0) in Setup 1}
\label{fig:messages_cipomcppf_cost0_setup1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-1-5.png}
\caption{Messages sent using CI-POMCP-PF (cost = 1) in Setup 1}
\label{fig:messages_cipomcppf_cost5_setup1}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-2-0.png}
\caption{Messages sent using CI-POMCP-PF (cost = 0) in Setup 2}
\label{fig:messages_cipomcppf_cost0_setup2}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-2-5.png}
\caption{Messages sent using CI-POMCP-PF (cost = 1) in Setup 2}
\label{fig:messages_cipomcppf_cost5_setup2}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-3-0.png}
\caption{Messages sent using CI-POMCP-PF (cost = 0) in Setup 3}
\label{fig:messages_cipomcppf_cost0_setup3}
\end{figure}

\begin{figure}[!ht]
\centering
\includegraphics[width=3.25in]{CIPOMCPPF-messages_time-3-5.png}
\caption{Messages sent using CI-POMCP-PF (cost = 1) in Setup 3}
\label{fig:messages_cipomcppf_cost5_setup3}
\end{figure}

\bibliography{kakarlapudi_475}

\end{document}
