% \documentclass[mathfont=newtx]{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}
\usepackage{xr}
\externaldocument[main:]{bonjour_315}
%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{bm}             % bold math

\usepackage{booktabs}       % professional-quality tables
\usepackage{multirow,array} % For the table
\usepackage{makecell}       % multi-line in table
\usepackage{array}
\usepackage{booktabs}       % professional-quality tables
\usepackage{algorithm, algpseudocode}

\usepackage{cleveref}

\usepackage{fancyhdr}

% \usepackage[center]{caption}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
%\newcommand{\swap}[3][-]{#3#1#2} % just an example
\title{Information-Theoretic Approach to Detect Collusion in Multi-Agent Games: Supplementary Material}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<tbonjour@purdue.edu>?Subject=UAI 2022 - Information-Theoretic Approach to Detect Collusion in Multi-Agent Games}{Trevor Bonjour}{}}
\author[2]{Vaneet Aggarwal}
\author[1]{Bharat Bhargava}
% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science\\
    Purdue University\\
    West Lafayette, Indiana, USA
}
\affil[2]{%
    School of Industrial Engineering\\
    Purdue University\\
    West Lafayette, Indiana, USA
}
  \begin{document}
  
 \onecolumn
\maketitle

\appendix
\fancyhead{\textbf{Supplementary for: Decision Making in Monopoly using Hybrid Deep Reinforcement Learning Approach}}

\section{State Space Specifications}\label{sec:state}
\subsection{Rock, Paper, Scissors}
In iterated rock, paper, scissors we represent the state space as a combination of ‘action history’ and ‘opponent action distribution’. We bound the action history to the last 20 actions taken by the players. The opponent action distribution is represented as a 3-dimensional vector with each element depicting the probability of taking a given action.
\subsection{Leduc Hold'em}
For Leduc Hold’em, we represent the state space as a combination of the following attributes with the number of possible values shown in brackets: Current player position (3), current player hole card (6), current player raised (2), opponent position (3x2), opponent folded (2x2), opponent raised (2x2), board card (6), round (2), pot value (36), and action history (9x9).
\section{Collusion Detection Algorithm}\label{sec:cda_algo}
\Cref{CDA_algo} gives the procedure for the collusion detection method for multi-agent games.
\begin{algorithm}
	\caption{Collusion Detection for Multi-Agent Games}
	\label{CDA_algo}
	\begin{algorithmic}[1]
	    \State \textbf{Input:} Game $m$ record tuples <\textbf{a}, \textbf{s}, \textbf{r}> for $n$ agents and collusion threshold $\alpha$
	    \State \textbf{Output:} Colluding pair if any.
	    \State \textbf{Initialize} $n\times n$ matrix $I$ \Comment{for pair-wise individual influence} 
	    \State \textbf{Initialize} $n\times n$ matrix $N$ \Comment{for pair-wise net influence} 
	    \State \textbf{Initialize} $count \leftarrow 0$ \Comment{count of colluding pairs} 
	    \State \textbf{Initialize} $c1, c2 \leftarrow -1$ \Comment{index of colluding agents}
	    \For{$i = 1:n$} \Comment{Construct policy matrix for each agent}
	    \State Construct $\pi^i$ \Comment{from \Cref{main:eq:pi}}
	    \EndFor
	    \For{$i = 1:n$} \Comment{Construct joint policy matrix pair of agents}
	    \For{$j = 1:n$}
	    \If{$i\neq j$}
	    \State Construct $\pi^{ij}$, $\pi^{ji}$\Comment{from \Cref{main:eq:pipi}}
	    \State $I[i,j] \leftarrow \gamma(i;j)$  \Comment{from \Cref{main:eq:ii}}
	    \State $I[j,i] \leftarrow \gamma(i;j)$  \Comment{from \Cref{main:eq:ii}}
	    \EndIf
	    \EndFor
	    \EndFor
	    \For{$i = 1:n$}  \Comment{Calculate net influence}
	    \For{$j = 1:n$}
	    \If{$i\neq j$}
	    \State $N[i,j] \leftarrow \Gamma(i;j)$  \Comment{from \Cref{main:eq:net_inf}}
	    \State $N[j,i] \leftarrow \Gamma(i;j)$  \Comment{from \Cref{main:eq:net_inf}}
	    \EndIf
	    \EndFor
	    \EndFor
	    \For{$i = 1:n$} 
	    \For{$j = 1:n$}
	    \If{$N[i,j] \geq \alpha$ and $N[j,i] \geq \alpha$}
	        \State $count \leftarrow count + 1$
	        \State $c1 \leftarrow i$
	        \State $c2 \leftarrow j$
	        \If{$count > 1$}
	        \State \Return{No Collusion}
	        \EndIf
	    \EndIf
	    \EndFor
	    \EndFor
	    \If{$count == 1$}
	    \State \Return{$c1,c2$}
	    \Else
	    \State \Return{No Collusion}
	    \EndIf
	\end{algorithmic}
\end{algorithm}
% % NOTE: necessary when ptmx or no mathfont class option is given
% \providecommand{\upGamma}{\Gamma}
% \providecommand{\uppi}{\pi}
% \section{Math font exposition}
% How math looks in equations is important:
% \begin{equation*}
%   F_{\alpha,\beta}^\eta(z) = \upGamma(\tfrac{3}{2}) \prod_{\ell=1}^\infty\eta \frac{z^\ell}{\ell} + \frac{1}{2\uppi}\int_{-\infty}^z\alpha \sum_{k=1}^\infty x^{\beta k}\mathrm{d}x.
% \end{equation*}
% However, one should not ignore how well math mixes with text:
% The frobble function \(f\) transforms zabbies \(z\) into yannies \(y\).
% It is a polynomial \(f(z)=\alpha z + \beta z^2\), where \(-n<\alpha<\beta/n\leq\gamma\), with \(\gamma\) a positive real number.
\section{Additional Experiment: 4 Player Rock Paper Scissors} \label{sec:add_exp}
We run an additional experiment for a 4 player version of rock paper scissors. We have the following players:
\begin{enumerate}
    \item \textbf{Player A} : Primary colluding agent.
    \item \textbf{Player B} : Assistant colluding agent.
    \item \textbf{Player C} : Non-colluding agent.
    \item \textbf{Player D} : Non-colluding agent.
\end{enumerate}
Player A, C and D choose an action at random. Player B chooses an action that guarantees A a point with some \textit{collusion probability}, and a random action otherwise. 

We run an experiment similar to \Cref{main:sec:exp1} Experiment 1 where we try to answer how the collusion strength affects the swiftness or sample complexity of the detection algorithm. We use data generated from games played between Players A, B, C and D (manual collusion). We run multiple simulations for a different number of games (sample size) and varying levels of collusion probability values. We plot the calculated net influence for different settings in \Cref{fig:sample_cp4}. Each graph in the figure is generated for the different $CP$ values. The y-axis gives the net influence values and the x-axis gives the number of games used to calculate the net influence values. The dashed horizontal line in each graph depicts the collusion threshold $\alpha$ which is set at $0.05$. Note that, as the $CP$ values go higher, our algorithm can detect collusion using data from fewer games. The results are similar to what we get for the 3 player version of Rock, Paper, Scissors.
\begin{figure}[ht]
  \centering
  \captionsetup{justification=centering}
  \includegraphics[width=0.90\linewidth]{4_player_rps}
  \caption{Net Influence Calculated for a Different Number of Games for Varying Values of Collusion Probability ($CP$) for Rock Paper Scissors. }
  \label{fig:sample_cp4}
%   \Description{There are a total of 10 line graphs with five on each side. Each line graph shows the net influence values on the y axis and the number of games on the x axis for a given CP value. The CP values range from 0.1 to 1 with increments of 0.1. The number of games range from 50 to 1000 with increments of 50 for all plots. The net influence ranges are different for each plot. For lower values of CP the range for net influence is lower, going from -0.05 to 0.05 for CP = 0.1 and for higher values of CP this range is higher going from -1 to 1 for CP=1. There are seven lines in each plot, six showing the net influence and one showing the collusion threshold}
\end{figure}
\end{document}
