\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% Choose your variant of English; be consistent
\usepackage[american]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{times}
\usepackage{soul}
\usepackage{url}
\usepackage[utf8]{inputenc}
\usepackage{graphicx,color}
\usepackage{amsmath, bm}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{amsthm}
\usepackage{dsfont}
\usepackage{booktabs}

\usepackage{bbm}
\usepackage{nicefrac}
% Comment out this line in the camera-ready submission
%\linenumbers
\urlstyle{same}

\usepackage[noend]{algorithm2e}

\RestyleAlgo{ruled}
\usepackage{mathtools}
\mathtoolsset{showonlyrefs}
\urlstyle{same}


\newtheorem{example}{Example}
\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\newtheorem{assumption}{Assumption}
\newtheorem{lemma}{Lemma}
\newtheorem{problem}{Problem}
\newtheorem{remark}{Remark}


% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
%----Helper code for dealing with external references----
% (by cyberSingularity at http://tex.stackexchange.com/a/69832/226)
\usepackage{xr}
\makeatletter

%\newcommand*{\addFileDependency}[1]{% argument=file name and extension
%\typeout{(#1)}% latexmk will find this if $recorder=0
% however, in that case, it will ignore #1 if it is a .aux or 
% .pdf file etc and it exists! If it doesn't exist, it will appear 
% in the list of dependents regardless)
%
% Write the following if you want it to appear in \listfiles 
% --- although not really necessary and latexmk doesn't use this
%
%\@addtofilelist{#1}
%%
%% latexmk will find this message if #1 doesn't exist (yet)
%\IfFileExists{#1}{}{\typeout{No file #1.}}
%}\makeatother
%
%\newcommand*{\myexternaldocument}[1]{%
%\externaldocument{#1}%
%\addFileDependency{#1.tex}%
%\addFileDependency{#1.aux}%
%}
%------------End of helper code--------------

% put all the external documents here!
%\myexternaldocument{chen_375}
\externaldocument{chen_375}

%%%%%%%% Tikz setup %%%%%%%%
\usepackage{pgfplots}
\DeclareUnicodeCharacter{2212}{−}
\usepgfplotslibrary{groupplots, dateplot}
\usetikzlibrary{patterns, shapes, automata, arrows, shapes.arrows, positioning, decorations.pathreplacing, calligraphy, calc}

\pgfplotsset{compat=newest}

\usepackage{tikz}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% CUSTOM LEGEND SETUP%%%%%%%%%%%%%%%%%%%%
\usepgfplotslibrary{external}
\pgfplotsset{compat=newest}
\newenvironment{customlegend}[1][]{%
	\begingroup
	% inits/clears the lists (which might be populated from previous
	% axes):
	\csname pgfplots@init@cleared@structures\endcsname
	\pgfplotsset{#1}%
}{%
	% draws the legend:
	\csname pgfplots@createlegend\endcsname
	\endgroup
}%

% makes \addlegendimage available (typically only available within an
% axis environment):
\def\addlegendimage{\csname pgfplots@addlegendimage\endcsname}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Differential Privacy in Cooperative Multiagent Planning\\(Supplementary Material)}

\author[1]{Bo~Chen\thanks{Indicates equal contribution.}}
\author[1]{Calvin~Hawkins$^*$}
\author[2]{Mustafa~O.~Karabag$^*$}
\author[2]{Cyrus~Neary$^*$}
\author[1]{Matthew~Hale}
\author[2]{Ufuk~Topcu}


 \affil[1]{
The University of Florida
 }
  \affil[2]{
The University of Texas at Austin
 }
  
\begin{document}

%%%%%%%%%%%%%%%%% MACROS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Colors
\definecolor{minDependencyPolicy}{RGB}{180, 65, 161}
\definecolor{baselinePolicy}{RGB}{46, 36, 47}

%Math symbols
\newcommand{\expectation}{\mathbb{E}}
\newcommand{\kl}{KL}
\newcommand{\entropy}{H}
\newcommand{\distribution}{\Delta}
\newcommand{\probabilityMeasure}{\mu}
\newcommand{\genericRandomVar}{Y}
\newcommand{\genericRandomVarSupport}{\mathcal{\genericRandomVar}}
\newcommand{\genericDistribution}{Q}
\newcommand{\genericDistributionSupport}{\mathcal{\genericDistribution}}
\newcommand{\genericFunction}{f}
\newcommand{\genericFunctionAlt}{g}
\newcommand{\emptyString}{\varepsilon}

\newcommand{\epsilonTransition}{\alpha}

\newcommand{\genericString}{w}
\newcommand{\constantNumber}{K}
\newcommand{\genericSet}{V}

% MDP
\newcommand{\mdp}{\mathcal{M}}
\newcommand{\mdpState}{s}
\newcommand{\mdpStateAlt}{y}
\newcommand{\mdpInitialState}{\mdpState_{I}}
\newcommand{\mdpStateSpace}{\mathcal{S}}
\newcommand{\mdpAction}{a}
\newcommand{\mdpActionAlt}{b}
\newcommand{\mdpActionSpace}{\mathcal{A}}
\newcommand{\mdpReward}{\mathcal{R}}
\newcommand{\mdpTransition}{\mathcal{T}}
\newcommand{\outdegree}{\rho}
\newcommand{\feasibleSet}{D_{\jointPolicy,T}}
\newcommand{\policy}{\pi}

\newcommand{\mdpPath}{\xi}
\newcommand{\mdpStateSeq}{h}
\newcommand{\mdpPathDist}{\Gamma}
\newcommand{\mdpValue}{v}
\newcommand{\mdpStateActionProcess}{X}
\newcommand{\mdpJointPathProcess}{\bm{X}}
\newcommand{\mdpStationaryStateActionProcess}{\bar{X}}
\newcommand{\mdpStateActionProcessAlt}{Y}
\newcommand{\mdpMixedStateActionProcess}{\bar{\mdpStateActionProcess}}
\newcommand{\mdpStateRandomVar}{S}
\newcommand{\mdpActionRandomVar}{A}

\newcommand{\timeHorizon}{T}
\newcommand{\randomReachTime}{\eta}

%%%%%%%%% CHANGE THESE %%%%%%%%%%%%%%
\newcommand{\joint}{joint}
\newcommand{\fullcommunication}{full}
\newcommand{\fullyimaginary}{full\text{ }img}
\newcommand{\imaginary}{img}
\newcommand{\intermittent}{int}

% Markov game
\newcommand{\game}{\bm{\mdp}}
\newcommand{\gameState}{\bm{\mdpState}}
\newcommand{\gameStateAlt}{\bm{\mdpStateAlt}}
\newcommand{\gameActionAlt}{\bm{\mdpActionAlt}}
\newcommand{\gameInitialState}{\bm{\mdpInitialState}}
\newcommand{\gameStateSpace}{\bm{\mdpStateSpace}}
\newcommand{\gameAction}{\bm{\mdpAction}}
\newcommand{\gameActionSpace}{\bm{\mdpActionSpace}}
\newcommand{\gameTransition}{\bm{\mdpTransition}}
\newcommand{\gameReward}{\bm{\mdpReward}}
\newcommand{\gameStateRandomVar}{\bm{\mdpStateRandomVar}}
\newcommand{\gameActionRandomVar}{\bm{\mdpActionRandomVar}}
\newcommand{\gameStateActionProcessAlt}{\bm{\mdpStateActionProcessAlt}}
\newcommand{\len}{len}
\newcommand{\expectedLength}{l}
\newcommand{\pathSet}{W}
\newcommand{\reachPathSet}{R}
\newcommand{\gameAbsorbingState}{\gameState_\alpha}

\newcommand{\jointPolicy}{\bm{\policy}}
\newcommand{\localPolicy}{\policy}

\newcommand{\targetSet}{\gameStateSpace_{\mathcal{T}}}

\newcommand{\deadSet}{\gameStateSpace_{\mathcal{A}}}
\newcommand{\deadSetPrime}{\gameStateSpace_{\mathcal{D}}}
\newcommand{\doneSet}{\gameStateSpace_{\mathcal{E}}}
\newcommand{\gameProcessEndState}{\gameState_{\epsilonTransition}}
\newcommand{\mdpProcessEndState}{\mdpState_{\epsilonTransition}}


\newcommand{\gamePath}{\bm{\mdpPath}}
\newcommand{\gamePathDist}{\bm{\mdpPathDist}}
\newcommand{\gameValue}{\bm{\mdpValue}}
\newcommand{\gameStateActionProcess}{\bm{\mdpStateActionProcess}}
\newcommand{\totalCorrelation}{C}
\newcommand{\totalCorrelationUpperBound}{\bar{\totalCorrelation}}

% multi-agent macros
\newcommand{\numAgents}{N}

%Communication systems
\newcommand{\probabilityFailureForever}{p}
\newcommand{\probabilityFailureOneStep}{q}
\newcommand{\sequenceCommAvailibility}{\Lambda}
\newcommand{\oneStepCommAvailibility}{\lambda}

% Optimization problem macros
\newcommand{\expectedLengthCoef}{\delta}
\newcommand{\totalCorrelationCoef}{\beta}
\newcommand{\occupancyVar}{x}

% Running example
\newcommand{\rover}{R}
\newcommand{\robot}{R}
\newcommand{\agent}{R}
\newcommand{\base}{B}
\newcommand{\goal}{T}

% Privacy macros
\newcommand{\adj}{\textnormal{Adj}}
\newcommand{\oblivious}{tr}
\newcommand{\private}{pr}
\newcommand{\genericPath}{\bm{v}}
\newcommand{\genericAgentPath}{v}

\newcommand{\genericPathAlt}{\bm{w}}
\newcommand{\genericAgentPathAlt}{w}

\newcommand{\privatePath}{\Tilde{\bm{z}}}
\newcommand{\privateAgentPath}{\Tilde{z}}

\newcommand{\privateMdpState}{\Tilde{\mdpState}}
\newcommand{\privateGameState}{\Tilde{\gameState}}
\newcommand{\privacyLevel}{\epsilon}
\newcommand{\adjParam}{k}
\newcommand{\trueStateProb}{\tau}

\newcommand{\probONB}{p_{\textrm{onb}}}
\newcommand{\probOffline}{p_{\textrm{off}}}
\newcommand{\probRepair}{p_{\textrm{r}}}

\newcommand{\directedGraph}{G}
\newcommand{\graphEdges}{E}
\onecolumn

\maketitle

% Match the counters with the last numbers included in the actual paper, so that we don't have confusion with different equations being labeled by the same number in the main body vs the supplementary material.
\setcounter{theorem}{2}
\setcounter{equation}{3}

\appendix

\section{Proofs for Theoretical Results}

The Kullback-Leibler (KL) divergence \cite{thomas2006elements} between discrete probability distributions $Q^1$ and $Q^2$ with supports $\bm{Q}^1$ and $\bm{Q}^2$, respectively, is
\[
K L\left(Q^1 \| Q^2\right)=\sum_{q \in \bm{Q}^1} Q^1(q) \log \left(\frac{Q^1(q)}{Q^2(q)}\right).
\]

\emph{Notations} We first define some notations that will be used for the proofs. Let $\gameStateRandomVar_t$ be a random variable denoting the joint state of the agents at time $t$ under the joint policy with no privatization, $\gameActionRandomVar_t$ be a random variable denoting the joint action of the agents at time $t$, $\mdpStateRandomVar_t^i$ be a random variable denoting the state of agent $i$ at time $t$, $\mdpActionRandomVar_t^i$ be a random variable denoting the action of Agent $i$ at time $t$. $\gameStateRandomVar_t^{-i}$ be a random variable denoting the state of agent $i$'s teammate exclude agent $i$ itself at time $t$, and $\gameActionRandomVar_t^{-i}$ be a random variable denoting the action of agent $i$'s teammate exclude agent $i$ itself at time $t$.
The total correlation $\totalCorrelation_{\jointPolicy}$ of a joint policy $\jointPolicy$ is
\begin{equation}
    \totalCorrelation_{\jointPolicy} = \sum_{i=1}^\numAgents \entropy(\mdpStateRandomVar_0^{i}\mdpActionRandomVar_0^{i}\ldots\mdpStateRandomVar_{\randomReachTime}^{i})-\entropy(\gameStateRandomVar_0\gameActionRandomVar_0\ldots\gameStateRandomVar_\randomReachTime)%\label{eq:proof_tc_def}
\end{equation}
where \(\randomReachTime\) denotes the random hitting time to \(\targetSet \cup \deadSetPrime\), i.e., the effective end of the trajectory in terms of the reach-avoid specification~\cite{Karabag2022}.  

Let \(\pathSet\) denote all trajectory fragments that end at a state in \(\targetSet \cup \deadSetPrime\), i.e., \(\pathSet = \lbrace \genericPath=\bm{s_0}\bm{a_0}\ldots\bm{s_T}|\bm{s_T} \in  \targetSet \cup \deadSetPrime \text{ and } \forall t < T,  \bm{s_t} \not\in \targetSet \cup \deadSetPrime \rbrace \), and \(\pathSet'\) denote all trajectories that never reach \(\targetSet \cup \deadSetPrime\), i.e., \(\pathSet' = \lbrace w=\genericPath=\bm{s_0}\bm{a_0}\ldots| \forall t \geq 0,  \bm{s_t} \not\in \targetSet \cup \deadSetPrime \rbrace \). Note that every trajectory either starts with a trajectory fragment from \(\pathSet\) or is in \(\pathSet'\). Also, let \(R \subseteq \pathSet \cup \pathSet'\) denote all trajectory fragments that end at a state in \(\targetSet\), i.e., \(R = \lbrace \genericPath=\bm{s_0}\bm{a_0}\ldots\bm{s_T}|\bm{s_T} \in  \targetSet  \text{ and } \forall t < T,  \bm{s_t} \not\in \targetSet \cup \deadSet  \rbrace \).

Let $\Gamma^{\oblivious}$ be the distribution of joint trajectories induced by the joint policy executed with truthful communications (i.e., no privacy). Also, let $\Gamma^{\private}$ be the distribution of joint trajectories with privacy enforced. Let $v^{\oblivious}$ be the probability of success under truthful communications and $v^{\private}$ be the probability of success under private communications. 

We use $\probabilityMeasure^{\oblivious}$ to denote the probability measure over the actual (finite or infinite) state-action process under the joint policy with truthful communications. $\probabilityMeasure^{\private}$ denotes the probability measure over the actual (finite or infinite) state-action process under joint policy with private communications. With abuse of notation, we also use $\probabilityMeasure_{\epsilon}$ to denote the conditional probability measure over private state trajectories given the actual state trajectory.

Let $\genericPath=\gameState_0\gameAction_0\gameState_1\gameAction_1\ldots\gameState_T\in (\bm{\mdpStateSpace}\times\bm{\mdpActionSpace})^T$ be a joint trajectory fragment and $\Tilde{\genericPath}=\Tilde{\gameState}_0\Tilde{\gameState}_1\ldots\Tilde{\gameState}_T\in \gameStateSpace^T$ be a private joint state trajectory fragment. We use $\bm{\hat{\mdpState}_t^j}=\{\Tilde{\mdpState}_t^0,\dots,\Tilde{\mdpState}_t^{j-1},\mdpState_t^j,\Tilde{\mdpState}_t^{j+1},\dots,\Tilde{\mdpState}_t^{N}\}$ to denote agent $j$'s copy of private joint state.

The Kleene star applied to a set $V$ of symbols is the set $V^*=\cup_{i\geq 0}V^i$ of all finite-length words where $V^0=\{\Lambda\}$ and $\Lambda$ is the empty string. The set of all infinite-length words is denoted by $V^\omega$.

We introduce the following lemma, to be used in the proof of other theoretical results.

\begin{lemma} \label{lemma:expectedlogproboftrueword}
    \begin{align}
        &\expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\log \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right] \geq  - \numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right)l^{\oblivious}
    \end{align} 
    where $\outdegree(\mdpState_{t-1}^{i})$ is the out degree of $\mdpState_{t-1}^{i}$ and $\outdegree_{\max}=\max_{\mdpState\in \cup_{i=1}^{\numAgents} \mdpStateSpace^{i}}  \outdegree(\mdpState)$.
\end{lemma}

\begin{proof}[Proof of Lemma \ref{lemma:expectedlogproboftrueword}]

Due to the Markovianity of the online privacy mechanism (Algorithm \ref{alg:privacy_construction}), independence between the agents, and the acyclic property of the policy graph \(G\), we have  
\begin{align}
     \probabilityMeasure_{\privacyLevel}(\privatePath=\genericPath|\genericPath=\gameState_{0}\ldots \gameState_{\timeHorizon}) = \prod_{t=0}^{\timeHorizon-1}\prod_{i=1}^\numAgents\probabilityMeasure_\privacyLevel(\Tilde{\mdpState}_t^i = \mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i)
\end{align}
We note that if \(\privatePath=\genericPath\), then for all \(t\geq 0\) and \(j \in [\numAgents]\), we have \(\hat{\gameState}_{t}^j = \gameState_{t}\),  i.e., the copy of the private state for every agent always matches the actual joint state. Hence, 
\begin{align}
    \probabilityMeasure_{\privacyLevel}(\privatePath=\genericPath|\genericPath=\gameState_{0}\ldots \gameState_{\timeHorizon}) = \prod_{t=0}^{\timeHorizon-1}\prod_{i=1}^{\numAgents}\probabilityMeasure_\privacyLevel(\Tilde{\mdpState}_t^i = \mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i = \mdpState_{t-1}^i) 
\end{align}

From~\cite[Theorem 7]{chen2022differential}, we have
\begin{align}
\probabilityMeasure_\privacyLevel(\Tilde{\mdpState}_t^i = \mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i = \mdpState_{t-1}^i)=\frac{1}{\left(\outdegree(\mdpState_{t-1}^{i})-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1}, 
\end{align}
where $\outdegree(\mdpState_{t-1}^{i})$ is the out degree of $\mdpState_{t-1}^{i}.$  Let $\outdegree_{\max}=\max_{\mdpState\in \cup_{i=1}^{\numAgents} \mdpStateSpace^{i}}  \outdegree(\mdpState)$ which gives $$\probabilityMeasure_\privacyLevel(\Tilde{\mdpState}_t^i = \mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i = \mdpState_{t-1}^i)\geq\frac{1}{\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1}.$$ Using this, we get
\begin{align*}
\log \probabilityMeasure_{\privacyLevel}(\privatePath=\genericPath|\genericPath=\mdpState_{0}\ldots \mdpState_{\timeHorizon})
&=\log\left(\prod_{t=0}^{\timeHorizon-1}\prod_{i=1}^\numAgents\probabilityMeasure_\privacyLevel(\mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i = \mdpState_{t-1}^i)\right)
\\
&=\sum_{t=0}^{\timeHorizon-1}\sum_{i=1}^\numAgents\log\probabilityMeasure_\privacyLevel(\mdpState_t^i|\mdpState_t^i,\Tilde{\mdpState}_{t-1}^i = \mdpState_{t-1}^i)
\\
&\geq\sum_{t=0}^{\timeHorizon-1}\sum_{i=1}^\numAgents\log\frac{1}{\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1}
\\
&=\sum_{t=0}^{{\timeHorizon-1}}-\numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right).
\end{align*}
Consequently,
\begin{align*}\expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\log \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]
&=
\sum_{\genericPath\in W}\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\prod_{t=0}^{\timeHorizon-1}\prod_{i=1}^\numAgents\probabilityMeasure_\privacyLevel(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)
\\
    &\geq    \sum_{\genericPath\in  W} -\probabilityMeasure^{\oblivious}(\genericPath)\sum_{t=0}^{\timeHorizon-1}\numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right)
    \\
    &= E\left[\sum_{t=0}^{\tau -1}-\numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right)\lvert\probabilityMeasure^{\oblivious}\right]
    \\
    &=-\numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right)E\left[\sum_{t=0}^{\tau -1}1\lvert\probabilityMeasure^{\oblivious}\right]
    \\
    &=-\numAgents\log\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\privacyLevel}{\adjParam})+1\right)l^{\oblivious}. \label{eq:prob_true_word}
\end{align*}


\end{proof}


\begin{proof}[Proof of Theorem \ref{lem:privacy_req}]

For any agent $i$, let $\genericPath=(\genericAgentPath^1,\genericAgentPath^2,\dots,\genericAgentPath^n)\in \feasibleSet$ and $\genericPathAlt=(\genericAgentPathAlt^1,\genericAgentPathAlt^2,\dots,\genericAgentPathAlt^n)\in \feasibleSet$ be two adjacent joint trajectories and by Definition~\ref{dfn:adjacency} we have $d(\genericAgentPath^i,\genericAgentPathAlt^i)\leq k$ and $\genericAgentPath^j=\genericAgentPathAlt^j$ for all $j\neq i$. Let $\privateAgentPath^i$ denote a private output of the online mechanism $M$ for agent $i$. We will show that the probability that the online mechanism generates \(\privateAgentPath^{i}\) satisfies Definition~\ref{dfn:word_dp}. 

Let \(\{p_1,p_2,\dots,p_m\}\) be the agents that have directed paths to node \(i\) in the directed policy dependency graph \(G\). We note that 
\begin{align*}
    \Pr(M(\genericPath) = \privateAgentPath^{i}) = \Pr(M(\genericPath) = \privateAgentPath^{i} | \genericAgentPath^i,\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m}),
\end{align*}

since the private path \(\privateAgentPath^{i}\) of agent \(i\) is conditionally independent from \(\genericAgentPath^{j}\) for all $j\in [N]\textbackslash \{p_1,p_2,\dots,p_m\}$. 
This independence occurs because \(1)\) the policy dependency graph is acyclic, and \(\privateAgentPath^{i}\) is generated from a distribution that does not depend on \(\privateAgentPath^{j}\) for all $j\in [N]\textbackslash \{p_1,p_2,\dots,p_m\}$, and \(2)\) the online mechanism of agent \(i\) depends on its own private and true state trajectories. Next, we note that 
\begin{align*}
    \Pr(M(\genericPath) = \privateAgentPath^{i} | \genericAgentPath^i,\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m})&=\frac{\Pr(M(\genericPath) = \privateAgentPath^{i},\genericAgentPath^i,\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m})}{\Pr(\genericAgentPath^i,\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m})}\\
    &= \frac{\Pr(\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m}|M(\genericPath) = \privateAgentPath^{i},\genericAgentPath^i)\Pr(M(\genericPath) = \privateAgentPath^{i},\genericAgentPath^i)}{\Pr(\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m}|\genericAgentPath^i)\Pr(\genericAgentPath^i)}\\
    &= \frac{\Pr(\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m})\Pr(M(\genericPath) = \privateAgentPath^{i},\genericAgentPath^i)}{\Pr(\genericAgentPath^{p_1},\dots,\genericAgentPath^{p_m})\Pr(\genericAgentPath^i)}\\
    &= \Pr(M(\genericPath) = \privateAgentPath^{i}|\genericAgentPath^i)\\
    &=\prod^{\timeHorizon}_{t=1} \probabilityMeasure^i_{\privacyLevel}(\privateMdpState^{i}_t\ |\ \mdpState^{i}_t,\privateMdpState^{i}_{t-1}),
\end{align*}
since the probability of $\genericAgentPath^p$, $\forall p\in\{p_1,p_2,\dots,p_m\}$, does not depend on agent $i$'s private and true local state trajectories and
the online mechanism is Markovian.

Similarly, for $\genericAgentPathAlt^i=\mdpStateAlt_1^i\mdpStateAlt_2^i\dots\mdpStateAlt_\timeHorizon^i$,
\begin{equation*}
    \Pr(M(\genericPathAlt) = \privateAgentPath^{i} | \genericAgentPathAlt^i,\genericAgentPathAlt^{p_1},\dots,\genericAgentPathAlt^{p_m}) = \Pr(M(\genericPathAlt) = \privateAgentPath^{i}|\genericAgentPathAlt^i)=\prod^{\timeHorizon}_{t=1} \probabilityMeasure^i_{\privacyLevel}(\privateMdpState^{i}_t\ |\ \mdpStateAlt^{i}_t,\privateMdpState^{i}_{t-1})
\end{equation*}

Consequently, 
\begin{equation*}
    \frac{\Pr(M(\genericPath) = \privateAgentPath^{i})}{\Pr(M(\genericPathAlt) = \privateAgentPath^{i})} = \frac{\Pr(M(\genericPath) = \privateAgentPath^{i}|\genericAgentPath^i)}{\Pr(M(\genericPathAlt) = \privateAgentPath^{i}|\genericAgentPathAlt^i)}=\frac{\prod^{\timeHorizon}_{t=1} \probabilityMeasure^i_{\privacyLevel}(\privateMdpState^{i}_t\ |\ \mdpState^{i}_t,\privateMdpState^{i}_{t-1})}{\prod^{\timeHorizon}_{t=1} \probabilityMeasure^i_{\privacyLevel}(\privateMdpState^{i}_t\ |\ \mdpStateAlt^{i}_t,\privateMdpState^{i}_{t-1})}
\end{equation*}
The rest of the proof immediately follows from the proof of Theorem \(7\) from \citep{chen2022differential}.

\end{proof}

\begin{proof} [Proof of Theorem \ref{thm:performance_bound}]
Due to the causality property of the online mechanism (Algorithm \ref{alg:privacy_construction}) and the joint policy execution (Algorithm  \ref{alg:policy_exec}), we have
\begin{align}
    \probabilityMeasure^{\private}(\genericPath)&= \sum_{\privatePath \in \gameStateSpace^T} \probabilityMeasure^{\private}(\genericPath,\privatePath)\nonumber\\
    &= \sum_{\privatePath\in \gameStateSpace^T}\prod_{t=0}^{T-1}\Pr(\gameAction_t\gameState_{t+1},\privateGameState_t|\gameAction_{t-1}\gameState_t\ldots\gameAction_0\gameState_1,\privateGameState_{t-1}\ldots\privateGameState_0),\nonumber
\end{align}
where,
\begin{align}\Pr(\gameAction_t\gameState_{t+1},\privateGameState_t|\gameAction_{t-1}\gameState_t \ldots\gameAction_0\gameState_1,\privateGameState_{t-1}\ldots\privateGameState_0)
    &= \Pr(\gameAction_t\gameState_{t+1},\privateGameState_t|\gameState_t,\privateGameState_{t-1})\label{eq:proof_1}\\
    &=  \prod_{i=1}^\numAgents\Pr(\mdpAction_{t}^i\mdpState_{t+1}^i,\privateGameState_t|\gameState_t,\privateGameState_{t-1})\label{eq:proof_2} \\
    &= \prod_{i=1}^\numAgents  \Pr(\mdpAction_{t}^i\mdpState_{t+1}^i|\gameState_t,\privateGameState_{t},\privateGameState_{t-1})\Pr(\privateGameState_{t}|\gameState_{t},\privateGameState_{t-1})\label{eq:proof_4}\\
    &= \prod_{i=1}^\numAgents \Pr(\mdpAction_{t}^i\mdpState_{t+1}^i|\gameState_{t},\privateGameState_t,\privateGameState_{t-1})\left(\prod_{k=1}^\numAgents\probabilityMeasure_\epsilon(\Tilde{s}_t^k|\mdpState_t^k,\Tilde{s}_{t-1}^k)\right)\label{eq:proof_5}\\
    &= \prod_{i=1}^\numAgents \Pr(\mdpAction_{t}^i\mdpState_{t+1}^i|\hat{\gameState}_t^i)\left(\prod_{k=1}^\numAgents\probabilityMeasure_\epsilon(\Tilde{\mdpState}_t^k|\mdpState_t^k,\Tilde{\mdpState}_{t-1}^k)\right)\label{eq:proof_6}\\
    &= \prod_{i=1}^\numAgents \mdpTransition(\mdpState_t^i,\mdpAction_{t}^i,\mdpState_{t+1}^i)\localPolicy^{i}(\hat{\gameState}_t^i, \mdpAction_{t}^i)\left(\prod_{k=1}^\numAgents\probabilityMeasure_\epsilon(\Tilde{\mdpState}_t^k|\mdpState_t^k,\Tilde{\mdpState}_{t-1}^k)\right)\label{eq:proof_7}.
\end{align}

Equation~\eqref{eq:proof_1} is because of the Markovian property. Equation~\eqref{eq:proof_2} is because the each agent are choosing its next action and state independently. Equation~\eqref{eq:proof_5} is due to each state is generating its private state independently. Equation~\eqref{eq:proof_6} is because for each agent $i$, its true next state $\Tilde{s}_{t+1}^i$ is independent of other states' true states and the private state itself. 

Therefore, 
\begin{align}
    \probabilityMeasure^{\private}(\genericPath)
    &=\sum_{\privatePath\in \gameStateSpace^{T}}\prod_{t=0}^{T-1}\prod_{i=1}^\numAgents \mdpTransition(\mdpState_t^i,\mdpAction_{t}^i,\mdpState_{t+1}^i)\localPolicy^{i}(\hat{\mathbf{s}}_t^i, \mdpAction_{t}^i)\left(\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\Tilde{s}_t^i|\mdpState_t^i,\Tilde{s}_{t-1}^i)\right)
    \nonumber\\
    &\geq \prod_{t=0}^{T-1}\prod_{i=1}^\numAgents \mdpTransition(\mdpState_t^i,\mdpAction_{t}^i,\mdpState_{t+1}^i)\localPolicy^{i}(\hat{\mathbf{s}}_t^i, \mdpAction_{t}^i)\left(\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\Tilde{s}_t^i|\mdpState_t^i,\Tilde{s}_{t-1}^i)\right), \forall \privatePath\in \gameStateSpace^T,
     \label{eq:proof_8}
\end{align}
where Equation~\eqref{eq:proof_8} is because the probability of all possible private state trajectories has to be greater than any single private state trajectory. We only consider the case when $\privateGameState_t=\gameState_{t}$, which means the private online mechanism will make the correct decision at every time $t$. Therefore,
\begin{align}
    \probabilityMeasure^{\private}(\genericPath) &\geq \prod_{t=0}^{T-1}\prod_{i=1}^\numAgents \mdpTransition(\mdpState_t^i,\mdpAction_{t}^i,\mdpState_{t+1}^i)\localPolicy^{i}(\gameState_{t},\mdpAction_{t}^i)\left(\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)
    \\
    &=\probabilityMeasure^{\oblivious}(\genericPath)\left(\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right) \label{eq:privfullrel} 
\end{align}

Now we look at the following KL divergence:
\begin{align}
KL(\gamePathDist^{\oblivious}||\gamePathDist^{\private})
    &=\sum_{\genericPath\in \pathSet \cup \pathSet'}\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\frac{\probabilityMeasure^{\oblivious}(\genericPath)}{\probabilityMeasure^{\private}(\genericPath)}\right)\nonumber
    \\
    &=\sum_{\genericPath\in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\frac{\probabilityMeasure^{\oblivious}(\genericPath)}{\probabilityMeasure^{\private}(\genericPath)}\right) \label{eq:everypathends}
    \\
        &\leq \sum_{\genericPath\in \pathSet }\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\frac{\probabilityMeasure^{\oblivious}(\genericPath)}{\probabilityMeasure^{\oblivious}(\genericPath)\left(\prod_{t=0}^\infty\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)}\right)\label{eq:proof_12}
        \\
    &= \sum_{\genericPath\in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\log(\probabilityMeasure^{\oblivious}(\genericPath)) - \sum_{\genericPath\in \pathSet} \probabilityMeasure^{\oblivious}(\genericPath)\log(\probabilityMeasure^{\oblivious}(\genericPath))\nonumber
    \\
    &\qquad - \sum_{\genericPath\in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\prod_{t=0}^\infty\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)\nonumber
    \\
    &= \entropy(\gameStateRandomVar_0\gameActionRandomVar_0\ldots\gameStateRandomVar_\randomReachTime) -\entropy(\gameStateRandomVar_0\gameActionRandomVar_0\ldots\gameStateRandomVar_\randomReachTime) -\sum_{\genericPath\in \pathSet} \probabilityMeasure^{\oblivious}(\genericPath)\log\left(\prod_{t=0}^{T-1}\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right) 
    \\
    &\leq \sum_{i=1}^\numAgents \entropy(\mdpStateRandomVar_0^i\mdpActionRandomVar_0^i\ldots\mdpStateRandomVar_\randomReachTime^i)-\entropy(\gameStateRandomVar_0\gameActionRandomVar_0\ldots\gameStateRandomVar_\randomReachTime) -  \sum_{\genericPath\in \pathSet} \probabilityMeasure^{\oblivious}(\genericPath)\log\left(\prod_{t=0}^{T-1}\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)\label{eq:proof_13}
    \\
    &= \totalCorrelation_{\jointPolicy} - \sum_{\genericPath\in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\log\left(\prod_{t=0}^{T-1}\prod_{i=1}^\numAgents\probabilityMeasure_\epsilon(\mdpState_t^i|\mdpState_t^i,\mdpState_{t-1}^i)\right)\label{eq:proof_14}
    \\
    &=  \totalCorrelation_{\jointPolicy}  - \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]
\end{align}
where \eqref{eq:everypathends} is due to \(\sum_{\genericPath\in \pathSet'}  \probabilityMeasure^{\oblivious}(\genericPath) = 0\), \eqref{eq:proof_13} is due to the subadditivity of entropy, and \eqref{eq:proof_14} is due to the definition of \(\totalCorrelation_{\jointPolicy}\).

Using Lemma \ref{lemma:expectedlogproboftrueword} in \eqref{eq:proof_14} gives
\begin{equation}
\kl(\gamePathDist^{\oblivious}||\gamePathDist^{\private})
    \leq \totalCorrelation_{\jointPolicy} + \numAgents\log\left(\left(\rho_{\max}-1\right)\exp(-\frac{\epsilon}{\ell})+1\right)l^{\oblivious}. \label{eq:klbound}
\end{equation}

Finally, we show that \(    v^{\private} \geq v^{\oblivious} -1 + \exp(-\totalCorrelation_{\jointPolicy})\left((\rho_{max}-1)\exp\left(-\frac{\epsilon}{\ell}\right)+1\right)^{Nl^{\oblivious}}/2.\)  Let \(R' \subseteq \pathSet \cup \pathSet'\) be an arbitrary set.
\begin{subequations}
\begin{align}
    \gameValue^{\oblivious} - \gameValue^{\private}
    &=  \sum_{\genericPath \in R} \probabilityMeasure^{\oblivious}(\genericPath ) - \probabilityMeasure^{\private}(\genericPath )
    \\
    &\leq \left | \sum_{\genericPath \in R} \probabilityMeasure^{\oblivious}(\genericPath ) - \probabilityMeasure^{\private}(\genericPath )\right |
    \\
    &\leq \sup_{R'} \left |\sum_{\genericPath \in R'} \probabilityMeasure^{\oblivious}(\genericPath ) - \probabilityMeasure^{\private}(\genericPath )\right |
    \\
    &\leq \sqrt{1-\exp(-\kl(\gamePathDist^{\oblivious} || \gamePathDist^{\private}))} \label{bretagnollehuber}
\end{align}
\end{subequations}
where \eqref{bretagnollehuber} is due to Bretagnolle-Huber inequality~\cite{bretagnolle1979estimation}.
Rearranging the terms of \eqref{bretagnollehuber} and using \eqref{eq:klbound} yields to the desired result.

\end{proof}

We note that apart from Theorem~\ref{thm:performance_bound}, we can derive a tighter lower bound on $\mdpValue^{pr}$. 

\begin{theorem}\label{thm:performance_2_bound}
Given $\epsilon>0$, for $\numAgents$ agents, we have
\begin{equation}\label{eq:thm_2_equation}
    \mdpValue^{pr}\geq \mdpValue^{\oblivious}-1+\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\epsilon}{\adjParam})+1\right)^{\numAgents l^{\oblivious}}.
\end{equation}
\end{theorem}

\begin{proof} [Proof of Theorem \ref{thm:performance_2_bound}]
    
As shown in the proof of Theorem \ref{thm:performance_bound}, we have
\begin{align}
    \mdpValue^{\private}&= \sum_{\genericPath=\gameState_0\gameAction_0\gameState_1\gameAction_1\ldots\gameState_T \in \pathSet} \probabilityMeasure^{\private}(\genericPath)  \mathds{1}(\genericPath \in \reachPathSet)  \\
    &\geq \sum_{\genericPath=\gameState_0\gameAction_0\gameState_1\gameAction_1\ldots\gameState_T \in \pathSet} \probabilityMeasure^{\oblivious} (\genericPath) \mathds{1}(\genericPath \in \reachPathSet) \left(\prod_{t=0}^{T-1} \prod_{k=1}^\numAgents \mu_\epsilon(\mdpState_t^k|\mdpState_t^k,\mdpState_{t-1}^k)\right)
    \\
    &= \Pr(\genericPath \in \reachPathSet \wedge \privatePath = \genericPath | \genericPath \sim \probabilityMeasure^{\oblivious}, \privatePath \sim \probabilityMeasure^{\epsilon}(\cdot| \genericPath)).
\end{align}

By the union bound, we have 
\begin{align}
    \mdpValue^{\private}&\geq \Pr(\genericPath \in \reachPathSet | \genericPath \sim \probabilityMeasure^{\oblivious}) + \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[ \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right] - 1 
    \\
    &= \mdpValue^{\oblivious} + \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[ \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right] -1 \label{eq:unionbound}
\end{align}

Then with \[\expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]=\sum_{\genericPath \in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \]
and Jensen's inequality,  we have
\begin{align}
    \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]
    &=\exp\left(\log\sum_{\genericPath \in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right)
    \\
    &\geq \exp\left(\sum_{\genericPath \in \pathSet}\probabilityMeasure^{\oblivious}(\genericPath)\log \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right)\label{eq:expectation_of_true-word}
    \\
    & =\exp\left( \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\log \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]\right).
\end{align}
Using Lemma \ref{lemma:expectedlogproboftrueword}, we get
\begin{align}
    \mdpValue^{\private} &\geq \mdpValue^{\oblivious}-1+\exp\left( \expectation_{\genericPath \sim \probabilityMeasure^{\oblivious}}\left[\log \probabilityMeasure_{\privacyLevel}(\privatePath = \genericPath|\genericPath) \right]\right)
    \\
    &\geq \mdpValue^{\oblivious}-1+\left(\left(\outdegree_{\max}-1\right)\exp(-\frac{\epsilon}{\adjParam})+1\right)^{\numAgents \expectedLength^{\oblivious}},
\end{align}
which completes the proof.

\end{proof}

Compared to \eqref{eq:thm_1_eq}, \eqref{eq:thm_2_equation} does not take the total correlation $\totalCorrelation_{\jointPolicy}$ into account and only focuses on the success probability when the private state trajectories are the same with the original state trajectories. As a result, a joint policy $\jointPolicy = \lbrace \localPolicy^{i} \rbrace_{i=1}^{\numAgents}$ synthesized by minimizing the lower bound in \eqref{eq:thm_2_equation} does not enjoy the robustness brought by minimizing \eqref{eq:thm_1_eq}. The inclusion of total correlation in the objective function increases the team performance under private communications since the agents' policies are less sensitive to each other's state trajectories. 
\bibliography{bibliography}

\end{document}