\documentclass[accepted]{uai2022} % for initial submission
% \documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
%\usepackage[american]{babel}
 \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{subcaption}
\usepackage{hyperref}
\usepackage[capitalise]{cleveref}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{listings}
\usepackage{float}
\floatstyle{ruled}
\usepackage{stmaryrd}

\crefname{definition}{Def.}{Defs.}
\Crefname{definition}{Definition}{Definitions}
\crefname{algorithm}{Alg.}{Algs.}
\Crefname{algorithm}{Algorithm}{Algorithms}
\crefname{operator}{Operator}{Operators}
\Crefname{operator}{Operator}{Operators}

\hyphenation{Dec-POMDP}

\newtheorem{definition}{Definition}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}[lemma]

%\allowdisplaybreaks
\DeclareMathOperator*{\argmax}{arg\,max}

\newfloat{operator}{tbph}{lon}
\floatname{operator}{Operator}
\newfloat{listing}{tbph}{lop}
\floatname{listing}{Listing}


%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Lifting in Multi-agent Systems under Uncertainty (Supplementary Material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<tanya.braun@uni-muenster.de>?Subject=Your UAI 2022 paper}{Tanya Braun}{}}
\author[2]{Marcel Gehrke}
\author[3]{Florian Lau}
\author[2]{Ralf M\"oller}
% Add affiliations after the authors
\affil[1]{%
    Computer Science Department\\
    University of M\"unster\\
    M\"unster, Germany
}
\affil[2]{%
    Institute of Information Systems\\
    University of L\"ubeck\\
    L\"ubeck, Germany
}
\affil[3]{%
    Institute of Telematics\\
    University of L\"ubeck\\
    L\"ubeck, Germany
  }
  
  \begin{document}
\maketitle
%!TEX root = ../uai2022-template.tex

%The supplementary material provides detailed information on the following topics:
%\begin{enumerate}
%	\item Additive count conversion used in \cref{lem:sym}
%%	\item Full proof of \cref{lem:lemming}
%	\item Full description of the DecTiger example in \cref{sec:ldec:dis} %and a sketch of a nanoscale application
%\end{enumerate}

\section{Additive Count Conversion}\label{app:add}
With additive semantics, the combination function becomes summation, which applies to the join, which is usually done using multiplication, as well as the count-conversion where multiplication is replaced by summation as well.
We need some further definitions for the formal operator that is based on the operator defined by \citet{Tag13}.
We use the framework of general parfactors under which reward functions with PRVs and CRVs as inputs fall, which is also based on the work cited above.

\begin{definition}[Logvar, PRV,  parfactor, model]\label{def:model}
	Let $\mathbf{R}$ be a set of random variable (randvar) names, $\mathbf{L}$ a set of logical variable (logvar) names, $\mathbf{\Phi}$ a set of factor names, and $\mathbf{U}$ a set of constants. % (universe).
	All sets are finite.
	Each logvar $L$ has a domain $dom(L) \subseteq \mathbf{U}$.
	A \emph{constraint} is a tuple $(\mathcal{X}, C_{\mathcal{X}})$ of a sequence of logvars $\mathcal{X} = (X_1, \dots, X_l)$ and a set $C_{\mathcal{X}} \subseteq \times_{i = 1}^l dom(X_i)$.
	The symbol $\top$ for $C$ marks that no restrictions apply, i.e., $C_{\mathcal{X}} = \times_{i = 1}^l dom(X_i)$.
	A \emph{PRV} $R(L_1, \dots, L_l), l \geq 0$, is a construct of a randvar $R \in \mathbf{R}$ possibly combined with logvars $L_1, \dots, L_l \in \mathbf{L}$. % to represent a set of randvars.
	If $l = 0$, the PRV is parameterless and forms a propositional randvar.
	The term $ran(A)$ denotes the possible values (range) of a PRV $A$.
	We use bold symbols for sets and calligraphic symbols for sequences of PRVs or logvars.
	For a set $\boldsymbol{A}$ or sequence $\mathcal{A}$, the range refers to all possible combinations of range values of the PRVs therein.
	An \emph{event} $A = a$ denotes the occurrence of PRV $A$ with range value $a \in ran(A)$.
	If $A$ is clear from the context, we write $a$.\\
%\end{definition}
%\begin{definition}[Parfactor, model]
	We denote a parametric factor \emph{(parfactor)} by $\phi(\mathcal{A})_{| C}$ with $\mathcal{A}$ a sequence of PRVs, $\phi : ran(\mathcal{A}) \mapsto \mathbb{R}^+$ a function with name $\phi \in \mathbf{\Phi}$, and $C$ a constraint on the logvars in $g$.
	A PRV $A$ or logvar $L$ under constraint $C$ is given by $A_{|C}$ or $L_{|C}$, respectively.
	We may omit $|\top$ in $A_{|\top}$, $L_{|\top}$, or $\phi(\mathcal{A})_{| \top}$.
	A \emph{model} $G$ is a set of parfactors $\{g_i\}_{i=1}^n$.\\
	A utility parfactor $g_{U}$ maps to a utility PRV $U(\boldsymbol{X})$ with a signature of $\phi_{U(\boldsymbol{X})}(\mathcal{A})_{|C}$, i.e., stores the result of $\phi$ in $U$.
\end{definition}
The term $lv(\Gamma)$ refers to the logvars in $\Gamma$, which may be a PRV, a constraint, a parfactor, or a model.                        
The term $rv(\Gamma)$ refers to the set of PRVs in a parfactor or model.
The term $gr(\Gamma)$ denotes the set of all instances of $\Gamma$ w.r.t.\ given constraints.
An instance is an instantiation (grounding) of $\Gamma$, substituting the logvars in $\Gamma$ with a set of constants from given constraints.
If $\Gamma$ is a constraint, $gr(\Gamma)$ refers to the second component $C_{\mathcal{X}}$.
%Given a parfactor $\phi(\mathcal{A})_{|C}$, $\phi$ is identical for the propositional randvars in $gr(\mathcal{A}_{|C})$.
We use the expression $lv_U(.)$ to refer to the logvars of the utility PRV(s) in the input, which can be a PRV, a parfactor, or a model.

We assume familiarity with operators of relational algebra such as projection $\pi$ and join $\Join$.
An alignment replaces the occurrences of one object with another.
The notion of count-normalisation says that each instance of one logvar sequence $\mathbf{Z}$ 
needs to refer to the same number of instances of another logvar sequence $\mathbf{Y}$.
If this number exists, we refer to it as $ncount_{\mathbf{Y} | \mathbf{Z}}$.

\begin{operator}[H]
\caption{Count-conversion for utility parfactors}
\label{op:lift:cc}
	\textbf{Operator} \textsc{count-convert}\\
	\textbf{Inputs:}
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item Utility parfactor $g_{u} = \phi_{U(\boldsymbol{X})}(\mathcal{A})_{|C}$
		\item Logvar $X \in lv(\mathcal{A})$ and $X \in \boldsymbol{X}$, to count in $g_u$
	\end{enumerate}
	\textbf{Preconditions:}
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item There is exactly one atom $A_i \in \mathcal{A}$ with $X \in lv(A_i)$.
		\item $X$ is count-normalised w.r.t.\ $\boldsymbol{Z} = lv(\mathcal{A}) \setminus \{X\}$ in $C$.
		\item For all counted logvars $X^{\#}$ in $g$:\\ $\pi_{X,X^{\#}}(C) = \pi_X(\pi_{\boldsymbol{X}}(C)) \times \pi_{X^{\#}} (\pi_{\boldsymbol{X}}(C))$.
	\end{enumerate}
	\textbf{Output:} utility parfactor $\phi'_{U'}(\mathcal{A}')_{|C}$ such that
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item $U' = \#_X[U(\boldsymbol{X})]$,
		\item $\mathcal{A}' = (A_1, \dots, A_{i-1}) \circ A'_i \circ (A_{i+1}, \dots, A_n)$, $A'_i = \#_X[A_i]$, and
		\item for each valuation $\boldsymbol{a}'$ to $\mathcal{A}'$ with $a'_i = h$,
		\begin{align*}
			&\phi_{U(\boldsymbol{X})}'(\dots, a_{i-1}, h, a_{i+1}, \dots) \\
			&= \sum_{a\in ran(A_i)} h(a_i)\phi_{U(\boldsymbol{X})}(\dots, a_{i-1}, a_i, a_{i+1}, \dots) 
		\end{align*} 
		where $h$ is a histogram as defined in Eq.\ (9) %$\{(a_i, n_i)\}_{i=1}^m$ with $m = |ran(A_i)|$, $a_i \in ran(A_i)$, $n_i \in \mathbb{N}$, and $\sum_{a_i \in \mathcal{R}(A_i)} h(a_i) = ncount_{X | \boldsymbol{Z}}(C)$, and $h(a_i) = n_i$.
	\end{enumerate}
	\textbf{Postcondition:}\\ 
	$G_U \equiv G_U \setminus \{g_u\} \cup \textsc{count-convert}(g_{u}, X)$
\end{operator}
\begin{operator}[H]
\caption{Additive join of utility parfactors}
\label{op:full:join}
	\textbf{Operator} \textsc{join}\\
	\textbf{Inputs:}
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item Utility parfactor $g_{u'} = \phi_{u'}(\mathcal{A}_{u'})_{|C_{u'}}$
		\item Utility parfactor $g_{u''} = \phi_{u''}(\mathcal{A}_{u''})_{|C_{u''}}$
		\item Alignment $\theta = \{\boldsymbol{Z}_{u''} \rightarrow \boldsymbol{Z}_{u'}\}$, between the logvars of $g_{u'}$ and $g_{u''}$
	\end{enumerate}
	\textbf{Preconditions:}
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item for $v = u', u'': \boldsymbol{Y}_v = lv(\mathcal{A}_v) \setminus \boldsymbol{Z}_v$ is count-normalised w.r.t. $\boldsymbol{Z}_v$ in $\pi_{\boldsymbol{X}_v}(C_v)$
	\end{enumerate}
	\textbf{Output:} utility parfactor $\phi_u(\mathcal{A}_{u})_{|C}$ such that
	\begin{enumerate}[noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt,label=(\arabic*)]
		\item $u = U(\boldsymbol{X}), \boldsymbol{X} = lv(u') \cup lv(u'')\theta$,
		\item $\mathcal{A}_u = \mathcal{A}_{u'} \Join \mathcal{A}_{u''}\theta$,
		\item $C = C_{u'} \Join C_{u''}\theta$ (component-wise), and
		\item for each valuation $\boldsymbol{a} \in ran(\mathcal{A}_{u})$ with $\boldsymbol{a}_{u'} = \pi_{\mathcal{A}_{u'}}(\boldsymbol{a})$ and $\boldsymbol{a}_{u''} = \pi_{\mathcal{A}_{u''}}(\boldsymbol{a})$
%			\begin{align*}
				\[\phi_u(\boldsymbol{a}) = \frac{1}{r_u''}\phi_{u'}(\boldsymbol{a}_{u'}) + \frac{1}{r_u'}\phi_{u''}(\boldsymbol{a}_{u''})\]
%			\end{align*}
			where $r'_u = ncount_{\mathbf{Y}_{u'} | \mathbf{Z}_{u'}} (\pi_{\mathbf{X}_{u'}}(C_{u'}))$ and $r''_u = ncount_{\mathbf{Y}_{u''} | \mathbf{Z}_{u''}} (\pi_{\mathbf{X}_{u''}}(C_{u''}))$
	\end{enumerate}
	\textbf{Postcondition:} $G_U \equiv G_U \setminus \{g_{u'}, g_{u''}\} \cup \textsc{join}(g_{u'}, g_{u''}, \theta)$
\end{operator}

Please refer to \citet{Tag13} for more details on count-normalisation and count-conversions.

\Cref{op:lift:cc} shows the additive count-conversion.
The operator transforms a parfactor with a logvar to be counted directly into a parfactor with the logvar counted.
That is, it turns the PRV in which the logvar occurs into a CRV and transforms the mapped values accordingly.

The long version of this calculation is grounding the logvar, joining the (partially) grounded parfactors, determining counting symmetry, and transforming the grounded instances into CRVs.
Joining parfactors with multiplicative semantics (mapping to probabilities or potentials) is done using multiplication, which is also the name of the operator that the following operator is based on \citep{Tag13}.
Since rewards have additive semantics, combining two parfactors does not work with multiplication but with summation.
\Cref{op:full:join} shows a join for parfactors with additive semantics.



%\section{Proof of Lemma 3}\label{app:lem3}
%This section provides a full proof of the following statement of Lemma 3:
%%\begin{quote} %\label{lem:lemming}
%    \emph{In an isomorphic DecPOMDP with model $\bar{M}_i$, partition policies are defined over $ran(A_k)$ and $ran(O_k)$}.
%%\end{quote}
%
%The proof uses the semantics of the parameterised probabilistic sequential model embedded in $\bar{M}_i$:
%The model basically contains three parametric factors (parfactors), i.e., factors with PRVs as inputs, namely, $\bar{T}_i$, $\bar{R}_i$, and $\bar{\Omega}_i$.
%The inputs are time-stamped by $t$, i.e., $S_t$, $\bar{A}_t$, and $\bar{O}_t$.
%A policy prescribes an action $a$ to perform for each possible observation history $o_{0:\tau}$ of length $\tau$ for each agent.
%To determine a joint action $\boldsymbol{a}$ for a joint observation history $\boldsymbol{o}_{0:\tau}$, $\boldsymbol{o}_{0:\tau}$ becomes evidence in $\bar{M}_i$.
%Since there are only a bounded number of histories possible in a partition, the agents in a partition can be partitioned again based on the histories.
%That is we basically encode $\boldsymbol{o}_{0:\tau}$ as lifted evidence and then split the partitions on the evidence using the split operator \citep{Tag13}. 
%So, each of the $K$ partitions has the same evidence for the agents in them and each has its own logical variable $X_{k,j}$ with a domain that is disjoint from all other domains.
%
%Next, we unroll the model for $\tau$ time steps, i.e., instantiate the split functions by replacing $t$ with each value in $\{0,\dots,\tau\}$, to handle the evidence over $\tau$ time steps.
%Then, the lifted absorption operator \citep{Tag13} is applicable to absorb the evidence in each sub-partition, eliminating $O_k(X)$ from each sub-partition and each time step.
%For the different time steps, we now have $\bar{T}_i(\bar{S}',\bar{S}, \circ_{k=1}^K\circ_{j}{A_k(X_j)})$, $\bar{R}_i(\bar{S}, \circ_{k=1}^K\circ_{j}{A_k(X_j)})$, and $\bar{\Omega}_i(S)$ where $j$ iterates over all existing sub-partitions of a partition.
%
%Next, we follow the semantics of the model to show that considering only $ran(A_k)$ is correct.
%The semantics prescribe the following:
%\begin{enumerate}
%    \item Ground the model.
%    \item Join all instantiated factors into one large factor.
%    \item Sum out the state variables $S_t, S_t'$ for all instantiated $t$.
%    \item Pick the actions that lead to the maximum utility value for the different observation histories.
%\end{enumerate}
%Without $\bar{R}_i$, we would have built the full joint probability distribution of the probabilistic sequential model in $\bar{M}_i$ in the third step.
%With $\bar{R}_i$, we build a distribution over utilities.
%The task in terms of the proof is to show that the same $\argmax$ actions are chosen for agents from the same sub-partition.
%
%The first step of grounding the model makes every constant explicit that is hidden behind the template of the PRVs.
%Another way of doing this is by counting each logical variable in the model, count-converting the PRVs into CRVs, which accounts for each constant explicitly like grounding does.
%So, we tweak the procedure above by not grounding but count-converting the model.
%The rest remains the same.
%Then we need to show that the peak-shaped histograms are chosen as $\argmax$ actions, which implies that all agents from the same partition perform the same action as in the ground case.
%
%To show that only peak-shaped histograms are relevant, we look at the count-conversions of the first step.
%Consider a minimum example of a function $\phi_i$ with two states and two actions of a decision PRV $A(X)$ as well as its ground version $\phi$ and its count-converted version $\phi_c$ with two agents ($|dom(X)| = 2$):
%\begin{center}
%    \begin{tabular}{ccc}
%        \toprule
%        $S$ & $A(X)$ & $\phi_i$\\
%        \midrule
%        $s^0$ & $a^0$ & $p_1$ \\
%        $s^0$ & $a^1$ & $p_2$ \\
%        $s^1$ & $a^0$ & $p_3$ \\
%        $s^1$ & $a^1$ & $p_4$ \\
%        \bottomrule
%    \end{tabular}\quad
%    \begin{tabular}{cccc}
%        \toprule
%        $S$ & $A(x_1)$ & $A(x_2)$ & $\phi$\\
%        \midrule
%        $s^0$ & $a^0$ & $a^0$ & $p_1^2$ \\
%        $s^0$ & $a^0$ & $a^1$ & $p_1 \cdot p_2$ \\
%        $s^0$ & $a^1$ & $a^0$ & $p_2 \cdot p_1$ \\
%        $s^0$ & $a^1$ & $a^1$ & $p_2^2$ \\
%        $s^1$ & $a^0$ & $a^0$ & $p_3^2$ \\
%        $s^1$ & $a^0$ & $a^1$ & $p_3 \cdot p_4$ \\
%        $s^1$ & $a^1$ & $a^0$ & $p_4 \cdot p_3$ \\
%        $s^1$ & $a^1$ & $a^1$ & $p_4^2$ \\
%        \bottomrule
%    \end{tabular}\\\medskip
%    \begin{tabular}{ccc}
%        \toprule
%        $S$ & $\#_{X}[A(X)]$ & $\phi_c$\\
%        \midrule
%        $s^0$ & $[0,2]$ & $p_2^0 \cdot p_1^2$ \\
%        $s^0$ & $[1,1]$ & $p_2^1 \cdot p_1^1$ \\
%        $s^0$ & $[2,0]$ & $p_2^2 \cdot p_1^0$ \\
%        $s^1$ & $[0,2]$ & $p_4^0 \cdot p_3^2$ \\
%        $s^1$ & $[1,1]$ & $p_4^1 \cdot p_3^1$ \\
%        $s^1$ & $[2,0]$ & $p_4^2 \cdot p_3^0$ \\
%        \bottomrule
%    \end{tabular}
%\end{center}
%The versions $\phi$ and $\phi_c$ show exactly how a CRV is simply another encoding of the same information stored in a grounded version.
%It also highlights that an isomorphic symmetry is always also a counting symmetry.
%The important part for the proof is that whatever the actual values of the different $p$'s are, when the PRVs are count-converted (or grounded), the maximum value for a given state $s$ will always occur where the exponent is largest as one of the $p$'s will be the largest and it will bring the most to take this to the power of the largest number possible.
%And the largest possible exponent occurs in a peak-shaped histogram where one bucket takes all the available elements.
%With the additive semantics of rewards, the rewards are added up (and not multiplied) but the effect is the same: the maximum value occurs where the individual maximum values meet, i.e., where the inputs are peak-shaped.
%
%The only way for a non-peak-shaped histogram to lead to a maximum value is if values are equal.
%Then, peak-shaped and non-peak-shaped histograms lead to the same maximum value and the solution will not be unique.
%However, since we are not interested in all solutions but only one, restricting ourselves to peak-shaped histograms still is correct.
%
%So, we have that one of the inputs with a peak-shaped histogram maps to the largest value, i.e., a peak-shaped histogram is the $\argmax$ action at the moment.
%In the functions of a DecPOMDP model, we have $K$ PRVs that need count-converting.
%However, the result still remains the same:
%For each of the count-conversions individually, it holds with the arguments above that the $\argmax$ action is one of the peak-shaped histograms.
%Since the count-conversions are applied iteratively, each CRV still has the maximum value where a peak-shaped histogram occurs, and these maximum values occur for those inputs in which the previous count-conversion had maximum values, which were also peak-shaped.
%So, after count-converting every logical variable in $\bar{T}_i$ and $\bar{R}_i$ of each sub-partition ($\bar{\Omega}_i$ does not contain any logical variable as we eliminated them using the observation histories as evidence), the lines with the largest values will be those where peak-shaped histograms occur together.
%
%The second step says to join all these functions where the largest values remain largest in combinations of peak-shaped histograms as for each possible state there are the histogram combinations where those peak-shaped histograms meet that bring with them the largest values.
%
%The third step requires summing out all non-decision CRVs, i.e., all state variables over the different time steps.
%Summing out adds up values that occur for the different states given the same input sequence of the CRVs, which again does not change any $\argmax$ actions as non-peak-shaped inputs cannot catch up with those peak-shaped inputs, where the largest values reside and are now added up.
%
%The forth step is already the decision on the $\argmax$ action, where we pick the input mapping to the largest value, which has peak-shaped histograms by the arguments above.
%Peak-shaped histograms prescribe the same action for all agents in a sub-partition, meaning it is enough to consider $ran(A_k)$ in each sub-partition.
%
%The remaining part of the proof has to show that it is also enough to consider $ran(O_k)$ and to not consider each possible sub-partition as we have done so far.
%
%Each sub-partition has peak-shaped histograms as $\argmax$ actions.
%An effect is that the $\argmax$ actions do not depend on partition sizes.
%The size of the peak changes but nothing else.
%Therefore, for all the other possible observation histories $\boldsymbol{o}'_{0:\tau}$, we again get sub-partitions that have observation histories as evidence as with $\boldsymbol{o}_{0:\tau}$ but with different sub-partition sizes.
%However, the result in terms of the $\argmax$ actions per sub-partition will be the same.
%Therefore, it is enough to consider what happens for one representative agent for each possible observation history of that agent as looking at $n_{k,j}$ agents in a sub-partition does not change the outcome compared to looking at one agent.
%That is, considering only $ran(O_k)$ and the histories possible with $ran(O_k)$ is sufficient.
%This concludes our proof.
%
%Some interesting points to note:
%\begin{itemize}
%    \item This proof only works because the CRVs come from PRVs.
%        If one would have a CRV in the original modelling, a non-peak-shaped histogram could get such a high reward that it will outweigh everything else and be the $\argmax$ action at a point.
%    \item The setting in terms of lifting is relatively easy: In $\bar{M}_i$, all domains are disjoint and the constraints are $\top$ constraints, i.e., cross-products of domains.
%        There are no inequalities between logical variables.
%        No PRVs reoccur in the same function.
%        No logvar occurs more than once in a function.
%        An effect is that all logical variables are count-normalised with respect to each other.
%    \item If we would have $\Omega$ depend on the joint action as well, the proof here would hold as well.
%        What would change, is the proof of Lemma~2 as count-converting $X_k$ in $\bar{\Omega}$ would require generalised counting, leading to a CRV $\#_{X_k}[O_k(X_k),A_k(X_k)]$, which is not the standard form of two CRVs used in the counting model.
%        We would need to split this merged CRV again into two parts, which is not something that has been relevant in lifted inference so far.
%        Therefore, standard lifting operators do not apply.
%        We would basically need an inverse operator to the generalised counting operator of merging counting formulas (CRVs) by \citet{Tag13}.
%    \item Lemma~3 goes hand in hand with the quasi independence between agents.
%        The independence on partition sizes for a decision mirrors this agent independence and allows for using partition sizes as an exponent in groundings.
%        Let us refer to a set $\boldsymbol{V}$ without an element indexed $i$ as $\boldsymbol{V}_{-i}$.
%        Then, for given partition actions $\bar{\boldsymbol{a}}_{-k}$ of other partitions and observations $\bar{\boldsymbol{o}}_{-k}$ and current state $\bar{s}$ and next state $\bar{s}'$, it holds for each partition $\mathfrak{I}_k$ ($x_k$ a representative constant for agents in $\mathfrak{I}_k$):
%        \begin{align*}
%            \bar{T}(\bar{s}',\bar{s},\bar{\boldsymbol{a}}) 
%            &= \prod_{i=1}^{n_k} \bar{T}(\bar{s}',\bar{s}, \bar{\boldsymbol{a}}_{-k} \circ a_k(x_{k,i}))\\
%            &= \bar{T}(\bar{s}',\bar{s}, \bar{\boldsymbol{a}}_{-k} \circ a_k(x_k))^{n_k} \\
%            \bar{R}(\bar{s}, \bar{\boldsymbol{a}}) 
%            &= \sum_{i=1}^{n_k} \bar{R}(\bar{s}, \bar{\boldsymbol{a}}_{-k} \circ a_k(x_{k,i})) \\
%            &= n_k \cdot \bar{R}(\bar{s}',\bar{s}, \bar{\boldsymbol{a}}_{-k} \circ a_k(x_k)) \\
%            \bar{\Omega}(\bar{\boldsymbol{o}}, \bar{s}, \bar{\boldsymbol{a}})
%            &= \prod_{i=1}^{n_k} \bar{T}(\bar{\boldsymbol{o}}_{-k} \circ o_k(x_{k,i}),\bar{s}) \\
%            &= \bar{\Omega}(\bar{\boldsymbol{o}}_{-k} \circ o_k(x_{k,i}),\bar{s})^{n_k}
%        \end{align*}
%\end{itemize}
%


% Policies are evaluated regarding their expected utility according to Eq.\ (1), which shows a max-sum calculation, in which the state variable $S$ is summed out before the $\argmax$ policy is chosen at the end.
% At the end, after summing out all observation variables, we have a function that depends on $S$ and $\boldsymbol{A}$, in which we have to sum out $S$ and then choose the $\argmax$ action of $\boldsymbol{A}$.

% In $\bar{M}_i$, $\boldsymbol{A}$ consists of a set of PRVs $A_k(X_k)$, which we can count-convert into CRVs $\#_{X_k}[A_k(X_k)]$ and for this proof, we do so as a first step.
% Then, we have all constants behind each $X_k$ explicitly accounted for in the CRVs and we can do arithmetic calculations without having to consider lifting restrictions as no uncounted logical variable remains.
% So, whatever the numbers in any function ($T$, $R$, $\Omega$) say, the hypothesis is that count-converting the different $X_k$ transforms the numbers in such a way that the maximum numbers will be occur for the peak-shaped histograms.



% One way to solve the problem is to perform the sum-out operations first and then pick the $\argmax$ argument in the remaining function.
% So, whatever happens before, at the end, it is a sum-out operation followed by a max-out operation.
% Looking at the bare bones of the calculation, the reward function $R$ is multiplied with probabilities and summed over until we reach the horizon $\tau$.
% Thus, in the end, we have a probability distribution multiplied with a number.

%\section{Example}
%This section lists the complete DecTiger example and looks at a nanoscale system application as a case study.

\section{Full DecTiger Example}\label{app:dectiger}
We use the specification of the DecTiger benchmark from the MADP tool box.\footnote{\url{https://github.com/MADPToolbox/MADP/blob/master/problems/dectiger.dpomdp}}.
\Cref{lst:dectiger} shows the original DecTiger version in the MADP input format (on the last page).
The DecPOMDP model reads as follows:
\begin{itemize}[noitemsep]
	\item $\boldsymbol{I} = \{agent_1, agent_2\}$, 
	\item $S$, $ran(S) = \{$\emph{tiger-left}, \emph{tiger-right}$\} = \{tl,tr\}$,
	\item $\boldsymbol{A} = \{A_i\}_{i\in \boldsymbol{I}}$, $\forall i : ran(A_i) = \{$\emph{listen}, \emph{open-left}, \emph{open-right}$\} = \{li,ol,or\}$, and
%	\item $T(S', S, \boldsymbol{A}) = P(S' \mid S, \boldsymbol{A})$,
%	\item $R(S, \boldsymbol{A})$, 
	\item $\boldsymbol{O} = \{O_i\}_{i \in \boldsymbol{I}}$, $\forall i : ran(O_i) = \{$\emph{hear-left}, \emph{hear-right}$\} = \{hl,hr\}$. % ,
%	\item $\Omega(\boldsymbol{O}, S) = P( \boldsymbol{O} \mid S, \boldsymbol{A})$
\end{itemize}
with $T$, $R$, and $\Omega$ (= O in the listing) in tabular notation in full below (lines are reordered compared to the listing to match the order of the inputs in the definitions). 
\begin{center}
\begin{tabular}{cccll}
    \toprule
    $S$   & $A_1$   & $A_2$ & $T_{tl}$  & $T_{tr}$ \\
    \midrule
    $tl$  & $li$    & $li$  & $1$       & $0$   \\
    $tl$  & $li$    & $ol$  & $0.5$     & $0.5$ \\
    $tl$  & $li$    & $or$  & $0.5$     & $0.5$ \\
    $tl$  & $ol$    & $li$  & $0.5$     & $0.5$ \\
    $tl$  & $ol$    & $ol$  & $0.5$     & $0.5$ \\
    $tl$  & $ol$    & $or$  & $0.5$     & $0.5$ \\
    $tl$  & $or$    & $li$  & $0.5$     & $0.5$ \\
    $tl$  & $or$    & $ol$  & $0.5$     & $0.5$ \\
    $tl$  & $or$    & $or$  & $0.5$     & $0.5$ \\
    $tr$  & $li$    & $li$  & $0$       & $1$   \\
    $tr$  & $li$    & $ol$  & $0.5$     & $0.5$ \\
    $tr$  & $li$    & $or$  & $0.5$     & $0.5$ \\
    $tr$  & $ol$    & $li$  & $0.5$     & $0.5$ \\
    $tr$  & $ol$    & $ol$  & $0.5$     & $0.5$ \\
    $tr$  & $ol$    & $or$  & $0.5$     & $0.5$ \\
    $tr$  & $or$    & $li$  & $0.5$     & $0.5$ \\
    $tr$  & $or$    & $ol$  & $0.5$     & $0.5$ \\
    $tr$  & $or$    & $or$  & $0.5$     & $0.5$ \\
    \bottomrule
\end{tabular}\hfill
\begin{tabular}{cccr}
    \toprule
    $S$   & $A_1$   & $A_2$ & $R$   \\
    \midrule
    $tl$  & $li$    & $li$  & $-2$  \\
    $tl$  & $li$    & $ol$  & $-101$\\
    $tl$  & $li$    & $or$  & $9$   \\
    $tl$  & $ol$    & $li$  & $-101$\\
    $tl$  & $ol$    & $ol$  & $-50$ \\
    $tl$  & $ol$    & $or$  & $-100$\\
    $tl$  & $or$    & $li$  & $9$   \\
    $tl$  & $or$    & $ol$  & $-100$\\
    $tl$  & $or$    & $or$  & $20$  \\
    $tr$  & $li$    & $li$  & $-2$  \\
    $tr$  & $li$    & $ol$  & $9$   \\
    $tr$  & $li$    & $or$  & $-101$\\
    $tr$  & $ol$    & $li$  & $9$   \\
    $tr$  & $ol$    & $ol$  & $20$  \\
    $tr$  & $ol$    & $or$  & $-100$\\
    $tr$  & $or$    & $li$  & $-101$\\
    $tr$  & $or$    & $ol$  & $-100$\\
    $tr$  & $or$    & $or$  & $-50$ \\
    \bottomrule
\end{tabular} \\\medskip
\begin{tabular}{cccllll}
    \toprule
    $S$   & $A_1$   & $A_2$ & $\Omega_{hl,hl}$ & $\Omega_{hr,hl}$ & $\Omega_{hl,hr}$ & $\Omega_{hr,hr}$ \\
    \midrule
    $tl$  & $li$    & $li$  & $0.7225$  & $0.1275$  & $0.1275$  & $0.0225$  \\
    $tl$  & $li$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $li$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $ol$    & $li$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $ol$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $ol$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $or$    & $li$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $or$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tl$  & $or$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $li$    & $li$  & $0.7225$  & $0.1275$  & $0.1275$  & $0.0225$  \\
    $tr$  & $li$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $li$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $ol$    & $li$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $ol$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $ol$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $or$    & $li$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $or$    & $ol$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    $tr$  & $or$    & $or$  & $0.25$    & $0.25$    & $0.25$    & $0.25$  \\
    \bottomrule
\end{tabular}
\end{center}
with $T_{s'} = T(s',s,a_1,a_2), s' \in \{tl,tr\}$ and $\Omega_{o_1,o_2} = \Omega((o_1,o_2),s,a_1,a_2)$, $o_1, o_2 \in \{hl,hr\}$.
The transition function only states that as long as both agents only listen, the state does not change (identity).
When at least one agent opens a door, the game basically restarts with the new state being set according to a uniform distribution.
It is basically a way of keeping the game infinite by resetting the state to an arbitrary one whenever the agents end the game by opening a door (to either lose---tiger, or win---gold).
One could argue that opening a door only ends the game and might not necessarily imply a restart.
In that case, one would keeping the state as is in all cases ($(1,0)$ distribution for all $tl$ lines, $(0,1)$ distribution for all $tr$ lines) and re-spawn the game with an arbitrary starting state, sampled from a $(0.5,0.5)$ distribution, for do-overs. 

The DecTiger model has the same action and observation sets for both agents and exhibits a counting symmetry.
Thus, it can be rewritten into a counting model with $K=1$.
We index the one partition with $c$ to distinguish it from the later case where we take an isomorphic viewpoint.
\begin{itemize}[noitemsep] %,topsep=0pt,parsep=0pt,partopsep=0pt
	\item $\mathfrak{I}_c = \{agent_1, agent_2\}$, 
	\item $\bar{S} = S = \{tl,tr\}$,
	\item $\bar{A}_c = \{\#_{X}[A(X)]\}$, $ran(A(X)) = \{li,ol,or\}$,
	\item $T_c(S', S, \boldsymbol{A}) = P(S' \mid S, \boldsymbol{A})$,
	\item $R_c(S, \boldsymbol{A})$, 
	\item $\bar{O}_c = \{\#_{X}[O(X)]\}$, $ran(O(X)) = \{hl,hr\}$. % ,
	\item $\Omega_c(\boldsymbol{O}, S) = P( \boldsymbol{O} \mid S, \boldsymbol{A})$
\end{itemize}
with $\bar{T}_c$, $\bar{R}_c$, and $\bar{\Omega}_c$ as follows ($\#A$ short for $\#_X[A(X)]$; histogram positions: $[li,ol,or]$):
\begin{center}
\begin{tabular}{ccll}
\toprule
	$S$	& $\#A$		& $T_{c,tl}$& $T_{c,tr}$\\
\midrule
	$tl$	& $[2,0,0]$	& $1.0$	& $0.0$	\\
	$tl$	& $[1,1,0]$	& $0.5$	& $0.5$	\\
	$tl$	& $[1,0,1]$	& $0.5$	& $0.5$	\\
	$tl$	& $[0,1,1]$	& $0.5$	& $0.5$	\\
	$tl$	& $[0,2,0]$	& $0.5$	& $0.5$	\\
	$tl$	& $[0,0,2]$	& $0.5$	& $0.5$	\\
	$tr$	& $[2,0,0]$	& $0.0$	& $1.0$	\\
	$tr$	& $[1,1,0]$	& $0.5$	& $0.5$	\\
	$tr$	& $[1,0,1]$	& $0.5$	& $0.5$	\\
	$tr$	& $[0,1,1]$	& $0.5$	& $0.5$	\\
	$tr$	& $[0,2,0]$	& $0.5$	& $0.5$	\\
	$tr$	& $[0,0,2]$	& $0.5$	& $0.5$	\\
    \bottomrule
\end{tabular} \hfill
\begin{tabular}{ccr}
\toprule
	$S$	& $\#A$		& $R_c$	\\
\midrule
	$tl$	& $[2,0,0]$	& $-2$	\\
	$tl$	& $[1,1,0]$	& $-101$	\\
	$tl$	& $[0,2,0]$	& $-50$	\\
	$tl$	& $[0,1,1]$	& $-100$	\\
	$tl$	& $[0,0,2]$	& $20$	\\
	$tl$	& $[1,0,1]$	& $9$	\\
	$tr$	& $[2,0,0]$	& $-2$	\\
	$tr$	& $[1,1,0]$	& $9$	\\
	$tr$	& $[0,2,0]$	& $20$	\\
	$tr$	& $[0,1,1]$	& $-100$	\\
	$tr$	& $[0,0,2]$	& $-50$	\\
	$tr$	& $[1,0,1]$	& $-101$	\\
\bottomrule
\end{tabular}\\\medskip
\begin{tabular}{cclll}
\toprule
	$S$	& $\#A$		& $\Omega_{c,[2,0]}$& $\Omega_{c,[1,1]}$& $\Omega_{c,[0,2]}$   \\
\midrule
	$tl$	& $[2,0,0]$	& $0.7225$	& $0.1275$	& $0.0225$	\\
	$tl$	& $[1,1,0]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tl$	& $[0,2,0]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tl$	& $[0,1,1]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tl$	& $[0,0,2]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tl$	& $[1,0,1]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tr$	& $[2,0,0]$	& $0.7225$	& $0.1275$	& $0.0225$	\\
	$tr$	& $[1,1,0]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tr$	& $[1,0,1]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tr$	& $[0,2,0]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tr$	& $[0,1,1]$	& $0.25$		& $0.25$		& $0.25$		\\
	$tr$	& $[0,0,2]$	& $0.25$		& $0.25$		& $0.25$		\\
\bottomrule
\end{tabular}
\end{center}
As the histogram $[1,1]$ in $\Omega_c$ stands for two joint observations, $(hl,hr)$ and $(hr,hl)$, the probability for $\Omega_{c,[1,1]}$ counts twice.
In general, a multinomial coefficient provides the number of inputs represented by a histogram, i.e., ${n_k!}/{\prod_l n_l!}$ in reference to Eq.~(9).

For an isomorphic model, one would need to be able to factorise $\bar{T}_c$, $\bar{R}_c$, and $\bar{\Omega}_c$ into identical functions per agent, which is immediately possible for $\bar{\Omega}_c$ (and also provided like this in the book by \citet{OliAm16}):
\begin{center}
\begin{tabular}{ccll}
\toprule
	$S$	& $A(X)$	& $\Omega_{i,hl}$& $\Omega_{i,hr}$   \\
\midrule
	$tl$	& $li$	& $0.85$	& $0.15$	\\
	$tl$	& $ol$	& $0.5$	& $0.5$	\\
	$tl$	& $or$	& $0.5$	& $0.5$	\\
	$tr$	& $li$	& $0.85$	& $0.15$	\\
	$tr$	& $ol$	& $0.5$	& $0.5$	\\
	$tr$	& $or$	& $0.5$	& $0.5$	\\
\bottomrule
\end{tabular}
\end{center}

Functions $\bar{T}_c$ and $\bar{R}_c$ do not factorise accordingly.
$\bar{T}_c$ does not factorise as we have this mix of uniform and identity distributions, which would lead to identity distributions whenever one \emph{listen} operation is involved.
However, the ground case only has identity distributions when both agents listen.
If not encoding the reset into $T$, i.e., using identity distributions in all lines, then $\bar{T}_c$ would factorise trivially:
\begin{center}
\begin{tabular}{cccc}
\toprule
	$S$	& $A(X)$	& $T_{i,tl}$& $T_{i,tr}$\\
\midrule
	$tl$	& $li$	& $1$	& $0$	\\
	$tl$	& $ol$	& $1$	& $0$	\\
	$tl$	& $or$	& $1$	& $0$	\\
	$tr$	& $li$	& $0$	& $1$	\\
	$tr$	& $ol$	& $0$	& $1$	\\
	$tr$	& $or$	& $0$	& $1$	\\
\bottomrule
\end{tabular}
\end{center}

A factorisation of $\bar{R}_c$ does not work out for all inputs.
Matching the peak-shaped histograms in their reward, we could factorise parts of it as follows, leading to different values in the lines marked with $\lightning$. % in the counted version (and in the corresponding lines of the ground version):
\begin{center}
\begin{tabular}[t]{ccr}
\toprule
	$S$   & $A(X)$  & $R_i$   \\
\midrule
	$tl$  & $li$    & $-1$  \\
	$tl$  & $ol$    & $-25$ \\
	$tl$  & $or$    & $10$  \\
	$tr$  & $li$    & $-1$  \\
	$tr$  & $ol$    & $10$ \\
	$tr$  & $or$    & $-25$  \\
\bottomrule
\end{tabular}\quad
\begin{tabular}[t]{ccrl}
\toprule
	$S$	& $\#_{X}[A(X)]$& $R_c$	& 	\\
\midrule
	$tl$	& $[2,0,0]$	& $-2$	& 	\\
	$tl$	& $[1,1,0]$	& $-26$	& $\lightning$\\
	$tl$	& $[0,2,0]$	& $-50$	& 	\\
	$tl$	& $[0,1,1]$	& $-15$	& $\lightning$\\
	$tl$	& $[0,0,2]$	& $20$	& 	\\
	$tl$	& $[1,0,1]$	& $9$	& 	\\
	$tr$	& $[2,0,0]$	& $-2$	& 	\\
	$tr$	& $[1,1,0]$	& $9$	& 	\\
	$tr$	& $[0,2,0]$	& $20$	& 	\\
	$tr$	& $[0,1,1]$	& $-15$	& $\lightning$\\
	$tr$	& $[0,0,2]$	& $-50$	& 	\\
	$tr$	& $[1,0,1]$	& $-26$	& $\lightning$\\
\bottomrule
\end{tabular}
\end{center}
There are two other options.
One option would be to match the rewards for the histograms that represent combinations of listening and opening a door:
\begin{center}
\begin{tabular}[t]{ccr}
\toprule
	$S$	& $A(X)$	& $R_{i}$	\\
\midrule
	$tl$	& $li$	& $-1$	\\
	$tl$	& $ol$	& $-100$	\\
	$tl$	& $or$	& $10$	\\
	$tr$	& $li$	& $-1$	\\
	$tr$	& $ol$	& $10$	\\
	$tr$	& $or$	& $-100$	\\
\bottomrule
\end{tabular}\quad
\begin{tabular}[t]{ccrl}
\toprule
	$S$	& $\#_{X}[A(X)]$& $R_c$	&	\\
\midrule
	$tl$	& $[2,0,0]$	& $-2$	& 	\\
	$tl$	& $[1,1,0]$	& $-101$	& 	\\
	$tl$	& $[0,2,0]$	& $-200$	& $\lightning$ \\
	$tl$	& $[0,1,1]$	& $-90$	& $\lightning$\\
	$tl$	& $[0,0,2]$	& $20$	& 	\\
	$tl$	& $[1,0,1]$	& $9$	& 	\\
	$tr$	& $[2,0,0]$	& $-2$	& 	\\
	$tr$	& $[1,1,0]$	& $9$	& 	\\
	$tr$	& $[0,2,0]$	& $20$	& 	\\
	$tr$	& $[0,1,1]$	& $-90$	& $\lightning$\\
	$tr$	& $[0,0,2]$	& $-200$	& $\lightning$ \\
	$tr$	& $[1,0,1]$	& $-101$	& 	\\
\bottomrule
\end{tabular}
\end{center}
The other option would be to match the rewards for those histograms with combinations of opening different doors:
\begin{center}
\begin{tabular}{ccr}
\toprule
	$S$	& $A(X)$	& $R_i$	\\
\midrule
	$tl$	& $li$	& $-1$	\\
	$tl$	& $ol$	& $-110$	\\
	$tl$	& $or$	& $10$	\\
	$tr$	& $li$	& $-1$	\\
	$tr$	& $ol$	& $10$	\\
	$tr$	& $or$	& $-110$	\\
\bottomrule
\end{tabular}\quad
\begin{tabular}{ccrl}
\toprule
	$S$	& $\#_{X}[A(X)]$& $R_c$	& 	\\
\midrule
	$tl$	& $[2,0,0]$	& $-2$	& 	\\
	$tl$	& $[1,1,0]$	& $-111$	& $\lightning$\\
	$tl$	& $[0,2,0]$	& $-220$	& $\lightning$ \\
	$tl$	& $[0,1,1]$	& $-100$	& 	\\
	$tl$	& $[0,0,2]$	& $20$	& 	\\
	$tl$	& $[1,0,1]$	& $9$	& 	\\
	$tr$	& $[2,0,0]$	& $-2$	& 	\\
	$tr$	& $[1,1,0]$	& $9$	& 	\\
	$tr$	& $[0,2,0]$	& $20$	& 	\\
	$tr$	& $[0,1,1]$	& $-100$	& 	\\
	$tr$	& $[0,0,2]$	& $-220$	& $\lightning$ \\
	$tr$	& $[1,0,1]$	& $-111$	& $\lightning$\\
\bottomrule
\end{tabular}
\end{center}
So, an isomorphic model using one of the three options would not be able to capture that both agents agreeing on an action, even though it opens the door to the tiger, costs them less than opening different doors.
Here, one would need the expressiveness of a counting model.
This limitation is a direct consequence of what we observed in the proof on Lemma~3:
A non-peak-shaped histogram cannot map to a larger value than a peak-shaped histogram when the CRV came from a PRV as given here.

However, an isomorphic model would automatically exclude any policy where one agent opens the door while the other agent either listens or opens the other door.
The first one is sort of a waste of the listening operation as the game ends nonetheless and the observation result cannot be used.
The second one means that the door to the tiger is definitely opened, therefore, it should be avoided, highlighting how isomorphic models can help to determine sensible policies.

Using the ground version, the counting version, and one of the isomorphic versions, let us have a brief look at the behaviour of the example under the complexities given under a rising $N$ with one exception:
We use the binomial coefficient to actually calculate the number of possible histograms and do not use the upper bound of $n^a$ and $n^o$.
In addition, we include artificial partitions of the same dimensions to see the effect of rising $K$.
So, $a=3$, $o=2$, $s=2$, $n=N$ if $K=1$.
Otherwise, $n = 100$ and $N = K \cdot n$.
We set $\tau=2$ arbitrarily.
\Cref{fig:dectiger:t,fig:dectiger:c,fig:dectiger:p,fig:dectiger:k} show the behaviour according to the complexities derived in the paper for $T$ (as the worst case of the three functions in this setup), the evaluation cost, and the policy space.
Please note the log-scale on the y-axis.
The figures highlight impressively the differences between the different models in terms of complexity.
%\newpage
\begin{figure}[H]
	\centering
	\includegraphics[width=.87\columnwidth]{tsize.pdf}
	\caption{The transition function size $\mathbb{T}$ under rising $N$}
	\label{fig:dectiger:t}
\end{figure}
\begin{figure}[H]
	\centering
	\includegraphics[width=.87\columnwidth]{csize.pdf}
	\caption{The policy evaluation cost $\mathbb{C}$ under rising $N$}
	\label{fig:dectiger:c}
\end{figure}
\begin{figure}[H]
	\centering
	\includegraphics[width=.87\columnwidth]{psize.pdf}
	\caption{The policy space size $\mathbb{P}$ under rising $N$}
	\label{fig:dectiger:p}
\end{figure}
\begin{figure}[H]
	\centering
	\includegraphics[width=.87\columnwidth]{kplot.pdf}
	\caption{Transition function sizes and evaluation cost for all settings and policy space sizes for the isomorphic setting under rising $K$ with $n=10$}
	\label{fig:dectiger:k}
\end{figure}
%\begin{figure*}
%\begin{minipage}[t]{.48\textwidth}
%	\centering
%	\includegraphics[width=.94\columnwidth]{fig/tsize.pdf}
%	\caption{The transition function size $\mathbb{T}$ under rising $N$}
%	\label{fig:dectiger:t}
%\end{minipage}\hfill
%\begin{minipage}[t]{.48\textwidth}
%	\centering
%	\includegraphics[width=.94\columnwidth]{fig/csize.pdf}
%	\caption{The policy evaluation cost $\mathbb{C}$ under rising $N$}
%	\label{fig:dectiger:c}
%\end{minipage}\\
%\begin{minipage}[t]{.48\textwidth}
%	\centering
%	\includegraphics[width=.94\columnwidth]{fig/psize.pdf}
%	\caption{The policy space size $\mathbb{P}$ under rising $N$}
%	\label{fig:dectiger:p}
%\end{minipage}\hfill
%\begin{minipage}[t]{.48\textwidth}
%	\includegraphics[width=.94\columnwidth]{fig/kplot.pdf}
%	\caption{Transition function sizes and evaluation cost for all settings and policy space sizes for the isomorphic setting under rising $K$ with $n=10$}
%	\label{fig:dectiger:k}
%\end{minipage}
%\end{figure*}

\begin{listing*}
\caption{DecTiger specification in the MADP toolbox (without the comments from the source)}
\label{lst:dectiger}
\begin{lstlisting}
agents: 2
discount: 1
values: reward
states: tiger-left tiger-right
start: uniform
actions: 
listen open-left open-right
listen open-left open-right
observations:
hear-left hear-right
hear-left hear-right
# Transition probabilities
T: * : uniform
T: listen listen : identity
# Observation probabilities: <2actions> : <state> : <2observations> : probability
O: * : uniform
O: listen listen : tiger-left : hear-left hear-left : 0.7225
O: listen listen : tiger-left : hear-left hear-right : 0.1275
O: listen listen : tiger-left : hear-right hear-left : 0.1275
O: listen listen : tiger-left : hear-right hear-right : 0.0225
O: listen listen : tiger-right : hear-left hear-left : 0.7225
O: listen listen : tiger-right : hear-left hear-right : 0.1275
O: listen listen : tiger-right : hear-right hear-left : 0.1275
O: listen listen : tiger-right : hear-right hear-right : 0.0225
# Rewards: <2 actions> : <state> : * : * : reward
R: listen listen: * : * : * : -2
R: open-left open-left : tiger-left : * : * : -50
R: open-right open-right : tiger-right : * : * : -50
R: open-left open-left : tiger-right : * : * : 20
R: open-right open-right : tiger-left : * : * : 20
R: open-left open-right : tiger-left : * : * : -100
R: open-left open-right : tiger-right : * : * : -100
R: open-right open-left : tiger-left : * : * : -100
R: open-right open-left : tiger-right : * : * : -100
R: open-left listen : tiger-left : * : * : -101
R: listen open-right : tiger-right : * : * : -101
R: listen open-left : tiger-left : * : * : -101
R: open-right listen : tiger-right : * : * : -101
R: listen open-right : tiger-left : * : * : 9
R: listen open-left : tiger-right : * : * : 9
R: open-right listen : tiger-left : * : * : 9
R: open-left listen : tiger-right : * : * : 9
\end{lstlisting}
\end{listing*}


% \subsection{Nanoscale Medical System Case Study}

% \paragraph{An Introduction}
% DNA-based nanonetworks have been proposed as an alternative to polymerase chain reaction, PCR for short, for detecting arbitrary diseases on the basis of DNA.
% In this scenario, a disease sample is mixed with a medical nanosystem that computes a programmed function depending on environmental parameters to decide if a disease is present.
% This section explains the basic mechanisms and ideas behind this novel technology and sets up how DecPOMDPs can be used as a model for it. 

% In general, a computational process at nanoscale is subject to resource constraints, and collaboration between nanodevices might be necessary to achieve a goal \citep{akyildiz2008nanonetworks}.
% In \citeyear{SEEMAN1982237}, \citeauthor{SEEMAN1982237} first proposed DNA as a construction material for nanoscale systems.
% Based on this idea, \citeauthor{Rothemund2006} \shortcite{Rothemund2006} developed the DNA-origami method, which allows for creating almost arbitrary shapes using DNA at nanoscale.
% In \citeyear{andersen2009self}, \citeauthor{andersen2009self} created a box with a controllable lid using the DNA-origami method.
% These boxes serve as a basis for medical nanosystem technology.
% They can be filled with either medication or DNA-tiles that serve as an input for a computation \cite{winfree1998design}.
% Certain DNA-tile systems form a Turing-complete computational model and are much more capable than most widely used medical diagnostic tools \cite{LAU2019}.
% For a detailed introduction, we refer to \citeauthor{winfree1998design} \shortcite{winfree1998design}.  

% \Cref{subfig:andtileset,subfig:assembly} show an example system of DNA-tiles that computes a 4-bit \textsc{and} operation.
% The blocks in \cref{subfig:andtileset} represent DNA-tiles.
% They are modelled as non-rotatable blocks with colour-coded \emph{glues} at possibly all sides.
% The number of black boxes represents the \emph{strength} of glue and a label next to it a condition.
% Tiles with the same number of boxes and identical labels can form a binding.
% That binding is subject to an environmental parameter called \emph{temperature} $\tau$.
% The temperature $\tau$ encodes the necessary number of fitting glues over all neighbours to form a stable binding.
% In the presented example, the temperature is $2$, and all tiles need at least two neighbours to stably bind together.

% An assembly process begins with a \emph{seed-tile} $\sigma$, shown in \cref{subfig:assembly} at the right.
% Due to the temperature requirement of $2$, three tiles can bind to the seed-tile $\sigma$: tile T, B, or 1.
% The assembly process continues until the entire DNA molecule in \cref{subfig:assembly} is formed.
% The \emph{receptors} R can only bind with the molecule if tiles 1, 2, 3, and 4 are present.
% If those tiles are only conditionally present, they can represent the input values of a computation, in this case of a 4-bit \textsc{and}.
% The molecule can only fully assemble if all four tiles are present.
% The other tiles are assumed to be present at all times in the medium.

% \input{./fig/and.tex}

% \input{./fig/example_network_and.tex}

% \Cref{scenario} shows the same DNA molecule assembly process incorporated into a network structure.
% In the first phase, a possibly large number of predefined markers are detected by a very large number of indistinguishable nanosensors of four different types.
% Upon detection, the lids of the nanosensors open, and they release their tiles 1--4 into the medium.
% If all four tiles are present in high enough numbers, message molecules can fully assemble and later be detected by a number of indistinguishable nanobots.
% Those react by predefined programming, e.g., releasing a fluorescence marker, medication, or additional tiles.
% Many operations are possible depending on the desired response by the nanonetwork.

% The entire assembly process is guided by diffusion and Brownian motion.
% It can only be controlled by the types of supplied tiles, their concentration, and the environmental temperature.
% Due to the stochastic nature, it is never fully clear where or when a message molecule forms and if it is erroneous. 
% Further, the nanodevices themselves have no global and very limited local information that is also noisy about their environment.

% Now that the basic functionality of DNA-based nanonetworks is clear, we can model part of them as DecPOMDPs.
% The goal of this model is to coordinate the actions of the large number of indistinguishable devices in the nanonetwork to decide a predefined problem.
% Since the nanobots are extremely resource-constrained, they only have partial information about the global state of the system.
% In addition to that, communication in nanonetworks is expensive and should be reduced to a minimum.


% \paragraph{An Application}
% To model a nanoscale medical system as a partitioned DecPOMDP, we need to specify the components of a partitioned DecPOMDP.

% The \emph{set of agents} $\boldsymbol{I}$ with its $K$ partitions consists of the different nanosensors and nanobots.
% There are $\kappa$ types of nanosensors, each type reacting to one of $\kappa$ different markers.
% So, for each type, there is a set of nanosensors, forming a partition $\mathfrak{I}_k^{\kappa}$ in $\boldsymbol{I}$.
% For the nanobots, the setting is the same w.r.t.\ the types of messages that different types of nanobots react to.
% With $\iota$ message types, there are $\iota$ sets of nanobots, each forming a partition $\mathfrak{I}_k^{\iota}$ in $\boldsymbol{I}$.
% Preliminary experiments have shown that each partition may have around $64{,}000$ agents in such a nanoscale medical system, making the agent set at least of size $(\kappa + \iota) \cdot 64{,}000$.

% Each type of agent basically has one action, which it can select to perform, and one possible observation.
% So, in terms of the model, there are two actions in each partition: 
% \begin{inparaenum}[(i)]
% 	\item outputting its load, which are tiles for nanosensors and medication for nanobots, or 
% 	\item doing nothing,
% \end{inparaenum}
% and two observations:
% \begin{inparaenum}[(i)]
% 	\item for nanosensors, sensing a marker and for nanobots, receiving a message or
% 	\item sensing / receiving nothing.
% \end{inparaenum}

% The physical state space can be described in terms of the presence of markers and assembled messages of certain types.
% Considering only assembled messages is a simplification as messages undergo a series of different states themselves during assembly, but only the assembled message is of importance for a nanobot.
% With $\kappa$ different markers and $\iota$ different messages, there are $2^{\kappa} \cdot 2^{\iota}$ states.
% Of course, other representations of the physical state space are possible, e.g., focusing on the medical context in a more detailed way.

% With the given physical state space, the transition model $T$ would need to model the presence of markers, which could possibly follow a Poisson distribution, as well as the presence of messages, which we assume to follow a log-normal distribution.
% To be able to compute a solution, further approximations might be necessary.
% The overall goal is that nanobots output their medication if corresponding markers are present, which the reward function needs to encode.
% The sensor model $\Omega$ would need to capture the probability of sensing correct inputs, which can vary greatly depending on outside influences.
% In general, we have the following sources of error:
% Nanosensors might sense a marker even though there is none or it might sense a marker of a wrong type as its own.
% Nanoagents might sense that they received a correct message even though the message is not there, the message is of another type, or the message is incorrectly assembled.
% Both types of nanodevices might also mistake a received input as there not being an input.

% To close out this application example, let us consider the worst case space requirements of a nanoscale medical system modelled as a lifted DecPOMDP:
% If we consider four types of marker and one type of message, e.g., for detecting a specific disease, which means $\kappa = 4$ and $\iota = 1$, we have a state space of size $s = 2^4 \cdot 2^1 = 32$ whereas our agent set is of size $N = (4+1) \cdot 64{,}000 = 320{,}000$ partitioned into $K= 4 + 1 = 5$ partitions.
% With these parameters and in reference to \cref{eq:decpomdpsize,eq:lifteddecpomdpsize}, the model sizes of $T$ are 
% \begin{align}
% 	S_{T}^{dec} &\in O( 32 \cdot 32 \cdot 2^{320{,}000})  \label{eq:nanosize:t}\\
% 	S_{T}^{lif} &\in O( 32 \cdot 32 \cdot (64{,}000^2)^5) \label{eq:liftednanosize:t}\\
% %\end{align}
% \intertext{and of $\Omega$ are}
% %\begin{align}
% 	S_{\Omega}^{dec} &\in O (32 \cdot 2^{320{,}000}) \label{eq:nanosize:o}\\
% 	S_{\Omega}^{lif} &\in O(32 \cdot (64{,}000^2)^5) \label{eq:liftednanosize:o}
% \end{align}
% in the worst case.
% \Cref{eq:nanosize:t,eq:liftednanosize:t,eq:nanosize:o,eq:liftednanosize:o} highlight to what a large degree the number of agents represent the dominating parameter in a lifted DecPOMDP with a nanoscale system as an application.\todo{update}

\bibliography{braun_143}

\end{document}