% !TEX root =  ../main.tex
\section{Encoding causal knowledge with convex spaces}\label{sec:encoding_ck}
Let us consider the measurable space $(\myendogenousvals,\, \Omega)$ associated with the endogenous variables, where $\Omega$ is a $\sigma$-algebra over \myendogenousvals.
The set of probability measures on $(\myendogenousvals,\, \Omega)$, namely $\Delta_{(\myendogenousvals,\, \Omega)}$ is a convex space, subset of the vector space $\reall^\Omega$.
Following \cite{fritz2009convex}, we define the latter convex space as follows.
\begin{restatable}{lemma}{convexspaceprob}\label{lem:convexspace_prob}[Convex space of probability measures, $\langle \Delta_{(\myendogenousvals,\, \Omega)}, cc_{\lambda} \rangle$]
    The convex space of probability measures, namely $\langle \Delta_{(\myendogenousvals,\, \Omega)}, cc_{\lambda} \rangle$, is given by the set $\Delta_{(\myendogenousvals,\, \Omega)}$ of probability measures $\chi$ on $(\myendogenousvals,\, \Omega)$ together with a convex combination operation defined by 
    \begin{equation}\label{eq:ccl}
        cc_{\lambda}(\chi_1, \chi_2)(\mathcal{O}) \coloneqq \lambda \chi_1(\mathcal{O}) + \bar{\lambda} \chi_2(\mathcal{O})\,, 
    \end{equation}
    for all $\mathcal{O} \in \Omega$, with $\lambda \in [0,1]$.
\end{restatable}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}
Convex spaces $\langle \Delta_{(\myendogenousvals,\, \Omega)}, cc_{\lambda} \rangle$ are the objects of the category of convex spaces of probability measures, namely \CSprob, that we introduce below.
\begin{restatable}{lemma}{csprobcat}    
    The category \CSprob has convex spaces $\langle \Delta_{(\myendogenousvals,\, \Omega)}, cc_{\lambda} \rangle$ as objects, and affine measurable maps -- i.e., measurable maps commuting with $cc_\lambda$ -- as morphisms.
\end{restatable}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}
It turns out that \CK{\scm{}} is closed w.r.t. the convex combination operation in \Cref{lem:convexspace_prob}.
\begin{restatable}{theorem}{convexcomb}\label{th:convex_comb_prob_meas}
    Every convex combination of probability measures corresponding to a causal knowledge \CK{\scm{}} is a valid soft-interventional probability measure for \CK{\scm{}}.
\end{restatable}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}
Hence, in light of \Cref{th:convex_comb_prob_meas}, it is legitimate to question whether we can establish a functorial encoding of \scm{} in a convex space.
If on objects, the encoding seems straightforward, on morphisms, we have to be careful.
Indeed, given \scm{} and $\scm{\prime}$, the natural transformation $\eta$ in \Cref{def:SCMcat} tells us nothing about the existence of natural transformations between the intervened states of \scm{} and $\scm{\prime}$.
If the latter natural transformations do not exist in \SCMcat, an encoding functor will fail in mapping $\eta$ to an affine morphism between the convex spaces corresponding to \scm{} and $\scm{\prime}$, since such a morphism does not exist in \CSprob.
Conversely, if the intervened states of \scm{} and \scm{\prime} are related by a natural transformation in \SCMcat, meaning that they are IC, the affine morphism will exist in \CSprob.\\
The latter observation naturally links our work to the theory of \emph{causal abstraction} (CA), electing CA as the necessary formalism for relating causal knowledge.
Specifically, in the following, we build upon the \alphaabs introduced by \cite{rischel2020category}.
Given a micro-level \scm{} and a macro-level $\scm{\prime}$, an \alphaabs is a triple $\boldsymbol{\alpha}\coloneqq \langle \Rset, \amap, \alphamap{} \rangle$ where \emph{(i)} \Rset is a set of endogenous variables in \scm{} that are abstracted to the macro-level \emph{(ii)} structurally via the surjective map $\amap:\Rset\rightarrow \myendogenous^\prime$, and \emph{(iii)} functionally by  $\alphamap{}: \mathcal{D}[\Rset] \rightarrow \mathcal{D}[\myendogenous^\prime]$.
Here, for each $X_i^\prime \in \myendogenous^\prime$, we have a surjective function mapping the values of the micro-level to the macro-level, viz.  $\alphamap{X_i^\prime}: \mathcal{D}[\amap^{-1}\left(X_i^\prime\right)] \rightarrow \mathcal{D}[X_i^\prime]$. 
Given an \alphaabs between \scm{} and $\scm{\prime}$, we say that \abst is IC if, \emph{for any intervention} \intervention on the endogenous $\myendogenous^\prime_\intervention \subseteq \myendogenous^\prime$ and for any set of targets $\mathcal{T}^\prime \subseteq \myendogenous^\prime \setminus \myendogenous^\prime_\intervention$, we can obtain the values in $\mathcal{D}[\mathcal{T}^\prime]$ starting from those in $\mathcal{D}[\amap^{-1}\!\left(\myendogenous^\prime_\intervention \right)]$ in two alternative ways: 
\emph{(i)} by computing the values in $\mathcal{D}[\amap^{-1}\!\left(\mathcal{T}^\prime \right)]$ at the micro-level, then abstracting via \alphamap{\mathcal{T}^\prime}; or \emph{(ii)} by abstracting via \alphamap{\myendogenous^\prime_\intervention} to $\mathcal{D}[\myendogenous^\prime_\intervention]$ and then computing the values in $\mathcal{D}[\mathcal{T}^\prime]$ at the macro-level.\\
The \alphaabs manifests in \SCMcat as a natural transformation between the micro-level \scm{} and the macro-level $\scm{\prime}$.
As highlighted in \Cref{def:scm_fun,def:SCMcat}, our category-theoretic formalism highlights the role of the exogenous.
Accordingly, we need to consider the exogenous to properly extend the \alphaabs to \SCMcat.
Specifically, \amap and \alphamap{} have two components, the first for the exogenous, and the second for the endogenous, as formalized below.
\begin{definition}[$\abst$-abstraction in \SCMcat]\label{def:alpha_abstraction_scmcat}
    Given micro- and macro-level SCMs, \scm{} and $\scm{\prime}$, respectively, an \alphaabs is a tuple $\abst = \langle \Rset, \Qset, \amap, \alphamap{} \rangle$, where: (i) $\Rset \subseteq \myendogenous$ is a set of relevant endogenous variables; (ii) $\Qset \subseteq \myexogenous$ is a set of relevant exogenous variables given by the union of the set of exogenous corresponding to the endogenous in \Rset and those corresponding to their ancestors; (iii) $\amap=\langle \amap_\myexogenous, \amap_\myendogenous \rangle$ is a pair of surjective functions mapping sets, $\amap_\myexogenous: \Qset \rightarrow \myexogenous^\prime$ and $\amap_\myendogenous: \Rset \rightarrow \myendogenous^\prime$, respectively; (iv) $\alphamap{}=\langle \alphamap{\myexogenous}, \alphamap{\myendogenous} \rangle$ is a natural transformation composed of measurable functions mapping probability spaces, $\alphamap{\myexogenous}$ for the exogenous and $\alphamap{\myendogenous}$ for the endogenous, respectively.  
\end{definition}
In addition, interventional consistency translates into commutation of diagrams in \SCMcat.
Here, we look at an intervention on the endogenous $\myendogenous_\intervention^\prime \in \myendogenous^\prime$ on the macro-level model \scm{\prime} as a collection of measurable maps, as given in \Cref{lem:intervention_prob}, and denote it by $\intervention^\prime$.
Similarly for the corresponding intervention on the micro-level variables $\amap_\myendogenous^{-1}(\myendogenous_\intervention^\prime) \in \myendogenous$, denoted by $\intervention$.
To aid visualization, we use \emph{(i)} violet for micro-level \scm{} layer, \emph{(ii)} gray for macro-level $\scm{\prime}$ layer, and \emph{(iii)} orange for the \alphaabs.
\begin{definition}[IC $\abst$-abstraction in \SCMcat]\label{def:IC_alpha_abstraction_scmcat}
    An $\abst$-abstraction is IC in \SCMcat if, for all the interventions $\eta^{\intervention^\prime}=\langle \catidentity_{\scm{\prime}(I)}, \intervention^\prime \rangle$ on the macro-level model $\scm{\prime}$, the faces of the following diagram commute
    \tdplotsetmaincoords{0}{0}
    \begin{tikzpicture}[tdplot_main_coords, node distance=2cm]
    
      \node (A) at (0,0,0) {$\scm{}(I)$};
      \node (B) at (3,0,0) {$\scm{}(I')$};
      \node (C) at (0,-2,0) {$\scm{\prime}(I)$};
      \node (D) at (3,-2,0) {$\scm{\prime}(I')$};
      
      \draw[->,Mulberry] (A) -- node[above] {$\mymixing$} (B);
      \draw[->,Melon] (A) -- node[left]  {$\alphamap{\myexogenous}$} (C);
      \draw[->,Melon] (B) -- node[right] {$\alphamap{\myendogenous}$} (D);
      \draw[->,Periwinkle] (C) -- node[below] {$\mymixing^\prime$} (D);
      
      \coordinate (shift) at (4,2,1);
      
      \node (A2) at ($(A)+(shift)$) {$\scm{}_\intervention(I)$};
      \node (B2) at ($(B)+(shift)$) {$\scm{}_\intervention(I')$};
      \node (C2) at ($(C)+(shift)$) {$\scm{\prime}_{\intervention^\prime}(I)$};
      \node (D2) at ($(D)+(shift)$) {$\scm{\prime}_{\intervention^\prime}(I')$};
      
      \draw[->,Mulberry] (A2) -- node[above] {$\mymixing_\intervention$} (B2);
      \draw[->,Melon] (A2) -- node[left]  {$\alphamap{\myexogenous}$} (C2);
      \draw[->,Melon] (B2) -- node[right] {$\alphamap{\myendogenous}$} (D2);
      \draw[->,Periwinkle] (C2) -- node[below] {$\mymixing^\prime_{\intervention^\prime}$} (D2);
      
      \draw[->,dashed,Mulberry] (A) -- node[left, xshift=-5mm] {$\catidentity_{\scm{}(I)}$} (A2);
      \draw[->,dashed,Mulberry] (B) -- node[right, xshift=5mm] {$\intervention$}(B2);
      \draw[->,dashed,Periwinkle] (C) -- node[left, xshift=-5mm] {$\catidentity_{\scm{\prime}(I)}$} (C2);
      \draw[->,dashed,Periwinkle] (D) -- node[right, xshift=5mm] {$\intervention^\prime$} (D2);
    \end{tikzpicture}
\end{definition}
Starting from \Cref{def:IC_alpha_abstraction_scmcat}, exploiting \Cref{th:convex_comb_prob_meas}, we obtain the following.
\begin{restatable}{theorem}{caaffinefunctions}\label{th:ca_affine_functions}
    The component $\alphamap{\myendogenous}$ of \alphamap{} within an IC \alphaabs commutes with $cc_{\lambda}$, thus is affine.
\end{restatable}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}
At this point, denoted by \NI the subcategory of \SCMcat whose objects are non-intervened SCMs, and morphisms are IC \alphaabs.
Exploiting \Cref{th:convex_comb_prob_meas} and \Cref{th:ca_affine_functions}, we have the following.
\begin{restatable}{theorem}{encodingfunctor}\label{th:encoding_functor}
    There exists an IC encoding functor $E: \NI \rightarrow \CSprob$ mapping (i) each non-intervened $\scm{} \coloneqq \langle(\myexogenousvals,\, \Upsilon, \zeta), \, (\myendogenousvals,\, \Omega, \chi)\, , \mymixing \rangle$ to the convex spaces of probability measures $\Delta_{(\myendogenousvals,\, \Omega)}$, and (ii) an IC \alphaabs between \scm{} and \scm{\prime} with its endogenous component \alphamap{\myendogenous}.
\end{restatable}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}