% !TEX root =  ../main.tex
\section{Categorical SCM and causal knowledge}\label{sec:cat_scm_ck}
Category theory \cite{mac2013categories} is a branch of pure mathematics that studies abstract structures and their relationships through objects and morphisms, focusing on how they compose and interact.
A \emph{category} \Ccat is composed of objects having a certain structure (e.g., measurable spaces, vector spaces) and arrows (morphisms) between them preserving the structure (e.g., measurable maps, linear maps) and satisfying certain axioms (cf. \Cref{def:category}).
Objects and morphisms form the collections $\mathcal{C}_0$ and $\mathcal{C}_1$, respectively.
Given \Ccat, the opposite category $\Ccat^\mathrm{op}$ has the same objects as \Ccat and the same, but reversed, arrows.
Taking the subcollections $\mathcal{S}_0\subseteq \mathcal{C}_0$ and $\mathcal{S}_1\subseteq \mathcal{C}_1$ we can form a \emph{subcategory} of \Ccat.
Given two categories \Ccat and \Dcat, we can have an arrow between them, namely a \emph{functor} $F:\Ccat \rightarrow \Dcat$.
Functors define mappings between categories in a consistent way (cf. \Cref{def:functor}).
Notably, functors cannot destroy relations existing in \Ccat when mapping to \Dcat.
Given two functors $F$ and $G$ from \Ccat to \Dcat, we can have an arrow $\eta: F \rightarrow G$ between them called \emph{natural transformation}, which is a peculiar arrow as it is invariant w.r.t. morphisms between the mapped objects in target category \Dcat (cf. \Cref{def:nat_transf}).  
Functors can be arranged in categories whose objects are functors and morphisms are natural transformations.
More details in \Cref{app:background}.\\
We look at the probabilistic SCM over continuous random variables, denoted by \scm{}, as a subjective and imperfect world representation.
We consider the \emph{Markovian setting} \cite{pearl2009causality}, thus \scm{} entails a \emph{directed acyclic graph} (DAG), viz., $G^{\scm{}}$.
The nodes of $G^{\scm{}}$ correspond to the \emph{endogenous} variables $\myendogenous$, that is, the causal variables on which we can intervene on.
The edges of $G^{\scm{}}$ are determined by structural functions $\myfunctional \coloneqq\{f_1, \ldots, f_n\}$ determining the value of each causal variable $X_i$, $i\in[n]$, in terms of a set of \emph{parents}, viz. $\parents_i \subseteq \myendogenous \setminus \{X_i\}$, and a node-specific \emph{exogenous} variable, $Z_i \in \myexogenous$.
Denote by $\myexogenous^{\mathcal{A}_i} \subseteq \myexogenous \setminus \{ Z_i\}$ the set of exogenous variables corresponding to the ancestors of $X_i$, where $\ancestors_i \subseteq [n] \setminus \{i\}$.
\myfunctional induces a set of mixing functions $\mymixing\coloneqq\{m_1, \ldots, m_n\}$, such that the values of the endogenous random variables are equivalently expressed as $x_i=m_i\left(\{z_j\}_{j\in \ancestors_i}, z_i\right)$, $\forall \; i \in [n]$.\\
Accordingly, we can define \scm{} as a triple $\langle (\myexogenousvals,\, \Upsilon, \zeta), \, (\myendogenousvals,\, \Omega, \chi)\, , \mymixing \rangle$ made of the probability space of the exogenous $(\myexogenousvals,\, \Upsilon, \zeta)$, the probability space of the endogenous $(\myendogenousvals,\, \Omega, \chi)$, and measurable mixing functions \mymixing.
\begin{definition}[Measure-theoretic SCM]\label{def:SCM_meas}
    A Markovian SCM \scm{} is a triple $\langle (\myexogenousvals,\, \Upsilon, \zeta), \, (\myendogenousvals,\, \Omega, \chi)\, , \mymixing \rangle$  where:
    \begin{squishlist}
        \item $(\myexogenousvals,\, \Upsilon, \zeta)$ is a probability space associated with exogenous variables \myexogenous. Specifically, it consists of the product probability measure $\zeta=\zeta_1 \times \ldots \times \zeta_n$ on the product measurable space $(\myexogenousvals,\, \Upsilon)=(\myexogenousvals_1 \times \ldots \times \myexogenousvals_n,\, \Upsilon_1 \otimes \ldots \otimes \Upsilon_n)$.
        The probability measure is such that, for each $\mathcal{W}_1 \in \Upsilon_1, \ldots, \, \mathcal{W}_n \in \Upsilon_n$, we have 
        \begin{equation}
            \zeta_1 \times \ldots \times \zeta_n (\mathcal{W}_1 \times \ldots \times \mathcal{W}_n)=\zeta_1(\mathcal{W}_1) \times \ldots \times \zeta_n(\mathcal{W}_n)\,;
        \end{equation}
        \item $(\myendogenousvals,\, \Omega, \chi)$ is a probability space associated with endogenous variables \myendogenous consisting of a joint probability measure $\chi$ on the product measurable space $(\myendogenousvals,\, \Omega)=(\myendogenousvals_1 \times \ldots \times \myendogenousvals_n,\, \Omega_1 \otimes \ldots \otimes \Omega_n)$;
        \item $\mymixing\coloneqq\{m_1, \ldots, m_n\}$ is a collection of measurable mixing functions allowing us to recursively rewrite the causal assignments only in terms of the exogenous variables. 
        Accordingly, the joint probability measure $\chi$ factorizes as 
        \begin{equation}\label{eq:scm_obs_push}
            \chi = \bigtimes_{i=1}^n m_i\left(\mu_i \left( \myexogenousvals_i \times \myexogenousvals^{\ancestors_i} \right) \right)\,;
        \end{equation}
        where (i) $\myexogenousvals^{\ancestors_i}=\bigtimes_{j \in \ancestors_i} \myexogenousvals_j$; 
        (ii) $\mu_i$ is a probability measure on the product measurable space $\left( \myexogenousvals_i \times \myexogenousvals^{\ancestors_i},\, \Upsilon_i \otimes \Upsilon^{\ancestors_i} \right)$, with $\Upsilon^{\ancestors_i}=\bigotimes_{j \in \ancestors_i} \Upsilon_j$.
    \end{squishlist}
\end{definition}

\medskip
\Cref{app:examples} provides an example for the case of linear SCM with additive noise.
An important property of \mymixing that holds for the main classes of Markovian SCMs, such as additive noise and post-nonlinear models \cite[Chapter~4]{peters2017elements}, is invertibility (cf. \Cref{app:invertibility}).
We will leverage such a property to prove that the proposed \SCMcat preserves expressiveness.
At this point, we can leverage \Cref{def:SCM_meas} to give a functorial definition of \scm{}.
Consider the following categories, \I and \Prob, respectively.
The former is a small category made only of two objects, a source node and a target node, and a unique arrow from the source to the target node.
Specifically, \I has shape $\bullet \rightarrow \bullet$.
The latter category instead has as objects probability spaces and as morphisms measurable maps.
The functorial representation follows by viewing \scm{} as an arrow between \I and \Prob, assigning \emph{(i)} to the source node in \I the probability space associated with exogenous variables; \emph{(ii)} to the target node the probability space associated with endogenous variables; 
and \emph{(iii)} to the only arrow in \I the collection of measurable maps \mymixing.
\begin{definition}[Category-theoretic SCM]\label{def:scm_fun}
    An SCM is a functor $\scm{}: \I \rightarrow \Prob$ defined as follows
    \begin{equation}
        \centering
        \begin{tikzpicture}[]
    
        \node (I) at (0, 2.25) {\I};
        \node (P) at (3, 2.25) {\Prob};
        
        \node[circle, draw, fill,inner sep=1pt] (A) at (0, 1.5) {};
        \node[circle, draw, fill,inner sep=1pt] (B) at (0, 0) {};
        \node (A1) at (-0.3, 1.5) {I};
        \node (B1) at (-0.3, 0) {$I^\prime$};
        \node (C) at (3, 1.5) {$(\myexogenousvals,\, \Upsilon, \zeta)$};
        \node (D) at (3, 0) {$(\myendogenousvals,\, \Omega, \chi)$};
    
        \coordinate (A1shift) at ([yshift=-5pt]A);
        \draw[->,shorten >=2pt] (A1shift) -- (B);
    
        \coordinate (Ishift) at ([xshift=10pt]I);
        \coordinate (Pshift) at ([xshift=-20pt]P);
        \draw[->] (Ishift) -- node[above] {\scm{}} (Pshift);
        \draw[->] (C) -- node[right] {\mymixing} (D);
        \end{tikzpicture}
        \label{fig:scmfunctor}
    \end{equation}
\end{definition}
Please, refer to \Cref{app:examples} for an example in the case of linear SCM with additive noise.
At this point, we can define a category of SCMs whose objects are functors as in \Cref{def:scm_fun}, and whose morphisms are natural transformation between the functors.
\begin{definition}\label{def:SCMcat}
    The category of SCMs, namely \SCMcat, consists of (i) functors $\scm{}: \I \rightarrow \Prob$ as objects; and (ii) natural transformations $\eta: \scm{} \rightarrow \scm{\prime}$ as morphisms, such that:
    \begin{squishlist}
        \item for each $I$ in \I, a measurable map $\eta_I: \scm{}(I) \rightarrow \scm{\prime}(I)$ in \Prob, called component at $I$;
        \item for the unique morphism $f:I \rightarrow I^\prime$ in \I, the following commutes:
            \begin{equation}\label{eq:nat_transf_SCMcat}
                \begin{tikzcd}[row sep=1.5cm, column sep=1.5cm]
                    \scm{}(I) \arrow[r, "\scm{f} = \mymixing"] \arrow[d, "\eta_I"'] & \scm{}(I^\prime) \arrow[d, "\eta_{I^\prime}"] \\
                    \scm{\prime}(I) \arrow[r, "\scm{\prime^f} = \mymixing^\prime"'] & \scm{\prime}({I^\prime)}
                \end{tikzcd}
                \begin{tikzpicture}[overlay]
                    \draw[dashed, rounded corners] (-4,-1.5) rectangle (-2.8,1.5);
                    \draw[dashed, rounded corners] (-1.2,-1.5) rectangle (0,1.5);
                    \node at (-3.4,-1.8){Exogenous };
                    \node at (-0.6,-1.8){Endogenous };
                \end{tikzpicture}
            \end{equation}  
        \end{squishlist}
\end{definition}
In \Cref{eq:nat_transf_SCMcat}, we added dashed rectangles to remark that the functor images \emph{(i)} on the left correspond to the probability spaces of the exogenous variables of \scm{} and $\scm{\prime}$;
\emph{(ii)} on the right, to probability spaces of the endogenous variables.\\
The knowledge of an SCM allows us to run \emph{interventions} to act or to simulate \enquote{new worlds}, that is, obtain new post-interventional distributions.
This enables the second and third layers of the ladder of causation \cite{pearl2018book} described in \Cref{sec:introduction}. 
In our work, we consider \emph{hard} and \emph{soft} interventions.\\
A hard intervention $\operatorname{do}\left(\{X_i = x_i\}_{X_i \in \widetilde{\mathcal{X}}}\right)$, where $\widetilde{\mathcal{X}} \subseteq \myendogenous$, replaces each assignment function $f_i$ corresponding to $X_i \in \widetilde{\mathcal{X}}$ with the constant $x_i$, thus generating a new post-interventional SCM, viz. $\scm{}_\hard$.
We associate a hard intervention with a collection $\mathcal{F}_\hard$ such that $f_i^\hard=x_i$, for all $X_i \in \widetilde{\mathcal{X}}$; and $f_j^\hard=f_j$, for all $X_j \in \myendogenous \setminus \widetilde{\mathcal{X}}$.
Graphically, a hard intervention mutilates $G^{\scm{}}$ by removing the incoming edges of the variables in $\widetilde{\mathcal{X}}$.
Consequently, according to the truncated product formula \cite{pearl2009causality}, an intervention entails a post-interventional distribution $P({\myendogenous}\mid \hard)= \prod_{j \in \myendogenous \setminus \widetilde{\mathcal{X}}} P(X_j \mid \parents_i, Z_i)$ (evaluated at $\{X_i = x_i\}$).\\
In \SCMcat, the post-interventional SCM is the functor mapping \emph{(i)} the source node in \I to $(\myexogenousvals,\, \Upsilon, \zeta)$, as done by \scm{}; \emph{(ii)} the target node to $(\myendogenousvals,\, \Omega, \chi)_\mathcal{H}$, where $\chi_j$ degenerates to a point measure $\indicatorf{x_j}{X_j}$; \emph{(iii)} and the unique arrow in \I to the measurable maps $\mymixing_\hard \coloneqq \{m_1^\hard, \ldots, m_n^\hard\}$.
The components of $\mymixing_\hard$ are \emph{(i)} constant functions $m_i^\hard=x_i$ for $X_i \in \mathcal{\myendogenous}$,
and \emph{(ii)} $m_j^\hard = m_j^\hard(Z_j \cup \myexogenous^{\widetilde{\ancestors}_j})$ where $\myexogenous^{\widetilde{\ancestors}_j}$ is the set of exogenous corresponding to the ancestors of $X_j$ that are not screened by the intervention (cf. \Cref{app:examples}).
At this point, in the same spirit of the truncation formula above, the post-interventional probability measure reads as 
\begin{equation}\label{eq:scm_hint_push}
    \chi_{\mathcal{H}} = \bigtimes_{X_j\in \myendogenous\setminus\widetilde{\mathcal{X}}} m_j^\hard\left(\mu_j \left( \myexogenousvals_j \times \myexogenousvals^{\widetilde{\ancestors}_j} \right) \right)\,.
\end{equation}
Running hard interventions on SCMs is not always possible, and in certain cases, this way of intervening is unethical \cite{eberhardt2007interventions}.
Therefore, a more general notion of intervention has been considered over the past. 
Indeed, the mutilation of the DAG is not the only possible informative intervention.
Rather, we can be interested in simply modifying the causal mechanisms, without removing any incoming causal relations.
Such a family of interventions is called soft.\\
The soft intervention generates a post-interventional $\scm{}_\soft$ by substituting \myfunctional with $\myfunctional_\soft$, where each $f_i$ associated with the intervened variables $X_i \in \widetilde{X}$ is replaced by another function $\widetilde{f}_i$, and the rest is unchanged.
Hence, the soft intervention can change the functional form of an SCM.
In principle, soft interventions can potentially add new causal relations.
However, in our work, we consider only soft interventions that do not alter the parent set of the endogenous variables.
At this point, $\myfunctional_\soft$ induces a new collection of mixing functions $\mymixing_\soft \coloneqq \{m^\soft_1, \ldots, m^\soft_n\}$ (cf. \Cref{app:examples}), such that the post-interventional probability measure reads as
\begin{equation}\label{eq:scm_sint_push}
    \chi_{\soft} = \bigtimes_{X_i \in \myendogenous} m_i^\soft\left(\mu_i \left( \myexogenousvals_i \times \myexogenousvals^{\ancestors_i} \right) \right) \,.
\end{equation}
The resulting $\scm{}_\soft$ is an object in \SCMcat mapping \emph{(i)} the source node in \I to $(\myexogenousvals,\, \Upsilon, \zeta)$, as done by \scm{}; \emph{(ii)} the target node to $(\myendogenousvals,\, \Omega, \chi)_\mathcal{\soft}$, where $\chi_\soft$ follows \Cref{eq:scm_sint_push}; and \emph{(iii)} the unique arrow in \I to the measurable map $\mymixing_\soft$.\\
An important point we must ensure is that the proposed category-theoretic SCM formulation preserves the expressiveness: given either a hard or soft intervention \intervention, we have a suitable morphism in \SCMcat from \scm{} to $\scm{}_\intervention$.
This is ensured by the following.
\begin{restatable}{lemma}{interventionlemma}\label{lem:intervention_prob}[Interventions in \SCMcat]
    Given (i) \scm{} as in \Cref{def:scm_fun}; and (ii) the collection of measurable maps $\mathcal{F}_\intervention\coloneqq \{f^i_1, \ldots, f^i_n\}$;
    an intervention in \SCMcat is a natural transformation $\eta^\intervention\coloneqq\langle \catidentity_{\scm{}(I)},\, \intervention \rangle$, with $\intervention = \mymixing_\intervention \circ \mymixing^{-1}$ such that the following holds 
\end{restatable}
\begin{equation}\label{eq:nat_transf_inter_SCMcat}
    \begin{tikzcd}[row sep=1.5cm, column sep=1.5cm]
        \scm{}(I) \arrow[r, "\mymixing"] \arrow[d, "\catidentity_{\scm{}(I)}"'] & \scm{}(I^\prime) \arrow[d, "\intervention"] \\
        \scm{}_{\intervention}(I) \arrow[r, "\mymixing_\intervention"'] & \scm{}_\mathcal{H}({I^\prime)}
    \end{tikzcd} \Longrightarrow  \mymixing_\intervention = \intervention \circ \mymixing\,. 
\end{equation}
\begin{proof}
    See \cref{app:proofs}.
\end{proof}
The proof leverages the fact that the hard intervention acts only on the endogenous, thus the component $\eta^\intervention_I$ mapping $\scm{}(I)$ to $\scm{}_\intervention(I)$ is simply the identity $\catidentity_{\scm{}(I)}$.
Then, for the second component $\eta^\intervention_{I^\prime}$, we leverage invertibility of \mymixing mentioned above.
The commutation in \Cref{eq:nat_transf_inter_SCMcat} follows by construction.
\Cref{eq:nat_transf_inter_SCMcat} highlights that our proposed \SCMcat, is as rich as the canonical SCM framework.
Specifically, starting from a non-intervened SCM \scm{}, we can obtain all possible observational and interventional states of the causal model \scm{} through the application of all the possible hard and soft interventions since \Cref{lem:intervention_prob} guarantees the existence of corresponding measurable maps \intervention.
\begin{remark}
    By leveraging the invertibility of \mymixing, \Cref{lem:intervention_prob} preserves expressiveness not only at the level of probability measures, but also with respect to the values of exogenous and endogenous variables.
    This is a stronger result, as interventional consistency concerns only the distributional level.
    In fact, \SCMcat retains the expressiveness of canonical SCMs also in terms of counterfactual consistency.
    If counterfactual consistency is relaxed, a more general result on the existence of the morphism \intervention can be established via Brenier's polar factorization theorem \cite[Chapter~3]{villani2021topics}, without requiring the invertibility of \mymixing, as shown in \cite{d2025causal}.
\end{remark}
\begin{definition}[Causal knowledge]\label{def:causal_knowledge}
    The causal knowledge \CK{\scm{}} entailed by \scm{} is the subcategory of \SCMcat whose objects are \scm{} together with its intervened states $\{\scm{}_\intervention\}$ generated by the application of all the possible interventions $\{\intervention\}$, and whose morphisms are the natural transformations $\eta^\intervention$ between these objects in \SCMcat.  
\end{definition}
Consequently, \CK{\scm{}} corresponds to \emph{(i)} a product distribution over the exogenous variables together with the identity morphism, representing the component $\eta_I^\intervention$ of the natural transformations; and \emph{(ii)} the set of probability measures over the endogenous as given in \Cref{eq:scm_obs_push,eq:scm_hint_push,eq:scm_sint_push} together with the morphisms \hard and \soft, representing the natural transformation component $\eta_{I'}^\intervention$.
Consider now two different non-intervened SCMs, namely \scm{} and $\scm{\prime}$, and their corresponding \CK{\scm{}} and $\CK{\scm{\prime}}$, respectively.
Additionally, suppose that \scm{} and $\scm{\prime}$ are not isolated entities; rather, they are immersed in a certain network where other non-intervened SCMs exist.
We are interested in relating \CK{\scm{}} and $\CK{\scm{\prime}}$ within the network in a way that is both \emph{causally and category-theoretically} consistent.
From now on, we will focus on the endogenous layer, since exogenous variables are latent.
Hence, when we say \CK{\scm{}}, with slight abuse of notation we refer to the observational and interventional probability measures over the endogenous.