% !TEX root = ../main.tex

\section{Efficient Algorithm}\label{sec:implementation} GES, if naively implemented, is
slow. In this section, we develop new ways of implementing its details to significantly speed it up. As we will see in the empirical studies, these details are crucial to scaling up XGES to large dense graphs. We now review how GES manipulates MECs in
practice, and then show how to make it more efficient.

\subsection{Manipulating MECs with CPDAGs}\label{sec:manipulating_mecs}
MECs are sets of DAGs whose size can grow exponentially with the number of nodes $d$
\citep{he2015counting}. To manipulate them practically, GES builds on the
following theorem.

\begin{theorem}[\citep{verma1991equivalence}]
    Two DAGs are Markov equivalent if and only if they have the same skeletons and the
    same v-structures. 
    \label{thm:mec}
 \end{theorem} 

 The \textit{skeleton} of a graph is the undirected graph obtained
 by removing the direction of all edges; a \textit{v-structure} is a triple of nodes
such that $x \rightarrow y \leftarrow z$ with no edge between $x$ and
 $z$.\looseness=-1

\Cref{thm:mec} shows that all the graphs of a MEC share the same skeleton and differ
only on edges that can be reversed without changing the set of v-structures. So within a
MEC, some edges are consistently oriented in one direction while others may have
different orientations between graphs. They are respectively called \textit{compelled}
and \textit{reversible} edges.


\parhead{CPDAGs.}
Each MEC can be represented by a \textit{partially directed acyclic
graph} (PDAG). A PDAG is a graph with both directed and undirected edges and no cycles of
directed edges. The \textit{canonical PDAG of a MEC} contains all the compelled edges as directed
edges and all the reversible edges as undirected edges (see \Cref{fig:mec}). A PDAG that is the canonical
representation of a MEC is called a \textit{completed PDAG} (CPDAG).
A PDAG $P$ that is not a CPDAG but has the same skeleton and v-structures as another CPDAG $P'$
can be transformed into $P'$ with a method called 
\textit{completing} the PDAG \citep{meek1995causal,chickering2002learning}.

We will use the following terminology when discussing a PDAG. For node $x$: its \textit{neighbors}
$\Ne(x)$ are its neighbors from undirected edges, its \textit{children} $\Ch(x)$ are its
children from directed edges, its \textit{parents} $\Pa(x)$ are its parents from
directed edges, and its \textit{adjacent} nodes $\Ad(x)$ are any of all three. 
A \textit{semi-directed path} from $x$ to $y$ is a path from $x$ to $y$ with edges that
are either undirected or directed toward the direction of $y$. A \textit{clique} is a
set of all adjacent nodes.


\parhead{Operators on CPDAGs.}
GES associates each operation on a MEC $M$  with an operator acting on its CPDAG $P$,
such that an operation changing $M$ into $M'$ is associated with an operator changing
$P$ into $P'$, the CPDAG of $M'$. 

For insertions, the operators used by GES are of the form \text{Insert}$(x,y,T)$ where
$x,y \in V$ and $T \subset V$. The action of Insert$(x,y,T)$ on $P$ is to insert the
edge $x \rightarrow y$, orient any undirected edges $t - y$ as $t \rightarrow y$ for $t
\in T$ and finally complete the resulting PDAG into a CPDAG. 

Given a MEC $M$ and its CPDAG $P$, \citet{chickering2002optimal} shows that there is a
bijection between (a) the set of possible insertions on $M$ and (b) the set of operators
Insert$(x,y,T)$ satisfying the following validity conditions relative to $P$:
\begin{align}
    \bm{I1.}\quad&x \not\in \mathrm{Ad}(y). \label{eq:valid1} \\
    \bm{I2.}\quad&T \subset \mathrm{Ne}(y) \setminus \mathrm{Ad}(x). \label{eq:valid2} \\
    \bm{I3.}\quad&(\mathrm{Ne}(y) \cap \mathrm{Ad}(x)) \cup T \text{ is a clique.} \label{eq:valid3} \\
    \bm{I4.}\quad&\text{All semi-directed paths from } y \text{ to } x \text{ have a node in }\nonumber \\
    &(\mathrm{Ne}(y) \cap \mathrm{Ad}(x)) \cup T. \label{eq:valid4} 
\end{align}
Insert operators satisfying these conditions, with $\Ad, \Ne,$ $\Pa,$ clique and paths
computed in $P$, are called \textit{valid} for $P$. To navigate from one MEC to another
with an insertion, GES applies the corresponding valid Insert operator from one CPDAG to
another.

\parhead{Score of Operators.}
The increase in score after an Insert operation can be efficiently computed when the
score is BIC. Indeed, the BIC for a graph $G$ equivalently rewrites as: 
\begin{equation}
    \textstyle
    S(G; \data) = \sum_{j=1}^d s(j, \Pa_j^G ; \data), \label{eq:bic-decomp}
\end{equation}
where $s(j, \Pa_j^G ; \data)$ is called the \textit{local score} of $j$ and equals:
\begin{equation}
    \sum\limits_{i=1}^n \log p_{\hat
\theta}(x^i_j | x^i_{\Pa_j^G}) - \frac{\alpha}{2}\log n \cdot |\Pa_j^G|.
\end{equation}
A score decomposing as \Cref{eq:bic-decomp} is called \textit{decomposable}.


With a decomposable score, the increase in score for an operator Insert($x,y,T$) applied
to $P$ is:
\begin{multline}
    \delta = s(y, (\Ne(y) \cap \Ad(x)) \cup T \cup \Pa(y) \cup \{x\}) \\
    - s(y, (\Ne(y) \cap \Ad(x)) \cup \Pa(y )),
    \label{eq:insert-score}
\end{multline}
where each term $\Ad, \Ne, \Pa$ is computed relative to $P$. For convenience, we say
that $\delta$ is the \textit{score} of the operator.

Similar derivations are made for Delete and Reversal in \citet{chickering2002optimal}
 and \citet{hauser2012characterization} (reversal is called turning). We review them in
 \Cref{appendix:sec:ges_parametrization}.

\parhead{GES with CPDAGs.}
In sum, GES implements \Cref{alg:ges-vanilla} using CPDAGs and operators. It begins with
the empty CPDAG, identifies all the Insert (or Delete, Reversal) that are valid for the
current CPDAG, computes their scores, applies the best one if it has a positive score,
and repeats.

XGES could proceed similarly. However, whether for GES or XGES, constructing the list of
valid operators and scoring them at each step is computationally expensive. We now turn
to new ways to more efficiently implement these operations.

\subsection{Efficient Algorithmic Formulation}
\label{sec:efficient_algorithmic_formulation}
When applying an operator on $P$ to form $P'$, the validity conditions of the other
operators (\Cref{eq:valid1,eq:valid2,eq:valid3,eq:valid4}) can become valid or invalid.
Similarly, the score of the other operators in \Cref{eq:insert-score} can
change. Yet, as noticed in \citet{ramsey2017million}, only a few edges changed from $P$ to $P'$. As a result, most
other operators that were computed for $P$ but not applied remain valid operators for
$P'$. Similarly, the scores of most operators remain identical.

Each step of XGES involves the following sub-steps:
\begin{enumerate}
    \item Start with a CPDAG $P$ and a list of candidate operators $\mathcal{C}$,
    where $\mathcal{C}$ is guaranteed to include all the valid operators for $P$, and
    their scores.
    \item Choose the best operator $O^*$ from
    $\mathcal{C}$ using XGES's heuristic (deletion before reversal, before insertion).
    \item Verify that $O^*$ is valid for $P$, otherwise re-run the heuristic on
    $\mathcal{C} \setminus \{O^*\}$ until a valid operator is found.
    \item Apply $O^*$ to $P$ to form $P'$ and add to $\mathcal{C}$ all the operators that
    became valid for $P'$, with their scores. Return to 1, with $P \leftarrow P'$, as we
    have just guaranteed that $\mathcal{C}$ includes all the valid operators for $P$.
\end{enumerate}

The operators that became invalid for $P'$ are not removed from $\mathcal{C}$. It is more efficient to leave them in the list and only check the validity of an operator in step 3 just before applying it (and discarding it if invalid). Indeed, if we recheck the validity of all operators after each operation, a single operator will be rechecked at each step until it is applied, instead of being checked only once before being applied.

No steps were included to recompute the scores of any operators in
$\mathcal{C}$. We explain how we can avoid it next.

\subsubsection{Updating the Score of Operators.}
\label{sec:updating_score}
To avoid recomputing the scores of operators at each step, we
change the parametrization of the
operators to make their scores independent of the CPDAG they are applied to. 

We parametrize each Insert by an additional set $E \subset V$ and an extra validity
condition that completes \Cref{eq:valid1,eq:valid2,eq:valid3,eq:valid4}: 
\begin{flalign}
    ~~~\bm{I5.}\quad & E = (\Ne(y) \cap \Ad(x)) \cup T \cup \Pa(y).&&
\end{flalign} 
The score of Insert($x,y,T,E$) from \Cref{eq:insert-score} becomes $s(y, E \cup \{x\}) -
s(y, E )$, which only depends on the Insert parameters. We reparametrize Delete and
Reversal operators similarly in \Cref{appendix:sec:xges_parametrization}.

With \citet{chickering2002optimal}'s parametrization, the score of an Insert would
change if $(\Ne(y) \cap \Ad(x)) \cup T \cup \Pa(y)$ changes. Now, with $E$ as a fixed
parameter of the operator, it is the status of condition \textbf{I5} that would change.
We turned a change in score into a change in validity.

We now turn to efficiently update the valid operators.

\begin{table}[t]
    \centering
    \begin{tabular}{ L{1.7cm}C{3.4cm}C{1.8cm} }
    \toprule
    Pre-update & $a \quad b$ & $a
    \rightarrow b$ \\
    Post-update& $a - b$ &  $a- b$  \\
    \midrule
    Necessary conditions
    &\begin{itemize}[leftmargin=*, itemsep=-1pt]
        \vspace*{-4mm}
        \item[] $y \in \{a,b\}$ 
        \item[or] $y \in \Ne(a)\cap \Ne(b)$
        \item[or] $(x = a) \wedge (y \in \Ne(b))$
        \item[or] $(x = b) \wedge (y \in \Ne(a))$
    \end{itemize}
    & \begin{itemize}[leftmargin=*]
        \item[] $y \in \{a,b\}$
    \end{itemize}
    \\[-3mm]
    \bottomrule
    \end{tabular}
    \caption{Necessary conditions for an Insert($x,y,T,E$) to become valid after the
    $(a,b)$ update. Excerpt of \Cref{tab:operator_updates} from
    \Cref{appendix:sec:efficient_algorithmic_formulation} with only two types of
    updates. }
    \label{tab:operator_updates_example}
\end{table}

\subsubsection{Updating the Validity of Operators.}
After updating a CPDAG $P$ into $P'$, our goal is to efficiently add to $\mathcal{C}$
the operators that became valid for $P'$.

To do so, we decompose the update from $P$ to $P'$ into a succession of single edge
updates $P_1, \hdots P_k$, with $P_1=P$, $P_k = P'$ and where $P_i$ and $P_{i+1}$ only
differ on the orientation or presence of a single edge, e.g. $a \rightarrow b$ vs $a -
b$. We then have the following theorem.

\begin{theorem}
    Write $P_1, \hdots P_k $ a sequence of single edge updates that transforms $P$ into
    $P'$. Take an operator $O$ that is invalid for $P$ and becomes valid for $P'$ and
    write $\{c_1, \ldots, c_m \}$ its validity conditions, e.g. $\bm{I1}$ to $\bm{I5}$
    for an Insert. Then there exists $i^* \in \{1,k-1\}$ and one validity condition
    $c_{j^*}$ such that $c_{j^*}$ is false for $P_{i^*}$, true for $P_{i^*+1}$, and all
    other conditions $c_{j} \neq c_{j^*}$ are true for $P_{i^*+1}$.
    \label{thm:update_validity}
\end{theorem}
\begin{proof}
    All $c_j$ are true for $P'$ i.e. $P_k$. So let us step back from $P'$ to $P$ until
    one of the conditions $c_{j^*}$ becomes false for some $P_{i^*}$. Such an $i^{*}$
    must exist since some condition is false for $P$ i.e. $P_1$. $P_{i^*}$ and $c_{j^*}$
    satisfy the theorem.
\end{proof}

With \Cref{thm:update_validity}, we can efficiently update $\mathcal{C}$ if we can
identify which operators are susceptible to having one of their conditions
become true after single-edge updates.

In \Cref{appendix:sec:efficient_algorithmic_formulation} we study the necessary
conditions on the parameters of an operator to have one of its validity conditions
become true after a single-edge update. We report the necessary conditions for all
validity conditions of all operators against all types of edge updates in
\Cref{tab:operator_updates} in \Cref{appendix:sec:efficient_algorithmic_formulation}. We
provide an excerpt in \Cref{tab:operator_updates_example} with only two types of edge
updates, for the Insert operator only, and where we grouped the necessary conditions for
each validity condition into a single set of necessary conditions (with or).

For example, if edge $a \rightarrow b$ is changed into $a - b$,
\Cref{tab:operator_updates_example} shows that the only Insert($x,y,T,E$) that 
can become
valid are those with $y \in \{a,b\}$. 
If the edge $a - b$ is changed into $a \rightarrow
b$, then the necessary conditions for an Insert operator to become valid are more
involved but still efficient.\looseness=-1

In sum, we can efficiently update $\mathcal{C}$ after each CPDAG update using
\Cref{tab:operator_updates} in \Cref{appendix:sec:efficient_algorithmic_formulation}.

\subsubsection{XGES Implementation.}
We implement the efficient algorithmic formulation of XGES-0 and XGES  at \url{github.com/ANazaret/XGES}. We provide
code in C++ as well as a Python wrapper.
