\documentclass[accepted]{uai2023}

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\title{Adaptivity Complexity for Causal Graph Discovery\\(Supplementary Material)}

% Add authors
\author[1]{\href{mailto:<davin@u.nus.edu>}{Davin Choo\thanks{Equal contribution}}{}}
\author[2]{Kirankumar Shiragur$^*$}
\affil[1]{
    School of Computing\\
    National University of Singapore
}
\affil[2]{
    Broad Institute of MIT and Harvard
}

%%% MY STUFF %%%
\usepackage{algorithm}
\usepackage{algorithmicx}
\usepackage{algpseudocode}
\usepackage{subcaption}

\hypersetup{hidelinks}
\usepackage[capitalize,nameinlink]{cleveref}
\usepackage{amsthm}
\usepackage{thm-restate}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}

\usetikzlibrary{calc, graphs, graphs.standard, shapes, arrows, arrows.meta, positioning, decorations.pathreplacing, decorations.markings, decorations.pathmorphing, fit, matrix, patterns, shapes.misc, tikzmark}

\newcommand{\cA}{\mathcal{A}}
\newcommand{\cB}{\mathcal{B}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cE}{\mathcal{E}}
\newcommand{\cO}{\mathcal{O}}
\newcommand{\cI}{\mathcal{I}}
\newcommand{\skel}{\mathrm{skel}}
%%% MY STUFF %%%
  
\begin{document}
\onecolumn
\maketitle

\appendix

\section{Meek rules}
\label{sec:appendix-meek-rules}

Meek rules are a set of 4 edge orientation rules that are sound and complete with respect to any given set of arcs that has a consistent DAG extension \citep{meek1995}.
Given any edge orientation information, one can always repeatedly apply Meek rules till a fixed point to maximize the number of oriented arcs.

\begin{definition}[Consistent extension]
A set of arcs is said to have a \emph{consistent DAG extension} $\pi$ for a graph $G$ if there exists a permutation on the vertices such that (i) every edge $\{u,v\}$ in $G$ is oriented $u \to v$ whenever $\pi(u) < \pi(v)$, (ii) there is no directed cycle, (iii) all the given arcs are present.
\end{definition}

\begin{definition}[The four Meek rules \citep{meek1995}, see \cref{fig:meek-rules} for an illustration]
\hspace{0pt}
\begin{description}
    \item [R1] Edge $\{a,b\} \in E \setminus A$ is oriented as $a \to b$ if $\exists$ $c \in V$ such that $c \to a$ and $c \not\sim b$.
    \item [R2] Edge $\{a,b\} \in E \setminus A$ is oriented as $a \to b$ if $\exists$ $c \in V$ such that $a \to c \to b$.
    \item [R3] Edge $\{a,b\} \in E \setminus A$ is oriented as $a \to b$ if $\exists$ $c,d \in V$ such that $d \sim a \sim c$, $d \to b \gets c$, and $c \not\sim d$.
    \item [R4] Edge $\{a,b\} \in E \setminus A$ is oriented as $a \to b$ if $\exists$ $c,d \in V$ such that $d \sim a \sim c$, $d \to c \to b$, and $b \not\sim d$.
\end{description}
\end{definition}

\begin{figure}[htbp]
\centering
\resizebox{\linewidth}{!}{%
\begin{tikzpicture}
%
% R1
%
\node[draw, circle, inner sep=2pt] at (0,0) (R1a-before) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R1a-before] (R1b-before) {\small $b$};
\node[draw, circle, inner sep=2pt, above=of R1a-before](R1c-before) {\small $c$};
\draw[thick, -stealth] (R1c-before) -- (R1a-before);
\draw[thick] (R1a-before) -- (R1b-before);

\node[draw, circle, inner sep=2pt] at (3,0) (R1a-after) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R1a-after] (R1b-after) {\small $b$};
\node[draw, circle, inner sep=2pt, above=of R1a-after](R1c-after) {\small $c$};
\draw[thick, -stealth] (R1c-after) -- (R1a-after);
\draw[thick, -stealth] (R1a-after) -- (R1b-after);

\node[single arrow, draw, minimum height=2em, single arrow head extend=1ex, inner sep=2pt] at (2.2,0.75) (R1arrow) {};
\node[above=5pt of R1arrow] {\footnotesize R1};

%
% R2
%
\node[draw, circle, inner sep=2pt] at (6,0) (R2a-before) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R2a-before] (R2b-before) {\small $b$};
\node[draw, circle, inner sep=2pt, above=of R2a-before](R2c-before) {\small $c$};
\draw[thick, -stealth] (R2a-before) -- (R2c-before);
\draw[thick, -stealth] (R2c-before) -- (R2b-before);
\draw[thick] (R2a-before) -- (R2b-before);

\node[draw, circle, inner sep=2pt] at (9,0) (R2a-after) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R2a-after] (R2b-after) {\small $b$};
\node[draw, circle, inner sep=2pt, above=of R2a-after](R2c-after) {\small $c$};
\draw[thick, -stealth] (R2a-after) -- (R2c-after);
\draw[thick, -stealth] (R2c-after) -- (R2b-after);
\draw[thick, -stealth] (R2a-after) -- (R2b-after);

\node[single arrow, draw, minimum height=2em, single arrow head extend=1ex, inner sep=2pt] at (8.2,0.75) (R2arrow) {};
\node[above=5pt of R2arrow] {\footnotesize R2};

%
% R3
%
\node[draw, circle, inner sep=2pt] at (12,0) (R3d-before) {\small $d$};
\node[draw, circle, inner sep=2pt, above=of R3d-before](R3a-before) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R3a-before] (R3c-before) {\small $c$};
\node[draw, circle, inner sep=2pt, right=of R3d-before](R3b-before) {\small $b$};
\draw[thick, -stealth] (R3c-before) -- (R3b-before);
\draw[thick, -stealth] (R3d-before) -- (R3b-before);
\draw[thick] (R3c-before) -- (R3a-before) -- (R3d-before);
\draw[thick] (R3a-before) -- (R3b-before);

\node[draw, circle, inner sep=2pt] at (15,0) (R3d-after) {\small $d$};
\node[draw, circle, inner sep=2pt, above=of R3d-after](R3a-after) {\small $a$};
\node[draw, circle, inner sep=2pt, right=of R3a-after] (R3c-after) {\small $c$};
\node[draw, circle, inner sep=2pt, right=of R3d-after](R3b-after) {\small $b$};
\draw[thick, -stealth] (R3c-after) -- (R3b-after);
\draw[thick, -stealth] (R3d-after) -- (R3b-after);
\draw[thick] (R3c-after) -- (R3a-after) -- (R3d-after);
\draw[thick, -stealth] (R3a-after) -- (R3b-after);

\node[single arrow, draw, minimum height=2em, single arrow head extend=1ex, inner sep=2pt] at (14.2,0.75) (R3arrow) {};
\node[above=5pt of R3arrow] {\footnotesize R3};

%
% R4
%
\node[draw, circle, inner sep=2pt] at (18,0) (R4a-before) {\small $a$};
\node[draw, circle, inner sep=2pt, above=of R4a-before](R4d-before) {\small $d$};
\node[draw, circle, inner sep=2pt, right=of R4d-before] (R4c-before) {\small $c$};
\node[draw, circle, inner sep=2pt, right=of R4a-before](R4b-before) {\small $b$};
\draw[thick, -stealth] (R4d-before) -- (R4c-before);
\draw[thick, -stealth] (R4c-before) -- (R4b-before);
\draw[thick] (R4d-before) -- (R4a-before) -- (R4c-before);
\draw[thick] (R4a-before) -- (R4b-before);

\node[draw, circle, inner sep=2pt] at (21,0) (R4a-after) {\small $a$};
\node[draw, circle, inner sep=2pt, above=of R4a-after](R4d-after) {\small $d$};
\node[draw, circle, inner sep=2pt, right=of R4d-after] (R4c-after) {\small $c$};
\node[draw, circle, inner sep=2pt, right=of R4a-after](R4b-after) {\small $b$};
\draw[thick, -stealth] (R4d-after) -- (R4c-after);
\draw[thick, -stealth] (R4c-after) -- (R4b-after);
\draw[thick] (R4d-after) -- (R4a-after) -- (R4c-after);
\draw[thick, -stealth] (R4a-after) -- (R4b-after);

\node[single arrow, draw, minimum height=2em, single arrow head extend=1ex, inner sep=2pt] at (20.2,0.75) (R4arrow) {};
\node[above=5pt of R4arrow] {\footnotesize R4};

% Separating lines
% \draw[thick] (0,-1) -- (10.5,-1);
\draw[thick] (5.25,1.75) -- (5.25,-0.25);
\draw[thick] (11.25,1.75) -- (11.25,-0.25);
\draw[thick] (17.25,1.75) -- (17.25,-0.25);
\end{tikzpicture}
}
\caption{An illustration of the four Meek rules}
\label{fig:meek-rules}
\end{figure}

There exists an algorithm \cite[Algorithm 2]{pmlr-v161-wienobst21a} that runs in $\cO(d \cdot |E|)$ time and computes the closure under Meek rules, where $d$ is the degeneracy of the graph skeleton\footnote{A $d$-degenerate graph is an undirected graph in which every subgraph has a vertex of degree at most $d$. Note that the degeneracy of a graph is typically smaller than the maximum degree of the graph.}.

\section{Deferred details}
\label{sec:appendix-deferred-details}

\subsection{Basic results}
\label{sec:appendix-basic-results}

\begin{lemma}[Equation 3.10 of \citep{graham1994concrete}]
\label{lem:eq310}
Let $f(x)$ be any continuous, monotonically increasing function with the property that $x$ is an integer if $f(x)$ is an integer.
Then, $\lceil f(x) \rceil = \lceil f(\lceil x \rceil) \rceil$.
\end{lemma}

\setcounter{theorem}{16}
\begin{lemma}
\label{lem:nested-divisions}
For positive integer $n$, and arbitrary real numbers $m,x$, we have
$
\left\lceil \frac{\left\lceil \frac{x}{m} \right\rceil}{n} \right\rceil
= \left\lceil \frac{x}{mn} \right\rceil
$.
\end{lemma}
\begin{proof}
Apply \cref{lem:eq310} with the function as $f(x) = x/n$ on input as $x/m$.
\end{proof}

\setcounter{theorem}{3}
\begin{lemma}
\label{lem:lowerboundineq}
For $r \geq 2$, we have
$
\frac{r-1}{2} \cdot \left( \frac{2}{r} \right)^{\frac{1}{r-1}}
\geq \frac{r}{4}
$.
\end{lemma}
\begin{proof}
Multiplying the left-hand side by $4/r$, we get
\begin{align*}
(r-1) \cdot \left( \frac{2}{r} \right)^{1 + \frac{1}{r-1}}
& \geq (r-1) \cdot \left( \frac{2}{r} \right) && \text{Since $r > 1$}\\
& \geq 1 && \text{Since $r \geq 2$}
\end{align*}
Thus, the inequality holds.
\end{proof}
% \url{https://www.wolframalpha.com/input?i=solve+\%28x-1\%29\%2F2+*+\%282\%2Fx\%29\%5E\%281\%2F\%28x-1\%29\%29+\%3E\%3D+x\%2F4}

\begin{lemma}[Theorem 12 of \citep{choo2022verification}]
\label{lem:bounded-size-lb}
For any causal DAG $G^*$, we have $\nu_k(G^*) \geq \lceil \frac{\nu_1(G^*)}{k} \rceil$.
\end{lemma}

\subsection{Algorithm for bounded size interventions}

\setcounter{algorithm}{2}
\begin{algorithm}[htb]
\caption{Adaptivity-sensitive search.}
\label{alg:adaptive-search-bounded}
\begin{algorithmic}[1]
    \Statex \textbf{Input}: Essential graph $\cE(G^*)$, adaptivity round parameter $r \geq 1$, intervention size upper bound $k \geq 1$.
    \Statex \textbf{Output}: A sequence of intervention sets $\cI_1, \ldots, \cI_r$ such that $\cE_{\cI_1, \ldots, \cI_r}(G^*) = G^*$ and $|I| \leq k$ for any intervention in $I \in \cI_i$ in intervention set $\cI_i$, $1 \leq i \leq r$.
    \State Initialize $L = \lceil n^{1/r} \rceil$.
    \For{$i = 1, \ldots, r-1$}
        \State Initialize $\cI_i \gets \emptyset$
        \For{chain comp.\ $H \in CC(\cE_{\cI_1, \ldots, \cI_{i-1}}(G^*))$}
            \If{$H$ is a clique}
                \State Set $V' \gets V(H)$.
            \Else
                \State Compute clique tree $T_H$ of $H$.
                \State Compute $L$-balanced partitioning $S$ of $T_H$ via Algorithm 1.
                \State Let $V' \gets \cup_{K_j \in S} V(K_j)$.
            \EndIf
            \State Add output of \cref{alg:intervention-subroutine} on $V'$ to $\cI$.
        \EndFor
        \State Intervene on all interventions in $\cI_i$.
    \EndFor
    \State Define $\cI_r$ as output of \cref{alg:intervention-subroutine} on remaining relevant vertices and intervene on all interventions in $\cI_r$.
    \State \Return $\cI_1, \ldots, \cI_r$
\end{algorithmic}
\end{algorithm}

\begin{algorithm}[htb]
\caption{Intervention subroutine.}
\label{alg:intervention-subroutine}
\begin{algorithmic}[1]
    \Statex \textbf{Input}: Set of vertices $A$, size upper bound $k \geq 1$.
    \Statex \textbf{Output}: A $k$-separating system $B \subseteq 2^A$.
    \If{$k = 1$}
        \State Set $B \gets A$.
    \Else
        \State Define $k' = \min\{k, |A|/2\}$, $a = \lceil |A|/k' \rceil \geq 2$, and $\ell = \lceil \log_a n \rceil$.
        \State Compute labelling scheme of \cite[Lemma 1]{shanmugam2015learning} on $A$ with $(|A|, k', a)$.
        \State Set $B \gets \{S_{x,y}\}_{x \in [\ell], y \in [a]}$, where $S_{x,y} \subseteq A$ is the subset of vertices whose $x^{th}$ letter in the label is $y$.
    \EndIf
    \State \Return $B$
\end{algorithmic}
\end{algorithm}

\section{Deferred proofs}
\label{sec:appendix-proofs}

\setcounter{theorem}{1}
\begin{restatable}[Atomic worst case]{theorem}{atomicworstcase}
\label{thm:atomic-worst-case}
In the worst case, any $r$-adaptive algorithm needs to use at least
$\Omega(\min\{r,\log n\} \cdot n^{1/\min\{r,\log n\}} \cdot \nu_1(G^*))$ atomic interventions against an adaptive adversary.
\end{restatable}
\begin{proof}
Without loss of generality, we may assume $r \leq \log n$ and prove a lower bound of $\Omega(r \cdot n^{1/r} \cdot \nu_1(G^*))$.

Consider the case where the essential graph is a path on $n$ nodes and the adversary can adaptively choose the source node as long as it is consistent with the arc directions revealed thus far.
On a path essential graph, $\nu(G^*) = 1$.

Suppose $r = 1$.
Then, by Theorem 13, we need to intervene on a $G$-separating system, which has size $\Omega(n)$.
The claim follows since $\nu(G^*) = 1$.

Now, suppose $r \geq 2$.
If currently have length $\ell$ segment and $k$ interventions are performed, then there must be some segment of length at least $\ell/(k+1)$.
Recurse on that.
If the final round has length $\ell$ segment, need at least $\ell/2$ interventions because $G$-separating system on a segment of length $\ell$ has size at least $\ell/2$.

Suppose the algorithm intervenes on $k_i$ vertices on the $i$-th round, for $1 \leq i \leq r$.
where $k_i \geq 1$, so $k_i + 1 \leq 2 k_i$ and so $1/(k_i + 1) \geq 1/(2 k_i)$.

Then, from the above discussion,
\begin{align*}
k_r
& \geq \frac{1}{2} \cdot n \cdot \frac{1}{k_1 + 1} \cdot \frac{1}{k_2 + 1} \cdot \ldots \cdot \frac{1}{k_{r-1} + 1}\\
& \geq \frac{1}{2^r} \cdot \frac{n}{k_1 \cdot k_2 \ldots \cdot k_{r-1}}
\end{align*}

So, the number of overall interventions used is
\begin{align*}
&\; k_1 + \ldots + k_r\\
\geq &\; k_1 + \ldots + k_{r-1} + \frac{1}{2^r} \cdot \frac{n}{k_1 \cdot k_2 \ldots \cdot k_{r-1}}\\
\geq &\; (r-1) \cdot \left( \prod_{i=1}^{r-1} k_i \right)^{\frac{1}{r-1}}
+ \frac{1}{2^r} \cdot \frac{n}{k_1 \cdot k_2 \ldots \cdot k_{r-1}}
\end{align*}
where the last inequality is the AM-GM inequality.

Let $x = k_1 \cdot k_2 \ldots \cdot k_{r-1}$.
Then,
\[
\sum_{i=1}^r k_i
= k_1 + \ldots + k_r
\geq (r-1) \cdot x^{\frac{1}{r-1}} + \frac{1}{2^r} \cdot \frac{n}{x}
\]

\textbf{Case 1}: $\frac{1}{2^r} \cdot \frac{n}{x} \geq \frac{r}{4} \cdot n^{\frac{1}{r}}$

Then,
\[
\sum_{i=1}^r k_i
\geq \frac{1}{2^r} \cdot \frac{n}{x}
\geq \frac{r}{4} \cdot n^{\frac{1}{r}}
\in \Omega(r \cdot n^{\frac{1}{r}})
\]
Thus, the claim holds as $\nu(G^*) = 1$.

\textbf{Case 2}: $\frac{1}{2^r} \cdot \frac{n}{x} < \frac{r}{4} \cdot n^{\frac{1}{r}}$

Then,
\[
x
> \frac{4 \cdot n^{1 - 1/r}}{2^r \cdot r}
= \frac{2 \cdot n^{\frac{r-1}{r}}}{2^{r-1} \cdot r}
\]
and \cref{lem:lowerboundineq} in \cref{sec:appendix-basic-results} tells us that
\begin{align*}
(r-1) \cdot x^{\frac{1}{r-1}}
& > n^{\frac{1}{r}} \cdot \frac{r-1}{2} \cdot \left( \frac{2}{r} \right)^{\frac{1}{r-1}}\\
& \geq n^{\frac{1}{r}} \cdot \frac{r}{4} && \text{For $r \geq 2$}
\end{align*}
So,
\[
\sum_{i=1}^r k_i
\geq (r-1) \cdot x^{\frac{1}{r-1}}
\geq \frac{r}{4} \cdot n^{\frac{1}{r}}
\in \Omega(r \cdot n^{\frac{1}{r}})
\]
Thus, the claim holds as $\nu(G^*) = 1$.
\end{proof}

\begin{restatable}[Bounded upper bound]{theorem}{boundedupperbound}
\label{thm:bounded-upper-bound}
Let $\cE(G^*)$ be the observational essential graph of an underlying causal DAG $G^*$ on $n$ nodes.
There is a polynomial time $r$-adaptive algorithm that uses $\cO(\min\{r,\log n\} \cdot n^{1/\min\{r,\log n\}} \cdot \log k \cdot \nu_k(G^*))$ bounded sized interventions to recover $G^*$ from $\cE(G^*)$, where each intervention involves at most $k > 1$ vertices.
\end{restatable}
\begin{proof}
We invoke \cref{alg:adaptive-search-bounded} with $k > 1$.

\textbf{Number of interventions}

The high level proof approach for is exactly the same as the proof of Theorem 1, except for how to compute intervention sets from the maximal clique vertices (obtained by ``balanced partitioning'' in the first $r-1$ rounds, within the while loop) and the from the remaining relevant vertices (in the final $r$-th round, outside the while loop).

In each iteration of the while-loop, we intervene on at most $L$ cliques for each connected component.
To orient the edges incident to these cliques we use the labelling scheme of Lemma 14 via \cref{alg:intervention-subroutine}.
So, the number of bounded size interventions we perform per round is
\[
\cO \left( L \cdot \log k \cdot \frac{\nu_{1}(G^*)}{k} \right)
\]
By \cref{lem:bounded-size-lb}, we know that $\nu_k(G^*) \geq \lceil \frac{\nu_1(G^*)}{k} \rceil$.
So, we can re-express the above bound as $\cO \left( L \cdot \log k \cdot \nu_{k}(G^*) \right)$.
Similarly, we use $\cO \left( L \cdot \log k \cdot \nu_{k}(G^*) \right)$ bounded size interventions in the final round.
Thus, over all $r$ adaptive rounds, we use a total of
\[
\cO \left( r \cdot L \cdot \log k \cdot \nu_{k}(G^*) \right)
\]
bounded size interventions.
Substituting $L = \lceil n^{1/r} \rceil$ yields our desired bound.

\textbf{Running time}

\cref{alg:adaptive-search-bounded} only differs from Algorithm 2 by invoking \cref{alg:intervention-subroutine}, which runs in polynomial time (see Lemma 14).
Thus, \cref{alg:adaptive-search-bounded} runs in polynomial time.
\end{proof}

\section{Experiments}
\label{sec:appendix-experiments}

The experiments are conducted on an Ubuntu server with two AMD EPYC 7532 CPU and 256GB DDR4 RAM.
Our code and entire experimental setup is available at \url{https://github.com/cxjdavin/adaptivity-complexity-for-causal-graph-discovery}.

\subsection{Implementation details}

\paragraph{Checks to avoid redundant interventions}

The current implementation of \citep{choo2022verification}'s \texttt{separator} algorithm is actually $n$-adaptive because it performs ``checks'' before performing each intervention --- if the vertices in the proposed intervention set $S$ do \emph{not} have any unoriented incident arcs, then the intervention set $S$ will be skipped.
One may think of such interventions as ``redundant'' since they do not yield any new information about the underlying causal graph.
As such, we ran two versions of their algorithm: one without checks (i.e.\ $\cO(\log n)$-adaptive) and one with checks (i.e.\ $n$-adaptive).
Note that each check corresponds to an adaptivity round because an intervention within a batch of interventions may turn out to be redundant, but we will only know this after performing a check after some of the interventions within that batch have been executed.

\paragraph{Scaling our algorithm with checks}

Since $n^{\frac{1}{\log n}} = 2$, running Algorithm 2 (as it is) with adaptivity parameters $r \in \Omega(\log n)$ does not make much sense.
As such, we define a checking budget $b = r - \lceil \log n \rceil$ and greedily perform up to $b$ checks whilst executing Algorithm 2.
This allows Algorithm 2 to scale naturally for $r \in \Omega(\log n)$.

\paragraph{Non-adaptive intervention round}

For the final round of interventions, let $V'$ be the set of remaining relevant vertices.
From our algorithm, we know that $|V'| \leq L$ but we may even intervene on less vertices in the final round.
By \citep{kocaoglu2017cost}, we only need to intervene on a graph-separating system of the subgraph $G[V']$.
For atomic interventions, this exactly correspond to the minimum vertex cover of $V'$.
To obtain this, we first compute the maximum independent set $S$ of $V'$ (which can be computed efficiently on chordal graphs \citep{gavril1972algorithms,leung1984fast}), then only intervene on $V' \setminus S$.

\paragraph{Optimization before final round}

Note that we can always compute the intervention set $F \subseteq V$ which we \emph{would} have intervened if $r=1$.
At any point in time of the algorithm, if $F$ involves less vertices than the number of vertices required from the $L$-partitioning, then we simply treat the current adaptivity round as the final round, choose to intervene on $F$ and use any remaining adaptive budget for performing checks.

\subsection{Synthetic graphs}

We use synthetic moral randomly generated graphs from earlier prior works \citep{choo2022verification,squires2020active,choo2023subset}.
For each of the graph classes and parameters, we generate 100 DAGs and plot the average with an error bar.

\begin{enumerate}
    \item Erd\H{o}s-R\'{e}nyi styled graphs (used by \citep{squires2020active,choo2022verification})\\
    These graphs are parameterized by 2 parameters: number of nodes $n$ and density $\rho$.
    Generate a random ordering $\sigma$ over $n$ vertices.
    Then, set the in-degree of the $n^{th}$ vertex (i.e.\ last vertex in the ordering) in the order to be $X_n = \max\{1, \texttt{Binomial}(n-1, \rho)\}$, and sample $X_n$ parents uniformly form the nodes earlier in the ordering.
    Finally, chordalize the graph by running the elimination algorithm of \citep{koller2009probabilistic} with elimination ordering equal to the reverse of $\sigma$.\\
    \textbf{Parameters used:} $n = \{10, 15, 20, \ldots, 95, 100\}$ and $\rho = 0.1$.
    \item Tree-like graphs (used by \citep{squires2020active,choo2022verification})\\
    These graphs are parameterized by 4 parameters: number of nodes $n$, degree $d$, $e_{\min}$, and $e_{\max}$.
    First, generate a complete directed $d$-ary tree on $n$ nodes.
    Then, add $\texttt{Uniform}(e_{\min}, e_{\max})$ edges to the tree.
    Finally, compute a topological order of the graph by DFS and triangulate the graph using that order.
    As the original definition of this graph class by \citep{squires2020active} becomes very sparse as $n$ grows, we tweaked the other parameters to scale accordingly by defining new parameters $d_{prop}, e_{\min, prop}, e_{\max, prop} \in [0,1]$ as follows: $d = n \cdot d_{prop}$, $e_{\min} = n \cdot e_{\min, prop}$, and $e_{\max} = n \cdot e_{\max, prop}$.\\
    \textbf{Parameters used:} $n = \{100, 150, 200, \ldots, 450, 500\}$, $d_{prop} = 0.4$, $e_{\min, prop} = 0.2$, $e_{\max, prop} = 0.5$.
    \item $G(n,p)$-union-tree (used by \citep{choo2023subset})\\
    These graphs are parameterized by 2 parameters: number of nodes $n$ and edge probability $p$.
    An Erd\H{o}s-R\'{e}nyi $G(n,p)$ and a random tree $T$ on $n$ vertices are generated.
    Take the union of their edge sets, orient the edges in an acyclic fashion, then add arcs to remove v-structures.\\
    \textbf{Parameters used:} $n = \{10, 15, 20, \ldots, 95, 100\}$ and $p=0.03$.
\end{enumerate}

\subsection{Algorithms benchmarked}

While both the algorithm of \citep{choo2022verification} and Algorithm 2 have been implemented to take in a parameter $k$ for bounded-size interventions, our experiments focused on the case of atomic interventions, i.e.\ $k = 1$.

\texttt{separator}:\quad
Algorithm of \citep{choo2022verification}. With checks, it allows for full adaptivity.

\texttt{separator\_no\_check}:\quad
\texttt{separator} but we remove checks that avoid redundant interventions, i.e.\ $\cO(\log n)$ rounds of adaptivity.

\texttt{adaptive\_r1}:\quad
Algorithm 2 with $r = 1$, i.e.\ non-adaptive

\texttt{adaptive\_r2}:\quad
Algorithm 2 with $r = 2$

\texttt{adaptive\_r3}:\quad
Algorithm 2 with $r = 3$

\texttt{adaptive\_rlogn}:\quad
Algorithm 2 with $r = \log_2 n$

\texttt{adaptive\_r2logn}:\quad
Algorithm 2 with $r = 2 \log_2 n$. Can perform checks that avoid redundant interventions.

\texttt{adaptive\_r3logn}:\quad
Algorithm 2 with $r = 3 \log_2 n$. Can perform checks that avoid redundant interventions.

\texttt{adaptive\_rn}:\quad
Algorithm 2 with $r = n$, i.e.\ full adaptivity allowed

\subsection{Experimental results}

As expected, we observe that higher rounds of adaptivity leads to lower number of interventions required.
When $r \in \cO(\log n)$, Algorithm 2 can match \citep{choo2022verification} with checks disabled.
When $r=n$, Algorithm 2 can match \citep{choo2022verification} with its full adaptivity.

\begin{figure}[htb]
\centering
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp1_interventioncount.png}
    \caption{Number of interventions}
\end{subfigure}
\\
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp1_time.png}
    \caption{Time}
\end{subfigure}
\caption{Experiment 1}
\label{fig:exp1}
\end{figure}

\begin{figure}[htb]
\centering
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp2_interventioncount.png}
    \caption{Number of interventions}
\end{subfigure}
\\
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp2_time.png}
    \caption{Time}
\end{subfigure}
\caption{Experiment 2}
\label{fig:exp2}
\end{figure}

\begin{figure}[htb]
\centering
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp3_interventioncount.png}
    \caption{Number of interventions}
\end{subfigure}
\\
\begin{subfigure}[t]{\linewidth}
    \centering
    \includegraphics[width=\linewidth]{exp3_time.png}
    \caption{Time}
\end{subfigure}
\caption{Experiment 3}
\label{fig:exp3}
\end{figure}

\bibliography{uai2023-template}

\end{document}
