\section{Proof of Theorem \ref{theorem: LB-Tree}}\label{secappendix: proof of lower bound for tree}

%\VIN{Consider an n-ary tree $\mathcal{T}$ with root $X_1$ and all the nodes at the last level being connected to the outcome node $Y$. We assume that $\mathcal{T}$ has at least four nodes and each node except the leaves have at least two children. Construct a subgraph $\mathcal{T}_M$ of $\mathcal{T}$, by selecting $M$ nodes from $\mathcal{T}$, such that a node is selected only after all its children are selected. Let the maximum distance of $Y$ from a node in $\mathcal{T}_M$ be $h$. Now we define distributions $\mathbb{P}_{0}, \ldots , \mathbb{P}_{M}$ all compatible with $\mathcal{G}$ such that the optimal arm in the CBN $\mathcal{C}_i = (\mathcal{G}, \mathbb{P}_i)$ is $a_{i,x}$ for $i\in [N]$, and for $\mathcal{C}_0 = (\mathcal{G}, \mathbb{P}_0)$ every arm is an optimal arm.}

\VIN{Throughout this proof we assume the following terminology: a) a node is a root node if it has not parents, b) a node is a leaf node if it has no children. Consider an n-ary tree $\mathcal{T} \in \mathsf{T}$ on $N$ intervenable nodes. Note that since $\mathcal{T}$ is a tree, each node $X_i$ for $i\in [N]$ has at most one parent. In addition $\mathcal{T}$ has one special node $Y$, called the outcome. There is a directed from every leaf node in $\mathcal{T}$ to $Y$, and let $L_{\mathcal{T}}$ be the set of all leaf nodes. We use $\mathbf{V}$ to denote the set of nodes in $\mathcal{T}$, that is, $\mathbf{V} = \{X_1, \ldots, X_N, Y\}$. Without loss of generality, we assume that $X_1, \ldots, X_N$ is in the reverse topological order, that is, $X_1$ is a leaf node, $X_N$ is a root node, $X_{N-1}$ is either a root node or a child of $X_N$, and so on. Let $\mathcal{T}_M$ be the sub-graph of $\mathcal{T}$ defined by the nodes $X_1, \ldots, X_M$. An edge belongs to $\mathcal{T}_M$ if both its endpoints belong to $\{X_1, \ldots, X_M\}$. Further, let $h$ be the maximum number of nodes in a (directed) path from a root node to $Y$. Now we define distributions $\mathbb{P}_{0}, \ldots , \mathbb{P}_{M}$ all compatible with $\mathcal{T}$ such that the optimal arm in the CBN $\mathcal{C}_i = (\mathcal{T}, \mathbb{P}_i)$ is $a_{i,1}$ for $i\in [M]$, and for $\mathcal{C}_0 = (\mathcal{T}, \mathbb{P}_0)$ every arm is an optimal arm.}

\begin{comment}
\AUR{Consider an n-ary tree $\mathcal{T}$ with root $X_N$ and all the nodes at the last level being connected to the outcome node $Y$. We assume that $\mathcal{T}$ has at least four nodes and each node except the leaves have at least two children. Also without loss of generality assume that the $X_1, \dots, X_N$ are in the reverse topological order. Construct a subgraph $\mathcal{T}_M$ of $\mathcal{T}$, by selecting $X_1, \dots, X_M$ nodes from $\mathcal{T}$. Let the maximum distance of $Y$ from a node in $\mathcal{T}_M$ be $h$. Now we define distributions $\mathbb{P}_{0}, \ldots , \mathbb{P}_{M}$ all compatible with $\mathcal{T}$ such that the optimal arm in the CBN $\mathcal{C}_i = (\mathcal{T}, \mathbb{P}_i)$ is $a_{i,1}$ for $i\in [M]$, and for $\mathcal{C}_0 = (\mathcal{T}, \mathbb{P}_0)$ every arm is an optimal arm.}
\end{comment}

\textbf{Defining} $\mathbb{P}_0$: For $X_i$ not belonging to $\mathcal{T}_M$ let $\mathbb{P}_0(X_i = 1 | .) = 0.5$, and for $X_i$ belonging to $\mathcal{T}_M$ and for an appropriately chosen $\alpha$ let 
\begin{align*}
&\mathbb{P}_0(X_i = 1) = \alpha ~~~&\text{ If }X_i \text{ is a root node,} \\
&\mathbb{P}_0(X_i = 1 \mid \mathbf{Pa}(X_i) = 0) = \alpha ~~~&\text{ If }X_i \text{ is not a root node,} \\
&\mathbb{P}_0(X_i = 1 \mid \mathbf{Pa}(X_i) = 1) = 1-\alpha ~~~&\text{ If }X_i \text{ is not a root node,} \\
&\mathbb{P}_0(Y=1 \mid . ) = 0.5 ~~~~ \mathbb{P}_0(Y=0 \mid . ) = 0.5 
\end{align*}
The value of $\alpha$ is appropriately chosen later to achieve the desired lower bound. Note that in the above equations if $X_i$ is not a root node then $\mathbf{Pa}(X_i)$ is a singleton set. Also, $\mathbb{P}_0(Y=1| . )$ denotes the probability of $Y=1$ conditioned on any value of its parents. Next, we define $\mathbb{P}_i$ for $i\in [N]$.

\textbf{Defining} $\mathbb{P}_i$: Let $L_i$ be the set of leaf nodes that are reachable from $X_i$, that is there is a directed path from $X_i$ to every leaf node in $L_i$. Note that if $X_i$ is a leaf then $L_i = \{X_i\}$. We use $L_i = \mathbf{1}$ and $L_i = \mathbf{0}$ to denote all nodes in $L_i$ evaluated to $1$ and $0$ respectively. Also, let $L_{\mathcal{T}}^M$ be the set of all leaves in $\mathcal{T}_M$ and $L'_i = L_{\mathcal{T}}^M \setminus L_i$. Then
%Define for $X_i$, a distribution $\mathbb{P}_i$ as follows. The rest of the conditional distributions remain same.
\begin{align*}
\mathbb{P}_i(Y | L_i = \mathbf{1}, L'_i = \mathbf{0}) = 0.5 + \epsilon~.
\end{align*}
The value of $\epsilon$ is appropriately chosen later to achieve the desired lower bound. The distributions of $X_i$ given its parents corresponding to $\mathbb{P}_i$ is the same as those defined for $\mathbb{P}_0$. \\

We set $\alpha = \min\{(2h|L_\mathcal{T}| + 2^{h+1})^{-1}, (2^{h}|L_\mathcal{T}|M)^{-1}\}$ and hence $\alpha < \frac{1}{M}$. Using this it is easy to see that $m(\mathcal{C}_i) = M$ for $i\in [0,M]$ and $M > 4$. Additionally, in $\mathcal{C}_i$ arm $a_{i,1}$ is the optimal arm for $i\in [1,M]$ and the reward for every arm in $\mathcal{C}_0$ is $0.5$.
% It is easy to see that under the distribution $P_i$, the optimal action is $do(X_i = 1)$. 
We will denote $a^*$ as the optimal arm for every $\mathcal{C}_i$, and note that $a^* = a_{i,1}$ for $\mathcal{C}_i$, where $i\in [M]$. First, in Lemma \ref{lemma: lower bound on regret}, we lower bound the regret of returning a sub-optimal arm in $\mathcal{C}_i$ at the end of $T$ rounds. Further, in Lemma \ref{lemma: lower bound on the probability of choosing a sub-optimal arm}, we show that any algorithm would have a non-trivial probability of returning a sub-optimal arm in at least one of the constructed CBNs. Finally, we would use Lemmas \ref{lemma: lower bound on regret} and \ref{lemma: lower bound on the probability of choosing a sub-optimal arm} to lower bound the expected regret of any algorithm. 
\VIN{Let $\text{rew}_i(a_{j, x})$ denote the expected reward of action $do(X_j=x)$ under the distribution $\mathbb{P}_i$. We deviate from the usual notation of $\mu$ in this case, because the reward now depends on the arm and the corresponding distribution.} We require the following sets in Lemmas \ref{lemma: lower bound on regret} and \ref{lemma: lower bound on the probability of choosing a sub-optimal arm}: $V_1 = L_i \setminus L_j$, $V_2 = L_i \cap L_j$, $V_3 = L_j \setminus L_i$, $V_4 = L_{\mathcal{T}}^M \setminus (L_i \cup L_j)$, and $V_5 = V \setminus L_{\mathcal{T}}^M$. 

\begin{lemma}\label{lemma: lower bound on regret}
For every $i \in [1,M]$, $j \in [1,N]$, $x\in \{0,1\}$, and $(j,x) \neq (i,1)$ the following holds: $\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,x}) \geq 0.5\epsilon$.
\end{lemma}
\begin{proof}
For any $i,j \in [M]$, we have
\begin{align}
\text{rew}_i(a_{i,1}) &= 0.5 + \mathbb{P}_i(V_4 = \mathbf{0}, V_1=\mathbf{1}, V_2=\mathbf{1}, V_3=\mathbf{0} \mid do(X_i =1))(\epsilon) \label{eqn: rew for i} \\
\text{rew}_i(a_{j,1}) &= 0.5 + \mathbb{P}_i(V_4 = \mathbf{0}, V_1=\mathbf{1}, V_2=\mathbf{1}, V_3=\mathbf{0}\mid do(X_j =1))(\epsilon) \label{eqn: rew for j}
\end{align}
Subtracting Equation \ref{eqn: rew for j} from Equation \ref{eqn: rew for i} we have
\begin{align*}
&\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,1}) \nonumber\\
&= \mathbb{P}_i(V_4 = \mathbf{0}) \big[\mathbb{P}_i(V_1=\mathbf{1}, V_2=\mathbf{1}, V_3=\mathbf{0} \mid do(X_i = 1)) - \mathbb{P}_i(V_1=\mathbf{1}, V_2=\mathbf{1}, V_3=\mathbf{0} \mid do(X_j = 1))\big] \epsilon \nonumber\\
&= \mathbb{P}_i(V_4 = \mathbf{0}) \big[\mathbb{P}_i(V_3 = \mathbf{0}) \mathbb{P}_i(V_1=\mathbf{1}, V_2=\mathbf{1} \mid do(X_i = 1)) - \mathbb{P}_i(V_1=\mathbf{1}) P(V_2=\mathbf{1}, V_3=\mathbf{0} \mid do(X_j = 1))\big] \epsilon \nonumber\\
&\underset{(i)}{\geq} (1-\alpha)^{h|V_4|}\big[(1-\alpha)^{h(|L_i|+|V_3|)} - (2^h\alpha)\big] \epsilon \nonumber \\
&\geq ((1-\alpha)^{h|L_\mathcal{T}|} - 2^h\alpha)\epsilon \nonumber \\
&\geq ((1-h|L_\mathcal{T}|\alpha) - 2^h\alpha)\epsilon \nonumber \\
&\geq 0.5\epsilon
\end{align*}
(i) in the above equations follows from the definitions of $h$ and $\mathbb{P}_i$.
Similarly, it can be shown that $\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,0}) \geq 0.5 \epsilon$ for $j \in [N]$, and $\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,1}) \geq 0.5 \epsilon$ for $j \in [M+1,N]$. Also $\text{rew}_i(a_{i,1}) - \text{rew}_i(a_0) \geq 0.5 \epsilon$.
\end{proof}
Let \texttt{ALG} be an algorithm that outputs arm $a_T$ at the end of $T$ rounds. We choose $\epsilon = \min \{\frac{1}{4}, \sqrt{\frac{M}{18T}}\}$. Note that corresponding to every $\mathcal{C}_i$ for $i\in [0,M]$, \texttt{ALG} and $\mathbb{P}_i$ together define a probability measure on all the sampled values of the nodes of $\mathcal{T}$ over $T$ rounds. Denote $\mathbb{D}_i$ as this measure and $\mathbb{E}_i$ as the expectation over $\mathbb{D}_i$ for $i \in [0,M]$. Let $\mathcal{G}_{t}$ be the sampled values of the nodes of $\mathcal{T}$ at time $t$ and let $\mathbf{G}_{t} = \{\mathcal{G}_{1}, \ldots, \mathcal{G}_{t}\}$. Also, for $i \in [0,M]$ let $\mathbb{D}_i(.|\mathbf{G}_{t-1}) = \mathbb{P}_i^t(.)$; here $\mathbb{D}_i(.|\mathbf{G}_{t-1})$ denotes the probability of the sampled values of the nodes of $\mathcal{G}$ conditioned on its history till time $t-1$. Observe that conditioned on history $\mathbf{G}_{t-1}$, $\texttt{ALG}$ determines an arm, say $a_t$, to pull  at time $t$ (either deterministically or in a randomized way), and for $j,j'\in [1,N]$ if $a_t = a_{j,x}$ then $\mathbb{P}_i^t(X_{j'} = x | do(X_j=x)) = \mathbb{P}_i(X_{j'}=x| do(X_j=x))$.



\begin{lemma}\label{lemma: lower bound on the probability of choosing a sub-optimal arm}
For any algorithm \texttt{ALG} there exists an $i \in [M]$ such that 
$\mathbb{D}_i(a_T \neq a_{i,1}) \geq \frac{\frac{M}{4e} - 1}{M}$.   
\end{lemma}
\begin{proof}
We use $KL(\mathbb{D}_0, \mathbb{D}_i)$ to denote the KL divergence between $\mathbb{D}_0$ and $\mathbb{D}_i$ for any $i\in [M]$. Let $N^{(i,1)}_T$ be the number of times  \texttt{ALG} plays the arm $a_{i,1}$ at the end of $T$ rounds. Also, let $\mathcal{B} = \{ a_{i,1} \mid  i \leq M \textit{ and } \mathbb{E}_0[N^{(i,1)}_T] \leq 2T/M \}$. Observe that $|\mathcal{B}| \geq M/2$, as otherwise the sum of the expected number of arm pulls of arms not in $\mathcal{B}$ would be greater than $T$. First, using Lemma 2.6 from \cite{10.5555/1522486}, we have, 
\begin{equation*}
\mathbb{D}_0(a_T = a_{i,1}) + \mathbb{D}_i(a_T \neq a_{i,1}) \geq 
\frac{1}{2}\cdot  \exp{(-KL(\mathbb{D}_0, \mathbb{D}_i))} 
\end{equation*}
Rearranging and summing the above equation over arms in $\mathcal{B}$, and observing that $\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_0(a_T = a_{i,1}) \leq 1$ we have
\begin{equation}\label{equation: lower bound in terms of KL Divergence}
\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_{i}(a_T \neq a_{i,1}) \geq \frac{1}{2}\cdot \sum_{a_{i,1} \in \mathcal{B}} \exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))-1 
\end{equation} 
Now we bound $\exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))$ for every $i$ such that $a_{i,1} \in \mathcal{B}$.
Using the chain rule for product distributions (see \cite{492488} and Chapter 2 in \cite{Slivkins_Book}) the KL divergence of $\mathbb{D}_0$ and $\mathbb{D}_i$ for any $i \in [M]$ can be written as 
\begin{align}
KL(\mathbb{D}_0, \mathbb{D}_i) = \sum_{t=1}^T KL(\mathbb{D}_0(\mathcal{G}_{t} | \mathbf{G}_{t-1}), \mathbb{D}_i(\mathcal{G}_{t} | \mathbf{G}_{t-1}) = \sum_{t=1}^T KL(\mathbb{P}_0^t(\mathcal{G}_t), \mathbb{P}_i^t(\mathcal{G}_t)) \label{kl-divergence-summation-tree}
\end{align}
Each term on the right hand side of the above summation can be computed as follows:
\begin{align}
KL(\mathbb{P}_0^t, \mathbb{P}_i^t) &= \sum_{\mathbf{v}} \mathbb{P}_0^t(V = \mathbf{v}) \log \frac{\mathbb{P}_0^t(\mathbf{V} = \mathbf{v})}{\mathbb{P}_i^t(\mathbf{V}= \mathbf{v})} \nonumber \\
    &\underset{(i)}{=} \sum_{x,\mathbf{v_5}} \mathbb{P}_0^t(Y=x, L_i = \mathbf{1}, L'_i = \mathbf{0}, V_5 = \mathbf{v_5}) \log \frac{\mathbb{P}_0^t(Y=x | L_i = \mathbf{1}, L'_i = \mathbf{0}, V_5 = \mathbf{v_5})}{\mathbb{P}_i^t(Y=x | L_i = \mathbf{1}, L'_i = \mathbf{0}, V_5 = \mathbf{v_5})} \nonumber \\
    &\underset{(ii)}{=} 0.5 \cdot \mathbb{P}_0^t(L_i = \mathbf{1}, L'_i = \mathbf{0}) \Big[ \log\frac{0.5}{0.5 + \epsilon} + \log\frac{0.5}{0.5 - \epsilon} \Big] \nonumber \\
    &\underset{(iii)}{\leq} 0.5 \Big(\mathbb{P}_0^t\{do(X_i=1)\} + 2^h|L_\mathcal{T}|\alpha \Big) \log\frac{0.25}{0.25 - \epsilon^2} \nonumber \\
    &= -0.5 \Big(\mathbb{P}_0^t\{do(X_i=1)\} + 2^h|L_\mathcal{T}|\alpha \Big) \log(1-4\epsilon^2) \nonumber \\
    &= 0.5 \Big(\mathbb{P}_0^t\{do(X_i=1)\} + 2^h|L_\mathcal{T}|\alpha \Big) \Big(4\epsilon^2 + \frac{(4\epsilon^2)^2}{2} + \frac{(4\epsilon^2)^3}{3} + \dots \Big) \nonumber \\
    &\leq 6 \Big(\mathbb{P}_0^t\{do(X_i=1)\} + 2^h|L_\mathcal{T}|\alpha \Big) \epsilon^2~. \label{equation: individual term KL divergence}
\end{align}
In the above equations: (i) follows by observing that for every other evaluation of $\mathbf{V}$ the distributions $\mathbb{P}_0^t$ and $\mathbb{P}_i^{t}$ are same hence the corresponding terms in KL divergence amount to zero, (ii) follows from the definitions of $\mathbb{P}_0^t$ and $\mathbb{P}_i^t$, and (iii) follows by observing that 
$$\mathbb{P}_0^t(L_i = \mathbf{1}, L'_i = \mathbf{0}) \leq \mathbb{P}_0^t\{do(X_i=1)\} + 2^h|L_\mathcal{T}|\alpha ~.$$
Using Equations \ref{kl-divergence-summation-tree} and \ref{equation: individual term KL divergence}, we have
for every $a_{i,1} \in \mathcal{B}$,
\begin{equation}\label{equation: upper bound on KL divergence}
KL(\mathbb{D}_0, \mathbb{D}_i) \leq \sum_{t=1}^T 6 \big(\mathbb{E}_0[N^{(i,1)}_T] + 2^h|L_\mathcal{T}|\alpha T \big) \epsilon^2  \underset{(i)}{\leq} \frac{18T}{M}\epsilon^2 \leq 1~,
\end{equation}
where (i) follows from the definition of $\mathcal{B}$. Finally, using Equations \ref{equation: lower bound in terms of KL Divergence} and \ref{equation: upper bound on KL divergence}, and $|\mathcal{B}|\geq M/2$, we have
\begin{align*}
\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_{i}(a_T \neq a_{i,1}) &\geq \frac{1}{2} \sum_{a_{i,1} \in \mathcal{B}} \exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))-1 \nonumber \\
    &\geq \frac{|\mathcal{B}|}{2e} - 1 \nonumber\\
    &\geq \frac{M}{4e} - 1~.
\end{align*}
Therefore as $|\mathcal{B}| \leq M$, by averaging argument there exists an $i \in [M]$ such that 
\begin{align*}
\mathbb{D}_i(a^*_T \neq a_{i,1}) \geq \frac{\frac{M}{4e} - 1}{M} ~.   
\end{align*}
\end{proof}

From Lemmas \ref{lemma: lower bound on regret} and \ref{lemma: lower bound on the probability of choosing a sub-optimal arm} for any algorithm \texttt{ALG}, \AUR{if $\epsilon < \frac{1}{4}$} then the  expected simple regret of \texttt{ALG} can be upper bounded as follows
\begin{align}
r_{\texttt{ALG}}(T) \geq \mathbb{D}_i(a^*_T \neq a_{i,1})\frac{1}{2}\epsilon \geq \frac{\frac{M}{4e} - 1}{M} \cdot (\frac{1}{2} \epsilon) \geq \frac{\frac{M}{4e}-1}{2M} \sqrt{\frac{M}{18T}}~.
\end{align}
\AUR{On the contrary, if $\epsilon \geq \frac{1}{4}$ then $M \geq T$, so $\sqrt{M/T} = \Omega(1)$ and regret $r_{\texttt{ALG}}(T) \geq \Omega(1)$}. 
Hence, for any algorithm there exists an $i\in [0,M]$ such that the expected simple regret of the algorithm on $\mathcal{C}_i$ is $\Omega\bigg(\sqrt{\frac{m(\mathcal{C}_i)}{T}} \bigg)$.

%%%%%%%%%%%%%%%%%%%%%%%%%%Comments%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{comment}Let $\mathbb{S}_i$ denote the measure on all observations over T rounds for $i \in [0, M]$ and $E_i$ is the expectation in $\mathbb{S}_i$. Let $G_t$ be the observation at time $t$ and $\mathbf{G}_t$ is the observation till time $t$, that is $\mathbf{G}_t = \{\mathbf{G}_1, \dots, \mathbf{G}_t\}$. Denote $\mathbb{S}_i(.|\mathbf{G}_{t-1}) = \mathbb{P}_i^t(.)$, and recall that for any node $X_j$ that is not acted upon by an algorithm, $\mathbb{P}_i^t(X_j | .) = \mathbb{P}_i(X_j | .)$ 
\end{comment}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Proof of Theorem \ref{theorem: LB-given-q}}\label{secappendix: lower bound for tree given q}
%\VIN{We begin by constructing the causal graph $\mathcal{G}$ on $N+1$ nodes $\{X_1, \ldots, X_N, Y\}$, where $N\geq 2$. In $\mathcal{G}$, $X_1$ is the parent of $X_2, \dots, X_N$ and there are edges from all node to $Y$. The strategy remains the same as in the proof of Theorem \ref{theorem: LB-given-q}; we will construct $\mathcal{P}_0, \ldots , \mathcal{P}_{N}$ such that on at least one CBN $\mathcal{C}_i = (\mathcal{G},\mathbb{P}_i)$ the expected simple regret of any algorithm is tight. Now, we define the distribution $\mathbb{P}_0$, and then define $\mathbb{P}_1, \ldots, \mathbb{P}_{N}$ dependent on the value of $m(\mathcal{C}_0)$. Recall, we are given a valid $\mathbf{q}$.}

\AUR{We begin by constructing the causal graph $\mathcal{G}$ on $N+1$ nodes $\{X_1, \ldots, X_N, Y\}$, where $N\geq 3$. In $\mathcal{G}$, $X_N$ is the parent of $X_1, \dots, X_{N-1}$ and there is a directed edge form each node to the outcome node $Y$. The strategy remains the same as in the proof of Theorem \ref{theorem: LB-Tree}; Now given $q_1, q_2, \dots, q_N$, compatible with the graph $\mathcal{G}$, we will construct $\mathbb{P}_0, \ldots , \mathbb{P}_{N}$ such that on at least one CBN $\mathcal{C}_i = (\mathcal{G},\mathbb{P}_i)$ the expected simple regret of any algorithm is tight. Also, without loss of generality, assume that $q_1 \leq q_2 \leq \dots \leq q_N$.}

\textbf{Defining} $\mathbb{P}_0$: For all the nodes in the graph $\mathcal{G}$, we define the distribution $\mathbb{P}_0$ as follows:
\begin{align*}
&\mathbb{P}_0(X_N = 1) = q_N \\
&\mathbb{P}_0(X_i = 1 | X_N = 0) = \frac{q_i}{1-q_N} \\
&\mathbb{P}_0(X_i = 1 | X_N = 1) = \frac{1}{2} \\
&\mathbb{P}_0(Y = 1 | . ) = 0.5
\end{align*}

$\mathbb{P}_0(Y = 1 | . )$ denotes the probability of $Y = 1$ conditioned on any value of the parents. Also, note that since $q_1, \dots, q_N$ are compatible with the given graph $\mathcal{G}$, we have, for any $i \neq N$, $q_i = \min_{x_i, x_N} \mathbb{P}_0(X_i = x_i, X_N = x_N) \leq \mathbb{P}_0(X_i = 1, X_N = 1) = q_N/2$.  In addition, $\mathbb{P}_0(X_i = 1 | X_N = 0) = q_i/(1-q_N) \leq 2q_i$. Let $M = m(\mathcal{C}_i)$ for all $i \in [N]$ and $M' = M-1$. 

%Let the $N \geq 2$ nodes of the Causal Bayesian Network be $X_1, \dots, X_N$ and the outcome node be $Y$. Further, let 
%Then define the distribution of the graph as follows.

% Note that, when $i \neq 1$, then $q_{i} \leq q_{1}/2$. \VIN{Now, let $M = m(\mathcal{C}_0)$}. The conditional distributions can be written as follows:

% \begin{align}
% \mathbb{P}_0(X_i = 1| X_1 = 0) &= \frac{P_0(X_i=1, X_1=0)}{P_0(X_1 = 0)} \nonumber \\
% &\leq 2q_{i,1} \nonumber
% \end{align}

\textbf{Case a:} $M \geq 12$.

\textbf{Defining} $\mathbb{P}_i$: For $i = N$, define $\mathbb{P}_N(Y = 1 | X_N = 1) = 0.5 + \epsilon$, and for $i \neq N$, $\mathbb{P}_i(Y = 1 | X_i = 1, X_N = 0) = 0.5 + \epsilon$. The remaining conditional distributions are same as those of $\mathbb{P}_0$.

Now, it is easy to see that the optimal action for $\mathbb{P}_i$ is $a_{i,1}$. \VIN{As in proof of Theorem \ref{theorem: LB-Tree}, let $\text{rew}_i(a_{j, x})$ denote the expected reward of action $do(X_j=x)$ under the distribution $\mathbb{P}_i$}.

\begin{lemma}\label{lemma 4.2: LB of regret of sub-optimal arm}
For every $i \in [M']$, $j \in [N]$, $x\in \{0,1\}$, and $(j,x) \neq (i,1)$ the following holds: $\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,x}) \geq 0.1\epsilon$.
\end{lemma}
\begin{proof}

For $i=N$, the regret for choosing a sub-optimal arm $a$ is $\text{rew}_N(a_{N,1}) - \text{rew}_N(a) \geq (1-q_{N}) \epsilon \geq 0.5\epsilon$. For $i \neq N$, the regret for choosing a sub-optimal arm $a_{j,x}$, where $j \neq N$ is as follows:
\begin{align}
\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{j,x}) &\geq (1-q_{N}) \epsilon - q_{i}\epsilon \nonumber \\
&\geq \bigg(1-\frac{3q_{N}}{2}\bigg) \epsilon \nonumber \\
&\geq 0.25 \epsilon \nonumber
\end{align}

For $j=N$, the regret is as follows:
\begin{align}
\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{N,0}) &= (1-q_{1}) \epsilon - \mathbb{P}_i(X_i = 1 | X_N = 0)\epsilon \geq (0.5-2q_{i}) \epsilon \nonumber \\
\text{rew}_i(a_{i,1}) - \text{rew}_i(a_{N,1}) &= (1-q_{1}) \epsilon \geq 0.5 \epsilon \nonumber
\end{align}

Hence, if $q_{i} \leq 1/M' \leq \frac{1}{5}$, the regret of pulling a sub-optimal arm is $0.1 \epsilon$.

\end{proof}

Let \texttt{ALG} be an algorithm that outputs arm $a_T$ at the end of $T$ rounds. We choose $\epsilon = \min \{\frac{1}{4}, \sqrt{\frac{M'}{24T}}\}$. For $i \in [N]$, denote $\mathbb{D}_i$ as the measure on all the sampled values of the nodes of $\mathcal{G}$ over $T$ rounds and $\mathbb{E}_i$ as the expectation over $\mathbb{D}_i$. Let $\mathcal{G}_{t}$ be the sampled values of the nodes of $\mathcal{G}$ at time $t$ and let $\mathbf{G}_{t} = \{\mathcal{G}_{1}, \ldots, \mathcal{G}_{t}\}$. Also, for $i \in [0,M']$ let $\mathbb{D}_i(.|\mathbf{G}_{t-1}) = \mathbb{P}_i^t(.)$. Note that \texttt{ALG} determines the arm $a_t$ conditioned on $\mathbf{G}_{t-1}$ (either in a deterministic or randomized way). Also for $j, j' \in [1,N]$, if $a_t = a_{j,x}$ and $j' \neq j$, then $\mathbb{P}_i^t(X_{j'} = x | do(X_j)=x) = \mathbb{P}_i(X_{j'}=x| do(X_j=x))$.

\begin{lemma}\label{lemma 4.2: LB of probability of selecting a sub-optimal arm}
For any algorithm \texttt{ALG}, there exists an $i \in [M']$, such that $\mathbb{D}_i(a_T \neq a_{i,1}) \geq \frac{\frac{M'}{4e}-1}{M'}$.
\end{lemma}

\begin{proof}
We use $KL(\mathbb{D}_0, \mathbb{D}_i)$ to denote the KL divergence between $\mathbb{D}_0$ and $\mathbb{D}_i$ for any $i\in [N]$. Let $N^{(i,1)}_T$ be the number of times  \texttt{ALG} plays the arm $a_{i,1}$ at the end of $T$ rounds. Also, let $\mathcal{B} = \{ a_{i,1} \mid  i \leq M' \textit{ and } \mathbb{E}_0[N^{(i,1)}_T] \leq 2T/M' \}$. Observe that $|\mathcal{B}| \geq M'/2$, as otherwise the sum of the expected number of arm pulls of arms not in $\mathcal{B}$ would be greater than $T$. First, using Lemma 2.6 from \cite{10.5555/1522486}, we have, 
\begin{equation*}
\mathbb{D}_0(a_T = a_{i,1}) + \mathbb{D}_i(a_T \neq a_{i,1}) \geq 
\frac{1}{2}\cdot  \exp{(-KL(\mathbb{D}_0, \mathbb{D}_i))} 
\end{equation*}
Rearranging and summing the above equation over arms in $\mathcal{B}$, and observing that $\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_0(a_T = a_{i,1}) \leq 1$ we have
\begin{equation}\label{equation: lower bound in terms of KL Divergence 2}
\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_{i}(a_T \neq a_{i,1}) \geq \frac{1}{2}\cdot \sum_{a_{i,1} \in \mathcal{B}} \exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))-1 
\end{equation} 
Now we bound $\exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))$ for every $i$ such that $a_{i,1} \in \mathcal{B}$.
Using the chain rule for product distributions (see \cite{492488} and Chapter 2 in \cite{Slivkins_Book}) the KL divergence of $\mathbb{D}_0$ and $\mathbb{D}_i$ for any $i \in [M]$ can be written as 
\begin{align}
KL(\mathbb{D}_0, \mathbb{D}_i) = \sum_{t=1}^T KL(\mathbb{D}_0(\mathcal{G}_{t} | \mathbf{G}_{t-1}), \mathbb{D}_i(\mathcal{G}_{t} | \mathbf{G}_{t-1}) = \sum_{t=1}^T KL(\mathbb{P}_0^t(\mathcal{G}_t), \mathbb{P}_i^t(\mathcal{G}_t)) \label{kl-divergence-summation}
\end{align}

Now each term in the summation can be written as, for $i \neq N$,
\begin{align}
&KL(\mathbb{P}_0^t, \mathbb{P}_i^t) \nonumber \\
&= \sum_\mathbf{v} \mathbb{P}_0^t(\mathbf{v}) \log{\frac{\mathbb{P}_0^t(\mathbf{v})}{\mathbb{P}_i^t(\mathbf{v})}} \nonumber \\
&= \sum_y \mathbb{P}_0^t(Y = y| X_N = 0, X_i = 1) \mathbb{P}_0^t(X_N = 0, X_i = 1) \log{\frac{\mathbb{P}_0^t(Y = y | X_N = 0, X_i = 1)}{\mathbb{P}_i^t(Y = y | X_N = 0, X_i = 1)}} \nonumber \\
&= 0.5\mathbb{P}_0^t(X_N = 0, X_i = 1) \bigg[ \log{\frac{0.5}{0.5 + \epsilon}} + \log{\frac{0.5}{0.5 - \epsilon}} \bigg] \nonumber \\
&\leq 6\mathbb{P}_0^t(X_N = 0, X_i = 1)\epsilon^2 \label{kl-i-neq-1}
\end{align}

For $i = N$,
\begin{align}
KL(\mathbb{P}_0^t, \mathbb{P}_i^t) &= \sum_\mathbf{v} \mathbb{P}_0^t(\mathbf{v}) \log{\frac{\mathbb{P}_0^t(\mathbf{v})}{\mathbb{P}_i^t(\mathbf{v})}} \nonumber \\
&= \sum_y \mathbb{P}_0^t(Y = y | X_N = 1) \mathbb{P}_0^t(X_N = 1) \log{\frac{\mathbb{P}_0^t(Y = y | X_1 = 1)}{\mathbb{P}_i^t(Y = y | X_N = 1)}} \nonumber \\
&= 0.5\mathbb{P}_0^t(X_N = 1) \bigg[ \log{\frac{0.5}{0.5 + \epsilon}} + \log{\frac{0.5}{0.5 - \epsilon}} \bigg] \nonumber \\
&\leq 6\mathbb{P}_0^t(X_N = 1)\epsilon^2 \label{kl-i-eq-1}
\end{align}

Using Equation \ref{kl-i-neq-1} and \ref{kl-i-eq-1} in equation \ref{kl-divergence-summation}, we get when $q_i \leq \frac{1}{M'}$
\begin{align*}
KL(\mathbb{D}_0, \mathbb{D}_i) &\leq 6 \bigg[ \mathbb{E}_0[N_T^{(i,1)}] + \frac{2}{M'}T \bigg] \epsilon^2 \\
&\leq \frac{24T}{M'} \epsilon^2 \\
&\leq 1 \\
\end{align*}

Now putting the value of $KL(\mathbb{D}_0, \mathbb{D}_i)$ in Equation \ref{equation: lower bound in terms of KL Divergence 2} we get the following,
\begin{align*}
\sum_{a_{i,1} \in \mathcal{B}} \mathbb{D}_{i}(a_T \neq a_{i,1}) &\geq \frac{1}{2} \sum_{a_{i,1} \in \mathcal{B}} \exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))-1 \nonumber \\
    &\geq \frac{|\mathcal{B}|}{2e} - 1 \nonumber\\
    &\geq \frac{M'}{4e} - 1~.
\end{align*}
Therefore as $|\mathcal{B}| \leq M'$, by averaging argument there exists an $i \in [M']$ such that 
\begin{align*}
\mathbb{D}_i(a^*_T \neq a_{i,1}) \geq \frac{\frac{M'}{4e} - 1}{M'} ~.   
\end{align*}

From Lemmas \ref{lemma 4.2: LB of regret of sub-optimal arm} and \ref{lemma 4.2: LB of probability of selecting a sub-optimal arm} for any algorithm \texttt{ALG}, \AUR{if $\epsilon < \frac{1}{4}$} then the  expected simple regret of \texttt{ALG} can be upper bounded as follows
\begin{align} \label{M-greater-than=5}
r_{\texttt{ALG}}(T) \geq \mathbb{D}_i(a^*_T \neq a_{i,1})\cdot (0.1\epsilon) \geq \frac{\frac{M'}{4e} - 1}{M'} \cdot (0.1 \epsilon) \geq \frac{\frac{M'}{4e}-1}{10M'} \sqrt{\frac{M'}{24T}}~.
\end{align}

\AUR{Otherwise, if $\epsilon \geq \frac{1}{4}$, $M' \geq T$, so $\sqrt{M'/T} = \Omega(1)$ and regret $r_{\texttt{ALG}}(T) \geq \Omega(1)$}.

Hence, it is proved that regret is lower bounded by $\Omega\big(\sqrt{\frac{M}{T}}\big)$.

\end{proof}

\textbf{Case b:} $M < 12$. Define $N$ distributions $\mathbb{P}_1, \dots, \mathbb{P}_N$ as follows. We choose $\epsilon = \sqrt{\frac{1}{45T}}$. The rest of conditional distributions remain same as $\mathbb{P}_0$. For all $i \in [N]$, 
\begin{align*}
&\mathbb{P}_i(Y = 1 | X_i = 1) = 0.5 + \epsilon
\end{align*}

Now, the optimal arm for action $\mathbb{P}_i$ is $a_{i,1}$, and the regret of pulling a sub-optimal arm in place of the optimal arm $a_{i,1}$ is $(1-q_i) \epsilon \geq 0.5\cdot\epsilon$. Each term in the summation of Equation \ref{kl-divergence-summation} can be written as
\begin{align*}
KL(\mathbb{P}_0^t, \mathbb{P}_i^t) &= \sum_\mathbf{v} \mathbb{P}_0(\mathbf{v}) \log{\frac{\mathbb{P}_0^t(\mathbf{v})}{\mathbb{P}_i^t(\mathbf{v})}} \\
&= \sum_y \mathbb{P}_0^t(Y = y | X_i = 1) \mathbb{P}_0^t(X_i = 1) \log{\frac{\mathbb{P}_0^t(Y = y| X_i = 1)}{\mathbb{P}_i^t(Y = y | X_i = 1)}} \\
&= 0.5\mathbb{P}_0^t(X_i = 1) \bigg[ \log{\frac{0.5}{0.5 + \epsilon}} + \log{\frac{0.5}{0.5 - \epsilon}} \bigg] \\
&\leq 6\mathbb{P}_0^t(X_i = 1)\epsilon^2
\end{align*}

Since $\mathbb{P}_0(X_i = 1 | .) \leq 0.5$. 
\begin{align}
KL(\mathbb{D}_0, \mathbb{D}_i) &\leq 6 \bigg[ \mathbb{E}_0[N_T^{(i,1)}] + \frac{T}{2} \bigg] \epsilon^2
\end{align}

Note that $\mathbb{E}_0[N_T^{(i,1)}] \leq T$ 
\begin{align}
KL(\mathbb{D}_0, \mathbb{D}_i) &\leq 9T \epsilon^2 \leq 0.2
\end{align}

Now putting the value of $KL(\mathbb{D}_0, \mathbb{D}_i)$ in Equation \ref{equation: lower bound in terms of KL Divergence 2} we get the following,
\begin{align*}
\sum_{i \in [N]} \mathbb{D}_{i}(a_T \neq a_{i,1}) &\geq \frac{1}{2} \sum_{i \in [N]}  \exp(-KL(\mathbb{D}_0, \mathbb{D}_{i}))-1 \nonumber \\
    &\geq \frac{N}{2e^{0.2}} - 1~.
\end{align*}

Hence  any algorithm \texttt{ALG} there exists an $i$ such that the regret incurred by it is
\begin{align}
r_{\texttt{ALG}}(T) &\geq 0.5 \mathbb{D}_i(a_T \neq a_{i,1}) \epsilon \geq \frac{\frac{N}{2e^{0.2}}-1}{N} \sqrt{\frac{1}{45T}}  
\label{M-less-than-6}
\end{align}

Finally, from Equations \ref{M-greater-than=5} and \ref{M-less-than-6} it follows that the expected simple regret of any algorithm is $\Omega\big(\sqrt{\frac{M}{T}}\big)$, where $M$ depends on $\mathbf{q}$ and $k_i$ for $i \in [N]$.