% \documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\externaldocument{sawarni_406}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Learning Good Interventions in Causal Graphs via Covering\\(Supplementary Material)}

% \title{Technical Appendix: }

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<ayushsawarni@iisc.ac.in>?Subject=Your UAI 2023 paper}{Ayush Sawarni}{}}
\author[1]{\href{mailto:<mrahul@iisc.ac.in>?Subject=Your UAI 2023 paper}{Rahul Madhavan}{}}
\author[2]{\href{mailto:<gauravsinha@microsoft.com>?Subject=Your UAI 2023 paper}{Gaurav Sinha}{}}
\author[1]{\href{mailto:<barman@iisc.ac.in>?Subject=Your UAI 2023 paper}{Siddharth Barman}{}}

% Add affiliations after the authors
\affil[1]{%
    % Dept. of Computer Science and Automation\\
    Indian Institute of Science, Bangalore
}
\affil[2]{%
    Microsoft Research\\
    Bangalore
}



%% PACKAGES I'VE INCLUDED
% \usepackage[margin=0.8in]{geometry}
% \renewcommand{\familydefault}{ppl}
\usepackage{graphicx}
\usepackage{framed}
\usepackage[normalem]{ulem}
\usepackage{xstring}
\usepackage{xfrac, amsthm,thm-restate,thmtools}
\usepackage{amssymb}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage{enumerate}
\usepackage{etoolbox}
\usepackage{subcaption}
\usepackage{url}
\usepackage{enumitem}
\usepackage{mathtools}
\usepackage{comment}
\usepackage[font={small,it}]{caption}
\usepackage{bbm}
% \usepackage[usenames,dvipsnames]{color}
\usepackage{xcolor}
\usepackage{algorithmic} %[noend]
\usepackage{algorithm}
\usepackage{array}
\newcolumntype{x}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}

\usepackage{nicefrac}
\usepackage{bm}
\usepackage{tikz}
\usepackage{ifthen}
\usepackage{svg}
% \usepackage{subfigure}
% \usepackage{todonotes}


% \usepackage[colorlinks=true, allcolors=black]{hyperref}
% \hypersetup{
%   colorlinks   = true, %Colours links instead of ugly boxes
%   urlcolor     = blue, %Colour for external hyperlinks
%   linkcolor    = blue, %Colour of internal links
%   citecolor   = blue %Colour of citations, could be ``red''
% }
\usepackage{hyperref}
% \usepackage[colorlinks=true]{hyperref}
\definecolor{DarkBlue}{rgb}{0.3,0.3,0.70}
\definecolor{azure}{rgb}{0.0, 0.5, 1.0}
\definecolor{darkcerulean}{rgb}{0.03, 0.27, 0.49}
\definecolor{denim}{rgb}{0.08, 0.38, 0.74}
\definecolor{DarkGreen}{rgb}{0.3,0.7,0.3}
\hypersetup{
	colorlinks   = true,
	linkcolor    = blue, % color of internal links
	urlcolor     = DarkBlue, % color of external links
	citecolor    = DarkGreen % color of links to bibliography
}


\usepackage{thmtools,thm-restate}


%% Selfdefined Theorem for amsthm

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{claim}[lemma]{Claim}
\newtheorem{corollary}[lemma]{Corollary}
\newtheorem{definition}{Definition}
\newtheorem{observation}{Observation}
\newtheorem*{example}{Example}
\newtheorem{remark}{Remark}
\newtheorem{proposition}[lemma]{Proposition}


%% Self-defined macros

\newcommand{\bproof}{\bigskip {\bf Proof. }}
\newcommand{\eproof}{\hfill\qedsymbol}
\newcommand{\fac}{\Phi}
\newcommand{\welf}{\textsc{Wel-Alg}}
\newcommand{\alg}{\textsc{Alg}}

\newcommand{\x}{\mathbf x}
\newcommand{\opt}{\bm{\omega}}
\newcommand{\W}{\mathcal{W}}
\newcommand{\arm}{\mathtt{arm}}

\newcommand{\E}{\mathbb{E}}
\newcommand{\prob}{\mathbb{P}}
% \newcommnad{\ind}{\mathbb{I}}
\newcommand{\indic}{\mathbb{I}}
\newcommand{\NRg}{\textsc{NR}}
\newcommand{\Rg}{\textsc{R}}

\newcommand{\NCB}{\mathrm{NCB}}
\newcommand{\UCB}{\mathrm{UCB}}

\newcommand{\Sample}{S}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\pa}{\mathrm{Pa}}
\newcommand{\qa}{\mathrm{Pa'}}
\newcommand{ \ac }{\mathrm{Ac}}
\newcommand{\Do}{\mathrm{do}}
\newcommand{\cI}{{\cal{I}}}
\newcommand{\cV}{{\cal{V}}}
\newcommand{\cU}{{\cal{U}}}
\newcommand{\cA}{{\cal{A}}}
\newcommand{\cC}{{\cal{C}}}
\newcommand{\cG}{{\cal{G}}}
\newcommand{\cL}{{\cal{L}}_\mathbf{z}}
\newcommand{\cH}{{\cal{H}}_\mathbf{z}}
% \newcommand{\cP}[3]{%
%   \IfEqCase{#2}{%
%   {na}{\cal{P}}_{#3}\left(#1, #2 \right )}
%   {{\cal{P}}_{#3}\left(#1, #2 \right )}
% } %{\cal{P} \left\{ #1 \left|  #2 , #3 \right\}}
\newcommand{ \zd} { \mathbf{z}}
\newcommand{ \muh} {\widehat{\mu}}
\newcommand{\cP}[3]{%
    \ifthenelse{\equal{#2}{}}{{\cal{P}}_{#3}\left(#1 \right )}{%
    \ifthenelse{\equal{#2}{}}{}{{\cal{P}}_{#3}\left(#1 \mid #2 \right) }}}

\newcommand{\cPh}[3]{%
    \ifthenelse{\equal{#2}{}}{{\widehat{\cal{P}}}_{#3}\left(#1 \right )}{%
    \ifthenelse{\equal{#2}{}}{}{{\widehat{\cal{P}}}_{#3}\left(#1 \mid #2 \right) }}}

\newcommand*{\textcal}[1]{%
  % family qzc: Font TeX Gyre Chorus (package tgchorus)
  % family pzc: Font Zapf Chancery (package chancery)
  \textit{\fontfamily{qzc}\selectfont#1}%
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}



% Some definitions
\newcommand{\grad}{\nabla}
\newcommand{\A}{\mathcal{A}}
\newcommand{\Cover}{\mathcal{I}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\CSet}{\mathscr{C}}
\newcommand{\D}{\mathcal{D}}
% \newcommand{\E}{\mathbb{E}}
\newcommand{\F}{\mathbb{F}}
% \renewcommand{\H}{\mathcal{H}}
\newcommand{\Hs}{\mathscr{H}}
\newcommand{\I}{\mathcal{I}}
\newcommand{\M}{\mathcal{M}}
\newcommand{\N}{\mathbb{N}}
\renewcommand{\O}{\mathcal{O}}
\renewcommand{\P}{\mathscr{P}}
\newcommand{\R}{\mathbb{R}}
\renewcommand{\S}{\mathcal{S}}
\newcommand{\T}{\mathscr{T}}
\renewcommand{\T}{\Gamma}
\newcommand{\V}{\mathbb{V}}
% \newcommand{\W}{\mathbb{W}}
\newcommand{\tr}{\top}
\newcommand{\0}{\mathbf{0}}
\newcommand{\1}{\mathbf{1}}
\newcommand{\e}{\widehat{e}}
\newcommand{\Rank}{\mathrm{Rank}}
\newcommand{\Null}{\mathrm{Null}}
\newcommand{\Range}{\mathrm{Range}}
\newcommand{\Dim}{\mathrm{Dim}}
\newcommand{\Span}{\mathrm{Span}}
\newcommand{\Prob}{\mathbb{P}}

\newcommand{\CI}{\textsc{CoveringInterventions}}
\newcommand{\UE}{\textsc{Uniform Exploration}}
\newcommand{\DE}{\textsc{DirectExploration}}
\newcommand{\PI}{\textsc{PropInf}}



  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

\appendix

%\input{Include/relatedWorkAppendix.tex}

\section{Missing Proofs from Section \ref{section:regret-analysis}}
\label{appendix:regret-analysis}

We first provide a standard concentration bound which will be used in the analysis. Then, we restate and prove Lemmas \ref{lem:er_bound}, \ref{lem:l-bound}, and \ref{lem:h-bound}.

\begin{lemma} [Hoeffding's Inequality] \label{lem:hoeff}
Let $Z_1, \ldots, Z_n$ be independent bounded random variables with $Z_i \in [a_i, b_i]$, for all $i \in [n]$. Then, for all $\varepsilon \geq 0$:
\begin{align*}
\mathbb{P}\left\{ \left| \sum_{i=1}^n\left(Z_i-\mathbb{E}\left[Z_i\right]\right) \right|  \geq \varepsilon \right\} \leq 2 \exp \left(-\frac{2  \varepsilon ^2}{\sum_{i=1}^n (b_i-a_i)^2}\right).
\end{align*}
\end{lemma}


\begin{lemma} 
For estimates obtained via a covering intervention set $\cI$, as in Algorithm \ref{algo: main algo fully obs setting}, write $\mathcal{E}$ to denote the event that $|\Delta \cP{\zd_i}{\zd_{\pa(i)}}{}| \leq \sqrt{\frac{|\cI| (d+ \log{(NT)})}{T}}$ for all vertices $i \in \cV$. Then, $\Pr\{ \mathcal{E}\} \geq \left( 1-\frac{2}{T} \right)$.
%	\begin{align*}
%		|\Delta \cP{\zd_i}{\zd_{\pa(i)}}{}| \leq \sqrt{\frac{|\cI| (d+ \log{(NT)})}{T}} & \qquad \text{for all $i \in \cV$.}
%	\end{align*}
\end{lemma}
\begin{proof}
Since $\cI$ is a covering intervention set, for each conditional distribution $\cP{\zd_i } {\zd_{\pa(i)}}{}$, we have at least $\frac{T}{|\cI|}$ independent samples. Now, we invoke Lemma \ref{lem:hoeff}, with $\varepsilon = \sqrt{\frac{|\cI| \log{(2^d NT)}}{T}}$, and apply the union bound over all $i \in [N]$ and all assignments to $\pa(i)$. This gives us the desired probability bound. \
\end{proof}

\begin{lemma}
For estimates obtained via a covering intervention set $\cI$, as in Algorithm \ref{algo: main algo fully obs setting}, the following event holds with probability at least $\left( 1-\frac{2}{T} \right)$:
\begin{align*}
\sum_{\zd \in Z(A)} \left| \mathcal{L}_\zd  \right| \leq 4(N\eta)^2 \qquad \text{for all $A \in \cA$.}
\end{align*}
Here, parameter $\eta = \sqrt{\frac{|\cI| (d+\log{(NT)})}{T}} $ and $T$ is moderately large $T$.
\end{lemma}
%\begin{lemma}\label{lem:l-bound}
%For estimates obtained via a covering intervention set $\cI$, as in Algorithm \ref{algo: main algo fully obs setting}, the following event holds with probability at least $\left( 1-\frac{2}{T} \right)$:
%\begin{align*}
%\sum_{\zd \in Z(A)} \left| \mathcal{L}_\zd  \right| \leq 4(N\eta)^2 \qquad \text{for all $A \in \cA$.}
%\end{align*}
%Here, parameter $\eta = \sqrt{\frac{|\cI| (d+\log{(NT)})}{T}} $ and $T$ is moderately large $T$.
%\end{lemma}
\begin{proof}
    We will use the fact that each error term in $\mathcal{L}_\zd$ satisfies the bound stated in Lemma \ref{lem:er_bound}. Moreover, we utilize the graph structure to marginalize variables that do not appear in an expansion of $\mathcal{L}_\zd$.
	\begin{align*}
		\sum_{\zd \in Z(A)} |\mathcal{L}_\zd | \leq \sum_{\zd \in Z (A)} \sum_{k = 2}^{|\cV (A)|} \sum_{\substack{U \subseteq \cV(A) \\ |U| = k \ \ }} \left(\prod_{i \in U }  \left| \Delta \cP{\zd_i}{\zd_{\pa(i)}}{}\right| \right) \left(\prod_{j \in \cV(A) \setminus U} \cP{\zd_j}{\zd_{\pa(j)}}{} \right)\\
		= \sum_{k = 2}^{|\cV (A)|} \sum_{\zd \in Z (A)}  \sum_{\substack{U \subseteq \cV(A) \\ |U| = k \ \ }} \left(\prod_{i \in U } \left |\Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \right |  \right) \left(\prod_{j \in \cV(A) \setminus U} \cP{\zd_j}{\zd_{\pa(j)}}{} \right).
	\end{align*}

	First, we upper bound each term considered in the outer-most sum. Towards this, let $U =\{V_{x_1}, V_{x_2}, \ldots, V_{x_k}\}$ to be a subset of vertices that appears in the inner sum. Here, $x_1 < x_2 < \ldots < x_k$ and, as mentioned previously, the indexing of the vertices respects a topological ordering over the causal graph. In the derivation below, we will split the sum into $k$ parts, $\sum_{\zd_{[1:x_1]}} \sum_{\zd_{(x_1:x_2]}} \ldots \sum_{\zd_{(x_k:N]}} $, and individually bound the marginalized probability distribution.
 %Let $S = \{x_1, x_2 \ldots , x_k \}$ where  $x_i$ denotes the index in set $\cV(A)$ and let $\cV_U(A) = \cV(A) \cap  U$ for some $U \subset \cV$.
	\begin{align}
		&\sum_{\zd \in Z (A)}  \sum_{\substack{U \subseteq \cV(A) \\ |U| = k}} \left(\prod_{i \in U } \left |\Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \right |  \right) \left(\prod_{j \in \cV(A) \setminus U} \cP{\zd_j}{\zd_{\pa(j)}}{} \right) \nonumber\\ 
		&\leq \sum_{\substack{U \subseteq \cV(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{j \in \cV(A) \setminus U} \cP{\zd_j}{\zd_{\pa(j)}}{} \right) \nonumber \tag{via Lemma \ref{lem:er_bound}, $\left|\Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \right| \leq \eta$ }\\ 
		&= \sum_{\substack{U \subseteq \cV(A) \\ |U| = k}} \eta ^k 
		\sum_{\zd_{[1: \x_1]} \in Z_{[1: \x_1]}(A)} 
		\left(\prod_{j_1 \in  \cV_{[1:x_1) }(A) } \cP{\zd_{j_1}}{\zd_{\pa(j_1)}}{} \right) \sum_{\zd_{(x_1: \x_2]} \in Z_{(x_1: x_2]}(A)} \left(\prod_{j_2 \in  \cV_{(x_1:x_2) }(A) } \cP{\zd_{j_2}}{\zd_{\pa(j_2)}}{} \right)  \nonumber
		\ldots
		 \\& \sum_{\zd\in Z_{(x_i: x_{i+1}]}(A)} \left(\prod_{j_i \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_{j_i}}{\zd_{\pa(j_i)}}{} \right) \ldots \sum_{\zd_{(x_k : N ]} \in Z_{(x_k: N]}(A)} \left(\prod_{j_k \in \cV_{(x_k:N] }(A) } \cP{\zd_{j_k}}{\zd_{\pa(j_k)}}{} \right) \label{eqn:sec}
	\end{align}

	The last term in the above expression can be bounded as follows 
	\begin{align*}
		\sum_{\zd_{(x_k: N]} \in Z_{(x_k: N]}(A)} \left(\prod_{j \in \cV_{(x_k:N] }(A) } \cP{\zd_j}{\zd_{\pa(j)}}{} \right) &= \sum_{\zd_{(x_k: N]} \in Z_{(x_k: N]}(A)} \prob_{\Do(A)} \left[ \cV_{(x_k: N]}(A) = \zd_{(x_k: N]} | \pa (\cV_{(x_k: N]}(A)) \right]\\
  &=\prob_{\Do(A)} \left[ V_N = 1 | \pa \left( \cV_{(x_k:N] }(A) \right)\right] \leq 1.
	\end{align*}
	For all other terms, we have the following inequality 
	\begin{align*}
		\sum_{\zd\in Z_{(x_i: x_{i+1}]}(A)}& \left(\prod_{j \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_j}{\zd_{\pa(j)}}{} \right) 
		\\ &= \sum_{\zd_{x_{i+1}} \in \{0,1\}} \ \sum_{\zd_{(x_i: x_{i+1})} \in Z_{(x_i: x_{i+1})}(A)} \left(\prod_{j \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_j}{\zd_{\pa(j)}}{} \right)
		\\ &= \sum_{\zd_{x_{i+1}} \in \{0,1\}} \sum_{\zd_{(x_i: x_{i+1})} \in Z_{(x_i: x_{i+1})}(A)} \prob_{\Do(A)} \left[ \cV_{(x_i:x_{i+1}) }(A) = \zd_{(x_i: x_{i+1})} | \pa \left( \cV_{(x_i:x_{i+1}) }(A) \right)\right]
  \\ & \leq \sum_{\zd_{x_{i+1}} \in \{0,1\}} 1  \\&=  2.
	\end{align*}

Substituting in (\ref{eqn:sec}), we get 
\begin{align*}
	\sum_{\zd \in Z (A)}  \sum_{\substack{U \subseteq \cV(A) \\ |U| = k}} \left(\prod_{i \in U } \left |\Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \right |  \right) \left(\prod_{j \in \cV(A) \setminus U} \cP{\zd_j}{\zd_{\pa(j)}}{} \right) \leq \sum_{\substack{U \subseteq \cV(A) \\ |U| = k}} \left( 2 \eta\right) ^k = \binom{N}{k} \left( 2 \eta\right) ^k
\end{align*}

Therefore, the sum $\sum_{\zd \in Z(A)} \left| \mathcal{L}_\zd  \right|$ satisfies
\begin{align*}
	\sum_{\zd \in Z(A)} |\mathcal{L}_\zd | &\leq \sum_{k=2}^{N}\binom{N}{k} \left( 2 \eta\right) ^k \\
	&= \sum_{k=0}^{N}\binom{N}{k} \left( 2 \eta\right) ^k  - 2N\eta - 1 \\
	&=(1+2\eta)^N-2N\eta-1\\
	&\leq e^{2N\eta}-2N\eta -1\\
	&\leq 1 + 2N\eta + (2N\eta)^2 - 2N \eta -1 \tag{with $\eta \leq \frac{1}{2N}$}\\
	&\leq 4N^2\eta^2.
\end{align*}
The lemma stands proved. 
\end{proof}

\begin{lemma}
For estimates obtained via a covering intervention set $\cI$, as in Algorithm \ref{algo: main algo fully obs setting}, the following event holds with probability at least $\left( 1-\frac{2}{T} \right)$:
    \begin{align*}
        \left| \sum_{\zd \in Z(A)}\mathcal{H}_\zd \right| \leq \sqrt{\frac{N |\cI| \log{|\cA| T}}{T}} \qquad \text{for all $A \in \cA$.}
    \end{align*}
\end{lemma}
\begin{proof}
The definition of $\mathcal{H}_\zd$ gives us
{\allowdisplaybreaks
	\begin{align*} 
		&\left| \sum_{\zd \in Z(A)}\mathcal{H}_\zd \right|\\
            &= \left|  \sum_{\zd \in Z(A)}\sum_{ i \in \cV (A)} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV(A), j\neq i}  \cP{\zd_j}{\zd_{\pa(j)}}{} \right| \\
		&=  \left|  \sum_{ i \in \cV (A)} \sum_{\zd \in Z(A)} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV(A), j\neq i}  \cP{\zd_j}{\zd_{\pa(j)}}{} \right| \\
		&= \left|  \sum_{ i \in \cV (A)} \sum_{\substack{\zd_{[1:i]} \in \\  Z_{[1:i]}(A)}} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \sum_{\substack{\zd_{(i: N ]} \in \\ Z_{(i:N]}(A)}} \prod_{ k \in \cV_{(i:N]}(A)}  \cP{\zd_k}{\zd_{\pa(k)}}{} \right| \\
		&= \left| \sum_{i \in \cV (A) } \sum_{\substack{\zd_{[1:i]} \in \\  Z_{[1:i]}(A)}} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \sum_{\substack{\zd_{(i: N ]} \in \\ Z_{(i:N]}(A)}} \prob_{\Do(A)} \left[\cV_{(i:N]}(A) = \zd_{(i:N ]} \mid \pa\left(\cV_{(i:N]}(A)\right)  \right] \right|\\
		&= \left| \sum_{ i \in \cV (A) } \sum_{\substack{\zd_{[1:i]} \in \\  Z_{[1:i]}(A)}} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \prob_{\Do(A)} \left[V_N = 1 \mid \pa\left(\cV_{(i:N]}(A)\right)  \right] \right|\\    
		%
		&= \left| \sum_{ i \in \cV (A) } \sum_{\zd_i \in \{0,1\}}\sum_{\substack{\zd_{[1:i)} \in \\ Z_{[1:i)}(A)}} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{}
		\prob_{\Do(A)} \left[V_N = 1 \mid \pa\left(\cV_{(i:N]}(A)\right)  \right] \right|\\
  &= \left| \sum_{ i \in \cV (A) } \sum_{\zd_i \in \{0,1\}}\sum_{\substack{\zd_{\pa(i)} \in \\ Z_{\pa(i)}(A)}} \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} \sum_{\substack{\zd_{\ac(i)} \in \\ Z_{\ac(i)}(A)}} \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \prob_{\Do(A)} \left[V_N = 1 \mid \pa\left(\cV_{(i:N]}(A)\right)  \right] \right|\\
\end{align*}
}
 Recall that $\ac(i) = [1,i) \setminus \pa(i) $ and write 
 \begin{align}
     c_i(z_i, \zd_{\pa (i)}) \coloneqq  \sum_{\substack{\zd_{\ac(i)} \in \\ Z_{\ac(i)}(A)}} \ \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \prob_{\Do(A)} \left[V_N = 1 \mid \pa\left(\cV_{(i:N]}(A)\right)  \right] \label{eqn:def-c}
 \end{align}
Also, as a shorthand for $z_i=1$ and $z_i=0$ we will write $1_i$ and $0_i$, respectively. With these notations, we have 
\begin{align*}
\left| \sum_{\zd \in Z(A)}\mathcal{H}_\zd \right| 
  &= \left| \sum_{ i \in \cV (A) } \sum_{\zd_i \in \{0,1\}} \sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A) } \Delta \cP{\zd_i}{\zd_{\pa(i)}}{} c_i(\zd_i,\zd_{\pa(i)}) \right| \\
%
		&=\left| \sum_{ i \in \cV (A) } \sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A) } \Delta \cP{1_i}{\zd_{\pa(i)}}{} \left(c_i(1_i,\zd_{\pa(i)}) - c_i(0_i,\zd_{\pa(i)}) \right)\right| \tag{since  $\Delta \cP{1_i}{\zd_{\pa(i)}}{} = -  \Delta \cP{0_i}{\zd_{\pa(i)}}{}$}
	\end{align*}

Since $\cI$ is a covering intervention set, for each pair $(i,\zd_{\pa(i)})$, there exists an intervention $I \in \cI$ such that intervening $\Do(I)$ provides a sample from the conditional probability distribution $\prob [ V_i =1 \mid \pa(V_i) = \zd_i]$. Hence, Line \ref{line:do-intervene} of the algorithm provides at least $\frac{T}{|\cI|}$ independent samples from the conditional distribution $\prob [ V_i =1 \mid \pa(V_i) = \zd_i]$. We write the $s^{th}$ sample for this conditional distribution by $Y_s(i,\zd_{\pa(i)})$. Now, we have 
\begin{align*}
	\left| \sum_{\zd \in Z(A)}\mathcal{H}_\zd \right|  = \left| \sum_{ i \in \cV (A) } \sum_{\substack{\zd_{\pa (i)} \in \\ Z_{\pa(i)}(A)}} \frac{|\cI|}{T} \left( \sum_{s=1}^{T/|\cI|} Y_s(i,\zd_{\pa(i)}) - \cP{1_i}{\zd_{\pa(i)}}{} \right)   (c_i(1_i,\zd_{\pa(i)}) - c_i(0_i,\zd_{\pa(i)}))\right|
\end{align*}
We will apply Hoeffding's inequality (Lemma \ref{lem:hoeff}) to bound the above expression. Note that in this expression, besides $Y_s(i,\zd_{\pa(i)})$-s, all the other terms are deterministic. In particular, we show in Claim \ref{lem:bound_c} (stated and proved below) that $\sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A)} (c(1_i,\zd_{\pa(i)}) - c(0_i,\zd_{\pa(i)}))^2  \leq 1$, for all $i$. Hence, for any $A \in \cA$, Lemma \ref{lem:hoeff} gives us 
\begin{align*}
	\prob \left( \left| \sum_{\zd \in Z(A)}\mathcal{H}_\zd \right| \geq \varepsilon \right) &\leq 2 \ \mathrm{exp}\left( \frac{ - T \varepsilon^2}{|\cI| \sum_{ i \in \cV (A) } \sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A) } (c_i(1_i,\zd_{\pa(i)}) - c_i(0_i,\zd_{\pa(i)}))^2} \right) \\
		&\leq 2 \ \mathrm{exp} \left( \frac{ - T \varepsilon^2}{ |\cI| \ |\cV(A)| } \right) \tag{via Claim \ref{lem:bound_c}}\\ 
		&\leq 2 \ \mathrm{exp} \left( \frac{ - T \varepsilon^2}{ |\cI| \ N } \right).
	\end{align*}
	Setting $\varepsilon = \sqrt{\frac{N \ |\cI| \log{(|\cA|  T)}}{T}}$ and taking union bound over all $A \in \cA$, gives us the required probability bound. This completes the proof of the lemma. 
 \end{proof}
We next establish the claim used in the proof of Lemma \ref{lem:h-bound}.
\begin{claim} \label{lem:bound_c}
	\begin{align*}
		\sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A)} (c(1_i,\zd_{\pa(i)}) - c(0_i,\zd_{\pa(i)}))^2  \leq 1.
	\end{align*}
\end{claim}
\begin{proof}
The definition of $c(\zd_i, \zd_{\pa(i)})$ (see equation (\ref{eqn:def-c})) gives us 
{ \allowdisplaybreaks
	\begin{align*}
		&| c(1_i,\zd_{\pa(i)}) - c(0_i,\zd_{\pa(i)}) | 
		\\&= \biggl| \sum_{\zd \in Z_{\ac(i)}(A) } \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa\left(\cV_{[1: i]}(A) \right) = (\zd_{[1:i)} \cup 1_{i} )  \right] - 
		\\ & \qquad \qquad \sum_{\zd \in Z_{\ac(i)}(A) } \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa\left(\cV_{[1: i]}(A) \right)= (\zd_{[1:i)} \cup 0_{i} )  \right] \biggr|\\
        &=  \biggl| \sum_{\zd \in Z_{\ac(i)}(A) } \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{}  \biggl( \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa (\cV_{[1: i]}(A)) = (\zd_{[1:i)} \cup 1_{i} ) \ \right] - \\ & \qquad \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa(\cV_{[1: i]}(A) )= (\zd_{[1:i)} \cup 0_{i} )  \right] \biggr)  \biggr| \\
		&\leq   \sum_{\zd \in Z_{\ac(i)}(A) } \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \biggl| \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa (\cV_{[1: i]}(A)) = (\zd_{[1:i)} \cup 1_{i})\right] - \\ & \qquad \qquad \prob_{\Do(A)} \left[\cV_{[i+1:N]}(A) = \zd_{[i+1:N ]} \mid \pa (\cV_{[1: i]}(A)) = (\zd_{[1:i)} \cup 0_{i} )  \right] \biggr| \\
		& \leq \sum_{\zd \in Z_{\ac(i)}(A) } \prod_{ j \in \cV_{[1:i)}(A)} \cP{\zd_j}{\zd_{\pa(j)}}{} \\
		& = \prob_{\Do(A)}\left[ \cV_{\pa(i)}(A) = \zd_{\pa(i)}\right].
	\end{align*}
}
Hence, under intervention $A \in \cA$, we have 
	\begin{align*}
		\sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A)} (c(1_i,\zd_{\pa(i)}) - c(0_i,\zd_{\pa(i)}))^2  &\leq \sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A)} |c(1_i,\zd_{\pa(i)}) - c(0_i,\zd_{\pa(i)})|\\
		&\leq \sum_{\zd_{\pa (i)} \in Z_{\pa(i)}(A)} \prob_{\Do(A)}\left[ \cV_{\pa(i)}(A) = \zd_{\pa(i)}\right] \\
		&\leq 1.
	\end{align*}
This completes the proof of the claim.
\end{proof}

\section{Regret Analysis for Semi Markov Bayesian Networks (SMBNs)}
\label{appendix:regret-analysis SMBN}

%The next lemma states that one can find a covering intervention set of size $\widetilde{O}(\log N)$ even in the case of Semi Markovian Bayesian Networks (SMBNs). This is a direct implication of Lemma 4.2 in \cite{acharya2018learning}.
%\begin{lemma} [\cite{acharya2018learning}] \label{lem:covers SMBN}
 %   For any moderately large $T \in \mathbb{Z}_+$ and any causal graph $\cG$---with in-degree at most $d$ and c-components of size at most $\ell$---there exists a covering intervention set $\cI$ of size $k = (3d)^\ell \cdot  2^{\ell d} (\log N +2\ell d + \log T )$. Furthermore, such a set $\cI$ can be found with probability at least $1 - \frac{1}{T}$.
%\end{lemma}


 %Next, 
 We introduce the notion of \emph{pseudo parents} of a vertex in an SMBN graph $\cG$, which we will use throughout the proof. Recall that $\cV$ denotes the set of vertices, and they conform to a topological ordering. We assume that each c-component $C_i$ maintains the ordering. For an intervention $A$, consider any $c$-component $C \in \cC(A)$ with vertices $(U_1, U_2, \ldots, U_m)$, the pseudo parents of a vertex $U_j$ is defined as

\begin{align}
    \qa(j) \coloneqq \pa(\{U_1, U_2, \ldots U_{j} \}) \cup \left\{U_1, U_2, \ldots U_{j-1} \right\}  \label{definition: pseudo_pa}
\end{align}

For any SMBN graph with in-degree at most $d$ and c-components of size at most $\ell$, the size $|\qa(j)|$ is at most $d \ell + \ell$. Furthermore, note that the set $\qa(j)$ will always precede the vertex $V_j$ in any topological ordering of the graph.

The next lemma shows that the distribution of any c-component conditioned on its parents, $\cP{\zd_{C}}{}{\zd_{\pa(C)}}$, can be  factorized into the distribution of individual vertices conditioned on its pseudo parents. This allows us to extend the techniques used for the regret analysis of fully observable graphs  (Section \ref{section:regret-analysis}) to the case of SMBNs. Intuitively, one can view the factorization of an SMBN (under an intervention $A$) as a factorization over a fully observable graph where each vertex $V_j$ has the set $\qa(j)$ as its parents.   

\begin{lemma} \label{lem:pseudo_pa}
    For any intervention $A$ and any c-component $C \in \cC(A)$, consisting of vertices $\{U_1, U_2 \ldots U_m \}$, we have  
    \begin{equation*}
        \cP{\zd_{C}}{}{\zd_{\pa(C)}} = \prod_{j \in C} \cP{\zd_j}{\zd_{\qa(j)}}{A}  
    \end{equation*}
    Here $\qa(j)$ denotes the set of pseudo parents as defined in equation (\ref{definition: pseudo_pa}).
\end{lemma}

\begin{proof}
    First, note that intervening on parent vertices of a c-component (under intervention $A$) is the same as conditioning on them. Specifically, %\footnote{It is useful to note the similarity between this equation and equality (\ref{eqn:all-parent})}
    \begin{align*}
         \cP{\zd_{C}}{}{\zd_{\pa(C)}} = \cP{\zd_{C}}{\zd_{\pa{(C)}}}{A} 
    \end{align*}
    Further, the chain rule of conditional probability gives us 
     \begin{align*}
        \cP{\zd_{C}}{\zd_{\pa{(C)}}}{A} &= \prod_{j \in C} \prob_{\Do(A)} \left[U_j = \zd_{j} \mid \pa(C) = \zd_{\pa(C)}, (U_1 \ldots U_{j-1}) = \zd_{(U_1 \ldots U_{j-1})}\right] 
    \end{align*}
    Next, we use the notion of d-separation (see \citep{pearl2009causality} ) to argue that conditioning on just the set $\qa(j)$ is sufficient. In particular, note that the set $Y = \pa(\{U_{j+1} \ldots U_m\})$ is d-separated from vertex $U_j$ by the set $X = \pa(\{U_{1} \ldots U_j\}) \cup (\{U_{1} \ldots U_{j-1}\}) $. This is due to the fact that all paths from a vertex in $Y$ to $U_j$ are either blocked by a collider vertex in $\{U_{j+1} \ldots U_m\}$ (and the collider vertex is not included $X$), or the path is blocked by a vertex in $X$. This implies that conditioned on $X$, $U_j$ is independent of all vertices in $Y$ \citep{pearl2009causality}   . Formally, we write
    \begin{align*}
         &\prob_{\Do(A)} \left[U_j = \zd_{j} \mid \pa(C) = \zd_{\pa(C)}, (U_1 \ldots U_{j-1}) = \zd_{(U_1 \ldots U_{j-1})}\right] \\
         &= \prob_{\Do(A)} [U_j = \zd_{j} \mid \pa({U_1 \ldots U_{j-1}}) = \zd_{\pa(U_1 \ldots U_j)}, \pa({U_{j+1} \ldots U_{m}}) = \zd_{\pa(U_{j+1}\ldots U_m)}, (U_1 \ldots U_{j-1}) = \zd_{(U_1 \ldots U_{j-1})}] \\
        &= \prob_{\Do(A)} \left[U_j = \zd_{j} \mid \pa({U_1 \ldots U_{j-1}}) = \zd_{\pa(U_1 \ldots U_j)}, (U_1 \ldots U_{j-1}) = \zd_{(U_1 \ldots U_{j-1})}\right]  \tag{since $\pa(\{U_1 \ldots U_{j} \}) \cup \{U_1 \ldots U_{j-1} \}$  d-separates $U_j$ from $\pa(\{U_{j+1}\ldots U_m \})$ }\\
        &= \prob_{\Do(A)} \left[U_j = \zd_{j} \mid \qa(j)\right] 
        \tag{by definition of $\qa(j)$}
    \end{align*}
    Therefore, 
     \begin{align*}
        \cP{\zd_{C}}{}{\zd_{\pa(C)}} &= \cP{\zd_{C}}{\zd_{\pa{(C)}}}{A} \\
        &=  \prod_{j \in C} \prob_{\Do(A)}  \left [V_j = \zd_{j} \mid \qa(j) = \zd_{\qa(j)} \right] \\
        &= \prod_{j \in C} \cP{\zd_j}{\zd_{\qa(j)}}{A}
    \end{align*}
This completes the proof of the lemma.
\end{proof}

Now, recall that the estimate $\muh(A)$ can be written as 
\begin{alignat*}{2}
	\widehat{\mu} \left(A \right) &=\sum_{\zd \in Z(A)} \prod_{ C_i \in \cC(A)}  \cPh{\zd_{C_i}}{}{\zd_{\pa(C_i)} }  \\
 &= \sum_{\zd \in Z(A)}  \prod_{ i \in \cC(A)} \left( \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} +  \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A}\right)                                                       \\
     &= \mu (A) + \sum_{\zd \in Z(A)} \Biggl( \sum_{ C_i \in \cC (A)}  \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \prod_{ C_j \in \cC(A), j\neq i}  \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A}+  \\
    &\sum_{\substack{U \subseteq \cC(A) \\ |U| = 2}} \left(\prod_{C_i \in U } \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \right) \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A}\right) + \\
    &\sum_{\substack{U \subseteq \cC(A) \\ |U| =3 }} \left(\prod_{C_i \in U } \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \right) \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right)    + \cdots \Biggr) \tag{expanding product terms} 
    {}% + \textbf{ B}\right ) 		 \\	
\end{alignat*}
Here, $\Delta \cP{}{}{} $ denotes the error in the estimate of the conditional probabilities. Let  $\cL$ represent all the product entries in the expansion that include more than one error term ( $\Delta \cP{}{}{}$). Specifically,
\begin{align*}
	\cL & = \sum_{k = 2}^{|\cC (A)|} \sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \left(\prod_{C_i \in U } \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \right) \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right)\\
  &= \sum_{k = 2}^{|\cC (A)|} \sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \left(\prod_{C_i \in U } \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \right) \left( \prod_{\substack{C \in \cC(A) \setminus C_i, \\ j \in C }} \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) \tag{via Lemma \ref{lem:pseudo_pa}}
\end{align*}
We further represent all the entries with a single $\Delta \cP{}{}{}$ term as
\begin{align}
	\cH =  \sum_{ C_i \in \cC (A)} \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \prod_{ \substack{C_k \in \cC(A)\\ k\neq i}}  \cP{\zd_{C_k}}{\zd_{\pa(C_k)}}{A} \nonumber \\
        =\sum_{ C_i \in \cC (A)} \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \prod_{ \substack{j\in \cV(A) \setminus C_i}}  \cP{\zd_{j}}{\zd_{\qa(j)}}{A} \label{eqn:SMBN-exp-Hz}
\end{align}
Here, the last equality follows from Lemma \ref{lem:pseudo_pa}.
Hence, we have 
\begin{equation}
    \muh(A) - \mu(A) = \sum_{\zd \in Z(A)} \left( \cH + \cL \right) \label{eq:error}
\end{equation}
We will establish upper bounds on the sums of $\mathcal{L}_\zd$s and $\mathcal{H}_\zd$s in Lemma \ref{lem:l-bound-smbn} and Lemma \ref{lem:h-bound-smbn}, respectively. These lemmas show that the sum of the $\mathcal{H}$ terms dominates the sum of $\mathcal{L}$ terms. Furthermore, these bounds imply that the estimated reward  $\muh(A)$ is sufficiently close to the true expected reward $\mu(A)$ for each intervention $A \in \cA$.
\begin{lemma} \label{lem:er_bound SMBN}
For estimates obtained via a covering intervention set $\cI$, as in Algorithm \ref{algo: main algo fully obs setting}, write $\mathcal{E}$ to denote the event that $|\Delta \cP{\zd_{C_i}}{}{\zd_{\pa(C_i)}}| \leq \sqrt{\frac{|\cI|( \ell d+\ell+\log({NT}))}{T}} $ for all c-components $C_i \in \cC(A)$ and for all $A \in \cA$. Then, $\Pr\left\{ \mathcal{E} \right\} \geq \left( 1-\frac{2}{T} \right)$. 
\end{lemma}

\begin{proof}
    Since $\cI$ is a covering intervention set (see Defintion \ref{defn:CIS-SMBN}), for each distribution $\cP{\zd_{C_i }}{}{\zd_{\pa(i)}}$, we have at least $\frac{T}{|\cI|}$ independent samples. Also, note that the total number of distributions to be estimated is at most $2^{(\ell d+ \ell)}N$. This follows from the fact that each c-component---under any intervention---is a subset of a c-component in the original graph $\cG$, and the number of c-components in $\cG$ is at most $N$. Hence, the number of possible distinct c-components (across all intervention) is at most $N2^\ell$. Furthermore, each c-component can have at most $\ell d$ parents with at most $2^{\ell d}$ distinct binary assignments to the parents.

    With this count in hand, we invoke Lemma \ref{lem:hoeff}, with $\varepsilon = \sqrt{\frac{|\cI| ( \log{(2^{\ell d+ \ell}NT)})}{T}} $ and apply the union bound over all $(\zd_{C_i} , \zd_{\pa(C_i)})$ pairs. This gives us the desired probability bound and completes the proof of the lemma. 
\end{proof}

\begin{lemma}\label{lem:l-bound-smbn}
	For estimates obtained via a covering intervention set $\cI$, the following event holds with  probability at least $(1 - \frac{2}{T})$: $$\sum_{\zd \in Z(A)} \left| \cL \right| \leq 4^\ell(N\eta)^2  \qquad \text{for all $A \in \cA$.} $$ Here, parameter $\eta = \sqrt{\frac{ |\cI| ( \ell d + \ell + \log({NT}))}{T}}$ and $T$ is moderately large.
\end{lemma}
\begin{proof}
    We use the fact that each error term in $\cL$ satisfies the bound stated in Lemma \ref{lem:er_bound SMBN}.  Moreover, we use the graph structure to marginalize variables that do not appear in the error terms. The idea is to split the sum $\sum_{\zd \in Z(A)}$ into $\sum_{\zd_{[1:x_1]}} \sum_{\zd_{(x_1:x_2]}} \ldots \sum_{\zd_{(x_k:N]}} $, where $\{x_1, x_2 \ldots , x_k \}$ denotes all the indices in $\cC(A)$ that show up as $\Delta \cP{}{}{}$ in the expression for $\cL$. 
	\begin{align*}
		\sum_{\zd \in Z(A)} |\cL| &\leq \sum_{\zd \in Z (A)} \sum_{k = 2}^{|\cC (A)|} \sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \left(\prod_{C_i \in U }  \left| \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A}\right| \right) \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right)\\
		&=  \sum_{k = 2}^{|\cC (A)|} \sum_{\zd \in Z (A)}  \sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \left(\prod_{C_i \in U }  \left| \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A}\right| \right) \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right) \\
   &\leq \sum_{k = 2}^{|\cC (A)|} \sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right) \nonumber \tag{via Lemma \ref{lem:er_bound SMBN},  $\left|\Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \right| \leq \eta$ }
	\end{align*}

	First, we upper bound each term considered in the outer-most sum. Towards this, let $U$ denote the set of c-components that show up as $\Delta \cP{}{}{}$,  we define $X \coloneqq \cup_{C_i \in U} C_i = \left\{ x_1, x_2 , \cdots , x_m \right\}$ where $x_i$ denotes the vertex $V_{x_i} \in \cV(A)$. Note that since c-components are at most of size $\ell$ and for $|U| = k$, we have $|X| \leq \ell k$. Now, using Lemma \ref{lem:pseudo_pa}, we obtain 
  \begin{align}
  &\sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right) \nonumber \\
  &=\sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{j \in \cV(A) \setminus X} \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) \nonumber \\
		 & =\sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \eta ^k 
		\sum_{\zd_{[1: \x_1]} \in Z_{[1: \x_1]}(A)} 
		\left(\prod_{j \in  \cV_{[1:x_1) }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) \sum_{\zd_{(x_1: \x_2]} \in Z_{(x_1: x_1]}(A)} \left(\prod_{j \in  \cV_{(x_1:x_2) }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right)  \nonumber
		\ldots
		 \\& \sum_{\zd\in Z_{(x_i: x_{i+1}]}(A)} \left(\prod_{i \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) \ldots \sum_{\zd_{(x_k : N ]} \in Z_{(x_k: N]}(A)} \left(\prod_{j \in \cV_{(x_k:N] }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) \label{eq:sec SMBN}
	\end{align}

	The last term in the above expression can be bounded as follows
	\begin{align*}
		\sum_{\zd_{(x_k: N]} \in Z_{(x_k: N]}(A)} \left(\prod_{i \in \cV_{(x_k:N] }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) &= \sum_{\zd_{(x_k: N]} \in Z_{(x_k: N]}(A)} \prob_{ \Do(A)} \left[ \cV_{(x_k: N]}(A) = \zd_{(x_k: N]} | \qa (\cV_{(x_k: N]}(A)) \right]\\
  &=\prob_{ \Do(A)} \left[ V_N = 1 | \qa \left( \cV_{(x_k:N] }(A) \right)\right] \leq 1.
	\end{align*}
	For all the other terms, we have the following bound   
	\begin{align*}
		\sum_{\zd\in Z_{(x_i: x_{i+1}]}(A)}& \left(\prod_{i \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right) 
		\\&= \sum_{\zd_{x_{i+1}} \in \{0,1\}} \sum_{\zd_{(x_i: x_{i+1})} \in Z_{(x_i: x_{i+1})}(A)} \left(\prod_{i \in \cV_{(x_i:x_{i+1}) }(A) } \cP{\zd_j}{\zd_{\qa(j)}}{A} \right)
		\\ &= \sum_{\zd_{x_{i+1}} \in \{0,1\}} \sum_{\zd_{(x_i: x_{i+1})} \in Z_{(x_i: x_{i+1})}(A)} \prob_{ \Do(A)} \left[ \cV_{(x_i:x_{i+1}) }(A) = \zd_{(x_i: x_{i+1})} | \qa \left( \cV_{(x_i:x_{i+1}) }(A) \right)\right]
  \\ & \leq \sum_{\zd_{x_{i+1}} \in \{0,1\}} 1  \\&=  2.
	\end{align*}


%\sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right)
Substituting in (\ref{eq:sec SMBN}), we get
\begin{align*}
\sum_{\substack{U \subseteq \cC(A) \\ |U| = k}} \sum_{\zd \in Z (A)} \eta^k   \left(\prod_{C_j \in \cC(A) \setminus U} \cP{\zd_{C_j}}{\zd_{\pa(C_j)}}{A} \right) &\leq \sum_{\substack{U \subseteq \cC(A) \\ |U|= k}} \eta^k \ 2^{\ell k}  \tag{since $|X| \leq \ell k$} \\  
 &= \binom{N}{k} \left( 2^\ell \eta\right) ^k
\end{align*}

Therefore, the sum  $\sum_{\zd \in Z(A)} |\cL|$ satisfies

\begin{align*}
	\sum_{\zd \in Z(A)} |\cL| &\leq \sum_{k=2}^{N}\binom{N}{k} \left( 2^\ell \eta\right) ^k \\
	&= \sum_{k=0}^{N}\binom{N}{k} \left( 2^\ell \eta\right) ^k  - 2^\ell N\eta - 1 \\
	&=(1+2^\ell \eta)^N-2^\ell N\eta-1\\
	&\leq e^{2^\ell N\eta}-2^\ell N\eta -1\\
	&\leq 1 + 2^\ell N\eta + (2^\ell N\eta)^2 - 2^\ell N \eta -1 \tag{with $\eta \leq \frac{1}{2^\ell N}$}\\
	&\leq 4^\ell N^2\eta^2
\end{align*}
The lemma stands proved.
\end{proof}
\begin{lemma}\label{lem:h-bound-smbn}
For estimates obtained via a covering intervention set $\cI$, the following event holds with probability at least $1-\frac{2}{T}$: 
\begin{equation*}
  	\left| \sum_{\zd \in Z(A)}\cH \right| \leq \sqrt{\frac{N \ 4^\ell  \ 2^d \ |\cI|  \log{(|\cA| T)}}{T}}  \qquad \text{for all $A \in \cA$.} 
\end{equation*} 
\end{lemma}
\begin{proof}
Equation (\ref{eqn:SMBN-exp-Hz}) gives us 
%of the joint distribution expressed in terms of pseudo parents of the vertices
	\begin{align*}
		\left| \sum_{\zd \in Z(A)}\cH \right|
         = \left| \sum_{ C_i \in \cC (A)} \sum_{\zd \in Z(A)}  \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} \prod_{ \substack{j\in \cV(A) \setminus C_i}}  \cP{\zd_{j}}{\zd_{\qa(j)}}{A} \right|.
    \end{align*}
Let $X \coloneqq \left\{ x_1, x_2 \cdots x_m \right\}$ be the vertices in a c-component $C_i$ considered in the outer summation. Furthermore, for ease of exposition, write $(x_k:x_{k+1})'  \coloneqq (x_k:x_{k+1}) \setminus \pa(C_i)$, i.e., the set $(x_k:x_{k+1})'$ excludes the parents of the c-component $C_i$. We have 
\begin{align*}
		&\left| \sum_{\zd \in Z(A)}\cH \right| \\&= \Biggl|  \sum_{ C_i \in \cC (A)}\sum_{\substack{\zd_{\pa(C_i)} \in \\  Z_{\pa(C_i)}(A)}} \sum_{\substack{\zd_{C_i} \in \\  Z_{C_i}(A)}} \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} 
    \sum_{\substack{\zd_{[1 : x_1)' } \in \\ Z_{[1 : x_1 )'}(A)}} \prod_{ j_1 \in \cV_{[1:x_1)}(A)} \cP{\zd_{j_1}}{\zd_{\qa(j_1)}}{} \\ 
    &\sum_{\substack{\zd_{(x_1: x_2 )'} \in \\ Z_{(x_1: x_2)'}(A)}} \prod_{ j_2  \in \cV_{(x_1:x_2)}(A)}  \cP{\zd_{j_2}}{\zd_{\qa(j_2)}}{A} \ldots
    \sum_{\substack{\zd_{(x_k: x_k+1 )'} \in \\ Z_{(x_k: x_{k+1})'}(A)}} \prod_{ j_2  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_k}}{\zd_{\qa(j_k)}}{A} \ldots \Biggr| \\
    &=\left|\sum_{ C_i \in \cC (A)}\sum_{\substack{\zd_{\pa(C_i)} \in \\  Z_{\pa(C_i)}(A)}} \sum_{\substack{\zd_{C_i} \in \\  Z_{C_i}(A)}} \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A} c_i\left( \zd_{C_i} , \zd_{\pa(C_i)} \right) \right|.
\end{align*}
Here, 
 \begin{align*}
 c_i&(\zd_{C_i}, \zd_{\pa (C_i)}) \coloneqq \\ & \sum_{\substack{\zd_{[1 : x_1)' } \in \\ Z_{[1 : x_1 )'}(A)}  } \prod_{ j_1 \in \cV_{[1:x_1)}(A)} \cP{\zd_j}{\zd_{\qa(j_1)}}{A} 
    \sum_{\substack{\zd_{(x_1: x_2 )'} \in \\ Z_{(x_1: x_2)'}(A)}} \prod_{ j_2  \in \cV_{(x_1:x_2)}(A)}  \cP{\zd_{j_2}}{\zd_{\qa(j_2)}}{A} \cdots \\
    &\sum_{\substack{\zd_{(x_k: x_k+1 )'} \in \\ Z_{(x_k: x_{k+1})'}(A) }} \prod_{ j_k  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_k}}{\zd_{\qa(j_k)}}{A} \cdots \sum_{\substack{\zd_{(x_m: N ]'} \in \\ Z_{(x_m: N]' }(A) }} \prod_{ j_m  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_m}}{\zd_{\qa(j_m)}}{A}
\end{align*}

We show in Claim \ref{lem:bound_c_smbn} (proved below) that $c_i(z_{C_i}, \zd_{\pa (C_i)}) \leq 1$. Therefore, 
\begin{align}
\left| \sum_{\zd \in Z(A)}\cH \right| 
  &\leq  \left| \sum_{ C_i \in \cC (A) } 
  \sum_{\substack{\zd_{\pa(C_i)} \in \\  Z_{\pa(C_i)}(A)}} \sum_{\substack{\zd_{C_i} \in \\  Z_{C_i}(A)}} \Delta \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{}) \right| \label{ineq:A} 
\end{align}

Since $\cI$ is a covering intervention set, for each pair $(C_i,\zd_{\pa(C_i)})$, there exits an intervention $I \in \cI $ such that intervening $\Do(I)$ provides a sample for the distribution $\prob [ \cV_{C_i} \mid \Do(\pa(C_i) = \zd_{\pa(C_i)}) ]$. Hence, we have at least $\frac{T}{|\cI|}$ samples for the distribution $\prob [ \cV_{C_i} \mid \Do(\pa(C_i) = \zd_{\pa(C_i)}) ]$. We represent the $s^{th}$ sample for the distribution by indicator random variable $Y_{s}(\zd_{C_i}, \zd_{\pa(C_i)})$ which takes value one when $ \cV_{C_i} = \zd_{C_i} $, else its zero. Hence, inequality (\ref{ineq:A}) reduces to 
\begin{align*}
	\left| \sum_{\zd \in Z(A)}\cH \right|  \leq \left| \sum_{ C_i \in \cV (A) } \sum_{\substack{\zd_{\pa(C_i)} \in \\  Z_{\pa(C_i)}(A)}} \frac{|\cI|}{T} \sum_{s=1}^{T/|\cI|} \left( \sum_{ \zd_{C_i} \in Z_{C_i}(A)} Y_s(\zd_{C_i},\zd_{\pa(C_i)}) - \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A}  \right)  \right|
\end{align*}

In the above expression, the term $\sum_{ \zd_{C_i} \in Z_{C_i}(A)} Y_s(\zd_{C_i},\zd_{\pa(C_i)}) - \cP{\zd_{C_i}}{\zd_{\pa(C_i)}}{A}$ is an independent random quantity bounded between $[-2^{|C_i|} ,2^{|C_i|}] $.
We now apply Heoffding's inequality (Lemma \ref{lem:hoeff})
\begin{align*}
	\prob_{ \Do(A)} \left[ \left| \sum_{\zd \in Z(A)}\cH \right| \geq \varepsilon \right] \leq 2 \mathrm{exp}\left( \frac{ - T \varepsilon^2}{ 2 |\cI| \sum_{ C_i \in \cC (A) } \sum_{\zd_{\pa (i)} \in \zd_{\pa(i)}} 2^{2|C_i|}} \right) \\  \label{ineq:heof} 
   \leq 2 \mathrm{exp}\left( \frac{ - T \varepsilon^2}{ 2 |\cI| \sum_{ C_i \in \cC (A) } \sum_{\zd_{\pa (i)} \in \zd_{\pa(i)}} 2^{2\ell}} \right)
   \leq 2 \mathrm{exp}\left( \frac{ - T \varepsilon^2}{ 2 |\cI| N 2^{\ell d} \cdot 2^{2 \ell}} \right)
\end{align*}
Setting $\varepsilon = \sqrt{\frac{2 N \ |\cI| \ 2^{\ell d} \ 4^{\ell} \log{\left(|\cA| \cdot T \right)}}{T}}$ and taking union bound over all of $A \in \cA$, gives us the required probability bound. This completes the proof of the lemma.
\end{proof}

We next establish the claim used in the proof of Lemma \ref{lem:h-bound-smbn}.
	
\begin{claim}\label{lem:bound_c_smbn}
$$c_i(\zd_{C_i}, \zd_{\pa (C_i)}) \leq 1.$$
\end{claim}
\begin{proof}
    It holds that 
    \begin{align*}
 c_i&(\zd_{C_i}, \zd_{\pa (C_i)}) =\\ & \sum_{\substack{\zd_{[1 : x_1)' } \in \\ Z_{[1 : x_1 )'}(A)}  } \prod_{ j_1 \in \cV_{[1:x_1)}(A)} \cP{\zd_j}{\zd_{\qa(j)}}{A} 
    \sum_{\substack{\zd_{(x_1: x_2 )'} \in \\ Z_{(x_1: x_2)'}(A)}} \prod_{ j_2  \in \cV_{(x_1:x_2)}(A)}  \cP{\zd_{j_2}}{\zd_{\qa(j_2)}}{A} \cdots \\
    &\sum_{\substack{\zd_{(x_k: x_k+1 )'} \in \\ Z_{(x_k: x_{k+1})'}(A) }} \prod_{ j_k  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_k}}{\zd_{\qa(j_k)}}{A} \cdots \sum_{\substack{\zd_{(x_m: N ]'} \in \\ Z_{(x_m: N]' }(A) }} \prod_{ j_k  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_k}}{\zd_{\qa(j_k)}}{A}
    \end{align*}
   We can upper bound each term in the above expression as shown below,
    \begin{align*}
        &\sum_{\substack{\zd_{(x_k: x_k+1 )'} \in \\ Z_{(x_k: x_{k+1})'}(A) }} \prod_{ j_k  \in \cV_{(x_k:x_{k+1})}(A)}  \cP{\zd_{j_k}}{\zd_{\qa(j_k)}}{A} \\
        & =\sum_{\substack{\zd_{(x_k: x_k+1 )'} \in \\ Z_{(x_k: x_{k+1})}(A) }} \prob_{\Do(A)} \left[ \cV_{(x_k:x_{k+1})}(A) = \zd_{(x_k: x_k+1 )} | \pa'(x_k:x_{k+1})\right]\\
        &= \prob_{\Do(A)} \left[ \cV_{(x_k:x_{k+1}) \cap \pa(C_i)}(A) = \zd_{(x_k: x_k+1 )\cap \pa(C_i)} | \pa'(x_k:x_{k+1})\right]\\
        &\leq 1
    \end{align*}
    Substituting this in the expression for $c_i(\zd_{C_i}, \zd_{\pa (C_i)})$, we get the required bound. 
\end{proof}

Next, we restate and prove Theorem \ref{thm: main theorem SMBN}.

\begin{theorem}{}

	Let $\cG$ be any given causal graph over $N$ vertices and with c-components of size at most $\ell$. Also, let the in-degree of the vertices in $\cG$ be at most $d$. Then, for any (moderately large) time horizon $T$ and given any covering intervention set $\cI$ of $\cG$, Algorithm \ref{algo: main algo fully obs setting}  achieves simple regret 
 \begin{align*}
     \Rg_T = O \left( \sqrt{ \frac{N \ 2^d \ 4^\ell  \ |\cI|  \log{(|\cA | T)} }{T} } \right).
 \end{align*} 
 Hence, using Lemma \ref{lem:covers SMBN}, we obtain the following bound on the simple regret 
	\begin{align*}
		\Rg_T = O \left( \sqrt{ \frac{N \  (3  d \ 8^d )^\ell  \  \log{|\cA | } }{T}  }\log{T} \right).
	\end{align*}
\end{theorem}
\begin{proof}%[Proof of Theorem \ref{thm: main theorem SMBN}]
Lemma \ref{lem:covers SMBN} implies that, with probability at least $\left(1 - \frac{1}{T}\right)$, the set $\cI$ is indeed a covering intervention set for the graph $\cG$. We combine this guarantee with Lemmas \ref{lem:l-bound-smbn} and \ref{lem:h-bound-smbn}. In particular, with probability at least $\left(1- \frac{5}{T}\right)$, we have, for all $A \in \cA$:
\begin{align*}
        \left|\mu(A) - \muh(A)\right|   &= \left|\sum_{\zd \in Z(A)} \left( \mathcal{H}_\zd + \mathcal{L}_\zd \right) \right|\\
        & \leq \sqrt{\frac{N  ~ 4^\ell  ~ 2^d ~ |\cI|  \log{(|\cA| T)}}{T}} + \frac{4^\ell  N^2 |\cI| (\ell d+ \ell + \log{(NT)})}{T}\\
        &\leq 2\sqrt{\frac{N ~ 4^\ell  ~ 2^d ~ |\cI| \log(|\cA| T)}{T}} \tag{For $T \gtrsim  N^3$}
\end{align*}
Let $A_T$ be the output after $T$ rounds of interventions, i.e.,  $A_T = \argmax_{A\in \cA} \ \muh(A)$. In addition, let $A^* = \argmax_{A\in \cA} \ \mu(A)$ be the optimal intervention. Hence, with probability at least $1-\frac{5}{T}$ we have, 
    \begin{align}
        \mu(A^*)- \mu(A_T) \leq 4  \sqrt{\frac{N ~ 4^\ell  ~ 2^d ~ |\cI| \log(|\cA| T)}{T}}
    \end{align}
This gives the desired upper bound on the simple regret, $R_T$:
\begin{align*}
    R_T = \E \left[ \mu(A^*)- \mu(A_T) \right] \leq  \left( 4  \sqrt{\frac{N ~ 4^\ell  ~ 2^d ~ |\cI| \log(|\cA| T)}{T}}\right) \left(1-\frac{5}{T}\right) + \frac{5}{T} 
    \leq 5  \sqrt{\frac{N ~ 4^\ell  ~ 2^d ~ |\cI| \log(|\cA| T)}{T}}.
\end{align*}
For SMBNs, since the size of the covering intervention set  satisfies 
$|\cI| = (3d)^\ell \cdot  2^{\ell d} (\log N +2\ell d + \log T )$ (see Lemma \ref{lem:covers SMBN}), we also have the following explicit form of the simple regret bound
\begin{align*}
		\Rg_T = O \left( \sqrt{ \frac{N \  (3  d \ 8^d )^\ell  \  \log{|\cA | } }{T}  }\log{T} \right).
	\end{align*}
The theorem stands proved. 
\end{proof}




\bibliography{sawarni_406}

\end{document}
