% \documentclass{uai2023} % for initial submission
 \documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

% -----------------------

\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:	
% \usepackage{natbib} % has a nice set of citation styles and commands	
% \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
% %%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{tikz}
\usetikzlibrary{external}
% The lastpage warning about the aux file can be silenced by
% switching to make externalization, but this does not solve
% the issue. You can test it by uncommenting the next line:
%\tikzset{external/mode=list and make}

\tikzexternalize
% \usepackage{cleveref}
% This implementation of the current answer from Ulrike Fischer
% does not seem to patch anything, as indicated by the tracingpatches output:
\usepackage{etoolbox}
\tracingpatches
\makeatletter
\tikzifexternalizing{%
\patchcmd\AtEndDocument{\lastpage@fileswtest{\thepage}{\lastpage@lastpage}}{}{\fail}
    \patchcmd\AtEndDocument{\lastpage@fileswtestHy}{}{\fail}}{}
\makeatother
%%%%%%%%%%%%%%%%%%%%%%%%%%%

% ------------Ours starts---------------
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
\usepackage{subfigure}


\usepackage{float}



\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newcommand{\indep}{\perp \!\!\! \perp}
\newcommand{\notindep}{\not \perp \!\!\! \perp}
\newlength\myindent
\setlength\myindent{2em}
\newcommand\bindent{%
  \begingroup
  \setlength{\itemindent}{\myindent}
  \addtolength{\algorithmicindent}{\myindent}
}
\newcommand\eindent{\endgroup}

\newcommand{\rcforest}[1]{$\mathbf{#1}$-rooted C forest }
\newcommand{\rcforests}[1]{$\mathbf{#1}$-rooted C forests }

\newcommand{\X}{\mathbf{X}}
\newcommand{\x}{\mathbf{x}}

\newcommand{\Y}{\mathbf{Y}}
\newcommand{\y}{\mathbf{y}}

\newcommand{\Z}{\mathbf{Z}}
\newcommand{\z}{\mathbf{z}}


\newcommand{\R}{\mathbf{R}}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}

\newcommand{\GbarX}{G_{\overline{\mathbf{X}}}}
\newcommand{\red}[1]{{\color{red}#1}}
\newcommand{\blue}[1]{{\color{blue}#1}}
\newcommand{\DO}{\text{do}}
\newcommand{\mr}{\textcolor{orange}}


\usepackage{algorithm2e}
% \usepackage{algorithmic}
\usepackage[noend]{algorithmic}
 \usepackage{caption}
% \usepackage{algorithm}%
% \usepackage[section]{algorithm}


%Fig. \ref{fig:pitt} and Eq \ref{eq:example} in the main paper can be cross referenced using \texttt{xr}. 







% ----------------------
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams


% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
% \usepackage{xr} 
% \externaldocument{main}





%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example


%%---------------- from overleaf
\usepackage{xr-hyper}
\usepackage{hyperref}
\makeatletter
\newcommand*{\addFileDependency}[1]{% argument=file name and extension
\typeout{(#1)}% latexmk will find this if $recorder=0
% however, in that case, it will ignore #1 if it is a .aux or 
% .pdf file etc and it exists! If it doesn't exist, it will appear 
% in the list of dependents regardless)
%
% Write the following if you want it to appear in \listfiles 
% --- although not really necessary and latexmk doesn't use this
%
\@addtofilelist{#1}
%
% latexmk will find this message if #1 doesn't exist (yet)
\IfFileExists{#1}{}{\typeout{No file #1.}}
}\makeatother

\newcommand*{\myexternaldocument}[1]{%
\externaldocument{#1}%
\addFileDependency{#1.tex}%
\addFileDependency{#1.aux}%
}
\myexternaldocument{lee_773}



% \title{Title in Title Case\\(Supplementary Material)}
\title{Finding Invariant Predictors Efficiently via Causal Structure
\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author{\href{mailto:<lee4094@purdue.edu>?Subject=Questions about ID4IP paper}{Kenneth Lee}{}}
\author{Md Musfiqur Rahman}
\author{Murat Kocaoglu}
% Add affiliations after the authors
\affil{%
    School of Electrical and Computer Engineering\\
    Purdue University\\
    West Lafayette, Indiana, USA
}
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

% \section{Examples on the workings of \textbf{ID4IP}}
% In this section, we describe some possible scenarios where one can suspect that our algorithm might fail. We explain how our algorithm deals with each case and illustrate its correctness.
% \begin{figure}[h]
%   \centering
%   \includegraphics[width=0.5\linewidth]{Figures/corner-cases.pdf}
%   \caption{A causal graph }\label{fig:corner-case}
% \end{figure}
% \par \textbf{Graph 1:}
% We return FAIL,
% it seems $P(Y|W,V))$ is valid invariant predictor
% isn't $S$ d-connected with $Y$ when you condition on $W$?
% $S\rightarrow Z \leftarrow Y$ gets d-connected after conditioning on $W$
% if the children do not work, why the descendants will not work as well.

% \par \textbf{Graph 2:}
% Should we consider nodes that are not bi-directed neighbors but are connected through bi-directed paths?

% \par \textbf{Graph 3:}
% We are considering bidirected neighbors after adding one-by-one valid predictors. Is there some other subset of nodes that might be a valid invariant predictor?


\appendix
\section{Proofs of Lemmas and Theorems}\label{appex:proofs}
%
%
%
\subsection{Proof of Theorem \ref{thm:conditional-hedge-iff-non-id}} 
\begin{theorem}
     There exists a hedge for $P(\mathbf{Y}|\DO(\mathbf{X}),\mathbf{W})$ according to the generalized hedge condition if and only if $P(\mathbf{Y}|\DO(\mathbf{X}),\mathbf{W})$ is unidentifiable in $G$.
\end{theorem}
%
\begin{proof}
     Let $\mathbf{Z} \subseteq \mathbf{W}$ be the maximal set such that $P(Y|do(\mathbf{\mathbf{X}}), \mathbf{W}) = P(Y|do(\mathbf{\mathbf{X}, \mathbf{Z}}), \mathbf{W}\setminus \mathbf{Z})$. By Theorem 21 in \cite{shpitser2008complete}, $P(\mathbf{Y}|do(\mathbf{X}), \mathbf{W})$ is identifiable in $G$ if and only if $P(Y, \mathbf{X} \setminus  \mathbf{Z}|do(\mathbf{X, Z}))$ is identifiable in $G$. By Theorem \ref{thm:conditional-hedge-iff-non-id}, there exists a hedge for $P(Y, \mathbf{W}\setminus \mathbf{Z}|do(\mathbf{X} ,\mathbf{Z}))$ if and only if $p(y, \mathbf{W}\text{\textbackslash} \mathbf{Z}|do(\mathbf{X, Z}))$ is unidentifiable in $G$. Therefore, we can apply the definition of hedge for $P(Y, \mathbf{W} \setminus \mathbf{Z}| do(\mathbf{X}, \mathbf{Z}))$ to formulate definition \ref{def:generalized-hedge} such that there exists a hedge for $P(\mathbf{Y}|do(\mathbf{X}),\mathbf{W})$ according to definition \ref{def:generalized-hedge} if and only if $P(\mathbf{Y}|do(\mathbf{X}),\mathbf{W})$ is unidentifiable in $G$.
\end{proof}


\subsection{Proof of Lemma \ref{lem:MAC=Y-rooted-Ctree}}
\begin{lemma}
\label{appex:lem:MAC=Y-rooted-Ctree}
Let $\mathbf{Y} = \{Y\}. $The output of \textbf{Find-MACS-on-set$(G,\mathbf{Y})$} is the MACS of $Y$. The MACS of $Y$ is a $Y$-rooted C-tree.
\end{lemma}
\begin{proof}
    The line \textbf{3} of the Algorithm \ref{alg:find-macs-on-set} in Section~\ref{sec-alg:find-macs-on-set}, first gets an induced subgraph of $G$ over $An(Y)$. That implies every variable in the resulting graph has a directed path to $Y$ and $Y$ does not have any child in it. Then, in step 4, it recursively calls on \textbf{Find-MACS-on-set} with $G_{An(Y)}$. Then, every variable in $G_{An(Y)}$ must be in $An(Y)$. The execution of the algorithm will then move to step 6 to get an induced subgraph of $G_{An(Y)}$ over $C(Y)$. 
    
    If there is no bidirected path from any variable to $Y$, then the algorithm will return $Y$, which is a $Y$-rooted C-tree. Suppose otherwise that there is a bidirected path from some variables to $Y$, then we have two cases: Case i.) The variables $\mathbf{M} $ have bidirected paths to $Y$, but $De(\mathbf{M}) \cap An(Y) \setminus \mathbf{M}$ are not in $C(Y)$ such that $G_{C(Y)}$ is not a $Y$-rooted C-tree.  Case ii.) The variables  $\mathbf{M}$  have bidirected paths to $Y$ and $De(\mathbf{M}) \cap An(Y) \setminus \mathbf{M}$ are in $C(Y)$ such that $G_{C(Y)}$ is a $Y$-rooted C-tree. For case i, when the algorithm recursively call on itself at Step 7. Since $De(\mathbf{M}) \cap An(Y) \text{\textbackslash} \mathbf{M}$ are not in $C(Y)$, $\mathbf{M} \not \in An(Y)$ in $G_{C(Y)}$. Then it will return $Y$ as $G$. For case ii, the result trivially follows.
\end{proof}


% \subsection{Proof of Lemma \ref{lem:y-rooted-ctree-superset}}
% \begin{proof}
%   We will show that there exists a hedge for $p(y|do(\mathbf{j}), \mathbf{w})$ according to the definition \ref{def:generalized-hedge}. By Theorem 20 in \cite{shpitser2008complete}, there exists a unique maximal set $\mathbf{Z}$ such that $p(y|do(\mathbf{j}), \mathbf{w}) = p(y|do(\mathbf{j, z}), \mathbf{w}\text{\textbackslash}\mathbf{z})$. Since $K \in F$\textbackslash $\{Y\}\text{ and } K \subseteq \mathbf{J}$, we have that $F \cap (\mathbf{J} \cup \mathbf{Z}) \ne \emptyset$. Next, we let $F' = \{Y\}$ such that $F' \cap (\mathbf{J} \cup \mathbf{Z}) = \emptyset$. By definition \ref{def:generalized-hedge}, there exists a hedge for $p(y|do(\mathbf{j}),\mathbf{w})$. By Theorem \ref{thm:conditional-hedge-iff-non-id}, $p(y|do(\mathbf{j}),\mathbf{w})$ is unidentifiable in $G$.
% \end{proof}
%
%
\subsection{Proof of Theorem \ref{thm:impossibility-of-invariant-predictor}}
\begin{theorem}\label{appex:thm:impossibility-of-invariant-predictor}
     For some $W \in Ch(S)$, if there exists a hedge for $P(Y|do(W))$, then for any $\mathbf{H}, \mathbf{J} \subseteq \mathbf{V}$,  we have $(Y\not \indep S |\mathbf{J})_{G_{\overline{H}}}$ or $P(Y|do(\mathbf{H}), \mathbf{J})$ is unidentifiable in $G$.
\end{theorem}
\begin{proof}
Suppose there exists a $Y$-rooted C-tree $F$ in $G$ such that there exists a hedge for $P(Y|do(W))$ for some $W\in Ch(S)$. We will show that for any $\mathbf{\mathbf{H}}, \mathbf{J} \subseteq \mathbf{V}$, we have $(Y\not \indep S |\mathbf{J})_{G_{\overline{H}}}$ or $P(Y|do(\mathbf{H}), \mathbf{J})$ is unidentifiable in $G$

First, suppose $\mathbf{H}$ does not contain any member in c-forest $F$. We will show that there exists an inducing path from $S$ to $Y$ such that $(Y \not \indep S| \mathbf{J})_{G_{\overline{\mathbf{H}}}}$. Since every member in $F \setminus Y$ must have only one child and $Y$ does not any children in $G_{F}$, every member in $F$ is in $An(Y)$. By definition of hedge, $W \in F$. 
%-re-write?
As $F$ is a C-component by the definition of C-tree and $W \in Ch(S)$ and $W \in F$, for a member $S_{W}$ of $S$ that is $Pa(W)$, we can have a path from $S_{W}$ to $Y$ through the directed path from $W$ to $Y$ in $F$ along which every variable on that path is a collider. 
%
Therefore, there exists an inducing path from $S_W$ to $F$. By theorem 4.2 in \cite{richardson2002ancestral}, $S_{W}$ cannot be d-separated from $Y$ in $G$ if and only if there exists an inducing path from $S_{W}$ to $Y$ in $G$. Therefore, $(Y \not \indep \mathbf{S} | \mathbf{J})_{G_{\overline{H}}}$ for any $\mathbf{J} \subseteq \mathbf{V}$. Next, suppose $\mathbf{H}$ contains some members of $F$. By lemma 
\ref{lem:y-rooted-ctree-superset},
% \ref{thm:impossibility-of-invariant-predictor},
that $P(Y|do(\mathbf{H}), \mathbf{J})$ is unidentifiable in $G$ for any $\mathbf{J} \subseteq \mathbf{V}$. 
\end{proof}


% \begin{proof}
%     The step 3 of the algorithm first get an induced subgraph of $G$ over $An(\mathbf{Y})$. That implies every variable in the resulting graph has a directed path to some $Y\in \mathbf{Y}$ and $\textbf{Y}$ does not have any child in $G_{An(\mathbf{Y})}$. In step 4, it recursively calls on \textbf{Find-MACS-on-set} with $G_{An(\mathbf{Y})}$. Since every variable in $G_{An(\mathbf{Y})}$ at this point, the execution will move to step 5 of the algorithm. If there exists a member in $G_{An(\mathbf{Y})}$ that does not belong to $C(Y)$ for any $Y \in \mathbf{Y}$, then the algorithm will move on to step 6 of the algorithm to get an induced subgraph of $G_{An(\mathbf{Y})}$ over $C(Y)$, which includes every variable in $\mathbf{Y}$ as $\mathbf{Y}$ is an ancestral confounded component as required by the input of the algorithm. By definition of an ancestral confounded component, $\mathbf{Y}$ is either a singleton e.g. $\mathbf{Y} = \{Y\}$ or a union of two distinct AC-components $\mathbf{Y}_{1}$, $\mathbf{Y}_{2}$ which have ancestral confounded sets $W_{1}$, $W_{2}$ respectively that are connected by a bidirected arc. Suppose $\mathbf{Y}= \{Y\}$, the resulting graph will be $Y$-rooted C-tree as the algorithm returns a graph where every variable is in $An(Y)$ and in the $C(Y)$. By definition of a $Y$-rooted C-tree, it is also a $Y$-rooted C-forest. Suppose otherwise that $\mathbf{Y}$ is a union of two distinct AC-components $\mathbf{Y}_{1}$, $\mathbf{Y}_{2}$ which have $W_{1}$, $W_{2}$. Let $W= W_{1} \cup W_{2}$. Since there is a bidirected arc from $W_1$ to $W_2$, for every variable $X \in W, W=C(X)_{G_{W}}$ and by construction $W=An(\mathbf{Y})_{G_{W}}$. Thus, $W$ is an ancestral confounded set for $\mathbf{Y}$. Since every variable in the resulting graph of \textbf{Find-MACS-on-set} is in $An(\mathbf{Y})$ and in $C(Y)$ for some $Y \in \mathbf{Y}$, only 
% \end{proof}


% \subsection{Proof of Theorem \ref{thm:valid-stable-estimator-v1}}
% \begin{proof} Let $T_Y$ be the MACS of $Y$ in $G$ and $\mathbf{D} = Pa(T_Y)  \setminus T_Y$. Recall that by the conditions of the theorem, $S$ is not a parent of $T_Y$. For $\mathbf{W} \subseteq T_{Y} \setminus Y$, since every variable in $\mathbf{W}$ has a back-door path to $Y$ in $G_{\overline{\mathbf{D}}}$ by lemma \ref{lem:MAC=Y-rooted-Ctree} and the definition of $\mathbf{D}$ , the maximal set $\mathbf{Z}$ such that $p(y|do(\mathbf{D}), \mathbf{W})$ = $p(y|do(\mathbf{D}, \mathbf{Z}), \mathbf{W} \setminus \mathbf{Z})$ is an empty set by rule 2 of do-calculus. Since $\mathbf{Z} = \emptyset$, there only exists a hedge for $p(y|do( \mathbf{D}), \mathbf{W})$ if there exists two $\mathbf{R}$-rooted C-forests $F, F'$ such that $F \cap \mathbf{D} \ne \emptyset$ and $F' \cap \mathbf{D} = \emptyset$ for some $\mathbf{R} \subset An(Y \cup \mathbf{W})_{G_{\overline{\mathbf{D}}}}$ by definition \ref{def:generalized-hedge}. For the sake of contradiction, assume there exists such $F$ and $F'$. By definition, $\mathbf{D} \not \in \mathbf{R}$; otherwise $F' \cap \mathbf{D} \ne \emptyset$, which is a contradiction to a hedge condition. Next, $\mathbf{D} \not \in \mathbf{R}$ and $F\cap \mathbf{D} \ne \emptyset$ imply that there exists some members in $\mathbf{D}$ that must have a child to some other variables in $F$ and is in the same C-component with $F$ by the definition of hedge. Since $\mathbf{D} \not \in \mathbf{R}$ and $\mathbf{R} \subset An(Y \cup \mathbf{W})_{G_{\overline{\mathbf{D}}}}$, we have that $\mathbf{R} \subseteq T_{Y}$, which is a contradiction, $\mathbf{R} \not \subseteq T_Y$ since any member in $C(Y) \cup Pa(T_Y) \setminus T_Y$ cannot be have a directed path to $Y$ and be in the same C-component with any member of $T_Y$ by definition.  Therefore,  there is no hedge for $P(Y|do(\mathbf{D}), \mathbf{W})$. By Theorem \ref{thm:conditional-hedge-iff-non-id},  $P(Y|do(\mathbf{D}), \mathbf{W})$ is identifiable in $G$.

% By problem set up, $S$ has no incoming edges but only outgoing edges in $G$. Since $S$ is not a parent of $T_Y$,  intervening on $\mathbf{D}$ blocks every backdoor path to $Y$ from $S$. Also, we only condition on the members $De(Y)$ that are non-descendants of $\mathbf{S}$ in $G_{\overline{\mathbf{D}}}$, we have $(Y \indep S | \mathbf{W})_{G_{\overline{\mathbf{D}}}}$.
% \end{proof}

\subsection{Proof of Theorem \ref{thm: parents-of-T_y-existence}}

\begin{theorem} \label{appex: parents-of-T_y-existence} If the selection variable $S$ is a parent of MACS $T_{Y}$, then there is no graph surgery estimator in $G$.
\end{theorem}

\begin{proof}
    Let $S$ be a parent of $T_{Y}$, where $T_{Y}$ is the MACS of $Y$ in $G$.  By Lemma \ref{lem:MAC=Y-rooted-Ctree}, $T_{Y}$ is a $Y$-rooted C-tree. Since $S \in Pa(T_{Y})$, there exists some members $W \in Ch(S)$ that are in $T_Y$ such that there exists a hedge for $P(Y|do(W))$ by the following construction: let  $F=T_{Y}, F'=\{Y\}$ to be two $\mathbf{R}$-rooted C-forest, where $\mathbf{R}=\{Y\}$. By Theorem \ref{thm:impossibility-of-invariant-predictor}, the result follows implying that there is no graph surgery estimator in $G$.
\end{proof}


\subsection{Proof of Theorem \ref{thm:valid-stable-estimator-v2}}
\begin{theorem}\label{appex:valid-stable-estimator-v2} Let $T_Y$ be the MACS of $Y$ in $G$, $\mathcal{H}\coloneqq \{H: H\in Ch(Y)$, $Pa(T_H)\not\ni S\}$ and $T_J\coloneqq \bigcup_{H\in \mathbf{K}} T_H$ for any $\mathbf{K}\subseteq \mathcal{H}$, where $T_{H}$ is the MACS with respect to the variable $H$. Let $\mathbf{D}= Pa(T_{Y}\cup T_{J})$.  If $S$ is not a parent of $T_Y$, then $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$ is identifiable in $G$ and $(Y\indep S |\mathbf{W},\mathbf{K})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W} \subseteq (T_{Y}\cup T_J) \setminus (Y\cup \mathbf{K})$.
\end{theorem}
\begin{proof} 
Let $T_Y$ be the MACS of $Y$ in $G$. Let $\mathcal{H}\coloneqq \{H: H\in Ch(Y),
 $ $Pa(T_H)\not\ni S\}$, $T_J\coloneqq \bigcup_{H\in \mathbf{K}} T_H$ for some $\mathbf{K}\subseteq \mathcal{H}$. Let $\mathbf{D}= Pa(T_{Y}\cup T_{J})$.  Recall that by the conditions of the theorem,  
$S$ is not a parent of $T_{Y} $. 

Consider any $\mathbf{W}\subseteq (T_{Y}\cup T_{J})\setminus (Y\cup \mathbf{K})$.
We first prove the following claim.

\textbf{Claim: $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W}) \neq P(Y|do(\mathbf{D}, \mathbf{Z}), (\mathbf{K} \cup \mathbf{W})\setminus \mathbf{Z})$ for any $\mathbf{Z}\subset \mathbf{K}\cup \mathbf{W}, \mathbf{Z}\neq \emptyset$.}
\begin{proof}
For the sake of contradiction, suppose there exists a non-empty subset $\mathbf{Z}$ where the equality holds. Under the extended faithfulness assumption, this would only be true if do-calculus Rule 2 is applicable through the following graphical condition:
\begin{equation}
    (Y\indep \mathbf{Z}|\mathbf{K}\cup \mathbf{W}\setminus Z)_{G_{\underline{\mathbf{Z}},\overline{\mathbf{D}}}}
\end{equation}

Now observe that any node $U$ in $T_{Y}\cup T_J$ must belong to a $T_H$ for some $H\in \mathbf{K}$ or $T_{Y}$. By definition of $T_H$ (or $T_{Y}$), there must be a bidirected path from $U$ to $H$ (or $Y$) that only goes through nodes in $T_H$ (or $T_{Y}$). By definition of $T_H$ (or $T_{Y}$), any node along this bidirected path must be an ancestor of $H$ (or $Y$).  Therefore, there is a d-connecting path from $U$ to $H$ (or $Y$) that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders. Finally, for the case where $U$ belongs to a $T_{H}$, as the conditioning set contains $H$, we can concatenate this path with the edge $Y\rightarrow H$ to obtain a d-connecting path to $Y$, since $H$ is a collider along this concatenated path. This contradicts with the d-separation statement above.   
\end{proof}

For the sake of contradiction, suppose for some $\mathbf{W}\subseteq (T_{Y}\cup T_{J})\setminus (Y\cup \mathbf{K})$,  either $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$ is unidentifiable in $G$ or $(Y\not \indep S | \mathbf{W},\mathbf{K})_{G_{\overline{\mathbf{D}}}}$.


\emph{\textbf{Suppose $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$ is unidentifiable in $G$}}. 

By Theorem \ref{thm:conditional-hedge-iff-non-id}, there must exists a hedge for $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$. The claim above implies that the maximal set $\mathbf{Z}$ such that $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$ = $P(Y|do(\mathbf{D}, \mathbf{Z}), \mathbf{K} \cup \mathbf{W}\setminus \mathbf{Z})$ is an empty set by rule 2 of do-calculus.  


Since $\mathbf{Z} = \emptyset$, by Definition \ref{def:generalized-hedge}, there exists a hedge for $P(Y|do( \mathbf{D}), \mathbf{K}, \mathbf{W})$ only if there exists two $\mathbf{R}$-rooted C-forests $F, F'$ such that $F \cap \mathbf{D} \ne \emptyset$ and $F' \cap \mathbf{D} = \emptyset$ for some $\mathbf{R} \subset An(Y \cup \mathbf{K} \cup \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. %by the characterization as described in . 
% \mr{there might be multiple disjoint c-forests rooted at each $Y \cup Ch(Y) \in H$.}
Consider any such $\mathbf{R}, F, F'$. Since $F'\cap \mathbf{D} =\emptyset$ and $F'$ is a $\mathbf{R}$-rooted C-forest, it must be the case that $\mathbf{D} \cap \mathbf{R}=\emptyset$. %for some $\mathbf{R} \subset An(Y \cup (\mathcal{H}\setminus Y) \cup  \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. 
Since $\mathbf{D}\cap \mathbf{R} = \emptyset$ and intervening on $\mathbf{D}$ removes the incoming edges of $\mathbf{D}$, any member of $An(Y\cup \mathbf{K}\cup \mathbf{W})$ will be in $T_Y \cup T_J$  such that $\mathbf{R} \subseteq T_Y \cup T_J$. But this is a contradiction due to the following:  

Suppose $\mathbf{R} \subseteq T_{Y} \cup T_{J}$. Since $\mathbf{D} \cap \mathbf{R} = \emptyset$ and $F\cap \mathbf{D} \ne \emptyset$, that implies some members of $\mathbf{D}$ must have a directed path to some members in $\mathbf{R}$. If that is the case, then that member of $\mathbf{D}$ must also be in $T_{Y} \cup T_H$, where $T_{Y}$ is a $Y$-rooted C-tree and $T_{H}$ is a $H$-rooted C-tree for some $H\in Ch(Y)$, which is also in $T_J$. But this is a contradiction as any member of $\mathbf{D}$ cannot be in $T_{Y} \cup T_J$ by the definition of $\mathbf{D}$. Thus, $\mathbf{R} \not \subseteq (T_{Y} \cup T_{J})$.

Therefore,  there is no hedge for $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$. By Theorem \ref{thm:conditional-hedge-iff-non-id},  $P(Y|do(\mathbf{D}), \mathbf{K}, \mathbf{W})$ is identifiable in $G$.

\par

\emph{\textbf{Suppose} $(Y\not \indep S | \mathbf{W}, \mathbf{K})_{G_{\overline{\mathbf{D}}}}$}.




Next, we will show $(Y \indep    S|\mathbf{W}, \mathbf{K})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W}\subseteq (T_{Y}\cup T_{J}) \setminus (Y\cup \mathbf{K})$. Given that $S$ is not a parent of $T_Y$, we will consider two cases separately:  $i.) S \in An(Y)$; $ii.) S \not \in An(Y)$ 

\underline{\emph{$S \in An(Y)$}}: 
Suppose there exists a d-connecting path from $S$ to $Y$ by conditioning on $\mathbf{W}\cup \mathbf{K}$ and intervening on $\mathbf{D}$ for some $\mathbf{W} \subseteq (T_{Y} \cup T_{J}) \setminus (Y\cup \mathbf{K})$. First, there is no directed path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$ since $S$ is not a parent of $T_{Y}$ by the conditions of the theorem and we intervene on $\mathbf{D}$, which is a superset of $Pa(T_{Y})$ and $Y\in T_{Y}$. For some $\mathbf{W} \subseteq (T_{Y} \cup T_{J}) \setminus (Y \cup \mathbf{K})$, conditioning on $\mathbf{W}$ must have opened paths with colliders that are in $An(\mathbf{W})$. However, since any member of $\mathbf{W}$ is in $T_{Y} \cup T_{J}$ and any incoming edges of $Pa(T_{Y}\cup T_{J})$ are removed in $G_{\overline{\mathbf{D}}}$ and $S$ is not a parent of any member of $T_J$ by definition of $T_J$, there cannot be a d-connecting path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$. Therefore, a contradiction. We have $(Y\not \indep S|\mathbf{W}, \mathbf{K})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W} \subseteq (T_{Y}
\cup T_{J}) \setminus (Y\cup \mathbf{K})$ when $S\in An(Y)$.

\underline{\emph{$S \not \in An(Y)$}}: we will show $i.)$ there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$ and $ii.)$ there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$.

\emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ that ends with a member of $Ch(Y)$}}.  Any child of $Y$ where parents of its MACS
do not contain $S$ will be in $T_{J}=\bigcup_{H\in \mathbf{K}} T_H$ for some $\mathbf{K}\subseteq \mathcal{H}$ and we intervene on $Pa(T_{Y}\cup T_{J})$ so that there is no path from $S$ to any such child of $Y$. Suppose the d-connecting path from $S$ to $Y$ ends with some other members of $Ch(Y)$. However, for any such path, we must have conditioned on the descendants of those children of $Y$ to open the path from $S$ to $Y$ through some descendants of $Y$, but any child of $Y$ that is in $\mathbf{K}$, its parents are also intervened such that any path from $S$ to $Y$ through those children
are blocked. For any other children that are not in $\mathbf{K}$ either form a collider or their descendants form a collider to block any other active paths from $S$ to $Y$.  Therefore, there is no d-connecting path from $S$ to $Y$ that ends with a member of $Ch(Y)$.

\emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$}}. Suppose further that bidirected neighbor of $Y$ is a child of $Y$. From above, we have proved there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$. Suppose that bidirected neighbor of $Y$ is not a child of $Y$ and there exists some d-connecting paths from $S$ to $Y$ that ends with those bidirected neighbor of $Y$, for the case where there is no any descendant of those bidirected neighbors of $Y$ is in $\mathbf{W}$, any path from $S$ to $Y$ along that bidirected neighbor of $Y$ is blocked as there exists a collider along any such path and that bidirected neighbor is not in $T_J$. Therefore, there is no d-connecting path from $S$ to $Y$. For the case where there exists some descendants of those bidirected neighbors $\mathbf{X}$ of $Y$ is in $\mathbf{W}$, but any parent of those descendants must be in $\mathbf{D}$ such that all the incoming edges of any such parent are removed so that any d-connecting path from $S$ to any of those parents will be blocked. Any d-connecting path from $S$ to the members along the upstream path of those parents to $\mathbf{X}$ will be blocked by a collider along the path by concatenating the path $Y\rightarrow X$ or $M\rightarrow L$ for some descendants $M, L$ of $\mathbf{X}$, where $M \ne L$.

Therefore, we have $(Y \indep S | \mathbf{W}, \mathbf{K})_{G_{\overline{\mathbf{D}}}}$. 
\end{proof}

\subsection{Proof of Theorem \ref{thm:find-macs-on-bidirected-nbr-Y-and-Y}}
\begin{theorem}
\label{appex:find-macs-on-bidirected-nbr-Y-and-Y}
Let $T_Y$ be the MACS of $Y$ in $G$, $T_H$ be the MACS of a child $H$ of $Y$ in $G$. Define $T_{\mathbf{C}}\coloneqq \bigcup_{H\in Ch(Y)} T_{H}$, $\mathcal{Z}\coloneqq \{Z: Z \in (C(Y) \cap Nbr(Y) )\setminus (T_{Y}\cup T_{\mathbf{C}})$ s.t. $Pa(T_{Y \cup Z})\not\ni S \}$ and $T_B\coloneqq \bigcup_{Z\in \mathbf{M}} T_{Y\cup Z}$ for any $\mathbf{M}\subseteq \mathcal{Z}$  where $T_{Y \cup Z}$ is the MACS for the set $(Y\cup Z)$. Let $\mathbf{D}= Pa(T_{B})$.  If $S$ is not a parent of $T_Y$, then $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$ is identifiable in $G$ and $(Y\indep S |\mathbf{W},\mathbf{M})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W} \subseteq (T_{B})\setminus (Y\cup \mathbf{M})$.
\end{theorem}
\begin{proof} 
Let $T_Y$ be the MACS of $Y$ in $G$, $T_H$ be the MACS of a child $H$ of $Y$ in $G$. Define $T_{\mathbf{C}}\coloneqq \bigcup_{H\in Ch(Y)} T_{H}$. Let $\mathcal{Z}\coloneqq \{Z: Z \in (C(Y) \cap Nbr(Y) )\setminus (T_{Y}\cup T_{\mathbf{C}})$ s.t. $Pa(T_{Y \cup Z})\not\ni S \}$ , $T_{B}\coloneqq \bigcup_{Z\in \mathbf{M}} T_Z$ for some $\mathbf{M}\subseteq \mathcal{Z}$ . Let $\mathbf{D}= Pa(T_{B})$.  Recall that by the conditions of the theorem,  
$S$ is not a parent of $T_{Y} $. 


Consider any $\mathbf{W}\subseteq (T_{B})\setminus (Y\cup \mathbf{M})$.
We first prove the following claim.

\textbf{Claim: $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W}) \neq P(Y|do(\mathbf{D}, \mathbf{Q}), (\mathbf{M} \cup \mathbf{W})\setminus \mathbf{Q})$ for any $\mathbf{Q}\subset \mathbf{M}\cup \mathbf{W}, \mathbf{Q}\neq \emptyset$.}
\begin{proof}
For the sake of contradiction, suppose there exists a non-empty subset $\mathbf{Q}$ where the equality holds. Under the extended faithfulness assumption, this would only be true if do-calculus Rule 2 is applicable through the following graphical condition:
\begin{equation}
    (Y\indep \mathbf{Q}|\mathbf{M}\cup \mathbf{W}\setminus Q)_{G_{\underline{\mathbf{Q}},\overline{\mathbf{D}}}}
\end{equation}

Now observe that any node $U$ in $T_B$ must belong to a $T_{Y \cup Z}$ for some $Z\in \mathbf{M}$. By definition of $T_{Y\cup Z}$, there must be a bidirected path from $U$ to $Z$ and $Y$ that only goes through nodes in $T_{Y\cup Z}$. By definition of $T_{Y\cup Z}$, any node along this bidirected path must be an ancestor of $Z$ or $Y$.  Therefore, there is a d-connecting path from $U$ to $Z$ or $Y$ that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders. Finally, for the case where there is a d-connecting path from $U$ to $Z$ , as the conditioning set contains $Z$, we can concatenate this path with the edge $Y\leftrightarrow Z$ to obtain a d-connecting path to $Y$, since $Z$ is a collider along this concatenated path. This contradicts with the d-separation statement above.   
\end{proof}

For the sake of contradiction, suppose for some $\mathbf{W}\subseteq (T_{B})\setminus (Y\cup \mathbf{M})$,  either $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$ is unidentifiable in $G$ or $(Y\not \indep S | \mathbf{W},\mathbf{M})_{G_{\overline{\mathbf{D}}}}$.


\emph{\textbf{Suppose $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$ is unidentifiable in $G$}}. 

By Theorem \ref{thm:conditional-hedge-iff-non-id}, there must exists a hedge for $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$. The claim above implies that the maximal set $\mathbf{Q}$ such that $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$ = $P(Y|do(\mathbf{D}, \mathbf{Q}), \mathbf{M} \cup \mathbf{W}\setminus \mathbf{Q})$ is an empty set by rule 2 of do-calculus.  


Since $\mathbf{Q} = \emptyset$, by Definition \ref{def:generalized-hedge}, there exists a hedge for $P(Y|do( \mathbf{D}), \mathbf{M}, \mathbf{W})$ only if there exists two $\mathbf{R}$-rooted C-forests $F, F'$ such that $F \cap \mathbf{D} \ne \emptyset$ and $F' \cap \mathbf{D} = \emptyset$ for some $\mathbf{R} \subset An(Y \cup \mathbf{M} \cup \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. %by the characterization as described in . 
% \mr{there might be multiple disjoint c-forests rooted at each $Y \cup Ch(Y) \in H$.}
Consider any such $\mathbf{R}, F, F'$. Since $F'\cap \mathbf{D} =\emptyset$ and $F'$ is a $\mathbf{R}$-rooted C-forest, it must be the case that $\mathbf{D} \cap \mathbf{R}=\emptyset$. %for some $\mathbf{R} \subset An(Y \cup (\mathcal{H}\setminus Y) \cup  \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. 
Since $\mathbf{D}\cap \mathbf{R} = \emptyset$ and intervening on $\mathbf{D}$ removes the incoming edges of $\mathbf{D}$, any member of $An(Y\cup \mathbf{M}\cup \mathbf{W})$ will be in $ T_B$  such that $\mathbf{R} \subseteq T_B$. But this is a contradiction due to the following:  

Suppose $\mathbf{R} \subseteq  T_{B}$. Since $\mathbf{D} \cap \mathbf{R} = \emptyset$ and $F\cap \mathbf{D} \ne \emptyset$, that implies some members of $\mathbf{D}$ must have a directed path to some members in $\mathbf{R}$. If that is the case, then some  members of $\mathbf{D}$ must also be in $T_{Y\cup Z}$, which is also in $T_B$. But this is a contradiction as any member of $\mathbf{D}$ cannot be in $T_{B}$ by the definition of $\mathbf{D}$. Thus, $\mathbf{R} \not \subseteq T_{B}$.

Therefore,  there is no hedge for $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$. By Theorem \ref{thm:conditional-hedge-iff-non-id},  $P(Y|do(\mathbf{D}), \mathbf{M}, \mathbf{W})$ is identifiable in $G$.

\emph{\textbf{Suppose} $(Y\not \indep S | \mathbf{W}, \mathbf{M})_{G_{\overline{\mathbf{D}}}}$}.

Next, we will show $(Y \indep    S|\mathbf{W}, \mathbf{M})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W}\subseteq (T_{B}) \setminus (Y\cup \mathbf{M})$. Given that $S$ is not a parent of $T_Y$, we will consider two cases separately:  $i.) S \in An(Y)$; $ii.) S \not \in An(Y)$ 

\underline{\emph{$S \in An(Y)$}}: 
Suppose there exists a d-connecting path from $S$ to $Y$ by conditioning on $\mathbf{W}\cup \mathbf{M}$ and intervening on $\mathbf{D}$ for some $\mathbf{W} \subseteq T_{B} \setminus (Y\cup \mathbf{M})$. First, there is no directed path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$ since $S$ is not a parent of $T_{Y}$ by the conditions of the theorem and we intervene on $\mathbf{D}$, which is a superset of $Pa(T_{Y})$ and $Y\in T_{Y}$ since $T_{Y} \subset T_{B}$. For some $\mathbf{W} \subseteq T_{B} \setminus (Y \cup \mathbf{M})$, conditioning on $\mathbf{W}$ must have opened paths with colliders that are in $An(\mathbf{W})$. However, since any member of $\mathbf{W}$ is in $T_{B}$ and any incoming edges of $Pa(T_{B})$ are removed in $G_{\overline{\mathbf{D}}}$ and $S$ is not a parent of any member of $T_{Y\cup Z}$ by definition of $T_B$, there cannot be a d-connecting path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$. Therefore, a contradiction. We have $(Y\not \indep S|\mathbf{W}, \mathbf{M})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W} \subseteq T_{B} \setminus (Y\cup \mathbf{M})$ when $S\in An(Y)$.

\underline{\emph{$S \not \in An(Y)$}}: we will show $i.)$ there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$ and $ii.)$ there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$.

\emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ that ends with a member of $Ch(Y)$}}. For the case where there is no descendant of any children of $Y$ are in $\mathbf{W}$. Since $\mathbf{W}$ does not contain any child of $Y$, any d-connecting path from $S$ to any child $J$ of $Y$, we can concatenate this path with the edge $Y\rightarrow J$ to obtain a blocked path to $Y$ as $J$ is a collider along this concatenated path. Suppose there exists some descendants of some children of $Y$ that are in $\mathbf{W}$. Note that for any member in $\mathbf{W}$, its parents are in $\mathbf{D}$ such that all incoming edges of those parents are removed and there cannot be a d-connecting path from $S$ to $Y$ by concatenating the path $Y\rightarrow J$ for any child $J$ of $Y$ as any $J$ is not in $\mathbf{W}$ nor such path can be opened by conditioning on $\mathbf{W}$.

\emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$}}. Suppose further that bidirected neighbor of $Y$ is a child of $Y$. From above, we have proved there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$. Any bidirected neighbor of $Y$ where it is not a child of $Y$ and the parents of its MACS
do not contain $S$ will be in $T_{B}=\bigcup_{Z\in \mathbf{M}} T_{Y\cup Z}$ for some $\mathbf{M}\subseteq \mathcal{Z}$ and we intervene on $Pa(T_{B})$ so that there is no path from $S$ to any such bidirected neighbor of $Y$. Suppose the d-connecting path from $S$ to $Y$ ends with some other members of bidirected neighbors that is not child of $Y$. Since those bidirected neighbors $\mathbf{A}$ are not in $\mathbf{W}$, there exists some descendants of $\mathbf{A}$ are in $\mathbf{W}$. However, for any such descendant, its parent must be in $\mathbf{D}$ such that all the incoming edges of that parent are removed. That implies any such path is blocked. Since no member of $\mathbf{A}$ are in $\mathbf{W}$, any d-connecting path will be blocked by concatenating $Y\rightarrow A$ and having a collider $A \in \mathbf{A}$ along that concatenated path. Thus, there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$.

Therefore, we have $(Y \indep S | \mathbf{W}, \mathbf{M})_{G_{\overline{\mathbf{D}}}}$. 
\end{proof}

% \begin{proof}  Let $\mathcal{Z}\coloneqq \{Z: Z\in C(Y) \cap Nbr(Y)$, s.t. $Pa(T_{Y \cup Z})\not\ni S \}$ . Let $\mathbf{D}= Pa(T_{Y} \cup T_{\mathbf{B}})$.  Recall that by the conditions of the theorem,  
% $S$ is not a parent of $T_{Y} $. 

% Consider any $\mathbf{W}\subseteq (T_{Y} \cup T_{ \mathbf{B}})\setminus (Y\cup \mathbf{B})$.
% We first prove the following claim.

% \textbf{Claim: $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W}) \neq P(Y|do(\mathbf{D}, \mathbf{M}), (\mathbf{B} \cup \mathbf{W})\setminus \mathbf{M})$ for any $\mathbf{M}\subset \mathbf{B}\cup \mathbf{W}, \mathbf{M}\neq \emptyset$.}
% \begin{proof}
% For the sake of contradiction, suppose there exists a non-empty subset $\mathbf{M}$ where the equality holds. Under the extended faithfulness assumption, this would only be true if do-calculus Rule 2 is applicable through the following graphical condition:
% \begin{equation}
%     (Y\indep \mathbf{M}|\mathbf{B}\cup \mathbf{W}\setminus M)_{G_{\underline{\mathbf{M}},\overline{\mathbf{D}}}}
% \end{equation}

% Now observe that any node $U$ in $T_{Y\cup \mathbf{B}}$ must belong to a $T_{Y\cup Z}$ for some $Z\in \mathcal{Z}$. By definition of $T_{Y\cup Z}$, there must be a bidirected path from $U$ to $Z$ that only goes through nodes in $T_{Y\cup Z}$. By definition of $T_{Y\cup Z}$, any node along this bidirected path must be an ancestor of $Y\cup Z$. Therefore, there is a d-connecting path from $U$ to $Z$ that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders. Finally, if the conditioning set contains $Z$, we can concatenate this path with the edge $Y\leftrightarrow Z$ to obtain a d-connecting path to $Y$, since $Z$ is a collider along this concatenated path. This contradicts with the d-separation statement above.   
% \end{proof}

% For the sake of contradiction, suppose for some $\mathbf{W}\subseteq T_{Y\cup \mathbf{B}}\setminus (Y\cup \mathbf{B})$,  either $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W})$ is unidentifiable in $G$ or $(Y\not \indep S | \mathbf{W},\mathbf{B})_{G_{\overline{\mathbf{D}}}}$.


% \emph{\textbf{Suppose $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W})$ is unidentifiable in $G$}}. 

% By Theorem \ref{thm:conditional-hedge-iff-non-id}, there must exists a hedge for $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W})$. The claim above implies that the maximal set $\mathbf{M}$ such that $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W})$ = $P(Y|do(\mathbf{D}, \mathbf{M}), \mathbf{B} \cup \mathbf{W}\setminus \mathbf{M})$ is an empty set by rule 2 of do-calculus.  


% Since $\mathbf{M} = \emptyset$, by Definition \ref{def:generalized-hedge}, there exists a hedge for $p(y|do( \mathbf{D}), \mathbf{B}, \mathbf{W})$ only if there exists two $\mathbf{R}$-rooted C-forests $F, F'$ such that $F \cap \mathbf{D} \ne \emptyset$ and $F' \cap \mathbf{D} = \emptyset$ for some $\mathbf{R} \subset An(Y \cup \mathbf{B} \cup \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. %by the characterization as described in . 
% % \mr{there might be multiple disjoint c-forests rooted at each $Y \cup Ch(Y) \in H$.}
% Consider any such $\mathbf{R}$, $F, F'$. Since $F'\cap \mathbf{D} =\emptyset$ and $F'$ is a $\mathbf{R}$-rooted C-forest, it must be the case that $\mathbf{D} \cap \mathbf{R}=\emptyset$. %for some $\mathbf{R} \subset An(Y \cup (\mathcal{H}\setminus Y) \cup  \mathbf{W})_{G_{\overline{\mathbf{D}}}}$. 
% Since $\mathbf{D}\cap \mathbf{R} = \emptyset$ and intervening on $\mathbf{D}$ removes in incoming edges of $\mathbf{D}$, any member of $An(Y\cup \mathbf{B}\cup \mathbf{W})$ will be in $T_{Y \cup \mathbf{B}}$ such that $\mathbf{R} \subseteq T_{Y \cup \mathbf{B}}$. But this is a contradiction due to the following:  

% Suppose $\mathbf{R} \subseteq T_{Y \cup \mathbf{B}}$. Since $\mathbf{D} \cap \mathbf{R}$ =$\emptyset$ and $F\cap \mathbf{D} \ne \emptyset$, that implies some members of $\mathbf{D}$ must have a directed path to some members in $\mathbf{R}$. If that is the case, then that member of $\mathbf{D}$ must also be in $T_{Y\cup Z}$, a $\{Y,Z\}$-rooted C-forest for some $Z\in C(Y)\cap Nbr(Y)$, which is also in $T_{Y\cup \mathbf{B}}$. But this is a contradiction as any member of $\mathbf{D}$ cannot be in $T_{Y\cup \mathbf{B}}$ by the definition of $\mathbf{D}$. Thus, $\mathbf{R} \not \subseteq T_{Y\cup \mathbf{B}}$.

% Therefore,  there is no hedge for $P(Y|do(\mathbf{D}), \mathbf{B}, \mathbf{W})$. By Theorem \ref{thm:conditional-hedge-iff-non-id},  $P(Y|do(\mathbf{D}),\mathbf{B}, \mathbf{W})$ is identifiable in $G$.

% \par

% \emph{\textbf{Suppose} $(Y\not \indep S | \mathbf{W}, \mathbf{B})_{G_{\overline{\mathbf{D}}}}$}.




% Next, we will show $(Y \indep    S|\mathbf{W}, \mathbf{B})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W}\subseteq T_{Y\cup \mathbf{B}} \setminus (Y\cup \mathbf{B})$. Given that $S$ is not a parent of $T_Y$, we will consider two cases separately:  $i.) S \in An(Y)$; $ii.) S \not \in An(Y)$ 

% \underline{\emph{$S \in An(Y)$}}: 
% Suppose there exists a d-connecting path from $S$ to $Y$ by conditioning on $\mathbf{W}\cup \mathbf{B}$ and intervening on $\mathbf{D}$ for some $\mathbf{W} \subseteq T_{Y \cup \mathbf{B}} \setminus (Y\cup \mathbf{B})$. First, there is no directed path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$ since $S$ is not a parent of $T_{Y}$ by the conditions of the theorem and we intervene on $\mathbf{D}$, which is a superset of $Pa(T_{Y})$ and $Y\in T_{Y}$. For some $\mathbf{W} \subseteq T_{Y\cup\mathbf{B}} \setminus (Y \cup \mathbf{B})$, conditioning on $\mathbf{W}$ must have opened paths with colliders that are in $An(\mathbf{W})$. However, since any member of $\mathbf{W}$ is in $T_{Y\cup \mathbf{B}}$ and any incoming edges of $Pa(T_{Y \cup \mathbf{B}})$ are removed in $G_{\overline{\mathbf{D}}}$ and $S$ is not a parent of any member of $T_{Y\cup \mathbf{B}}$ by definition of $T_{Y\cup \mathbf{B}}$, there cannot be a d-connecting path from $S$ to $Y$ in $G_{\overline{\mathbf{D}}}$. Therefore, a contradiction. We have $(Y\not \indep S|\mathbf{W}, \mathbf{B})_{G_{\overline{\mathbf{D}}}}$ for any $\mathbf{W} \subseteq T_{Y\cup \mathbf{B}}\setminus (Y\cup \mathbf{B})$ when $S\in An(Y)$.

% \underline{\emph{$S \not \in An(Y)$}}: we will show $i.)$ there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$ and $ii.)$ there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$.

% \emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ that ends with a member of $Ch(Y)$}}. Any child of $Y$ that is a bidirected neighbor of $Y$ where parents of its MACS
% do not contain $S$ will be in $T_{Y\cup \mathbf{B}}$ for some $\mathbf{B}\subseteq \mathcal{Z}$ and we intervene on $Pa(T_{Y\cup \mathbf{B}})$ so that there is no path from $S$ to any such child of $Y$. Suppose the d-connecting path from $S$ to $Y$ ends with some other members of $Ch(Y)$. However, for any such path, we must have conditioned on the descendants of those children of $Y$ to open the path from $S$ to $Y$ through some descendants of $Y$, but any children of $Y$ that is in $\mathbf{B}$, its parents are also intervened such that any path from $S$ to $Y$ through those children
% are blocked. For any other children that are not in $\mathbf{B}$ either form a collider or their descendants form a collider from some paths from $S$ to $Y$.  Therefore, there is no d-connecting path from $S$ to $Y$ that ends with a member of $Ch(Y)$.

% \emph{\textbf{Show that there is no d-connecting path from $S$ to $Y$ ends with a bidirected neighbor of $Y$}} Suppose further that bidirected neighbor of $Y$ is a child of $Y$. From above, we have proved there is no d-connecting path from $S$ to $Y$ ends with a member of $Ch(Y)$. Suppose that bidirected neighbor of $Y$ is not a child of $Y$, then any path from $S$ to $Y$ along that bidirected neighbor of $Y$ is blocked as there exists a collider along any such path and that bidirected neighbor is not in $T_{Y\cup \mathbf{B}}$. For any bidirected neighbor of $Y$ that is in $\mathcal{B}$, the parents of its MACS must be have been intervened such that there is no member in $\mathbf{B}$ along any d-connecting path from $S$ to $Y$. Therefore, there is no d-connecting path from $S$ to $Y$


 
% Therefore, we have $(Y \indep S | \mathbf{W}, \mathbf{B})_{G_{\overline{\mathbf{D}}}}$. 
% \end{proof}

\subsection{Proof of Theorem \ref{thm:gurantees-of-finding-invariant-predictor}}
We will first prove the following lemma.
\begin{lemma}
\label{lem:existence-of-parent}
If $S \not \in An(Y)$ and \textbf{ID4IP} (Algorithm \ref{alg:id4ip}) returns \textbf{FAIL}, then there is no parent of $Y$ in $G.$ 
\end{lemma}

\begin{proof}
For the sake of contradiction, assume there exists some parents of $Y$ in $G$.  Given as the conditions of the lemma, we know that $S \not \in An(Y)$ and \textbf{ID4IP} returns \textbf{FAIL}. Since $S\not \in An(Y)$,  \textbf{ID4IP} will only return \textbf{FAIL} at line \textbf{\ref{lineNum:failure}} since line \textbf{\ref{lineNum:failure1b}} requires that $S \in An(Y)$. $P(Y|do(Pa(T_{Y})), T_{Y}\setminus Y)$ will be a graph surgery estimator by Corollary \ref{cor:valid-stable-estimator-v1}. This is a contradiction because \textbf{ID4IP} returns \textbf{FAIL} at line \textbf{\ref{lineNum:failure}} due to $P_{set}= \emptyset$. Therefore, if $S \not \in An(Y)$ and \textbf{ID4IP} returns \textbf{FAIL}, there is no parent of $Y$ in $G$.
\end{proof}

\begin{theorem}
\label{appex:gurantees-of-finding-invariant-predictor}
If there exists a graph surgery estimator, \textbf{ID4IP} outputs a graph surgery estimator.
\end{theorem}


\textbf{Claim}: Given $S\in An(Y)$, if there exists a graph surgery estimator, \textbf{ID4IP} (Algorithm \ref{alg:id4ip}) outputs a graph surgery estimator.

We will first prove the above claim

\begin{proof}
    As given by the condition, $S \in An(Y)$ and there exists a graph surgery estimator. By Theorem \ref{thm: parents-of-T_y-existence}, $S \not \in Pa(T_{Y})$, where $T_{Y}$ is the MACS of $Y$. $S$ has no incoming edges by problem set up so $S$ cannot be in $T_{Y}$. Also, since there is a directed path from $S$ to $Y$ and $S$ is not a parent of $T_{Y}$, $S \in An(M)$ for some $M \in Pa(T_{Y})$. By Corollary \ref{cor:valid-stable-estimator-v1}, \textbf{ID4IP} will output a graph surgery estimator at line \textbf{\ref{lineNum:search T_y}} of the algorithm.
\end{proof}

Next, we will prove the theorem.

\begin{proof}
With the claim above, we only need to consider the case where $S \not \in An(Y)$. Suppose $S \not \in An(Y)$. Note that since $S \not \in An(Y)$, this \textbf{FAIL} is returned by line  \textbf{\ref{lineNum:failure}}  of the algorithm. 

We will prove it by using contraposition. Suppose \textbf{ID4IP} returns \textbf{FAIL}, for the sake of contradiction, suppose also there exists a graph surgery estimator $P(Y|do(\mathbf{Q}), \mathbf{W})$ for some $\mathbf{Q}\subseteq \mathbf{V}$ and some $\mathbf{W} \subseteq \textbf{V} \setminus \mathbf{Q}$.

We will consider the following cases where $i.)$  $\mathbf{Q} = \emptyset, \mathbf{W} \ne \emptyset$; $ii.)$  $\mathbf{Q} \ne \emptyset, \mathbf{W} = \emptyset$; $iii.)$  $\mathbf{Q} \ne \emptyset, \mathbf{W} \ne \emptyset$

\emph{$\mathbf{i.) Q = \emptyset, W \ne \emptyset}$}. Given $\mathbf{Q = \emptyset, W \ne \emptyset}$, it must be the case that there exists some $W \in \mathbf{W}$ that has a d-connecting path to $Y$. By Lemma \ref{lem:existence-of-parent}, $Y$ has no parents in $G$. It implies that any such path must end at a child of $Y$ or a bidirected neighbor of $Y$. If such path ends at a child of $Y$, for any such child $K$ of $Y$, there exists an inducing path from $S$ to $K$ since $S \in Pa(T_{K})$ and $T_{K}$ is a $K$-rooted C-tree. It implies that there exists no subset in $\mathbf{V}$ such that $Y$ and $S$ can be d-separated by Theorem 4.2 in \cite{richardson2002ancestral}. Since $\mathbf{Q}=\emptyset$, we have $(Y\not \indep S|\mathbf{W})_{G}$, which is a contradiction. Similarly, if such path ends at a bidirected neighbor of $Y$ that is not a child of $Y$, for any such bidirected neighbor $Z$ of $Y$, $S \in Pa(T_{Y\cup Z})$, which implies $(Y \not \indep S | \mathbf{W})_{G}$. It is because \textbf{ID4IP} returns \textbf{FAIL} implies that $S$ is a parent of the MACS of $\{Y, Z\}$ for any bidirected neighbor $Z$ of $Y$ that is not a child of $Y$. Observe that any node $U$ in $T_{Y\cup Z}$ for any $Z$, $U$ has a bidirected path to $Z$. By the definition of $T_{Y\cup Z}$, any node along this bidirected path must be an ancestor of $Z$. Therefore, there is a d-connecting path from $U$ to $Z$ that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders. As $\mathbf{W} \ne \emptyset$, if $Z$ is not in $\mathbf{W}$, then any such member will be d-separated from $Y$ by concatenating any d-connecting path from $U$ to $Z$ with $Y \leftrightarrow Z$ such that $P(Y|\mathbf{W}) = P(Y)$. If $Z$ is in $\mathbf{W}$, then $(Y \not \indep S|\mathbf{W})_{G}$. Therefore, we have reached a contradiction for the case when $\mathbf{Q} = \emptyset, \mathbf{W} \ne \emptyset$. 



\emph{$\mathbf{ii.) Q \ne \emptyset, W = \emptyset}$}. By Lemma \ref{lem:existence-of-parent}, there is no parent of $Y$ in $G$. Any member of $Q$ must be non-ancestors of $Y$. Then, either Y has a directed path to some $Q \in \textbf{Q}$ or there is no directed path to any $Q\in \mathbf{Q}$ such that $(Y\indep \mathbf{Q})_{G_{\overline{\mathbf{Q}}}}$. By rule 3 of do-calculus, that implies $P(Y|do(\mathbf{Q}))= P(Y)$, which is a contradiction. 

\emph{$\mathbf{iii.) Q \ne \emptyset, W \ne \emptyset}$}. We consider the case where $P(Y|do(\mathbf{Q}), \mathbf{W}) \ne P(Y| \mathbf{W})$ and $ P(Y|do(\mathbf{Q}), \mathbf{W}) \ne  P(Y|do(\mathbf{Q}))$ as we have reached contradiction for those cases. Since there is no parent of $Y$ in $G$ by Lemma \ref{lem:existence-of-parent}, both $\mathbf{Q}$ and $\mathbf{W}$ must be non-ancestors of $Y$. Also, since $\textbf{ID4IP}$ returns \textbf{FAIL}, there is no children $H$ of $Y$ where $S \not \in Pa(T_{H})$ and there is also no bidirected neigbhor $Z$ of $\textbf{Y}$ such that $S \not \in Pa(T_{Y \cup Z})$.  

If $S$ is not connected with $Y$ and $\textbf{ID4IP}$ return \textbf{FAIL}, then there will be no children or bidirected neighbors of $Y$. It is because $S$ is disconnected with $Y$ implies $S\not \in Pa(T_{H})$ and $S \not \in Pa(T_{Y\cup Z})$ for any child and bidirected neighbor of $Y$. To see this, suppose there exist some children of $Y$ and bidirected neighbor of $Y$ in $G$, \textbf{ID4IP} will not return \textbf{FAIL} as any of those children and bidirected neighbors will be in either $T_{H}$ or $T_{Y\cup Z}$ for some children of $Y$, $H$ and bidirected neighbors $Z$ of $Y$. We can have predictors return at line \textbf{\ref{lineNum:findchild}} and \textbf{\ref{lineNum:findbidirectednbr}} of Algorithm \ref{alg:id4ip}. Thus, it is a contradiction. Since there are no children of $Y$ and bidirected neighbors of $Y$ and no parents of $Y$, any variable in $\mathbf{V}$ will be d-separated from $Y$ in $G$ so that any query will be equal to $P(Y)$ such that there is no graph surgery estimator.  

Suppose $S$ is connected with $Y$. Since $S \not \in An(Y)$, for $S$ to be connected with $Y$, any path from $S$ to $Y$ must have a collider. Let $\mathbf{J}$ be the largest set such that $P(Y|do(\mathbf{Q}), \mathbf{W}) = P(Y|do(\mathbf{Q} \cup \mathbf{J}), \mathbf{W}\setminus \mathbf{J})$ by rule 2 of do-calculus. In addition, let $\mathbf{M}$ be the largest set such that $P(Y|do(\mathbf{Q}\cup\mathbf{J}), \mathbf{W}\setminus \mathbf{J})= P(Y|do((\mathbf{Q}\cup\mathbf{J})\setminus \mathbf{M}), \mathbf{W}\setminus \mathbf{J})$ by rule 3 of do-calculus. If $(\mathbf{Q\cup J}) \setminus \mathbf{M} = \emptyset$, then we reach the same contradiction as $(\mathbf{i.})$. Suppose $(\mathbf{Q\cup J}) \setminus \mathbf{M} \ne \emptyset$, it must be the case that any member in $\mathbf{(Q\cup J)} \setminus \mathbf{M}$  is d-connected with $Y$ when conditioning on $\mathbf{W}\setminus \mathbf{J}$ in $G_{\overline{(\mathbf{Q\cup J)} \setminus \mathbf{M}}}$ as implied by rule 3 of do-calculus.  

Any such d-connecting path from a member $A$ of $(\mathbf{Q\cup J}) \setminus \mathbf{M}$ to $Y$ must begin with an outgoing edge of some members $N$ in $(\mathbf{Q\cup J)} \setminus \mathbf{M}$ in $G_{\overline{(\mathbf{Q\cup J)} \setminus \mathbf{M}}}$. Since $Y$ does not have any parents, this d-connecting path must end at some children of $Y$ or some bidirected neighbors of $Y$ that are in $\mathbf{W}\setminus \mathbf{J}$. However, for any such child $H$ or bidirected neighbor $Z$ of $Y$, it must be that $S\in Pa(T_{H})$ and $S\in Pa(T_{Y\cup Z})$ as implied by the condition that \textbf{ID4IP} returns \textbf{FAIL}. 

For the case where the d-connecting path ends at $H$, without loss of generality, we consider the following three cases: $i.$  $N$ is in $T_{H}$ or   $ii.$ $N \in An(T_{H})\setminus T_{H}$ or  $iii.$ $N \in De(T_{H})\setminus T_{H}$. Note that the case $N\in T_{H}$ subsumes the case when $H$ is also a bidirected neighbor of $Y$.

\underline{$i. N \in T_{H}$}: 
If $N$ is in $T_{H}$, then $P(Y|do(\mathbf{Q}), \mathbf{W})$ is unidentifiable in $G$ by the following construction of hedge condition: for a query $P(Y|do((\mathbf{Q}\cup\mathbf{J})\setminus \mathbf{M}), \mathbf{W}\setminus \mathbf{J})$, we can let $\mathbf{R}=\{H\}$, which is a proper subset of $An(Y\cup (\mathbf{W}\setminus \mathbf{J}))$. Also, we can let $F' =\{H\}$ and $F = T_{H}$ as $T_H$ is a $H$-rooted C-tree. The result then follows Theorem \ref{thm:conditional-hedge-iff-non-id}. 

For the next two cases, we will make use of this observation: observe that for any node $U$ in $T_{H}$. By the definition of $T_{H}$, there must be a bidirected path from $U$ to $H$ that only goes through nodes in $T_{H}$ and any node along this bidirected path must be an ancestor of $H$. Therefore, there is a d-connecting path from $U$ to $H$ that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders.

\underline{$ii. N \in An(T_{H})\setminus T_{H}$}: If $N$ is in the $An(T_{H})\setminus T_{H}$ and $H \in \mathbf{W}\setminus \mathbf{J}$, 
 then $(Y\not \indep S| \mathbf{W}\setminus \mathbf{J})_{G_{\overline{\mathbf{Q} \cup \mathbf{J} \setminus \mathbf{M}}}}$. It is because $S \in Pa(T_{H})$ such that we can construct a d-connecting path from $S$ to $Y$ by concatenating $Y\rightarrow  H$ with the bidirected path as outlined in the observation. Note that 
 no member in $(\mathbf{Q} \cup  \mathbf{J}) \setminus \mathbf{M}$ can be in $T_{H}$ due to unidentifiability of the query as shown previously. Therefore, we have a contradiction.  If $H \not \in \mathbf{W} \setminus \mathbf{J}$, then any such path is blocked by $H$, which contradicts the fact that $(\mathbf{Q} \cup \mathbf{J} \setminus \mathbf{M})$ is non-empty by rule 3 of do-calculus. 

\underline{$iii. N \in De(T_{H})\setminus T_{H}$}:   If $N \in De(T_{H})\setminus T_{H}$ and $H \in \mathbf{W} \setminus \mathbf{J}$, then we can obtain a d-connecting path from $S$ to $Y$ by concatenating $Y\rightarrow H$ with the bidirected path as outlined in the observation, which is a contradiction. If $N \in De(T_{H})\setminus T_{H}$  and  $H \not \in \mathbf{W} \setminus \mathbf{J}$, then the such path is blocked by $H$, which contradict to the fact that $(\mathbf{Q} \cup \mathbf{J} \setminus \mathbf{M})$ is non-empty by rule 3 of do-calculus.  

For the case where the d-connecting path ends at a bidirected neighbor $Z$ of $Y$ that is not a child of $Y$, without loss of generality, we also consider three cases: $i. N$ is in $T_{Y\cup Z}$ or $ii. N\in An(T_{Y\cup Z}) \setminus T_{Y\cup Z}$ or $iii. N\in De(T_{Y\cup Z})$. 

\underline{$i. N \in T_{Y\cup Z}$}: If $N$ is in $T_{Y\cup Z}$, we can construct two $\mathbf{R}$-rooted C-forests $F=T_{Y\cup Z}, F'=\{Y, Z\}$ such that $F\cap N \ne \emptyset, F'\cap N = \emptyset$, where  $\mathbf{R}=\{Y,Z\}$, which is the proper subset of $An(Y\cup (\mathbf{W}\setminus \mathbf{J}))$ such that there exists a hedge for $P(Y|do((\mathbf{Q}\cup \mathbf{J})\setminus \mathbf{M}), \mathbf{W}\setminus \mathbf{J})$ by the characterization of generalized hedge condition. By Theorem \ref{thm:conditional-hedge-iff-non-id}, $P(Y|do((\mathbf{Q}\cup \mathbf{J})\setminus \mathbf{M}), \mathbf{W}\setminus \mathbf{J})$ is unidentifiable in $G$, which is a contradiction. 

For the next two cases, we will make use of this observation: observe that any node $U$ in $T_{Y \cup Z}$, there must be a bidirected path from $U$ to $Z$ and $Y$ that only goes through nodes in $T_{Y\cup Z}$. By definition of $T_{Y\cup Z}$, any node along this bidirected path must be an ancestor of $Z$ or $Y$.  Therefore, there is a d-connecting path from $U$ to $Z$ or $Y$ that starts with an arrow into $U$. Note that further conditioning cannot break this path since the path only consists of colliders.

\underline{$ii. N \in An(T_{Y\cup Z}) \setminus T_{Y\cup Z}$}: If $N$ is in $An(T_{Y\cup Z}) \setminus T_{Y\cup Z}$ and $Z \in \mathbf{W} \setminus \mathbf{J}$ , then $(Y\not \indep S| \mathbf{W}\setminus \mathbf{J})_{G_{\overline{\mathbf{Q}\cup \mathbf{J} \setminus \mathbf{M}}}}$. It is because $S \in Pa(T_{Y \cup Z})$ such that we can construct a d-connecting path from $S$ to $Y$ by concatenating $Y\leftrightarrow  Z$ with the bidirected path as outlined in the observation. Also, no member in $(\mathbf{Q}\cup \mathbf{J}) \setminus \mathbf{M}$ can be in $T_{Y\cup Z}$ due to unidentifiability of the query as shown previously. If $Z\not \in \mathbf{W} \setminus \mathbf{J}$, then  any such path is blocked by $Z$, which contradicts the fact that $(\mathbf{Q}\cup \mathbf{J}) \setminus \mathbf{M}$ is non-empty by rule 3 of do-calculus

\underline{$iii. N \in De(T_{Y\cup Z}) \setminus T_{Y\cup Z}$}: If $N\in De(T_{Y\cup Z})\setminus T_{Y\cup Z}$ and $Z\in \mathbf{W} \setminus \mathbf{J}$, then we can obtain a d-connecting path from $S$ to $Y$ by concatenating $Y\leftrightarrow Z$ with the bidirected path as outlined in the observation. If $N \in De(T_{Y\cup Z})\setminus T_{Y\cup Z}$ and $Z \not \in \mathbf{W} \setminus \mathbf{J},$ then such path is blocked by $Z$, which contradict to the fact that $(\mathbf{Q} \cup \mathbf{J}) \setminus\mathbf{M}$ is non-empty by rule 3 of do-calculus. 

Therefore, we reach a contradiction to the case where $\mathbf{Q} \ne \emptyset$ and $\mathbf{W} \ne \emptyset$.

Thus, there exists no graph surgery estimator. By contraposition, if there exists a graph surgery estimator, \textbf{ID4IP} (Algorithm \ref{alg:id4ip}) outputs a graph surgery estimator. 
\end{proof}

\subsection{Proof of Theorem \ref{thm:soundness}}
\begin{theorem} 
    \textbf{(Soundness of Algorithm~\ref{alg:id4ip}:ID4IP)} When \textbf{Algorithm~\ref{alg:id4ip}:ID4IP} %(Algorithm \ref{alg:id4ip}) 
    returns an estimator, it is a graph surgery estimator with respect to the given target and the selection variable in $G$. 
\end{theorem}
\begin{proof}
For line \textbf{\ref{lineNum:failure1a}- \ref{lineNum:failure1b}} in Algorithm~\ref{alg:id4ip}, the soundness follows Theorem \ref{thm: parents-of-T_y-existence} where we describe how selection variable being a parent of $T_y$ implies there is no graph surgery estimator. Then, the soundness of line \textbf{\ref{lineNum:search T_y}} will follow Corollary \ref{cor:valid-stable-estimator-v1} which shows how \textbf{ID4IP} can get graph surgery estimators by utilizing the parents of the MACS of $Y$.

In addition, Theorem \ref{thm:valid-stable-estimator-v2} ensures the correctness of line \textbf{\ref{lineNum:findchild}}
which utilizes the parents of the MACS of some children of the target that are not selection variables.
%
For line \textbf{\ref{lineNum:findbidirectednbr}}, the soundness follows Theorem \ref{thm:find-macs-on-bidirected-nbr-Y-and-Y} which 
shows how \textbf{ID4IP} can find graph surgery estimators if any by utilizing the parents of the MACS of the bidirected
neighbors of $Y$.
%
Lastly, line \textbf{\ref{lineNum:failure}} follows Theorem \ref{thm:gurantees-of-finding-invariant-predictor} 
which guarantees \textbf{ID4IP} to find at least one graph surgery estimator or show that there is no graph surgery estimator.
\end{proof}


\subsection{Proof of Theorem \ref{thm:graphsurgery-complexity}}
In this proof, we will make use of the following theorems for the proof.

\begin{theorem}\label{thm:bayes-ball-complexity}
\textbf{(Bayes-ball Complexity)} \cite{shachter2013bayes} Given a causal graph $G=(\mathbf{V}, \mathbf{E})$, the time complexity of Bayes-ball algorithm is $O(|\mathbf{V}|+ |\mathbf{E}_{\mathbf{V}}|)$, where $\mathbf{E}_{\mathbf{V}}$ are the edges incident to the nodes marked during the algorithm. In the worst case, it is linear time in the size of the graph.   
\end{theorem}
\begin{theorem} \label{thm:macs-Y-in-polytime}
\cite{shpitser2008dormant}
\textbf{Find-MACS-on-set}$(G, \mathbf{Y})$ outputs the MACS of $\mathbf{Y}$ in polynomial time in the size of graph. 
\end{theorem}
\begin{proof}
Let $|Ch(S)|=C$. the Graph Surgery Estimator algorithm first finds all supersets of $Ch(S)$. Getting all supersets of $Ch(S)$ takes the complexity of $O(2^{|\mathbf{V}|-C})$. Then, for each superset $\mathbf{M}$, the Graph Surgery Estimator algorithm finds the power set of $\mathbf{V}\setminus (\mathbf{M} \cup Y)$ , which takes $O(2^{|\mathbf{V}|\setminus (\mathbf{M}\cup Y)})$. Asymptotically, the complexity of finding power set for each superset becomes $O(2^{|\mathbf{V}|-(C+1)})$ as $Ch(S)$ is the smallest superset of $Ch(S)$. Then, for each member $\mathbf{Q}$ of each power set,  two major operations attribute to the complexity of the algorithm are: 
\begin{enumerate}
    \item  Using a for-loop to search through each member in $\mathbf{Q}$. Then, it checks for d-separation condition within each loop, resulting in the complexity of $O(|\mathbf{Q}|\times (|\mathbf{V} + \mathbf{E}_{\mathbf{V}}|))$ with the use of Bayes-ball algorithm \cite{shachter2013bayes}.
    \item Calling \textbf{ID} algorithm for checking the identifiability of the returned unconditional query, which takes the complexity of $O(B)$
\end{enumerate}
\end{proof}
%
\begin{theorem}
\textbf{(GSE Complexity)}
Let $|Ch(S)| = C$, $\mathbf{M}= Ch(S)$, $\mathbf{Q} = \mathbf{V}\setminus (\mathbf{M}\cup Y)$. 
Given a causal graph $G=(\mathbf{V},\mathbf{E})$ and disjoint variables $\mathbf{X,Y} \subset V$, the time complexity of Graph Surgery Estimator (GSE) (Algorithm 5 in Section B.4)
% \ref{alg:graph-surgery-estimator}) 
is: $O(2^{2(|\mathbf{V}|-C)- 1} \times B)$, where $B$ represents the time complexity of \textbf{ID} algorithm. 
\end{theorem}
\begin{proof}
From Theorem
~\ref{thm:macs-Y-in-polytime} and 
\ref{thm:bayes-ball-complexity}, 
we can derive the 
the complexity of the Graph Surgery Estimator as 
$O(2^{|\mathbf{V}|-C+ |\mathbf{V}\setminus (\mathbf{M} \cup Y)|} \times |\mathbf{Q}| \times (|\mathbf{V}| + |\mathbf{E}_{\mathbf{V}}|) \times B)$, which we can simplify to $O(2^{2(|\mathbf{V}|-C)-1} \times B)$.
\end{proof}






\subsection{Proof of Theorem \ref{thm:id4ip-complexity}}
\begin{theorem}
    \textbf{(ID4IP  Complexity)} Given a causal graph $G=(\mathbf{V},E)$ and disjoint variables $\mathbf{X,Y} \subseteq \mathbf{V}$, the complexity of \textbf{ID4IP} (Algorithm \ref{alg:id4ip}) is $O(|(C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})| +|Ch(Y)|+1)K + (|T_{Y}| - 1 + |T_{J} | + |T_{J}^{'} | - |\mathcal{H}^{'}| - |\mathcal{H}|) B)$, where $K$ represents the time complexity of \textbf{Find-MACS-on-set} and $B$ represents the time complexity of \textbf{ID} algorithm, $T_Y$ be the MACS of $Y$ in $G$, $T_H$ be the MACS of a child $H$ of $Y$ in $G$, and $T_{\mathbf{C}}\coloneqq \bigcup_{H\in Ch(Y)} T_{H}$.
\end{theorem}
\begin{proof} 
By Theorem \ref{thm:complexity-findMACS}, \textbf{Find-MACS-on-set} outputs the MACS of a set in polynomial time in the size of the graph. We let $O(K)$ be the complexity of \textbf{Find-MACS-on-set} so that line \textbf{\ref{lineNum:findMACs-TY}} takes $O(K)$. Let $O(B)$ be the time complexity of \textbf{ID} algorithm. At line \textbf{\ref{lineNum:search T_y}} of \textbf{ID4IP}, it takes $O(|T_{Y}|-1)$ to search through the sets $\mathbf{W}$ and each time it calls on \textbf{ID} algorithm so that each \textbf{Greedy-Eval} takes $O((|T_{Y}|-1) B)$. The line \textbf{\ref{lineNum:findchild}} takes $O(|Ch(Y)|K + (|T_{J} | - |\mathcal{H}|) B)$ because we call on \textbf{Find-MACS-on-set} $|Ch(Y)|$ many times and each time \textbf{Find-MACS-on-set} takes $O(K)$. 
Now we have,
$\mathcal{H}\coloneqq \{H: H\in Ch(Y)$, $Pa(T_H)\not\ni S\}$ and $T_J\coloneqq \bigcup_{H\in \mathbf{K}} T_H$ for any $\mathbf{K}\subseteq \mathcal{H}$, where $T_{H}$ is the MACS with respect to the variable $H$.
Therefore, after finding the MACS of each child of $Y$, we use \textbf{Greedy-Eval}, which calls on \textbf{ID} algorithm $|T_{J} | - |\mathcal{H}|$ times. 

Line \textbf{\ref{lineNum:findbidirectednbr}}, similar to line \textbf{\ref{lineNum:findchild}}, it finds the MACS of each bidirected neighbor of $Y$ that is not child nor parent of $Y$, which results in $O(|(C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})|K)$.
Here, we have,
$\mathcal{H}' \coloneqq \{H': H'\in (C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})$,
$Pa(T_{H'})\not\ni S\}$ and $T'_J\coloneqq \bigcup_{H'\in \mathbf{K}} T_{H'}$ for any $\mathbf{K}\subseteq \mathcal{H}'$, where $T_{H'}$ is the MACS with respect to the variable $H'$.
Then, we will use \textbf{Greedy-Eval}, which calls on \textbf{ID} algorithm $|T_{J}^{'}| - |\mathcal{H}^{'}|$ times, where $T_{J}^{'} \ne T_{J}$ and $\mathcal{H}^{'} \ne \mathcal{H}$. Therefore, line \textbf{\ref{lineNum:findbidirectednbr}} takes $O(|(C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})|K + (|T_{J}^{'} | - |\mathcal{H}^{'}|) B)$. Therefore, \textbf{ID4IP} takes $O( K+( |T_{Y}|-1)B+|Ch(Y)|K + (|T_{J} | - |\mathcal{H}|) B + |(C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})|K + (|T_{J}^{'} | - |\mathcal{H}^{'}|) B)$, which can be simplified to $O(|(C(Y) \cap Nbr(Y))\setminus (T_{Y}\cup T_{\mathbf{C}})| +|Ch(Y)|+1)K + (|T_{Y}| - 1 + |T_{J} | + |T_{J}^{'} | - |\mathcal{H}^{'}| - |\mathcal{H}|) B)$
\end{proof}


\subsection{Proof of Lemma \ref{appex:lem:uid->hedge}}
\begin{lemma} \label{appex:lem:uid->hedge}
    If there is no hedge for $P(\mathbf{Y}|do(\mathbf{X}))$, then $P(\mathbf{Y}|do(\mathbf{X}))$ is identifiable in $G$.
\end{lemma}


\begin{proof}
    Suppose there is no hedge for $P(\Y|\DO(\X))$. Therefore, for any $\R \subseteq An(Y)_{\GbarX}$, there does not exist two \rcforests{R} $F'\subset F\subseteq V$ such that $F\cap \X \neq \emptyset$ and $F'\cap \X=\emptyset$. Equivalently, it must be the case that for any $\R\subseteq An(Y)_{\GbarX}$ and \rcforests{R} $F'\subset F\subseteq V$, either $F\cap \X= \emptyset$ or $F'\cap \X\neq \emptyset$.
    
    We consider two cases: $i.)$ there is no bidirected path from $\mathbf{X}$ to any of its children $ii.)$ there is a bidirected path from $\mathbf{X}$ to some of its children. 

    \emph{\textbf{Suppose there is no bidirected path from any of the nodes in $\X$ to any of their children in $G_{An(\Y)}$.}} By Theorem $4$ of \cite{tian2002general}, the query $P(\Y|\DO(\X))$ is then identifiable. 

    \emph{\textbf{Suppose there is a bidirected path from some of the nodes in $\X$ to some of their children in $G_{An(\Y)}$.}} For case $ii$, suppose there is a bidirected path from $\mathbf{X}$ to some of its children in $G_{An(\mathbf{Y})}$. For any $\mathbf{R}\subseteq An(\mathbf{Y})_{G_{\overline{\mathbf{X}}}}$, either there is  $\mathbf{R}$-rooted C-forest $F'$ such that $F' \cap \mathbf{X} = \emptyset$ but $F \cap \mathbf{X} \ne \emptyset$ or there is a $\mathbf{R}$-rooted C-forest $F$ such that $F \cap \mathbf{X} \ne \emptyset$ but $F' \cap \mathbf{X} \ne \emptyset$ for any $\mathbf{R}$-rooted C-forest $F'$. Given these conditions, we will show that by soundness of \textbf{ID} algorithm \cite{shpitser2008complete} , we will have $p(\mathbf{y}|do(\mathbf{x}))$ being identifiable in $G$.
    
    We will briefly describe the \textbf{ID} algorithm (Algorithm \ref{alg:id}) here. At step 6, the $\mathbf{ID}$ algorithm takes the induced subgraph of $G$ over $An(\mathbf{Y})$, then it partitions $G_{An(\mathbf{Y})}$ into various induced subgraphs of $G_{An(\mathbf{Y})}$ over all possible C-components at step 13 of the algorithm. Since there is a hedge for $p(\mathbf{y}|do(\mathbf{x}))$ if and only if the \textbf{ID} algorithm
    returns at step 18 of the algorithm by the soundness of \textbf{ID} algorithm, we will proceed by showing we will never run into step 18 of the algorithm given the conditions described in the previous paragraph.  
    
    When \textbf{ID} algorithm returns FAIL, the graph $G$ at step 17 may not necessarily refer to the original causal graph $G$ that has been passed into \textbf{ID}, but rather a subgraph of $G$ after taking step 13 of the algorithm along with the other potential recursive steps 6 through 11. We use $G'\subseteq G$ in the rest of the argument for the sake of clarity.  
    
    For the sake of contradiction, assume there exists a C-component in some subgraphs of $G' \subseteq G$ such that $C(G'\text{\textbackslash}\mathbf{X}) = \{S\}$ and $C(G') =\{G'\}$. By definition, we can construct a $\mathbf{R}$-rooted C-forest $F'$ as $S$ such that $F' \cap \mathbf{X} = \emptyset$ . Now, given $C(G') = \{G'\}$ and by definition of hedge, $\mathbf{X} \not \in \mathbf{R}$, we can also construct another $\mathbf{R}$-rooted C-forest $F$ such that $F \cap \mathbf{X} \ne \emptyset$. Then, there exists a hedge for $P(\mathbf{Y}|do(\mathbf{X})),$ which is a contradiction. Therefore, we will never run into step 5 of the \textbf{ID} algorithm.  Then, $\mathbf{ID}$ will return an identifiable query. Therefore, we have that $P(\mathbf{Y}|do(\mathbf{X}))$ is identifiable in $G$.
    \end{proof}

\subsection{Proof of Theorem \ref{appex:hedge-iff-uid}}
\begin{theorem} \label{appex:hedge-iff-uid} 
    There exists a hedge for $P(\mathbf{Y}|\DO(\mathbf{X}))$ if and only if $P(\mathbf{Y}|\DO(\mathbf{X}))$ is unidentifiable in $G$
\end{theorem}

\begin{proof}
    By Lemma \ref{appex:lem:uid->hedge}  and Theorem 4 in \cite{shpitser2006identification}, the result follows.
\end{proof}

\subsection{Proof of Theorem \ref{appex:thm:new-hedge-criterion}}
\begin{theorem}\label{appex:thm:new-hedge-criterion}
    $P(Y|do(\mathbf{X}))$ is identifiable if and only if there is no hedge for $P(Y|do(\mathbf{X}'))$ where $\mathbf{X}'$ is the smallest subset of $\mathbf{X}$ such that $P(Y|do(\mathbf{X}'))=P(Y|do(\mathbf{X}))$. 
\end{theorem}
\begin{proof}
    $(\Leftarrow)$ Suppose there is no hedge for $P(Y|do(\mathbf{X}'))$, where $\mathbf{X}'$ is the smallest subset of $\mathbf{X}$ such that $P(Y|do(\mathbf{X}'))=p(y|do(\mathbf{x}))$. Since $P(Y|do(\mathbf{X}'))=P(Y|do(\mathbf{X}))$, there also exists no hedge for $P(Y|do(\mathbf{X}))$. By Lemma \ref{appex:lem:uid->hedge}, we have that $P(Y|do(\mathbf{X}))$ is identifiable in $G$.\\
    $(\Rightarrow)$ Suppose there exists a hedge for $P(Y|do(\mathbf{X}'))$, where $\mathbf{X}'$ is the smallest subset of $\mathbf{X}$ such that $P(Y|do(\mathbf{X}')) = P(Y|do(\mathbf{X}))$. Then, by Theorem 4 in \cite{shpitser2006identification}, we have that $P(Y|do(\mathbf{X}'))$ is unidentifiable so that $P(Y|do(\mathbf{X}))$ is also unidentifiable. By contraposition, $p(y|do(\mathbf{x}))$ is identifiable implies there is no hedge for $P(Y|do(\mathbf{X}'))$, where $\mathbf{X}'$ is the smallest subset of $\mathbf{X}$ such that $P(Y|do(\mathbf{X}'))=P(Y|do(\mathbf{X}))$
\end{proof}

\subsection{Proof of Lemma \ref{lem:y-rooted-ctree-superset}}
\begin{lemma} \label{lem:y-rooted-ctree-superset}
     Let $F$ be a $Y$-rooted C-tree in $G=(\mathbf{V},\mathbf{E})$. For any $K \in F \setminus \{Y\}$ such that $K \subseteq \mathbf{J} \subseteq \mathbf{V} \setminus \{Y\}$ and for any $\mathbf{W} \subseteq \mathbf{V}\setminus (\mathbf{J}\cup Y)$, $P(Y|do(\mathbf{J}), \mathbf{W})$ is unidentifiable in $G$.
\end{lemma}

\begin{proof}
   We will show that there exists a hedge for $p(y|do(\mathbf{j}), \mathbf{w})$ according to the definition \ref{def:generalized-hedge}. By Theorem 20 in \cite{shpitser2008complete}, there exists a unique maximal set $\mathbf{Z}$ such that $P(Y|do(\mathbf{J}), \mathbf{W}) = P(Y|do(\mathbf{J, Z}), \mathbf{W}\text{\textbackslash}\mathbf{Z})$. Since $K \in F$\textbackslash $\{Y\}\text{ and } K \subseteq \mathbf{J}$, we have that $F \cap (\mathbf{J} \cup \mathbf{Z}) \ne \emptyset$. Next, we let $F' = \{Y\}$ such that $F' \cap (\mathbf{J} \cup \mathbf{Z}) = \emptyset$. By definition \ref{def:generalized-hedge}, there exists a hedge for $P(Y|do(\mathbf{J}),\mathbf{W})$. By Theorem \ref{thm:conditional-hedge-iff-non-id}, $P(Y|do(\mathbf{J}),\mathbf{W})$ is unidentifiable in $G$.
\end{proof}


% \subsection{Proof of Theorem \ref{hedge-criterion}}
% \begin{proof}
% Let $\mathbf{X, Y, W}$ be sets of variables in $G$. Let $\mathbf{Z} \subseteq \mathbf{W}$ be the maximal set such that $p(\mathbf{y}|do(\mathbf{x}), \mathbf{w}) = p(\mathbf{y}|do(\mathbf{x,z}), \mathbf{w}\text{\textbackslash}\mathbf{z})$. Let $\mathbf{X}' = \mathbf{X}\cup \mathbf{Z}$ and $\mathbf{Y}' = \mathbf{Y}\cup (\mathbf{W}$\textbackslash $\mathbf{Z}$). By theorem 21 \cite{shpitser2008complete}, $p(\mathbf{y}'|do(\mathbf{x}'
%     ))$ is identifiable if and only if $p(\mathbf{y}|do(\mathbf{x}), \mathbf{w})$ is identifiable. By theorem 19 \cite{shpitser2008complete}, $p(\mathbf{y}'|do(\mathbf{x}'))$ is unidentifiable in $G$ if and only if $G$ contains a hedge for some $p(\tilde{\mathbf{y}}|do(\tilde{\mathbf{x}}))$, where $\tilde{\mathbf{y}} \subseteq \mathbf{y}', \tilde{\mathbf{x}} \subseteq \mathbf{x}'$. With theorem 19 and 21 together, the result follows. 
% \end{proof}


% \subsection{Proof of Theorem \ref{sufficiency-of-intervening-m-specific-set}}
% \begin{proof}
% Suppose there exists a hedge for $p(\mathbf{y}|do(\mathbf{m}))$ for the maximal subset  $\mathbf{M} \subseteq ((An(\mathbf{Y})\cap De(\mathbf{S})) \text{\textbackslash} \mathbf{Y})$. By theorem \ref{alt-hedge-def}, there exists a subset $\mathbf{R} \subseteq An(\mathbf{Y})_{\mathbf{V}\mathbf{\textbackslash}\mathbf{M}} \cap C^{\text{\mathbf{M}}}$ such that the conditions in theorem \ref{alt-hedge-def} hold. Then, we have three possible choices for $\mathbf{R}$: case i.) $\mathbf{R}$ contains some non-descendants of $\mathbf{S}$ only; case ii.) $\mathbf{R}$ contains some members of $\mathbf{Y}$ only; case iii.) both case i and case ii hold. Consider case i, that implies the non-descendants of $\mathbf{S}$ must be ancestors of $ch(S)$ in order to form an inducing path from $F_{ch(\mathbf{S})}$ to $\mathbf{R}$ for all members in $ch(\mathbf{S})$ in the induced subgraph of $G$ over $C^{\mathbf{M}}$, which is a contradiction as $\mathbf{R}$ cannot have any child in the induced subgraph by definition of $\mathbf{R}$-rooted C-forests. Thus, only case ii remains true. By theorem \ref{alt-hedge-def}, there exists a subset $\mathbf{Q}$ of $An(\mathbf{Y}$) that contains $\mathbf{M}$ and and an inducing path from $F_{\mathbf{Q}}$ to $\mathbf{R}$ for all members in $\mathbf{Q}$. Thus, there is no subset of $\mathbf{V}$ that can d-separate $\mathbf{Y}$ and $\mathbf{S}$. Therefore,  there does not exist an identifiable query  $p(\mathbf{y}|do(\mathbf{x}),\mathbf{k})$ such that $\mathbf{K}$ d-separates $\mathbf{Y}$ and $\mathbf{S}$ in $G_{\overline{\mathbf{X}}}$, where $\mathbf{X}\subseteq \mathbf{V}, \mathbf{K}\subseteq \mathbf{V}$\textbackslash $\mathbf{X}$
% \end{proof}

% \subsection{Proof of Lemma \ref{unconditional-emptyset-id}}
% \begin{proof}
%     Suppose $An(Y)_{\mathbf{V}\text{\textbackslash} X} $ contains some children of $X$, if the largest member of the power set $\mathcal{P}(An(Y)_{\mathbf{V} \text{\textbackslash} X})\text{\textbackslash} \emptyset \cap S^{X} = \emptyset$, that implies there is no bidirected path from $X$ to any of its children in $G_{An(Y)}$, then $p(y|do(x))$ is identifiable by Theorem 4 in \cite{tian2002general}. If $An(Y)_{\mathbf{V}\text{\textbackslash}} X$ does not contain any child of $X$, then the same argument follows.
% \end{proof}

% \subsection{Proof of Theorem \ref{thm:conditional-hedge-iff-non-id}}
% \begin{proof}
%      Let $\mathbf{Z} \subseteq \mathbf{W}$ be the maximal set such that $p(y|do(\mathbf{\mathbf{x}}), \mathbf{w}) = p(y|do(\mathbf{\mathbf{x}, \mathbf{z}}), \mathbf{w}\text{\textbackslash} \mathbf{z})$. By Theorem 21 in \cite{shpitser2008complete}, $p(\mathbf{y}|do(\mathbf{x}), \mathbf{w})$ is identifiable in $G$ if and only if $p(y, \mathbf{w}\text{\textbackslash} \mathbf{z}|do(\mathbf{x,z}))$ is identifiable in $G$. By Theorem \ref{thm:hedge-iff-uid}, there exists a hedge for $p(y, \mathbf{w}\text{\textbackslash} \mathbf{z}|do(\mathbf{x,z}))$ if and only if $p(y, \mathbf{w}\text{\textbackslash} \mathbf{z}|do(\mathbf{x,z}))$ is unidentifiable in $G$. Therefore, we can apply the definition of hedge for $p(y, \mathbf{w} \setminus \mathbf{z}| do(\mathbf{x, z}))$ to formulate definition \ref{def:generalized-hedge} such that there exists a hedge for $p(\mathbf{y}|do(\mathbf{x}),\mathbf{w})$ according to definition \ref{def:generalized-hedge} if and only if $p(\mathbf{y}|do(\mathbf{x}),\mathbf{w})$ is unidentifiable in $G$.
% \end{proof}

% Algorithm 3 


% \section{Supplemental Details for Theorem \ref{thm:graphsurgery-complexity}}

% \subsection{Proof of Theorem \ref{baseline-complexity}}
% \begin{proof}
% The NaiveHedgeSearch iterates through all subsets of the power set of $An(\mathbf{Y})_{\mathbf{V} \text{\textbackslash} \mathbf{X}}$ except for the empty set, which results in $O(2^{|\mathcal{P}(An(\mathbf{Y})_{\mathbf{V}\text{\textbackslash}\mathbf{X}})\text{\textbackslash} \{\emptyset\}|})$ time complexity. For each possible $R$, the algorithm searches for $An(R)_{\mathbf{V}\text{\textbackslash}\mathbf{X}}$ if $R$
% \end{proof}

% \subsection{Proof of Theorem \ref{refinedhedge-complexity}}
% \begin{proof}
%     Needs work.
% \end{proof}
% \subsection{Proof of Theorem \ref{id-complexity}}
% \begin{proof}
% We will list out the time complexity of each step in one single call of the ID algorithm as follows:
%     \begin{itemize}
%         \item Step 1: $O(1)$
%         \item Step 2: it only works with vertices in $An(Y)_{G}$. Finding $An(Y)$ takes $O(|\mathbf{V}|+|E|)$, then it pass to ID again.
%         \item Step 3: $W$ is the set of $An(Y)_{\mathbf{V}\text{\textbackslash}X}$, this takes another $O(|\mathbf{V}|+|E|)$
%         \item Step 4: this step first starts with c-component factorization, which takes $O(|\mathbf{V}\text{\textbackslash}X|+|E_{\mathbf{V} \text{\textbackslash}X}|)$. Next, it calls on ID on each c-component when the returned factorization gives more than one c-component. Next, if we get $F'$, and get another single c-component when $X$ is included, then we have $F$ to form a hedge condition. But, if we still get back the same c-component as $F'$, then, we don't have a hedge and we continue to another ID call.
%     \end{itemize}
%     Each recursive call takes $O(|\mathbf{V}|+|E|)$ at most and it will search through all subsets of $An(Y)_{\mathbf{V}\text{\textbackslash}X}$ for $R$.
% \end{proof}


% \subsection{Proof of Lemma \ref{id-targets-root}}
% \begin{proof}
%     Suppose there exists a hedge for $p(\mathbf{y}|do(\mathbf{x}))$ and there exists some $Y\in \mathbf{Y}$ and $\mathbf{R} \in \mathcal{R}$ such that $Y\in \mathbf{R}$. By theorem \ref{alt-hedge-def}, there exists an inducing path from $F_{\mathbf{X}}$ to $\mathbf{Y}$ in the induced subgraph of $G$ over $C^{\mathbf{X}}$. Then, any variable on that inducing path is in $An(\mathbf{Y})$ and every non-endpoint vertex on that path is a collider. That implies there exist an inducing path from $\mathbf{X}$ to $\mathbf{Y}$ in the induced subgraph of $G$ over $C^{\mathbf{X}}$. Thus, there exists no subsets $\mathbf{W}\subseteq \mathbf{V}$ such that $\mathbf{W}$ can d-separate $\mathbf{X}$ and $\mathbf{Y}$ in $G_{\overline{\mathbf{X}}}$ and since the rules of do-calculus are complete for identifying the causal effects of the form $p(\mathbf{y}|do(\mathbf{x}), \mathbf{w})$, we have that $p(\mathbf{y}|do(\mathbf{x}), \mathbf{w})$ is unidentifiable.
% \end{proof}

% \subsection{Proof of Theorem \ref{id-c-components}}
% \begin{proof}
% $(\Rightarrow)$ Suppose $p(\mathbf{y}|do(\mathbf{x}))$ is identifiable. By the soundness of the ID algorithm \cite{shpitser2008complete} for $p(\mathbf{y}|do(\mathbf{x}))$ to be identifiable, ID algorithm will not be able to execute step 5 of the algorithm. It implies that there is no hedge for each $p(\mathbf{y}|do(\mathbf{c}))$ for any $\mathbf{C} \in C_{\mathbf{X}}$ in order to be able to return an identifiable query in step 4 of the algorithm for $p(\mathbf{y}|do(\mathbf{x}))$.
% \\
% $(\Leftarrow)$Similarly, suppose there is no hedge for $p(\mathbf{y}|do(\mathbf{c}))$ for any $\mathbf{C}\in C_{\mathbf{X}}$ Then, the step 5 of ID algorithm will not be executed for any c-component in $G$ and ID algorithm will return an identifiable query such that $p(\mathbf{y}|do(\mathbf{x}))$ is identifiable by the soundness of the ID algorithm.
% \end{proof}
% \subsection{Proof of Lemma \ref{hedge-non-id-lemma1}}
% \begin{proof}
%     Needs work.
% \end{proof}

\section{Algorithms}
In this section, we provide the pseudo-codes of the algorithms that we call as sub-routines from the algorithms in the main paper.
\subsection{computeLoss}
\begin{algorithm}[H]
     \caption{computeLoss($\Y, \X$)}
     \label{alg:evaluate}
     \begin{algorithmic}[1]
      \STATE \textbf{Input:}  A set of targets $\Y$, an intervention set $\X$
        \STATE \textbf{Output:}  the value of $P(\Y|do(\X))$

            \STATE $P =$ \textbf{ID}$(\Y, \X,G)$ \COMMENT{Algorithm \ref{alg:id}}
                     \STATE $P_{s} = P / \sum_{Y} P$

            \STATE $L = $ Compute validation loss $l(P_{s})$
            \STATE \textbf{Return}
            $L$
     \end{algorithmic}
\end{algorithm}

\subsection{Find-MACS-on-set}
\label{sec-alg:find-macs-on-set}
\begin{algorithm}[H]
     \caption{Find-MACS-on-set($G, \mathbf{Y}$)\cite{shpitser2008dormant}}
     \label{alg:find-macs-on-set}
     \begin{algorithmic}[1]
      \STATE \textbf{Input:}  A causal graph $G$ ,  an AC-component $\mathbf{Y}$   in $G$ 
        \STATE \textbf{Output:} $T_\mathbf{Y}$, a subgraph of $G$, the maximal ancestral confounded set for $\mathbf{Y}$ in $G$.
        \IF{$\exists X \not \in An(\mathbf{Y})_{G}$}
         \STATE \textbf{Return Find-MACS-on-set}$(G_{An(\mathbf{Y})}, \mathbf{Y})$  
        \ENDIF
        \IF{$\exists Y \in \mathbf{Y}, \exists X \not \in C(Y)_{G}$}
        \STATE \textbf{Return Find-MACS-on-set}$(G_{C(Y)}, \mathbf{Y})$  
        \ELSE
            \STATE \textbf{Return} $G$
        \ENDIF
     \end{algorithmic}
\end{algorithm}

\subsection{ID Algorithm}\label{sec:id}
\begin{algorithm}[H]
     \caption{ID($\mathbf{Y}, \mathbf{X}, P, G$) \cite{shpitser2008complete}}
     \label{alg:id}
     \begin{algorithmic}[1]
      \STATE \textbf{Input:} a set of target variables $\mathbf{Y}$, a set of random variables for intervention $\mathbf{X}$ , a probability distribution $P$, a causal graph $G$
      \STATE \textbf{Output:} Expression for $P(Y|do(\mathbf{X}))$ in terms of $P$ or \textbf{FAIL}$(F, F')$
      \IF{$\mathbf{X} = \emptyset$}
        \STATE \textbf{Return} $\sum_{\mathbf{V}\setminus \mathbf{Y}} P(\mathbf{v})$
      \ENDIF
      \IF{$\mathbf{V}\setminus An(\mathbf{Y})_{G} \ne \emptyset$} 
        \STATE \textbf{Return ID}$(\mathbf{Y},\mathbf{X}\cap An(\mathbf{Y})_{G}, \sum_{\mathbf{V}\setminus An(\mathbf{Y})_{G}} P, G_{An(\mathbf{Y})})$
      \ENDIF
      \STATE Let $\mathbf{W} = (\mathbf{V} \setminus \mathbf{X}) \setminus An(\mathbf{Y})_{G_{\overline{\mathbf{X}}}}$
    \IF{$\mathbf{W} \ne \emptyset$}
        \STATE \textbf{Return ID}$(\mathbf{Y}, \mathbf{X}\cup \mathbf{W}, P, G)$
    \ENDIF
    \IF{$C(G \setminus \mathbf{X}) = \{S_1, \ldots, S_{k}\}$}
        \STATE \textbf{Return} $\sum_{\mathbf{V} \setminus (\mathbf{Y} \cup \mathbf{X})} \prod_{i}$ \textbf{ID}$(S_{i}, \mathbf{V}\setminus S_{i}, P, G)$
    \ENDIF
    \IF{$C(G \setminus \mathbf{X}) = \{S\}$}
        \IF{$C(G) = \{G\}$}
            \STATE \textbf{Return FAIL}$(G, G \cap S)$
        \ENDIF
        \IF{$S \in C(G)$}
            \STATE \textbf{Return} $\sum_{S\setminus \mathbf{Y}}\prod_{\{i | V_{i} \in S\}} P(V_{i} | V_{\pi}^{i-1})$
        \ENDIF
        \IF{$\exists S $ s.t. $S\subset S'\in C(G)$}
            \STATE \textbf{Return ID}$(\mathbf{Y}, \mathbf{X}\cap S', \prod_{\{i | V_{i} \in S\}}P(V_{i}| V_{\pi}^{(i-1)}\setminus S), G_{S'})$
        \ENDIF
    \ENDIF
    \end{algorithmic}
\end{algorithm}
% \section{Find-MACS algorithm}

% \begin{algorithm}[H]
%      \caption{Find-MACS($G, Y$)\cite{shpitser2008dormant}}
%      \label{alg:find-macs}
%      \begin{algorithmic}[1]
%       \STATE \textbf{Input:}  A causal graph $G$, a target $Y$
%         \STATE \textbf{Output:} $T_Y$, a subgraph of $G$, the maximal ancestral confounded set for $Y$ in $G$.
%         \IF{$\exists X \not \in An(Y)_{G}$}
%          \STATE \textbf{Return Find-MACS}$(G_{An(Y)}, Y)$  
%         \ENDIF
%         \IF{$\exists X \not \in C(Y)_{G}$}
%         \STATE \textbf{Return Find-MACS}$(G_{C(Y)}, Y)$  
%         \ELSE
%             \STATE \textbf{Return} $G$
%         \ENDIF
%      \end{algorithmic}
% \end{algorithm}

\subsection{Graph Surgery Estimator Algorithm}\label{sec:graph-surgery-estimator}
In this section, we present the main algorithms in \cite{subbaswamy2019preventing}.

\begin{algorithm}[H]
     \caption{Unconditional Query: UQ($\X,  \Y, \Z; G$)\cite{subbaswamy2019preventing}}
     \label{alg:unconditional-query}
     \begin{algorithmic}[1]
      \STATE \textbf{Input:} Acyclic Directed Mixed Graph (ADMG) $G=(\mathbf{V}, \mathbf{E})$, disjoint variable sets $\X, \Y, \Z \subset \mathbf{V}$
        \STATE \textbf{Output:} Unconditional query $ P(\Y|do(\X), \Z)$
        \STATE $\X' = \X$
        \STATE $\Y' = \Y$
        \STATE $\Z' = \Z$
        \WHILE{$\exists Z \in \Z s.t. (\Y \indep Z | \X, \Z \setminus \{Z\})_{G_{\overline{\X}, \underline{Z}}}$ }
            \STATE $\X' = \X' \cup Z$
            \STATE $\Z' = \Z' \setminus \{Z\}$
        \ENDWHILE
        \STATE $\Y' = \Y \cup \Z$
        \STATE \textbf{Return} $\X'$, $\Y'$ of unconditional query $P(\Y'|do(\X'))$
     \end{algorithmic}
\end{algorithm}
%
\begin{algorithm}[H]
     \caption{Graph Surgery Estimator ($G, \mathbf{M}, Y$)\cite{subbaswamy2019preventing}}
     \label{alg:graph-surgery-estimator}
     \begin{algorithmic}[1]
      \STATE \textbf{Input:}  ADMG $G$, mutable variables $\mathbf{M}$, target $T$
      \STATE \textbf{Ouput:} Expression for the surgery estimator or \textbf{FAIL} if there is no stable estimator.
      \STATE $S_{ID} = \emptyset$
      \STATE $Loss = \emptyset$
      \FOR{$\mathbf{Z} \in \mathcal{P}(\mathbf{V} \setminus (\mathbf{M} \cup \{T\}))$}
      \IF{$T \not \in \mathbf{M}$}
        \STATE $\mathbf{X}, \mathbf{Y} = UQ(\mathbf{M}, \{T\}, \mathbf{Z}, \mathcal{G})$ 
        \STATE \textbf{try}
                 \STATE \text{       }\text{       } \text{   } $P =$ \textbf{ID}$(\mathbf{X}, \mathbf{Y}, G)$
                 \STATE \text{       }\text{       } \text{   } $P_{s} = P / \sum_{Y} P$
                  \STATE \text{       }\text{       } \text{   } Compute the validation loss $l(P_{s})$
                  \STATE \text{       }\text{       } \text{   } $S_{ID}.$append$(P_{s})$; $Loss$.append$(l(P_{s}))$
         \STATE \textbf{catch}
         \STATE \text{       }\text{       } \text{   } \textbf{continue}
      \ENDIF
      \STATE $\X, \Y = $\textbf{UQ}$(\mathbf{M}, \{T\}, \mathbf{Z}; \mathbf{G}_{\overline{T}})$
      \STATE $\X = \X \cup \{T\}$
      \STATE $\Y = \Y \setminus \{T\}$
      \IF{$\Y \cap (T \cup Ch(T)) = \emptyset$}
            \STATE \textbf{continue}
      \ENDIF
      \STATE \textbf{try}
                 \STATE \text{       }\text{       } \text{   } $P =$ \textbf{ID}$(\mathbf{X}, \mathbf{Y}, G)$
                 \STATE \text{       }\text{       } \text{   } $P_{s} = P / \sum_{Y} P$
                  \STATE \text{       }\text{       } \text{   } Compute the validation loss $l(P_{s})$
                  \STATE \text{       }\text{       } \text{   } $S_{ID}.$append$(P_{s})$; $Loss$.append$(l(P_{s}))$
         \STATE \textbf{catch}
         \STATE \text{       }\text{       } \text{   } \textbf{continue}
      \ENDFOR
     \IF{$S_{ID} = \emptyset$}
        \STATE \textbf{Return FAIL}
     \ENDIF
     \STATE \textbf{Return} $P_{s} \in S_{ID}$ with lowest corresponding $Loss$
     \end{algorithmic}
\end{algorithm}
%
%
\section{Semi-Synthetic Causal graphs}

\subsection{Sachs causal graph}

\begin{figure}[H]
\centering
 \subfigure[Original Sachs causal graph with the target variable (yellow)]{\includegraphics[scale=0.20]{Figures/original_sach.pdf}}
 \subfigure[Modified Sachs causal graph with the selection variable (green)]{
\includegraphics[scale=0.20]{Figures/modified_sachs.pdf}
 }
\caption{Semi-synthetic experimental results }
\end{figure}



\subsection{Alarm causal graph}
%
\begin{figure}[H]
\centering
%
 \subfigure[Original Alarm causal graph with the target variable (yellow)]{\includegraphics[scale=0.40]{Figures/original_alarm.pdf}}
 %
 \subfigure[Modified Alarm causal graph with the selection variable (green)]{
\includegraphics[scale=0.40]{Figures/modified_alarm.pdf}
 }
 %
\caption{Semi-synthetic causal graph: Alarm }
\end{figure}





\bibliographystyle{plain}
\bibliography{lee_773-supp}

% \bibliography{references}

\end{document}
