% \appendix
% \begin{center}
%     \bfseries\Large Appendix
% \end{center}
\documentclass[accepted]{uai2022}

\usepackage[american]{babel}
\usepackage{natbib} 
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating 


\usepackage{amsmath,amsfonts,amssymb,mathtools, amsthm, dsfont}
\usepackage{subfiles}
\usepackage{tikz, subcaption}
\usepackage[noend]{algorithmic}
\usepackage[ruled,vlined,linesnumbered]{algorithm2e}
\usepackage{bbm}


\newcommand{\swap}[3][-]{#3#1#2} % just an example

\newtheorem{theorem}{Theorem}%[section]
\newenvironment{customthm}[1]{\renewcommand\thetheorem{#1}\theorem}{\endtheorem}
\newtheorem{corollary}{Corollary}%[theorem]
\newtheorem{lemma}{Lemma}
\newtheorem{assumption}{Assumption}
\newenvironment{customlem}[1]{\renewcommand\thelemma{#1}\lemma}{\endlemma}
  
\newenvironment{customtheorem}[1]{\renewcommand\thetheorem{#1}\theorem}{\endtheorem}
  
\newtheorem{proposition}{Proposition}
\newenvironment{customprp}[1]{\renewcommand\theproposition{#1}\proposition}{\endproposition}
\newtheorem{definition}{Definition}
\newtheorem{remark}{Remark}
\newtheorem*{lemma*}{Lemma}
\newtheorem{conjecture}{Conjecture}
\newenvironment{myproof}[1][\proofname]{%
  \begin{proof}[#1]$ $\nobreak\ignorespaces
}{%
  \end{proof}
}

\usetikzlibrary{positioning}


\newcommand{\Pa}[2]{\textit{Pa}_{#2}(#1)}
\newcommand{\Ch}[2]{\textit{Ch}_{#2}(#1)}
\newcommand{\Anc}[2]{\textit{Anc}_{#2}(#1)}
\newcommand{\V}[0]{\mathbf{V}}
\newcommand{\C}[0]{\mathbf{C}}
\newcommand{\W}[0]{\mathbf{W}}
\newcommand{\U}[0]{\mathbf{U}}
\newcommand{\E}[0]{\mathbf{E}}
\newcommand{\X}[0]{\mathbf{X}}
\newcommand{\R}[0]{\mathbf{R}}
\newcommand{\Y}[0]{\mathbf{Y}}
\newcommand{\Z}[0]{\mathbf{Z}}
\newcommand{\A}[0]{\mathbf{A}}
\newcommand{\B}[0]{\mathbf{B}}
\newcommand{\T}[0]{\mathbf{T}}
\newcommand{\x}[0]{\mathbf{x}}
\newcommand{\y}[0]{\mathbf{y}}
\newcommand{\z}[0]{\mathbf{z}}
\newcommand{\G}[0]{\mathcal{G}}
\newcommand{\M}[0]{\mathcal{M}}
\newcommand{\F}[0]{\mathcal{F}}
\newcommand{\dom}[1]{\mathfrak{X}_{#1}}

\newcommand{\independent}{\perp\mkern-9.5mu\perp}
\newcommand{\notindependent}{\centernot{\independent}}
\newcommand{\jalal}[1]{\nb{jalal}{green}{#1}}


%%% HELPER CODE FOR DEALING WITH EXTERNAL REFERENCES
\usepackage{xr}
\makeatletter
\newcommand*{\addFileDependency}[1]{
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{
    \externaldocument{#1}
    \addFileDependency{#1.tex}
    \addFileDependency{#1.aux}
}
\myexternaldocument{kivva_503}

\title{Revisiting the General Identifiability Problem \\ Appendix}

\author[1]{Yaroslav Kivva}
\author[1]{Ehsan Mokhtarian}
\author[1]{Jalal Etesami}
\author[1,2]{Negar Kiyavash}
\affil[1]{%
    School of Computer and Communication Sciences\\
    EPFL\\
    Lausanne, Switzerland
}
\affil[2]{%
    College of Management of Technology\\
    EPFL\\
    Lausanne, Switzerland
}

% \pagestyle{numbered}

\begin{document}
\onecolumn
\maketitle




\begin{figure}[b]
    \centering
    \begin{subfigure}[b]{0.3\linewidth}
            \centering
            \begin{tikzpicture}[
            roundnode/.style={circle, draw=black!60,, fill=white, thick, inner sep=1pt},
            dashednode/.style = {circle, draw=black!60, dashed, fill=white, thick, inner sep=1pt},
            ]
            % Nodes
            \node[roundnode]        (t1)        at (0, 0)                   {$T_1$};
            \node[roundnode]        (t2)        at (0, 1.5)                 {$T_2$};
            \node[roundnode]        (t3)        at (0, 3)                   {$T_3$};
            \node[roundnode]        (r)         at (0, -1.5)                {$R$};
            \node[dashednode]       (u1)        at (0.75, 0)                {$U_1$};
            \node[dashednode]       (u2)        at (-0.75, 1.75)            {$U_2$};
            \node[dashednode]       (u3)        at (-1.25, 1)               {$U_3$};
            
            %Edges
            \draw[-latex] (t1.south) -- (r.north) ;
            \draw[-latex] (t2.south) -- (t1.north);
            \draw[-latex] (t3.south) -- (t2.north);
            \draw[latex-, dashed] (t1.west) -- (u2.south);
            \draw[-latex, dashed] (u2.north) -- (t3.west);
            \draw[latex-, dashed] (r.east) .. controls +(right:5mm) and +(up:2mm) .. (u1.south);
            \draw[latex-, dashed] (t2.east) .. controls +(right:5mm) and +(down:2mm) .. (u1.north);
            \draw[latex-, dashed] (r.west) .. controls +(left:10mm) and +(up:2mm) .. (u3.south);
            \draw[latex-, dashed] (t3.west) .. controls +(left:10mm) and +(down:2mm) .. (u3.north);
                
            \end{tikzpicture}
            \caption{Thicket $\mathcal{J}$}
            \label{subfig: thicket exmple 2 }
    \end{subfigure}
    \begin{subfigure}[b]{0.3\linewidth}
            \centering
            \begin{tikzpicture}[
            roundnode/.style={circle, draw=black!60,, fill=white, thick, inner sep=1pt},
            dashednode/.style = {circle, draw=black!60, dashed, fill=white, thick, inner sep=1pt},
            ]
            % Nodes
            \node[roundnode]        (t1)        at (0, 0)                   {$T_1$};
            \node[roundnode]        (t2)        at (0, 1.5)                 {$T_2$};
            \node[roundnode]        (t3)        at (0, 3)                   {$T_3$};
            \node[roundnode]        (r)         at (0, -1.5)                {$R$};
            \node[dashednode]       (u2)        at (-0.75, 1.75)            {$U_2$};
            \node[dashednode]       (u3)        at (-1.25, 1)               {$U_3$};
            
            %Edges
            \draw[-latex] (t1.south) -- (r.north) ;
            \draw[-latex] (t2.south) -- (t1.north);
            \draw[-latex] (t3.south) -- (t2.north);
            \draw[latex-, dashed] (t1.west) -- (u2.south);
            \draw[-latex, dashed] (u2.north) -- (t3.west);
            \draw[latex-, dashed] (r.west) .. controls +(left:10mm) and +(up:2mm) .. (u3.south);
            \draw[latex-, dashed] (t3.west) .. controls +(left:10mm) and +(down:2mm) .. (u3.north);
                
            \end{tikzpicture}
            \caption{Hedgelet $\mathcal{H}_1$}
            \label{subfig: hedgelet 1 exmpl 2}
    \end{subfigure}
    \begin{subfigure}[b]{0.3\linewidth}
            \centering
            \begin{tikzpicture}[
            roundnode/.style={circle, draw=black!60,, fill=white, thick, inner sep=1pt},
            dashednode/.style = {circle, draw=black!60, dashed, fill=white, thick, inner sep=1pt},
            ]
            % Nodes
            \node[roundnode]        (t1)        at (0, 0)                   {$T_1$};
            \node[roundnode]        (t2)        at (0, 1.5)                 {$T_2$};
            \node[roundnode]        (r)         at (0, -1.5)                {$R$};
            \node[dashednode]       (u1)        at (0.75, 0)                {$U_1$};
            
            %Edges
            \draw[-latex] (t1.south) -- (r.north) ;
            \draw[-latex] (t2.south) -- (t1.north);
            \draw[latex-, dashed] (r.east) .. controls +(right:5mm) and +(up:2mm) .. (u1.south);
            \draw[latex-, dashed] (t2.east) .. controls +(right:5mm) and +(down:2mm) .. (u1.north);
    
                
            \end{tikzpicture}
            \caption{Hedgelet $\mathcal{H}_2$}
            \label{subfig: hedgelet 2 exmpl 2}
    \end{subfigure}
    \caption{(a) Thicket is formed for the causal effect of $\{T_1, T_2, T_3\}$ on $\{R\}$} in  Example 2; (b) and (c) are the hedgelets formed by the thicket $\mathcal{J}$
    \label{fig:exmpl 2}
\end{figure}

\section{On the positivity assumption} \label{sec: apd_pos}
    We first present some definitions and notations from \citep{lee2019general} including their illustrations using the causal graph $\G$ from Example 2 in the main text.
    
    \subsection{Notation} \label{apn: A notation}
        \begin{definition}[\citep{lee2019general}]
            Assume that $\mathbf{R}$ is a subset of observed variables $\V$. A hedge is a pair of $\mathbf{R}$-rooted c-forests $\langle \F, \F' \rangle$ such that $\F'$ is a subgraph of $\F$.
        \end{definition}
        \textbf{In Figure \ref{fig:countr_exmpl_complex} of the main text:} Subgraphs $\F = \G[\{R, T_1, T_2, T_3\}]$ and $\F' = \G[\{R\}]$ form a hedge $\langle \F, \F' \rangle$.
        
        Denote by $\mathcal{C}(\G)=\{\textbf{W}_i\}_{i=1}^{k}$, the set of c-components that partition observed variables in $\G$ such that each $\W_i$ is a maximal c-component. Maximal in the sense of number of nodes that is there is no $\W \in \V$ such that $\W_i \subsetneq \W$ and $\W$ is a c-component in $\G$. 
        Assume that $\T$ is the set of all observed variables in $\F$ but not in $\F'$. 
        We define $\F'':=\F[\textbf{T}]$. 
        
        \textbf{In Figure \ref{fig:countr_exmpl_complex}  of the main text:} $\mathcal{C}(\G[\{T_1, T_2, T_3\}]) = \{ \{T_1, T_3\}, \{T_2\}\}$. Additionally, $\F'' = \G[\{T_1, T_2, T_3\}]$ for the hedge constructed before.

        \begin{definition}[\cite{lee2019general}]
            Given a hedge $\langle \F, \F' \rangle$. Denote by $\V'$ a set of all observed variables of $\F'$. The hedgelet decomposition of a hedge $\langle \F, \F' \rangle$ is a collection of hedgelets $\{\F(\W)\}_{\W \in \mathcal{C}(\F'')}$ where each hedgelet $\F(\W)$ is a subgraph of $\F$ made of (i) $\F[\W\cup \V']$ and (ii) $\F[De_\mathcal{F}(\W)]$ without bidirected edges, that is all observed descendants of $\W$ and all directed edges between them. 
            Let $\mathbb{H}_{\F} := \{\F(\W)\}_{\W \in \mathcal{C}(\F'')}$ be the set of hedgelets of $\langle \F, \F' \rangle$.
        \end{definition}

        \textbf{In Figure \ref{fig:countr_exmpl_complex} of the main text:} For the hedge $\langle \F, \F' \rangle$, where $\F = \G[\{R, T_1, T_2, T_3\}]$ and $\F' = \G[\{R\}]$, there are two hedgelets $\mathcal{H}_1, \mathcal{H}_2$ displayed in Figures (\ref{subfig: hedgelet 1 exmpl 2})-(\ref{subfig: hedgelet 2 exmpl 2}). Moreover, we have $\mathbb{H}_{\F} = \{\mathcal{H}_1, \mathcal{H}_2\}$.
        
        \begin{definition}[\citep{lee2019general}]
            Let $\mathbf{R}$ be a non-empty set of variables and $\mathbb{Z}$ be a collection of sets of variables in $\G$. A thicket $\mathcal{J}$ is a subgraph of $\G$ which is an $\mathbf{R}$-rooted c-component consisting of a minimal c-component over $\mathbf{R}$ and hedges
            \begin{equation*}
                \mathbb{F}_{\mathcal{J}} := \{\langle \F_{\Z}, \mathcal{J}[\R]\rangle \mid \F_{\Z} \subseteq \G[\V\setminus\Z], \Z\cap \R=\varnothing\}_{\Z\in \mathbb{Z}}.
            \end{equation*}
        \end{definition}
        Let $\X$ and $\Y$ be disjoint sets of observed variables in $\G$. A thicket $\mathcal{J}$ is said to be formed for $P_{\mathbf{x}}(\mathbf{y})$ in $\G$ with respect to $\mathbb{Z}$ if $\R \subseteq \Anc{\Y}{\G[\V \setminus \X]}$ and every hedgelet of each hedge $\langle \F_{\Z}, \mathcal{J}[\R]\rangle$ intersects with $\X$.
        
        \textbf{In Figure \ref{fig:countr_exmpl_complex} of the main text:} This graph is a thicket, also displayed in Figure \ref{subfig: thicket exmple 2 }.  Let $\mathbb{F_{\mathcal{J}}}$ be
        \begin{equation*}
            \mathbb{F_{\mathcal{J}}} = \{\langle \F, \F'\rangle \},
        \end{equation*}
        where $\F = \G[\{R, T_1, T_2, T_3\}]$ and $\F' = \G[\{R\}]$.
        One can observe that thicket $\mathcal{J}$ is formed for the causal effect $\X = \{T_1, T_2, T_3\}$ on $\Y = \{R\}$.
        
        Denote by $\T$ all observed variables in thicket $\mathcal{J}$ outside of subgraph $\mathcal{J}[\R]$. Let $\mathbb{H} = \bigcup_{\{\langle \F, \F'\rangle \}\in \mathbb{F}_{\mathcal{J}}}\mathbb{H}_{\F}$, that is, a collection of all hedgelets induced by the hedges of $\mathcal{J}$.
        
        \textbf{In Figure \ref{fig:countr_exmpl_complex} of the main text:} $\T = \{T_1, T_2, T_3\}$ and $\mathbb{H} = \{\mathcal{H}_1, \mathcal{H}_2\}$.

    \subsection{On the positivity assumption} \label{apn: pos assumption simple}
        Given the above definitions, we can state Lemma 3 in \citep{lee2019general}.
        \begin{lemma*}
            Let $\T'\subsetneq \T$ such that there exists a hedgelet $\mathcal{H} \in \mathbb{H}\setminus\mathbb{H}(\T')$, where $\mathbb{H}(\T')$ is a set of hedgelets from $\mathbb{H}$ which contain at least one variable from $\textbf{T}'$. Then, under the intervention $do(\mathbf{t}')$, there exists $R \in \R$, for any instantiation of $\U$, such that $r=0$ in both models.
        \end{lemma*}

        Note that by the construction in \citep{lee2019general}, $R$ in the above Lemma is a binary random variable. 
        In the above Lemma, let $\T' = \varnothing$. 
        Based on this Lemma, for any instantiation of unobserved variables $\U$, $P(\V=\mathbf{v})=0$, where $\textbf{v}$ is a realization for observed variables in which $r=1$. 
        This clearly shows that the constructed models in \citep{lee2019general} violate the positivity assumption.

    \subsection{On the relaxed positivity assumption }
%$P(\X \mid \Pa{\X}{\G} \setminus \X)>0$

        Herein, we study Figure \ref{fig:countr_exmpl_complex} of the main text in more details and show that the models in \citep{lee2019general} violate the relaxed positivity assumption. 
        To this end, we present the models $\M_1$ and $\M_2$ constructed in \citep{lee2019general} for the thicket $\mathcal{J}$ which is defined for this case in Appendix \ref{apn: A notation}. 
        By the construction, each variable from $\{U_1, U_2, U_3, T_3\}$ is a binary number, i.e., $\{0, 1\}$ and each variable from $\{T_1, T_2\}$ is a vector of length two, because each variable from $\{U_1, U_2, U_3, T_3\}$ appears in only one hedgelet and each variables in $\{T_1, T_2\}$ appears in exactly two different hedgelets. 
        Thus, $T_1=(T_{1, 1}, T_{1, 2})$ and $T_2 = (T_{2, 1}, T_{2, 2})$, where $T_{1, 1}, T_{1, 2}, T_{2, 1}, T_{2, 2}$ are binary numbers. The first coordinate captures some properties of the hedgelet $\mathcal{H}_1$ while the second coordinate captures some properties of the hedgelet $\mathcal{H}_2$. 
        \cite{lee2019general} define both models $\M_1, \M_2$ for the hedgelet $\mathcal{H}_1$ as
        \begin{align*}
            & T_3 = U_2 \oplus U_3, \quad T_{2, 1} = T_3, \quad T_{1, 1} = T_{2, 1} \oplus U_2,
        \end{align*}
        and for the hedgelet $\mathcal{H}_2$ as
        \begin{align*}
            & T_{2, 2} = U_1, \quad T_{1, 2} = T_{2, 2}, \quad T_{2, 2} = U_1.
        \end{align*}
        Additionally, in model $\M_1$, variable $R$ is defined by
        \begin{equation*}
            R = \mathds{1}_{T_{1, 1} = 0} \wedge \mathds{1}_{T_{1, 2}=0} \wedge \mathds{1}_{U_3=1} \wedge \mathds{1}_{U_{1} = 1},
        \end{equation*}
        and in model $\M_2$, it is defined to be zero, i.e., $R = 0$.

\section{Technical proofs} \label{sec: apd_proof}

    \begin{figure}[t]
        \centering
        \begin{tikzpicture}[block/.style={rounded corners, minimum width=2cm, minimum height=1cm, draw}]
            \node[block] (1) {Theorem \ref{thm: main}};
            \node[block, right=1 of 1] (3) {Lemma \ref{lemma: prp3 - 1}};
            \node[block, below=1 of 3] (2) {Proposition \ref{prp: 4}};
            \node[block, right=1 of 3] (4) {Lemma \ref{lemma: prp3 - 3}};
            \node[block, below=1 of 4] (5) {Lemma \ref{lemma: prp3 - 2}};
            \node[block, right=1 of 4] (6) {Proposition \ref{prp: 3}};
            \node[block, above=1 of 1] (7) {Lemma \ref{lem: simplify}};
            \node[block, left=1.5 of 1] (9) {Lemma \ref{lemma: valid model}};
            \node[block, above=1 of 9] (8) {Lemma \ref{lem: lin indep}};
            \node[block, below=1 of 9] (10) {Lemma \ref{lem: theta index}};
            \node[block, below=1 of 1] (11) {Lemma \ref{lem: eta index}};
            \node[block, left=1 of 8] (12) {Lemma \ref{lemma: lin indep formal}};
        \begin{scope}[->, shorten >=1mm, shorten <=1mm]
            \draw (1) -- (3);
            \draw (2) -- (3);
            \draw (3) -- (4);
            \draw (5) -- (4);
            \draw (4) -- (6);
            \draw (7) -- (1);
            \draw (8) to[out=0,in=165]([yshift=1mm]1.west);
            \draw (9) -- (1);
            \draw (10) to[out=0,in=195]([yshift=-1mm]1.west);
            \draw (11) -- (1);
            \draw (12) -- (8);
        \end{scope}
        \end{tikzpicture}
        \caption{Logical order of our proofs.}
        \label{fig: logic order}
    \end{figure}

    In this section, we first present some technical lemmas which we use throughout our proofs.
    The proofs of the lemmas and propositions within the main text are provided in Subsections \ref{sec: proof sec algorithm} and  \ref{sec: proof sec main}. 
    
    The logical order of our proofs is depicted in Figure \ref{fig: logic order}. For instance, we use Theorem \ref{thm: main} to prove Lemma \ref{lemma: prp3 - 1}. 
    Also note that the proof of Theorem \ref{thm: main} is provided in the main text using Lemmas \ref{lem: simplify}, \ref{lem: lin indep}, \ref{lemma: valid model}, \ref{lem: theta index}, and \ref{lem: eta index}. 
    
    \begin{definition}[Ancestral]
        We say a subset $\X$ of observed variables $\V$ is ancestral in $\G$, if $\X = \Anc{\X}{\G_{\V}}$.
    \end{definition}

    
    \subsection{Technical Lemmas}
        \begin{lemma}[\citep{tian2003ID}]\label{lemma: Q-marginal}
            Let $\W \subseteq \C \subseteq \V$, $\T = \C \setminus \W$, $\mathbf{S}=\V \setminus \T$. 
            If $\W$ is an ancestral set in $\G[\C]$, then:
            \begin{equation*}
                Q[\W] = \sum_{\C \setminus \W}Q[\C].
            \end{equation*}
        \end{lemma}
        
        \begin{lemma} \label{lemma: prp3 - 1}
        Consider a causal graph $\G$ with observed variables $\V$.
            Suppose $\X \subseteq \V$ and $e:=(X_1,Z)$ is a directed edge such that $X_1\in\X$.
            %from a variable in $\X$ to a variable in $\V\setminus \X$ in $\G$. 
            $Q[\X]$ is g-identifiable from $(\mathbb{A},\G)$ if and only if $Q[\X]$ is g-identifiable from $(\mathbb{A}, \mathcal{H})$, where $\mathcal{H}$ is the graph obtained by deleting $e$ from $\G$.
        \end{lemma}
        \begin{proof}
            $\X$ has the same c-components in $\G$ and $\mathcal{H}$ since $\G_{\V}$ and $\mathcal{H}_{\V}$ have the same undirected edges. 
            Let $\X_1,\cdots,\X_l$ be the c-components of $\X$. 
            For any $i\in [1:l]$ and $\A \in \mathbb{A}$ such that $\X_i \subseteq \A$, \cite{huang2008completeness} showed that $Q[\X_i]$ is identifiable from $\G[\A]$ if and only if $Q[\X_i]$ is identifiable from $\mathcal{H}[\A]$. 
            Hence, Theorem \ref{thm: main} implies that $Q[\X_i]$ is g-identifiable from $(\mathbb{A},\G)$ if and only if $Q[\X_i]$ is g-identifiable from $(\mathbb{A},\mathcal{H})$.
            In this case, Proposition \ref{prp: 4} implies that $Q[\X]$ is g-identifiable from $(\mathbb{A},\G)$ if and only if $Q[\X]$ is g-identifiable from $(\mathbb{A},\mathcal{H})$.
        \end{proof}
    
        \begin{lemma}\label{lemma: prp3 - 2}
            Suppose that $\X$ and $\Y$ are disjoint subsets of $\V$. Let $(Y_1,Y_2)$ (i.e., $Y_1\rightarrow Y_2$) denotes a directed edge in $\G$, where $Y_1,Y_2 \in \Y$. Let $\G'$ denotes the resulting graph after removing edge $(Y_1,Y_2)$ from $\G$. If the causal effect of $\X$ on $\Y$ is not g-identifiable from $(\mathbb{A}, \G')$, then the causal effect of $\X$ on $\Y \setminus \{Y_1\}$ is not g-identifiable from $(\mathbb{A}, \G)$.
        \end{lemma}
        \begin{proof}
            Herein, we provide a proof that is similar to one of the proofs in \citep{huang2008completeness}.
            
            Using Markov factorization property in graph $\G'$, $P_{\mathbf{x}}(\mathbf{y})$ is given by
            \begin{equation*}
                P_{\x}(\y) = 
                \sum_{\V\setminus(\X\cup \Y)} \sum_{\U} \prod_{W \in \V\setminus \X}P(w \mid \Pa{W}{\G'}) \prod_{U \in \U} P(u).
            \end{equation*}
            Similarly, in graph $\G$ we have
            \begin{equation*}
                P_{\x}(\y \setminus\{Y_1\}) = 
                \sum_{\{Y_1\} \cup (\V \setminus (\X \cup \Y))} \sum_{\U} \prod_{W \in \V \setminus \X} P(w \mid \Pa{W}{\G}) \prod_{U \in \U} P(u).
            \end{equation*}
            Since the causal effect of $\X$ on $\Y$ is not g-identifiable from $(\mathbb{A}, \G')$, there exists $\M_1$ and $\M_2$ in $\mathbb{M}^+(\G')$ such that:
            \begin{equation*}
                Q^{\M_1}[\A_i](\mathbf{v}) = Q^{\M_2}[\A_i](\mathbf{v}),\; \forall \mathbf{v}\in \dom{\V},\; \forall i \in [0: m],
            \end{equation*}
            \begin{equation*}
                P_{\x}^{\M_1}(\y)\neq P_{\x}^{\M_2}(\y),\; \exists \x \in \dom{\X}, \; \exists \y \in \dom{\Y}.
            \end{equation*}
            Using $\M_1$ and $\M_2$, we construct two SEMs $\M_1'$ and $\M_2'$ in $\mathbb{M}^+(\G)$. 
            Define a surjective function $F\!:\: \dom{Y_1}\rightarrow \{0, 1\}$ and a function $\Psi \!:\: \{0, 1\}\times \dom{Y_1} \rightarrow (0,1)$ such that $\Psi(0, y_1)+\Psi(1, y_1)=1$ for each $y_1 \in \dom{Y_1}$.
            We will later assume some constraints for these functions, but for now lets assume they are arbitrary. 
            
            For any node $S$ which is either unobserved or in $\V \setminus (\{Y_2\}\cup \Ch{Y_2}{\G})$, we define
            \begin{equation*}
                P^{\M_i'}(s|\Pa{S}{\G}) = P^{\M_i}(s|\Pa{S}{\G'}),
            \end{equation*}
            where $i \in \{1, 2\}$.
            The domain of $Y_2$ in $\M_i'$ is defined as $\dom{Y_2}^{\M}\times \{0, 1\}$, where $\dom{Y_2}^{\M}$ is the domain of $Y_2$ in $\M_i$. 
            For $y_2 \in \dom{Y_2}^\M$, $i\in \{0,1\}$, and $k\in \{0, 1\}$ we define:
            \begin{equation*}
                P^{\M_i'}((y_2, k) 
                \mid \Pa{Y_2}{\G'}, y_1) =
                P^{\M_i}(y_2 \mid \Pa{Y_2}{\G'}) \Psi(F(y_1)\oplus k, y_1).
            \end{equation*}
            Note that $\Pa{Y_2}{\G'}\cup \{Y_1\}= \Pa{Y_2}{\G}$.
            Moreover, for a fixed realization $(\Pa{Y_2}{\G'}, y_1)$, we have:
            \begin{equation*}
                \sum_{k\in \{0,1\}} \sum_{y_2\in \dom{Y_2}^{\M}} P^{\M_i'}((y_2, k)|pa(Y_2), y_1) = 1.
            \end{equation*}
            
            For each $S \in \Ch{Y_2}{\G}$, we define:
            \begin{equation*}
                P^{\M_i'}(s \mid \Pa{S}{\G}\setminus \{Y_2\}, (y_2, k)) =
                P^{\M_i}(s \mid \Pa{S}{\G}\setminus \{Y_2\}, y_2).
            \end{equation*}
            
            Next, we show that $Q^{\M_1'}[\A_i](\mathbf{v}) = Q^{\M_2'}[\A_i](\mathbf{v})$ for each $\mathbf{v}\in \dom{\V}$ and $i \in [0:m]$.
            Suppose $\mathbf{v}$ is a realization of $\V$ in $\M'_1$ with realizations $y_1$ and $(y_2, k)$ for $Y_1$ and $Y_2$, respectively. 
            Consider two cases: 
            \begin{itemize}
                \item If $Y_2 \notin \A_i$:
                \begin{align*}
                    Q^{\M_1'}[\A_i](\mathbf{v}) 
                    &= \sum_{\U}\prod_{A\in \A_i} P^{\M_1'}(a \mid \Pa{A}{\G})\prod_{U\in \U}P^{\M_1'}(u) \\
                    & = \sum_{\U} \prod_{A\in \A_i}P^{\M_1}(a \mid \Pa{A}{\G'})\prod_{U\in \U} P^{\M_1}(u) 
                    = Q^{\M_1}[\A_i](\mathbf{v}) 
                    = Q^{\M_2}[\A_i](\mathbf{v}) \\
                    & = \sum_{\U} \prod_{A\in \A_i} P^{\M_2}(a \mid \Pa{A}{\G'}) \prod_{U\in \U} P^{\M_2}(u) \\
                    &= \sum_{\U} \prod_{A\in \A_i} P^{\M_2'}(a \mid \Pa{A}{\G}) \prod_{U\in \U} P^{\M_2'}(u) \\ 
                    &= Q^{\M_2'}[\A_i](\mathbf{v}).
                \end{align*}
                \item If $Y_2 \in \mathbf{A}_i$:
                \begin{align*}
                    Q^{\M_1'}[\A_i](\mathbf{v}) 
                    &= \sum_{\U} \prod_{A\in \A_i} P^{\M_1'} (a \mid \Pa{A}{\G})\prod_{U\in \U} P^{\M_1'}(u) \\
                    & = \Psi\left(F(y_1)\oplus k, y_1\right) \sum_{\U} \prod_{A\in \A_i} P^{\M_1}(a \mid  \Pa{A}{\G'}) \prod_{U\in \U}P^{\M_1}(u) \\
                    &= \Psi(F(y_1)\oplus k, y_1) Q^{\M_1}[\A_i](\mathbf{v}) = \Psi(F(y_1)\oplus k, y_1) Q^{\M_2}[\A_i](\mathbf{v}) \\
                    &= \Psi(F(y_1)\oplus k, y_1) \sum_{\U} \prod_{A\in \A_i} P^{M_2}(a \mid \Pa{A}{\G'}) \prod_{U\in \U} P^{M_2}(u) \\
                    &= \sum_{\U}\prod_{A\in \A_i} P^{\M_2'}(a \mid \Pa{A}{\G})) \prod_{U\in \U}P^{\M_2'}(u) \\
                    &= Q^{\M_2'}[\A_i](\mathbf{v}).
                \end{align*}
            \end{itemize}
            Therefore, $Q^{\M_1'}[\A_i](\mathbf{v}) = Q^{\M_2'}[\A_i](\mathbf{v})$ for each $\mathbf{v}\in \dom{\V}$ and $i \in [0:m]$.
            
            We know that there exists $\hat{\x} \in \dom{\X}^{\M}$ and $\hat{\y} \in \dom{\Y}^{\M}$ such that $P^{\M_1}_{\hat{\x}}(\hat{\y})\neq P^{\M_2}_{\hat{\x}}(\hat{\y})$. 
            Denote by $\hat{y}_1$ and $\hat{y}_2$ the realizations of $Y_1$ and $Y_2$ in the realization $\hat{\y}$, respectively. 
            Assume that $P^{\M_1}_{\hat{\x}}(\hat{\y}) = d_1 > P^{\M_2}_{\hat{\x}}(\hat{\y}) = d_2$. 
            Assume that $\Psi(F(\hat{y}_1)\oplus 0, \hat{y}_1)=0.5$ and $\Psi(F(y)\oplus 0, y) = \frac{d_1-d_2}{4}$ for all $y \in \dom{Y_1}\setminus \{\hat{y}_1\}$. 
            Then we have:
            \begin{align*}
                P_{\hat{\x}}^{\M_1'}(\hat{\y} \setminus \{\hat{y}_1\})
                &=\sum_{y_1 \in \dom{Y_1}} \sum_{\V \setminus (\X\cup\Y)} \sum_{\U} \prod_{Z\in \V\setminus \X} P^{\M_1'}(z \mid \Pa{Z}{\G}) \prod_{U\in \U} P(u) \\
                & >\sum_{y_1 = \hat{y}_1} \sum_{\V\setminus (\X\cup\Y)} \sum_{\U} \prod_{Z\in \V\setminus \X} P^{\M_1'}(z \mid \Pa{Z}{\G}) \prod_{U\in \U} P(u) \\
                & =P_{\hat{\x}}^{\M_1}(\hat{\y}) \Psi(F(\hat{y}_1)\oplus 0, \hat{y}_1) = 0.5 d_1.
            \end{align*}
            but, 
            \begin{align*}
                P_{\hat{\x}}^{\M_2'}(\hat{\y} \setminus \{\hat{y}_1\})
                &=\sum_{y_1 \in \dom{Y_1}} \sum_{\V \setminus (\X \cup \Y)} \sum_{\U}  \prod_{Z\in \V \setminus \X} P^{\M_1'}(z \mid \Pa{Z}{\G}) \prod_{U\in \U} P(u)\\
                & =\sum_{y_1 = \hat{y}_1} \sum_{\V \setminus (\X \cup \Y )} \sum_{\U} \prod_{Z\in \V \setminus \X} P^{\M_1'}(z \mid \Pa{Z}{\G}) \prod_{U\in \U} P(u) \\
                & + \sum_{y_1 \in \dom{Y_1}\setminus \{\hat{y}_1\}} \sum_{\V \setminus (\X\cup\Y)} \sum_{\U} \prod_{Z\in \V \setminus \X} P^{M_1'}(z \mid \Pa{Z}{\G}) \prod_{U \in \U} P(u) \\
                &\leq P_{\hat{\x}}^{\M_2}(\hat{\y}) \Psi(F(\hat{y}_1)\oplus 0, \hat{y}_1) + P_{\hat{\x}}^{\M_2}(\hat{\y} \setminus \{\hat{y}_1\}) \Psi(F(Y_1\neq y_1)\oplus 0, Y_1\neq y_1)\\
                & = 0.5d_2 + \frac{d_1-d_2}{4} < 0.5d_1.
            \end{align*}
            This implies that $P_{\hat{\x}}^{\M_1'}(\hat{\y} \setminus \{\hat{y}_1\})\neq P_{\hat{\x}}^{\M_2'}(\hat{\y} \setminus \{\hat{y}_1\})$ which concludes the proof.
        \end{proof}
      
        \begin{lemma}\label{lemma: prp3 - 3}
            Assume $\Y \subset \W \subset \V$ such that for each $W \in \W \setminus \Y$, there exists a directed path in $\G[\W]$ from $W$ to a variable in $\Y$. 
            Then, the causal effect of $\V \setminus \W$ on $\Y$ is g-identifiable from $(\mathbb{A}, \G)$ if and only if $Q[\W]$ is g-identifiable from $(\mathbb{A}, \G)$. 
        \end{lemma}
        \begin{proof}
            Let $\X:=\V \setminus \W$. 
            
            \textit{Sufficient part}:
            Suppose $Q[\W]$ is g-identifiable from $(\mathbb{A}, \G)$.
            Since $Q[\W] = P_{\x}(\W)$, we have
            \begin{equation*}
                P_{\x}(\y) = \sum_{\W \setminus \Y} Q[\W].
            \end{equation*}
            Hence, $P_{\x}(\y)$ is uniquely computed and the causal effect of $\X$ on $\Y$ is g-identifiable from $(\mathbb{A}, \G)$. 
            
            \textit{Necessary part:} 
            Suppose $Q[\W]$ is not g-identifiable from $(\mathbb{A}, \G)$, we will show that $P_{\x}(\y)$ is also not g-identifiable. 
            To this end, first, we order the nodes in $\W \setminus \Y$, say $(W_1, W_2, \cdots, W_n)$, such that for each $1\leq i \leq n$, $W_i$ is a parent of at least one node in $\Y\cup \{W_1, W_2, \dots, W_{i-1}\}$. 
            Assume that $e_i$ is the directed edge from $W_i$ to its child in $\Y\cup \{W_1, W_2, \dots, W_{i-1}\}$.
            We also define $\G'$ to be the graph obtained by deleting all the edges $\{e_i\}_{i=1}^n$ from $\G$. 
            Applying Lemma \ref{lemma: prp3 - 1} repeatedly $n$ times imply that $Q[\W]$ is not g-identifiable from $(\mathbb{A},  \G')$.
            
            Let $\G_n:=\G$ and for $0\leq i\leq n-1$, we define $\G_i$ to be the graph obtained by removing $e_{i+1}$ from $\G_{i+1}$.
            From Lemma \ref{lemma: prp3 - 2}, we know that if $Q[\W]$ is not g-identifiable from $(\mathbb{A}, \G')$, then adding edge $e_1$ will make the causal effect of $\X$ on $\W \setminus \{W_1\}$ not g-identifiable from $(\mathbb{A}, \G_1)$.
            Note that $\G_1$ is obtained from $\G'$ by adding edge $e_1$. 
            Using this lemma again implies that the causal effect of $\X$ on $\W \setminus \{W_1, W_2\}$ is not g-identifiable from $(\mathbb{A}, \G_2)$. 
            Repeating this procedure yields that the causal effect of $\X$ on $\W \setminus \{W_1, \dots W_n\}=\Y$ is not g-identifiable from $(\mathbb{A}, \G_n)$. 
            Since $\G_n=\G$, the causal effect of $\X$ on $\Y$ is not g-identifiable from $(\mathbb{A}, \G)$. 
        \end{proof}
    
       \begin{lemma}\label{lemma: lin indep formal}
            Consider a set of vectors $\{c_i\}_{i=1}^{n}$, where $c_i \in \mathbb{R}^d$. Assume $c\in \mathbb{R}^d$ is a vector that is linearly independent of $\{c_i\}_{i=1}^{n}$, then there is a vector $b\in \mathbb{R}^d$ such that
            \begin{align*}
                & \langle c_i, b \rangle = 0, \quad \forall i \in [1:n],\\
                & \langle c, b \rangle \neq 0.
            \end{align*}
        \end{lemma}
        \begin{proof}
            Denote by $\{\phi_i\}_{i=1}^{l}$ a subset of $\{c_i\}_{i=1}^{n}$ which forms a basis for the vectors in $\{c_i\}_{i=1}^{n}$. Clearly, we have $l < d$. Now, consider the following system of linear equations with respect to $b$:
            \begin{equation} \label{eq: lem11}
            \begin{split}
                & \langle \phi_i, b \rangle = 0, \quad \forall i\in[1:l], \\
                & \langle c, b \rangle = 13\neq0.
            \end{split}
            \end{equation}
            By the assumption, vectors in $\{\phi_i\}_{i=1}^{l}\cup\{c\}$ are linearly independent, thus there exists a solution to \eqref{eq: lem11}.
        \end{proof}


    \subsection{Proofs of Section \ref{sec: algorithm}} \label{sec: proof sec algorithm}
        \begin{customprp}{\ref{prp: 3}}
            Let $\X$ and $\Y$ be two disjoint subsets of $\V$.
            The causal effect of $\X$ on $\Y$ is g-identifiable from $(\mathbb{A},\G)$ if and only if $Q[\Anc{\Y}{\G_{\V \setminus \X}}]$ is g-identifiable from $(\mathbb{A},\G)$.
        \end{customprp}
        \begin{myproof}[Proof]
            Let $\mathbf{W}:=\Anc{\Y}{\G_{\V \setminus \X}}$.
            Since $Q[\V \setminus \X] = P_{\x}(\V \setminus \X)$, using marginalization, we obtain
            \begin{equation}\label{eq:app_prp3}
                P_{\mathbf{x}}(\mathbf{y}) = \sum_{\V \setminus (\X \cup \Y)}Q[\V \setminus \X] = \sum_{\W \setminus \Y} \sum_{\V \setminus (\W \cup \X)} Q[\V \setminus \X].
            \end{equation}
            Since $\W$ is an ancestral set in $\G[\V \setminus \X]$, Lemma \ref{lemma: Q-marginal} implies
            \begin{equation*}
                \sum_{\V \setminus (\W \cup \X)} Q[\V \setminus \X] = Q[\W].
            \end{equation*}
            Substituting the above equation into \eqref{eq:app_prp3} implies
            \begin{equation} \label{eq: prp 3 proof}
                P_{\mathbf{x}}(\mathbf{y}) = \sum_{\W \setminus \Y} Q[\W] = P_{\mathbf{v}\setminus \mathbf{w}}(\y).
            \end{equation}
            \textit{Sufficient part:} 
            Suppose $Q[\W]$ is g-identifiable from $(\mathbb{A},\G)$.
            Equation \eqref{eq: prp 3 proof} implies that $P_{\mathbf{x}}(\mathbf{y})$ is uniquely computable from $Q[\W]$, and therefore, the causal effect of $\X$ on $\Y$ is g-identifiable from $(\mathbb{A},\G)$.
            
            \textit{Necessary part:}
            Suppose $Q[\W]$ is not g-identifiable from $(\mathbb{A}, \G)$. 
            For each $W \in \W \setminus \Y$, there exists a directed path in $\G[\W]$ from $W$ to a variable in $\Y$. 
            Hence, Lemma \ref{lemma: prp3 - 3} implies that the causal effect of $\V \setminus \W$ on $\Y$ is not g-identifiable from $(\mathbb{A}, \G)$. 
            Hence, Equation \eqref{eq: prp 3 proof} implies that $ P_{\mathbf{x}}(\mathbf{y})$ cannot be uniquely computed and the causal effect of $\X$ on $\Y$ is not g-identifiable from $(\mathbb{A},\G)$.
        \end{myproof}
        
        \begin{customprp}{\ref{prp: 4}}
            Suppose $\mathbf{S}\subseteq \V$ and $\mathbf{S}_1,\cdots,\mathbf{S}_l$ are the c-components of $\mathbf{S}$. 
            $Q[\mathbf{S}]$ is g-identifiable from $(\mathbb{A}, \G)$ if and only if $Q[\mathbf{S}_i]$ is g-identifiable from $(\mathbb{A}, \G)$ for each $i\in [1:l]$.
        \end{customprp}
        \begin{myproof}[Proof]
            \textit{Sufficient part}: 
            Suppose $Q[\mathbf{S}_i]$ is g-identifiable from $(\mathbb{A}, \G)$ for each $i\in [1:l]$.
            \cite{tian2003ID} showed that 
            \[Q[\mathbf{S}] = \prod_{i=1}^{l}Q[\mathbf{S}_i].\]
            Hence, $Q[\mathbf{S}]$ is uniquely computable and therefore, g-identifiable from $(\mathbb{A}, \G)$. 
            
            \textit{Necessary part}: 
            Suppose $Q[\mathbf{S}]$ is g-identifiable from $(\mathbb{A}, \G)$. 
            For $i\in [1:l]$, \cite{tian2003ID} provided a formula for computing $Q[\mathbf{S}_i]$ from $Q[\mathbf{S}]$ (Lemma 4, Equations (71) and (72) in \citep{tian2003ID}). 
            Hence, for each $i\in [1:l]$, $Q[\mathbf{S}]$ is uniquely computable and therefore, g-identifiable from $(\mathbb{A}, \G)$.
        \end{myproof}
    
\subsection{Proofs of Section \ref{sec: main}} \label{sec: proof sec main}
    \begin{customlem}{\ref{lem: simplify}}
        If $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}', \G')$, then $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}, \G)$.
    \end{customlem}
    \begin{proof}
        If $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}', \G')$, then there exists two models $\M_1'$ and $\M_2'$ in $\mathbb{M}^+(\G')$ such that for each $i\in [0:m]$ and any $\mathbf{v} \in \dom{\V'}$,
        \begin{equation*}
            Q^{\M_1'}[\mathbf{A}_i'](\mathbf{v}) = Q^{\M_2'}[\mathbf{A}_i'](\mathbf{v}), 
        \end{equation*}
        and there exists $\mathbf{v}_0 \in \dom{\V'}$ such that 
        \begin{equation*}
            Q^{\M_1'}[\mathbf{S}](\mathbf{v}_0) \neq Q^{\M_2'}[\mathbf{S}](\mathbf{v}_0).
        \end{equation*}
        
        Next, we will construct two models $\M_1$ and $\M_2$ in $\mathbb{M}^+(\G)$ to prove that $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}, \G)$. 
        We define the domains of variables in $\V'$ in the model $\M_i$ similar to model $\M_i'$, for $i \in \{1, 2\}$. 
        Since for each node $V\in\V'$, we have $\Pa{V}{\G'}\subseteq\Pa{V}{\G}$, then for all $V\in \V'$ and $i \in \{1, 2\}$, we can define:
        \begin{equation*}
            P^{M_i}(V|\Pa{V}{\G}) := P^{M_i'}(V \mid \Pa{V}{\G'}).
        \end{equation*}
        And for $V \in \V \setminus \V'$, we define:
        \begin{equation*}
            \dom{V} = \{0\}, \quad P(V=0)=1.
        \end{equation*}
        Because variable $V \in \V \setminus \V'$ can only take value $0$ with probability one, then $Q^{\M_j}[\mathbf{A}_i](\mathbf{v})=Q^{\M'_j}[\mathbf{A}'_i](\mathbf{v})$ for all $i$ and $Q^{\M_j}[\mathbf{S}](\mathbf{v}_0)=Q^{\M_j'}[\mathbf{S}](\mathbf{v}_0)$ for $j\in\{1,2\}$. 
        Thus, we have
        \begin{align*}
            &Q^{\M_1}[\mathbf{A}_i](\mathbf{v}) = Q^{\M_2}[\mathbf{A}_i](\mathbf{v}), \quad i\in[0:m],\\
            &Q^{\M_1}[\mathbf{S}](\mathbf{v}_0) \neq Q^{\M_2}[\mathbf{S}](\mathbf{v}_0).
        \end{align*}
        This shows that $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}, \G)$.
    \end{proof}
 
    \begin{customlem}{\ref{lem: lin indep}}
        Consider the following set of vectors in $\mathbb{R}^d$
         \begin{equation} \label{eq: lin vectors apd}
            \mathbf{\Omega}:=\{\theta_{i}(\mathbf{v}):\ i\in [0:m], \mathbf{v}\in \dom{\V}\} \cup \mathds{1}_d,
        \end{equation}
        where $\mathds{1}_d$ denotes the all-ones vector in $\mathbb{R}^d$. 
        If there exists $\mathbf{v}_0 \in \dom{\V}$ such that $\eta(\mathbf{v}_0)$ is linearly independent from all the vectors in $\mathbf{\Omega}$, then the system of linear equations in \eqref{eq: linear system 2} admits a solution.
    \end{customlem}
    \begin{myproof}[Proof]
        This is a direct consequence of Lemma \ref{lemma: lin indep formal} with $\{c_i\}$ to be $\mathbf{\Omega}$ and $c$ to be $\eta(\mathbf{v}_0)$.
    \end{myproof}
    
    \begin{customlem}{\ref{lemma: valid model}}
        The SEM constructed above belongs to $\mathbb{M}^+(\G')$. 
    \end{customlem}
    \begin{proof}
        By the construction, it is clear that the model belongs to $\mathbb{M}(\G')$. 
        Hence, we need to show that $P(\mathbf{v})>0$ for any $\mathbf{v}\in \dom{\V'}$. 
        To this end, it is enough to show that for any realization $\mathbf{v} \in \dom{\V}'$, there exists a realization $\hat{\mathbf{u}}\in \dom{\U'}$ such that $P(\mathbf{v}, \hat{\mathbf{u}})>0$, because in this case we have
        \begin{equation*}
            P(\mathbf{v}) = \sum_{\mathbf{u}=\dom{\U'}} P(\mathbf{v,u}) \geq P(\mathbf{v}, \hat{\mathbf{u}}) > 0.
        \end{equation*}
        Let $\mathbf{v}$ be a fixed realization in $\dom{\V'}$. 
        For the rest of the proof, we assume all the realizations for $\V'$ are consistent with $\mathbf{v}$.
        
        By Markov factorization property,  for any $\mathbf{u}\in \dom{\U'}$ we have 
        \begin{equation}
            P(\mathbf{v}, \mathbf{u}) = \prod_{V\in \V'}P(v\mid \Pa{V}{\G'})\prod_{U\in \U'}P(u).
        \end{equation}
        By the construction of our model, we have $P(u)>0$ for any $U\in \U'$ and $u\in \dom{U}$. 
        Moreover, for any $X\in \mathbf{S}$ and any realization for $\Pa{X}{\G'}\cap \U'$ we have $P(x \mid \Pa{X}{\G'})>0$. 
        Hence, it is enough to show that there exists $\hat{\mathbf{u}}\in \dom{\U'}$ such that $P(x \mid \Pa{X}{\G'})>0$ for each $X\in \T$.
        
        Recall that for each $X\in \T$, we have $X=(X[i_1],\cdots, X[i_{\alpha(X)}])$, where $X$ belongs to $\F_{i_1}, \cdots, \F_{i_{\alpha(X)}}$ and 
        \begin{equation*}
            X[i_j] \equiv \left(\sum_{Y \in \Pa{X}{\F_{i_j}}} Y[i_j]\right) \pmod{2}.
        \end{equation*}
        
        By the construction, we define the entries corresponding to each $\F_i$ separately. 
        For each $i\in [0:k]$, let $\U_i$ to be the set of unobserved variables in $\U^{\T}$ that are in $\F_i$. 
        
        Let us fix an $i\in [0:k]$. 
        To finish the prove, we will introduce a method to determine $\hat{u}[i]$ for each $U\in \U_i$ such that 
        \begin{equation} \label{eq: eq for T}
            x[i] \equiv \left(\sum_{Y \in \Pa{X}{\F_{i}}} y[i]\right) \pmod{2},
        \end{equation}
        for each $X\in \T \cap \B_i$. 
        
        Lets start with an arbitrary set of values for $\{\hat{u}[i]\!:\: U\in \U_i\}$ which are either $0$ or $1$. 
        Suppose $X\in \T \cap \B_i$. 
        We introduce a trick such that $x[i]$ will be replaced by $1- x[i]$ while for all $Y\in \T \cap \B_i$, $y[i]$ remains the same: \\
        By the construction of $\F_i$, there exists a path $(X=X_1, U_1, X_2, \cdots, X_l,U_l,Z = X_{l+1} )$ from $X$ to a variable $Z \in \mathbf{S}$ such that $\{U_1, \cdots, U_l\} \subseteq \U_i$, $\{X_1,\cdots,X_l\} \subseteq \B_i \cap \T$, and $\Ch{U_j}{\F_i} = \{X_j, X_{j+1}\}$ for each $j\in [1:l]$. 
        Now for each $j \in [1:l]$, we replace $\hat{u}_j[i]$ by $1-\hat{u}_j[i]$. 
        Since Equation \eqref{eq: eq for T} is in mod $2$, the value of $x_j[i]$ will be the same for each $j\in [2:l]$ while $x[i]$ will be replaced by $1-x[i]$. 
        Note that $X_{l+1}=Z \notin \T$. 
        
        With the trick described above, we can construct any realization for the $i$-th bit of the variables in $\T \cap \B_i$. 
        Hence, we can construct $\hat{\mathbf{u}}\in \dom{\U'}$ such that $P(x \mid \Pa{X}{\G'})>0$ for each $X\in \T$.
    \end{proof}
    
   \begin{customlem}{\ref{lem: theta index}}
        For any $\mathbf{v} \in \dom{\V'}$ and $i\in[0:m]$,
        \begin{equation*}
            \theta_{i,j_1}(\mathbf{v}) = \theta_{i,j_2}(\mathbf{v}) = \cdots= \theta_{i,j_{\frac{\kappa+1}{2}}}(\mathbf{v}).
        \end{equation*}
    \end{customlem}
    \begin{proof}
        Lets fix a realization $\mathbf{v}$ for the observed variables $\V'$.
        Suppose that $l_1$ and $l_2$ are two integers such that
        \begin{equation*}
        \begin{split}
            & \gamma_{l_1} = (2x, 0, \dots, 0)),\\
            & \gamma_{l_2} = (2x+2 \pmod{\kappa+1}, 0, \dots, 0),
        \end{split}
        \end{equation*}
        where $x$ is any fixed integer in $[0 : \frac{\kappa-1}{2}]$.
        To show the result, we will prove that $\theta_{i, l_1}(\mathbf{v})=\theta_{i, l_2}(\mathbf{v})$. Let
        \begin{align*}
            &f_{i, j}(\mathbf{v}, \mathbf{u}^{\mathbf{T}}) := \sum_{\mathbf{u}\in\U^{\mathbf{S}}} \prod_{V \in \A'_i} P(v \mid \Pa{V}{\G'})\! \prod_{U\in \U' \setminus \{U_0\}}\! P(u)\\
            & = \prod_{V \in \A'_i\setminus\B_i} P(v \mid \Pa{V}{\G'})\! \prod_{V \in \B_i\setminus\mathbf{S}} P(v \mid \Pa{V}{\G'})\!\sum_{\mathbf{u}\in\U^{\mathbf{S}}}\prod_{V \in \mathbf{S}} P(v \mid \Pa{V}{\G'}) \prod_{U\in \U' \setminus \{U_0\}}\! P(u). 
        \end{align*}
        where index $j$ indicates $U_0=\gamma_j$. Note that variable $U_0$ may appear in the parent set of some observed variables. 
        Using the above definition, we have
        \begin{equation*}
            \theta_{i, j}(\mathbf{v}) = \sum_{\mathbf{u}^\textbf{T}\in\U^{\T}} f_{i, j}(\mathbf{v}, \mathbf{u}^{\mathbf{T}}).
        \end{equation*}
        Hence, if we show $f_{i, l_1}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})=f_{i, l_2}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})$ for any fixed realization $\mathbf{u}^{\T}$, the above equation implies $\theta_{i, l_1}(\mathbf{v})=\theta_{i, l_2}(\mathbf{v})$. 
        
        When $T\in \A'_i\setminus\B_i$, then for fixed realizations of $\textbf{u}^{\T}$,  $P(t|\Pa{T}{\G'})$ is the same for both realizations $\gamma_{l_1}$ and $\gamma_{l_2}$ since $\gamma_{l_1}\equiv\gamma_{l_2}$ mod 2.
        
        When $T\in \B_i\setminus\mathbf{S}$, unobserved variables in $\Pa{T}{\G'}$ are a subset of $\U^{\T}\cup\{U_0\}$. Note that in the definition of $f_{i, j}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})$, all such unobserved variables are fixed. 
        Thus, if there exists $T\in \B_i\setminus\mathbf{S}$, such that $P(t|\Pa{T}{\G'})=0$, then 
        \begin{equation*}
        f_{i, l_1}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})=f_{i, l_2}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})=0.
        \end{equation*}
        When $P(t|\Pa{T}{\G'})=1$ for all $T\in \B_i\setminus\mathbf{S}$,
        to prove $f_{i, l_1}(\mathbf{v}, \mathbf{u}^{\mathbf{T}})=f_{i, l_2}(\mathbf{v}, \mathbf{u}^{\mathbf{S}})$, we show that for any realization $(\textbf{u}_1,\gamma_{l_1})$ of $(\U^{\mathbf{S}},U_0)$, there is a realization $(\textbf{u}_2,\gamma_{l_2})$  of $(\U^{\mathbf{S}},U_0)$ such that 
        \begin{equation*}
            \prod_{V \in \mathbf{S}} P(v \mid \Pa{V}{\G'})\Big|_{(\U^{\mathbf{S}},U_0)=(\textbf{u}_1,\gamma_{l_1})}=\prod_{V \in \mathbf{S}} P(v \mid \Pa{V}{\G'})\Big|_{(\U^{\mathbf{S}},U_0)=(\textbf{u}_2,\gamma_{l_2})},
        \end{equation*}
        where $P(v \mid \Pa{V}{\G'})\Big|_{(\U^{\mathbf{S}},U_0)=(\textbf{u}_1,\gamma_{l_1})}$ denotes the conditional probability of $v$ given its parents in which the unobserved variables $(\U^{\mathbf{S}},U_0)$ are fixed to be  $(\textbf{u}_1,\gamma_{l_1})$.
        To this end, we consider two cases depending on $i$. 
        
        \textbf{First case, when $i \in [0:k]$:} 
        In this case, we have
        \begin{equation}\label{eq: tmp1}
            t[i] = \left( \sum_{Y \in \Pa{T}{\F_i}} y[i] \right) \pmod{2}.
        \end{equation}
        
        Consider the set $\mathbf{\Lambda}:=\Pa{\mathbf{S}}{\F_i}\setminus \Pa{\mathbf{S}}{\F_i[\mathbf{S}]}$, that is the set of all parents of nodes in $\textbf{S}$ that are outside of $\textbf{S}$. By the construction of our models, summation of the values of the observed and unobserved nodes in $\mathbf{\Lambda}$ are the same, i.e.,  
        \begin{equation*}
            \sum_{W \in \mathbf{\Lambda}\cap \B_i} w[i] \equiv \sum_{W \in \mathbf{\Lambda}\cap \U'} w[i] \pmod{2},
        \end{equation*}
        or equivalently
        \begin{equation}\label{eq: tmp2}
            \sum_{W \in \mathbf{\Lambda}} w[i] \equiv 0 \pmod{2}.
        \end{equation}
        This is because, in graph $\F_i$, each observed variable outside of $\mathbf{S}$ has at most one child outside of $\mathbf{S}$, and each unobserved node has either one or two children outside of $\mathbf{S}$. 
        According to (\ref{eq: tmp1}), those unobserved nodes with two children outside of $\mathbf{S}$ do not belong to $\mathbf{\Lambda}\cap \U'$. 
        Such unobserved nodes have exactly two observed descendants in $\mathbf{\Lambda}\cap \B_i$, and because both descendants appear in \eqref{eq: tmp2}, their summation is zero mod 2. 
        On the other hand, the unobserved nodes with only one child outside of $\mathbf{S}$ belong to $\mathbf{\Lambda}\cap \U'$ and have exactly one observed descendant in $\mathbf{\Lambda}\cap \B_i$. Thus, the summation of such unobserved variables and their observed descendant is again zero mod 2 in \eqref{eq: tmp2}.
 
        If $\mathbb{I}(S)=0$ for all $S \in \mathbf{S}$, then by our model construction, for any variable $W \in \mathbf{\Lambda}\setminus \{T_i\}$, $w[i]$ is an even number but $T_i$ takes value 1 with probability one. 
        Hence, the summation in \eqref{eq: tmp2} cannot be an even number.
        Therefore, there exists at least a variable $S\in \mathbf{S}$ such that $\mathbb{I}(S)=1$. 
        In this case, the value of $P(S|\Pa{S}{\G'})$ does not depend on the realizations of variables in $\U^{\mathbf{S}}$. 
        Next, we show that for any realization $\mathbf{u}_{1}$ of $\U^{\mathbf{S}}$, there is a realization $\mathbf{u}_{2}$ such that 
        \begin{equation}\label{eq:app_prop_3}
        P(s|\Pa{S}{\G'})\Big|_{(\U^{\mathbf{S}},U_0)=(\textbf{u}_1,\gamma_{l_1})}=P(s|\Pa{S}{\G'})\Big|_{(\U^{\mathbf{S}},U_0)=(\textbf{u}_2,\gamma_{l_2})}.
        \end{equation}
        Since $\G'_{\mathbf{S}}$ is a c-component, there exists a sequence of variables $U_0, \hat{S}_1, \hat{U}_1, \hat{S}_2, \hat{U}_2, \dots, \hat{U}_l, S$, such that $U_0$ is a parent of $\hat{S}_1$, $S$ is a children of $\hat{U}_l$ and $\hat{U}_j$ is a parent of variables $\hat{S}_j$ and $\hat{S}_{j+1}$ for $j \in [1:l-1]$. 
        Let $\hat{\U}:=\{\hat{U}_1, \dots, \hat{U}_l\}$. 
        For realization $\mathbf{u}_{1}$, we define $\mathbf{u}_{2}$ by
        \begin{equation}
            \begin{split}
                & u_{2,\hat{U}_j} := u_{1,\hat{U}_j} + 2(-1)^{j} \pmod{\kappa+1}, \quad j\in[1:l],\\
                & u_{2,U} := u_{1,U}, \quad \forall U\in \U' \setminus (\hat{\U}\cup \{U_0\}),
            \end{split}
        \end{equation}
        where $u_{2,U}$ denotes the realization for variable $U$ in $\textbf{u}_2$.
        It is straightforward to see that this mapping is a bijection between $\mathbf{u}_{1}$ and $\mathbf{u}_{2}$ and \eqref{eq:app_prop_3} holds.
        
        \textbf{Second case, when $i \in [k+1:m]$:} 
        In this case, $\mathbf{S} \setminus \A_i'\neq \varnothing$.  
        Since $\G'_{\mathbf{S}}$ is a c-component, there exists a sequence of variables $U_0, \hat{S}_1, \hat{U}_1, \hat{S}_2, \hat{U}_2, \dots, \hat{U}_l, S$, such that $U_0$ is a parent of $\hat{S}_1$, $S\in \mathbf{S}\setminus \A_i'$ is a children of $\hat{U}_l$ and $\hat{U}_j$ is a parent of variables $\hat{S}_j$ and  $\hat{S}_{j+1}$ for $j \in [1:l-1]$. 
        Let $\hat{\U}:=\{\hat{U}_1, \dots, \hat{U}_l\}$. 
        Similar to the previous case, for a given realization $\mathbf{u}_{1}$ of $\U^{\mathbf{S}}$,  we define $\mathbf{u}_{2} \in \dom{\U^{\mathbf{S}}}$ by
        \begin{equation}
            \begin{split}
                & u_{2,\hat{U}_j} := u_{1,\hat{U}_j} + 2(-1)^{j} \pmod{\kappa+1}, \quad j\in[1:l],\\
                & u_{2,U} := u_{1,U}, \quad \forall U\in \U' \setminus (\hat{\U}\cup \{U_0\}),
            \end{split}
        \end{equation}
        where $u_{2,\hat{U}}$ denotes the realization for variable $U$ in $\textbf{u}_2$. Analogous to the previous setting, we have \eqref{eq:app_prop_3}.
        
        Herein, we proved that $\theta_{i, l_1}(\mathbf{v})=\theta_{i, l_2}(\mathbf{v})$. By varying $x$ within $[0 : \frac{\kappa-1}{2}]$ in the definition of $\gamma_{l_1}$ and $\gamma_{l_2}$, we  conclude the lemma.
    \end{proof}

   \begin{customlem}{\ref{lem: eta index}}
        There exists $0<\epsilon<\frac{1}{\kappa}$ such that there exists $\mathbf{v}_0 \in \dom{\V'}$ and $1\leq r <t\leq \frac{\kappa+1}{2}$ such that 
        \begin{equation*}
            \eta_{j_r}(\mathbf{v}_0) \neq \eta_{j_t}(\mathbf{v}_0).
        \end{equation*}
    \end{customlem}
    \begin{proof}
        Lets consider $r$ and $t$ such that $\gamma_r = (0, 0, \dots, 0)$ and $\gamma_t = (2, 0, \dots, 0)$. Recall that:
        \begin{align}
            \label{eq: tmp4.1}
            & \eta_r(\mathbf{v}) :=\! \sum_{\U'\setminus \{U_0\}} \prod_{X \in \mathbf{S}} P(x \mid \Pa{X}{\G'})\Big|_{U_0=\gamma_r}\! \prod_{U\in \U'\setminus \{U_0\}}\! P(u), \\
            \label{eq: tmp4.2}
            & \eta_t(\mathbf{v}) :=\! \sum_{\U'\setminus \{U_0\}} \prod_{X \in \mathbf{S}} P(x \mid \Pa{X}{\G'})\Big|_{U_0=\gamma_t}\! \prod_{U\in \U'\setminus \{U_0\}}\! P(u).
        \end{align}
        
        We choose $\textbf{v}_0$ as follows: set all variables in $\mathbf{S}$ to be zero and select a realization for variables in $\V' \setminus \mathbf{S}$ such that $\mathbb{I}(S)=0$ for all $S \in \mathbf{S}$. 
        Denote by $S_0$ a child of $U_0$ in $\mathbf{S}$.
        
        Note that there is a term in the summation of the right side of equation (\ref{eq: tmp4.1}) that is $(1-\kappa\epsilon)^{|\mathbf{S}|}$. For instance, this occurs when all realizations of unobserved variables in $\U^{\mathbf{S}}$ are zero.
         
        Next, we prove that there is no realization of unobserved variables $\U^{\mathbf{S}}$ such that $P(S|\Pa{S}{\G'})=1-\epsilon\kappa$ for all $S\in \mathbf{S}$ and $U_0=\gamma_t$.
        In other words, each term in the summation of \eqref{eq: tmp4.2} has at least a term $\epsilon$. 
        To do so, it suffices to show that there is no realization of $\U^{\mathbf{S}}$ such that:
        \begin{equation*}
            \begin{split}
                & s = \sum_{W \in \Pa{S}{\G'[\mathbf{S}]}} w, \quad S\in \mathbf{S}\setminus \{S_0\},\\
                & s_0 = u_0[0] + \sum_{W \in \Pa{S}{\G'[\mathbf{S}]}} w.
            \end{split}
        \end{equation*}
        Suppose there is a realization of $\U^{\mathbf{S}}$ such that the above equations hold. In this case, since $\G'_{\mathbf{S}}$ is a tree, we can color its nodes with two colors, red and black, such that connected nodes by biderected edges have different colors. 
        Suppose that $\mathbf{S}_1$ is the set of black variables and $\mathbf{S}_2$ is the set of red variables which (without loss of generality) contains $S_0 \in \mathbf{S}_1$.
        Then:
        \begin{equation*}
            \begin{split}
                & \sum_{W\in \mathbf{S}_1} w \equiv u_0[0] + \sum_{U\in \U^{\mathbf{S}}} u \pmod{\kappa+1},\\
                & \sum_{W\in \mathbf{S}_2} w \equiv  \sum_{U\in \U^{\mathbf{S}}} u \pmod{\kappa+1}.
            \end{split}
        \end{equation*}
        The left-hand sides of both above equations are zero because of our choice of $\textbf{v}_0$.
        However, the right-hand sides cannot be the same since $u_0[0]=2$.
        Hence, in Equation \eqref{eq: tmp4.2}, there exists a term in the summation with probability $\epsilon$.
        Therefore, in extreme case, when $\epsilon=0$, $\eta_{t}(\mathbf{v}')=0$. However, 
        $\eta_{r}(\mathbf{v}')\geq (1-\kappa\epsilon)^{|\mathbf{S}|}\prod_{U\in \U'\setminus \{U_0\}}\! P(u)>0$. 
        Since $\eta_{r}(v)$ and $\eta_{t}(v)$ are polynomial functions of $\epsilon$ and they are not equal at $\epsilon=0$, then there exists a small enough $0<\epsilon<\frac{1}{\kappa}$ such that $\eta_{r}(\mathbf{v}') \neq \eta_{t}(\mathbf{v}')$.
    \end{proof}
    
\section{A special case in the proof of Theorem \ref{thm: main}} \label{sec: apd_second case}
    In this section, we provide our proof for the necessary part of Theorem \ref{thm: main} when $\mathbf{S}\nsubseteq \A_i'$ for all $i\in [0:m]$. 
     
    We define $\F^{\mathbf{S}}$ to be a minimal (in terms of edges) spanning subgraph of $\G[\mathbf{S}]$ such that $\F^{\mathbf{S}}_{\mathbf{S}}$ is a single c-component. 
    In this case, we can assume $\V' = \mathbf{S}$, $\G'$ is $\F^{\mathbf{S}}$, and $\mathbb{A}'= \{\A_i':=\A_i \cap \V'\}_{i=0}^m$. 
    For each $i\in [0:m]$, we have $\A_i' \subsetneq \V'$. %since $\V' \nsubseteq \A_i$.
    Note that Lemma \ref{lem: simplify} holds for this case. 
    Hence, it is enough to show that $Q[\mathbf{S}]$ is not g-identifiable from $(\mathbb{A}', \G')$. 
    
    Recall that our assumptions and goal in this section are as follows:\\
    $\G'$ is a DAG with observed variables $\V'$ and unobserved variables $\U'$ such that $\G'_{\V'}$ has no directed edges and its bidirected edges form a spanning tree over $\V'$. 
    $\mathbb{A}'= \{\A_i'\}_{i=0}^m$ is a collection of subsets such that $\A_i' \subsetneq \V'$. 
    The goal is to show that $Q[\V']$ is not g-identifiable from $(\mathbb{A}',\G')$.

    For this case we will define two model $\M_1$ and $\M_2$ such that for each $i\in [0:m]$ and any $\mathbf{v}\in \dom{\V'}$,
    \begin{equation*}
        Q^{\M_1}[\A_i'](\mathbf{v}) = Q^{\M_2}[\A_i'](\mathbf{v}),
    \end{equation*}
    but there exists $\mathbf{v}_0\in \dom{\V'}$ such that
    \begin{equation*}
        Q^{\M_1}[\mathbf{S}](\mathbf{v}_0) \neq Q^{\M_2}[\mathbf{S}](\mathbf{v}_0).
    \end{equation*}
    
    For both models $\M_1$ and $\M_2$ we define each observed and unobserved variable to be binary, i.e $\dom{W}=\{0, 1\}$ for all $W \in \V' \cup \U'$.
    Next, we define the equation of the variables in each model. 
    
    \textbf{Model 1}: For $V \in \V'$:
    \begin{align}
        V = 
        \begin{cases}
            \bigoplus \Pa{V}{\G'}, \quad \text{with probability } 1-\epsilon,\\
            1, \quad \text{with probability } \frac{\epsilon}{2},\\
            0, \quad \text{with probability } \frac{\epsilon}{2},
        \end{cases}
    \end{align}
    and for $U \in \U'$:
    \begin{equation*}
        P(U=0)= P(U=1) = 0.5.
    \end{equation*}
    
    \textbf{Model 2}: Suppose $V_1$ is a fixed observed variable in $\V'$. 
    Then, for all $V$ in $\V'\setminus \{V_1\}$ we define:
    \begin{align}
        V = 
        \begin{cases}
            \bigoplus \Pa{V}{\G'}, \quad \text{with probability } 1-\epsilon \\
            1, \quad \text{with probability } \frac{\epsilon}{2},\\
            0, \quad \text{with probability } \frac{\epsilon}{2},
        \end{cases}
    \end{align}
    and for $V_1$:
    \begin{align}
        V_1 = 
        \begin{cases}
            \urcorner\bigoplus \Pa{V_1}{\G'}, \quad \text{with probability } 1-\epsilon \\
            1, \quad \text{with probability } \frac{\epsilon}{2},\\
            0, \quad \text{with probability } \frac{\epsilon}{2},
        \end{cases}
    \end{align}
    where $\urcorner$ denotes the logical not. 
    Similar to the first mode, for each unobserved variables $U\in\U'$, 
    \begin{equation*}
        P(U=0)= P(U=1) = 0.5.
    \end{equation*}
    
    \begin{lemma}
        Let $i \in [0, m]$ and denote the cardinality of $\A_i'$ by $n$, i.e. $|\A_i'|=n$. 
        Then for any realization $\mathbf{v}\in \dom{\V'}$:
        $$
        Q^{\M_1}[\A_i'](\mathbf{v}) = Q^{\M_2}[\A_i'](\mathbf{v}) = \frac{1}{2^n}.
        $$
    \end{lemma}
    \begin{proof}
        Suppose $\A_i':= \{A_1, A_2, \dots, A_n\}$. Since $\A_i' \subsetneq \V'$, there are distinct unobserved variables $U_1, U_2, \dots, U_n$, such that $U_j$ is a parent of the $A_j$ for $j \in [1: n]$. Denote by $\M$ any of the model $\M_1$ or $\M_2$. 
        
        Assume that for some realization of observed and unobserved variables, exactly $t\in [0, n]$ variables in $\A'_i$ are defined by the $XOR$ or $\urcorner XOR$ of their parents. Without loss of generality, assume that these variables are $\{A_1, A_2, \dots, A_t\}$. If we know all unobserved variables $\U'$ except $\{U_1, U_2, \dots, U_t\}$, then we can determine uniquely the values of $\{U_1, U_2, \dots, U_t\}$ from the following equations:
        \begin{equation*}
            A_i = \bigotimes \Pa{A_i}{\G'},\quad i\in [1:t],
        \end{equation*}
        where $\bigotimes$ denotes the corresponding equation, either $XOR$ or $\urcorner XOR$, for variable $A_i$ in model $\M$. 
        Thus, by considering all possible realizations of unobserved variables that lead to a realization $\mathbf{v} \in \dom{\V'}$, we obtain
        \begin{equation*}
            Q[\textbf{A}_i'] = \sum_{j=0}^n C_n^j(1-\epsilon)^j\left( \frac{\epsilon}{2} \right)^{n-j} \left(\frac{1}{2}\right)^j = \left(\frac{1}{2}\right)^n,
        \end{equation*}
        where $C_n^j$ is the number of different ways to choose $j$ variables out of $n$, such that with probability $(1-\epsilon)$ their values are determined by either $XOR$ or $\urcorner XOR$ equation. All other $n-j$ variables are equal to either $0$ or $1$ with probability $\frac{\epsilon}{2}$.
    \end{proof} 
    
    \begin{lemma}
        Let $\mathbf{v}=\mathbf{0}$ be the realization of $\V'$ such that all observed variables are equal to $0$. 
        Then $Q^{\M_1}[\V'](\mathbf{v}) \neq Q^{\M_2}[\V'](\mathbf{v})$.
    \end{lemma}
    \begin{proof}
        Define  $n=|\V'|$ and $\V' = \{V_1, V_2, \dots, V_n\}$. Firstly, we will prove that for any $\mathbf{v} \in \dom{\V'}$, the value of $Q^{\M_2}[\V'](\mathbf{v})$ does not depend on the position of $V_1$ in graph $\G'$. 
        Denote by $V_2$ an observed variable which is connected to the $V_1$ by a bidirected edge in $\G'_{\V'}$.
        Let $U$ denotes the unobserved variable (corresponding to the bidirected edge) which is a parent of $V_1$ and $V_2$. 
        Next, we define a new model $\M_2'$ in which all variables in $\V'$ are defined similarly as they are defined in model $\M_2$ except for variables $V_1$ and $V_2$. 
        In $\M_2'$, we define $V_2$ in the same way as $V_1$ is defined in  $\M_2$.
        We also define $V_1$ in $\M_2'$ in the same way as $V_2$ is defined in $\M_2$.
        Then, we have
        \begin{align*}
            \prod_{i=1}^n P^{\M_2}(v_i|\Pa{V_i}{\G'}) = P^{\M_2}(v_1|\Pa{V_1}{\G'})P^{\M_2}(v_2|\Pa{V_2}{\G'})\prod_{i=3}^{n}P(v_i|\Pa{V_i}{\G'}) \\
            = P^{\M_2'}(v_1|\Pa{V_1}{\G'}\setminus\{U\}, u\oplus 1)P^{\M_2'}(v_2|\Pa{V_2}{\G'}\setminus\{U\}, u\oplus 1)\prod_{i=3}^{n}P(v_i|\Pa{V_i}{\G'}).
        \end{align*}
        This implies that substituting $V_1$ by $V_2$ does not change the value of $Q^{\M_2}[\V'](\mathbf{v})$.
        
        Without loss of generality, suppose that $V_1$ is a leaf in $\G'$ and $U_1$ is a parent of $V_1$. Note that there are exactly $n-1$ unobserved variables in graph $\G'$.
        This is because $\G'_{\V'}$ is a tree with bidirected edges over $\V'$. Therefore, we have
        \begin{align*}
            & 2^{n-1}Q^{\M_1}[\V'](\mathbf{0}) = P^{\M_1}(V_1=0|U_1=0)\!\! \sum_{\U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}) + P^{\M_1}(V_1=0|U_1=1)\!\! \sum_{\U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}), \\
            & 2^{n-1}Q^{\M_2}[\V'](\mathbf{0}) = P^{\M_2}(V_1=0|U_1=0)\!\! \sum_{\U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}) + P^{\M_2}(V_1=0|U_1=1)\!\! \sum_{\U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}).
        \end{align*}
        Note that:
        \begin{align*}
            & P^{\M_1}(V_1=0|U_1=0) = 1 -\frac{\epsilon}{2}\\
            & P^{\M_1}(V_1=0|U_1=1) = \frac{\epsilon}{2}\\
            & P^{\M_2}(V_1=0|U_1=0) = \frac{\epsilon}{2}\\
            & P^{\M_2}(V_1=0|U_1=1) = 1 -\frac{\epsilon}{2}
        \end{align*}
        More over, we have
        \begin{equation*}
            \sum_{U_1=0, \U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}) + \sum_{U_1=1, \U' \setminus \{ U_1\}} \prod_{j>1}P(v_j|\Pa{V_j}{\G'}) = Q[\V'\setminus\{V_1\}] = \left(\frac{1}{2}\right)^{n-1}
        \end{equation*}
        This yields
        \begin{align*}
            & 2^{n-1}Q^{\M_1}[\V'](\mathbf{0}) = \left( 1 - \frac{\epsilon}{2} \right)a + \frac{\epsilon}{2} b, \\
            & 2^{n-1}Q^{\M_2}[\V'](\mathbf{0}) = \left( 1 - \frac{\epsilon}{2} \right)b + \frac{\epsilon}{2} a,
        \end{align*}
        where 
        \begin{align*}
            & a = \sum_{U_1=0, \U' \setminus \{ U_1\}} \prod_{j>1}P(V_j=0|\Pa{V_j}{\G'}), \\
            & b = \sum_{U_1=1, \U' \setminus \{ U_1\}} \prod_{j>1}P(V_j=0|\Pa{V_j}{\G'}).
        \end{align*}
        To prove that $Q^{\M_1}[\V'](\mathbf{0}) \neq Q^{\M_2}[\V'](\mathbf{0})$, it is enough to show that $a\neq b$.
        
        Denote by $S_n$ an observed variable connected to the $V_1$ by a bidirect edge in $\G'_{\mathbf{\V'}}$.
        We define $\V'_{n-1} := \V'\setminus \{V_1\}$, $\U'_{n-1} := \U'\setminus \{U_1\}$ and $\G_{n-1}:=\G'[\V' \setminus \{V_1\}]$.
        We also define models $\M_1^{(n-1)}$ and $\M_2^{(n-1)}$ as follows:
        
        \textbf{New model $\M_1^{(n-1)}$}: For $V \in \V_{n-1}'$:
        \begin{align}
            V = 
            \begin{cases}
                \bigoplus \Pa{V}{\G_{n-1}}, \quad \text{with probability } 1-\epsilon,\\
                1, \quad \text{with probability } \frac{\epsilon}{2},\\
                0, \quad \text{with probability } \frac{\epsilon}{2},
            \end{cases}
        \end{align}
        and for $U \in \U'_{n-1}$:
        \begin{equation*}
            P(U=0)= P(U=1) = 0.5.
        \end{equation*}
        
        \textbf{Model $\M_2^{(n-1)}$}:
        For all $V$ in $\V'_{n-1}\setminus \{S_{n}\}$:
        \begin{align}
            V = 
            \begin{cases}
                \bigoplus \Pa{V}{\G_{n-1}}, \quad \text{with probability } 1-\epsilon \\
                1, \quad \text{with probability } \frac{\epsilon}{2},\\
                0, \quad \text{with probability } \frac{\epsilon}{2},
            \end{cases}
        \end{align}
        and for $S_{n}$:
        \begin{align}
            S_n = 
            \begin{cases}
                \urcorner\bigoplus \Pa{S_{n}}{\G_{n-1}}, \quad \text{with probability } 1-\epsilon \\
                1, \quad \text{with probability } \frac{\epsilon}{2},\\
                0, \quad \text{with probability } \frac{\epsilon}{2}.
            \end{cases}
        \end{align}
        Similar to the first model, for each unobserved variables $U\in\U'_{n-1}$, we define
        \begin{equation*}
            P(U=0)= P(U=1) = 0.5.
        \end{equation*}
        Note that:
        \begin{align*}
            & \left( \frac{1}{2} \right)^{n-2}\sum_{U_1=0, \U'_{n-1}} \prod_{j>1}P(V_j|\Pa{V_j}{\G'}) =   \left( \frac{1}{2} \right)^{n-2}a = Q^{\M_1^{(n-1)}}[\V'_{n-1}](\mathbf{0}),\\
            & \left( \frac{1}{2} \right)^{n-2}\sum_{U_1=1, \U'_{n-1}} \prod_{j>1}P(V_j|\Pa{V_j}{\G'}) =   \left( \frac{1}{2} \right)^{n-2}b =   Q^{\M_2^{(n-1)}}[\V'_{n-1}](\mathbf{0}).
        \end{align*}
        It remains to show  $Q^{\M_1^{(n-1)}}[\V'_{n-1}](\mathbf{0})\neq Q^{\M_2^{(n-1)}}[\V'_{n-1}](\mathbf{0})$. 
        Note that if this holds, then by our construction, $Q^{\M_1}[\V'](\mathbf{0})\neq Q^{\M_2}[\V'](\mathbf{0})$.
        In other words, we could reduce the size of the graph while keeping the same problem. Thus, by continuing this procedure, we eventually reach graph $\G_2$ that consists of only two observed nodes and showing $Q^{\M_1^{(2)}}[\V'_{2}](\mathbf{0})\neq Q^{\M_2^{(2)}}[\V'_{2}](\mathbf{0})$ in that graph will conclude the result.
        For graph $\G_2$, we have
        \begin{align*}
            & Q^{\M_1^{(2)}}[\V'_{2}](\mathbf{0}) =\left( \frac{\epsilon}{2}\right)^2 + 2\frac{\epsilon}{2}(1-\epsilon)\frac{1}{2}+(1-\epsilon)^2\frac{1}{2}, \\
            & Q^{\M_2^{(2)}}[\V'_{2}](\mathbf{0}) = \left( \frac{\epsilon}{2}\right)^2 + 2\frac{\epsilon}{2}(1-\epsilon)\frac{1}{2}.
        \end{align*}
        This clearly shows that $Q^{\M_1^{(2)}}[\V'_{2}](\mathbf{0})\neq Q^{\M_2^{(2)}}[\V'_{2}](\mathbf{0})$.
    \end{proof}
    
\end{document}