% \documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}
\usepackage{dsfont, amsfonts}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{enumitem}
\usepackage{mathabx}

\usepackage{amssymb}
\usepackage{amsthm}
\usepackage[capitalize,noabbrev]{cleveref}


\usepackage{subfiles}
\usepackage{tikz, subcaption}
\usetikzlibrary{positioning}
\usepackage{algorithm}
\usepackage{algpseudocode}
% \usepackage{algorithmic}
% \usepackage[ruled,vlined,linesnumbered]{algorithm2e}
\usepackage{todonotes}
\usepackage{bbm}
\usepackage{centernot}


\newtheorem{theorem}{Theorem}%[section]
\newenvironment{customthm}[1]{\renewcommand\thetheorem{#1}\theorem}{\endtheorem}    
\newtheorem{corollary}{Corollary}%[theorem]
\newtheorem{lemma}{Lemma}
\newtheorem{assumption}{Assumption}
\newenvironment{customlem}[1]{\renewcommand\thelemma{#1}\lemma}{\endlemma}
  
\newenvironment{customtheorem}[1]{\renewcommand\thetheorem{#1}\theorem}{\endtheorem}
  
\newtheorem{proposition}{Proposition}
\newenvironment{customprp}[1]{\renewcommand\theproposition{#1}\proposition}{\endproposition}
\newtheorem{definition}{Definition}
\newtheorem{remark}{Remark}
\newtheorem{claim}{Claim}
\newtheorem{conjecture}{Conjecture}
\newenvironment{myproof}[1][\proofname]{%
  \begin{proof}[#1]$ $\nobreak\ignorespaces
}{%
  \end{proof}
}
\newcommand{\V}[0]{\mathbf{V}}
\newcommand{\C}[0]{\mathbf{C}}
\newcommand{\W}[0]{\mathbf{W}}
\newcommand{\U}[0]{\mathbf{U}}
\newcommand{\E}[0]{\mathbf{E}}
\newcommand{\X}[0]{\mathbf{X}}
\newcommand{\R}[0]{\mathbf{R}}
\newcommand{\Y}[0]{\mathbf{Y}}
\newcommand{\Z}[0]{\mathbf{Z}}
\newcommand{\OO}[0]{\mathbf{O}}
\newcommand{\A}[0]{\mathbf{A}}
\newcommand{\B}[0]{\mathbf{B}}
\newcommand{\T}[0]{\mathbf{T}}
\newcommand{\D}[0]{\mathbf{D}}
\newcommand{\Pa}[2]{\textit{Pa}_{#2}(#1)}
\newcommand{\Ch}[2]{\textit{Ch}_{#2}(#1)}
\newcommand{\Anc}[2]{\textit{Anc}_{#2}(#1)}
\newcommand{\De}[2]{\textit{De}_{#2}(#1)}

\newcommand{\x}[0]{\mathbf{x}}
\newcommand{\y}[0]{\mathbf{y}}
\newcommand{\z}[0]{\mathbf{z}}
\newcommand{\w}[0]{\mathbf{w}}
\newcommand{\dd}[0]{\mathbf{d}}

\newcommand{\G}[0]{\mathcal{G}}
\newcommand{\M}[0]{\mathcal{M}}
\newcommand{\F}[0]{\mathcal{F}}
\newcommand{\dom}[2]{\mathfrak{X}_{#2}(#1)}


\newcommand{\independent}{\perp\!\!\!\perp}
\newcommand{\notindependent}{\centernot{\independent}}

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{On Identifiability of Conditional Causal Effects}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<yaroslav.kivva@epfl.ch>?Subject=Your UAI 2023 paper}{Yaroslav Kivva}{}}
\author[1, 3]{Jalal Etesami}
\author[1,2]{Negar Kiyavash}
% % Add affiliations after the authors
\affil[1]{%
    School of Computer and Communication Sciences\\
    EPFL\\
    Lausanne, Switzerland
}
\affil[2]{%
     College of Management of Technology\\
    EPFL\\
    Lausanne, Switzerland
}
\affil[3]{%
   TUM School of Computation, Information and Technology\\
   Technical University of Munich
}
\begin{document}
\maketitle
\begin{abstract}
We address the problem of identifiability of an arbitrary \textit{conditional causal effect} given both the causal graph and a set of any observational and/or interventional distributions of the form $Q[S]:=P(S|do(V\setminus S))$, where $V$ denotes the set of all observed variables and $S\subseteq V$. 
    We call this problem conditional generalized identifiability (\mbox{\textbf{c-gID}} in short) and prove the completeness of Pearl's $do$-calculus for the \mbox{c-gID} problem by providing sound and complete algorithm for the c-gID problem.  
    This work revisited the c-gID problem in \cite{lee2020general, correa2021nested} by adding explicitly the positivity assumption which is crucial for identifiability. It extends the results of \citep{lee2019general, kivva2022revisiting} on general identifiability (gID) which studied the problem for  \mbox{\textit{unconditional}} causal effects and \cite{shpitser2012identification} on identifiability of conditional causal effects given \textit{merely} the observational distribution $P(\mathbf{V})$ as our algorithm generalizes the algorithms proposed in \citep{kivva2022revisiting} and \citep{shpitser2012identification}.
\end{abstract}


\section{Introduction}

This paper addresses the problem of identification of a conditional post-interventional distribution from the combination of observational and/or interventional distributions. Formally, the relationships between the variables of interest are established by a directed acyclic graph (DAG) \cite{pearl1995causal}. Each node in the causal graph represents some random variable that may simulate real-life measurements, and each directed edge encodes a possible causal relationship between the variables. 
In general, a subset of the nodes in DAG are observed and others may be hidden. 
The hidden nodes could result in spurious correlations between observed variables and complicate the question of identifiability. On the other hand, when all the variables in the system are observable and the distribution over them is known then any conditional causal effect is identifiable.

%Studying these causal relationships is very important for applied science such as health and the social sciences, artificial intelligence, machine learning, etc.  When someone performs an experiment or forces somebody to follow some treatment then this action is represented by a $do(\cdot)$ operation in the causal inference literature. Practically an action $do(\x)$ changes the distribution over the observed variables $P(\cdot)$ into a new one $P_{\x}(\cdot)$ which is called post-interventional distribution. The ability to compute a causal effect that was not observed before may help in constructing explanations and making conclusions about the phenomenon under investigation. \cite{shpitser2012identification} provides an example of the important relationship between the identifiability of a conditional causal distribution and sequential decision problem that arise in many domains \citep{pearl1995probabilistic}.

The question of identification of the causal effect has been one of the central focus of research in causal inference literature. The classical setting of the problem asks whether the causal effect $P_{\x}(\y)$\footnote{This notation indicates causal effect on $\textbf{y}$ after intervention $do(\X=\x)$, That is, $P(\textbf{y}| do(\X=\x))$ shortened to $P_{\x}(\textbf{y})$.} is identifiable in a given graph $\G$ from observational distribution $P(\V)$ ($\V$ is a set of all observed nodes in the graph $\G$). The problem was solved in \cite{shpitser2006identification, huang2006identifiability} and later \cite{shpitser2012identification} extended the result by answering the question when a conditional causal effect $P_{\x}(\y|\z)$ is identifiable in a given graph $\G$.
The work of \cite{bareinboim2012causal, lee2019general, kivva2022revisiting} solved a generalization of the classical identifiability problem, namely identifiability of unconditional causal effect $P_\x(\y)$ from a specific mix of observational and interventional distributions.  It is noteworthy that all aforementioned works proved that the rules of do-calculus are sound and complete for the identification of the causal effect in their settings. 
Furthermore, the work of \cite{tikka2019causal, mokhtarian2022causal, bareinboim2014transportability, bareinboim2015recovering} considers the problem of identifiability in a presence of additional information to observational/interventional distributions and the causal graph $\G$. More specifically, \cite{mokhtarian2022causal} considers the identifiability problem in the presence of additional knowledge in the form of context-specific independence for some variables.
\cite{tikka2019causal} assumes that they have access to multiple incomplete data sources and \cite{bareinboim2015recovering} studies the identifiability problem under a selection bias.
%and \cite{bareinboim2014transportability} solves the problem of identification of the query in a target domain using the observations from other source domains

%{\color{red}
%Briefly discuss \cite{lee2020general,correa2021nested} that consider similar problem but their completeness proof is not valid because of the positivity assumption.}

{

In this paper, we extend both the general identifiability (gID) result of \cite{kivva2022revisiting} and the conditional identifiability result of \cite{shpitser2012identification}. 
%the ideas of \cite{kivva2022revisiting} on general identifiability problem (gID) and \cite{shpitser2012identification} on identifiability of conditional causal effect from merely observational distribution $P(\V)$.  
More specifically, our work answers the question of identifiability of an \textit{arbitrary} conditional causal effect $P_{\x}(\y|\z)$ under the same set of assumptions as in gID problem. 
We call this problem \textit{conditional general identifiability}, for short \textbf{c-gID}.
This problem has been studied in \cite{lee2020general, correa2021nested}.
 %The c-gID problem can be considered as a corollary of the works \cite{lee2020general, correa2021nested}. 
 The authors of \cite{lee2020general} generalizes the problem of c-gID by assuming that observable data is available from multiple domains and \cite{correa2021nested} considers the c-gID problem as an identifiability problem of counterfactual quantities. 
 However, both of the aforementioned works are based on causal models that violate the positivity assumption (See Appendix B) which is crucial for identification as it is discussed in \cite{kivva2022revisiting}. 
 %More precisely, their proofs of completeness rely on causal models that ignores a positivity assumption. 
Since they did not discuss whether their proposed models can be modified such that the positivity assumption holds and it is not straightforward whether such modifications exist, herein we present an alternative proof for the c-gID problem including its soundness and completeness.
The causal models developed here for proving the completeness of our algorithm are novel and satisfy the positivity assumption. 
%Furthermore our proof of the completeness part of the c-gID problem is novel and we believe that they  important itself for the further research in generalization of identifiability results.
}
%\color{red}In this paper, we extend the ideas of \cite{kivva2022revisiting} on general identifiability problem (gID) and \cite{shpitser2012identification} on identifiability of conditional causal effect from merely observational distribution $P(\V)$.  More specifically, our work answers the question of the identification of an \textit{arbitrary} causal effect $P_{\x}(\y|\z)$ under assumptions similar to the ones in gID problem. We call this problem \textit{conditional general identifiability}, for short \textbf{c-gID}.}
%Additionally, we provide the sound and complete algorithm that generalizes both algorithms of \cite{kivva2022revisiting} and \cite{shpitser2012identification}. 
% A nice corollary of our results is that the rules of do-calculus remain sound and complete for the c-gID problem.

\section{Preliminaries}
\subsection{Notation and definitions}
 We denote random variables by capital letters and their realization by their lower-case version. 
 Similarly, a set of random variables and their realizations are denoted by bold capital and bold lower-case letters, respectively. 
 For two integers $a\leq b$, we define $[a:b]:=\{a, a+1, \dots, b\}$.
 For any random variable $X$, we denote its domain set by $\dom{X}{}$ and for any set of random variables $\X$, we denote by $\dom{\X}{}$, the Cartesian product of the domains of the variables in $\X$. 
 Suppose that $\X$ and $\Y$ are arbitrary sets of random variables, then we say that realizations $\x$ and $\y$ are \textit{consistent}, if the values of $\X\cap\Y$ in $\x$ and $\y$ are the same. 
 Also, we use $\dom{\X}{\y}$ to denote a set of realizations of $\X$ that are consistent with $\y$.
  %the values of variables $\X \cap \Y$ in the realization of $\y$. 
 Suppose that $\X'\subseteq \X$ and $\x$ to be a realization of $\X$. Then, we use $\x[\X']$ to denote a realization of $\X'$ that is consistent with $\x$.
 %with the same values of $\X'$ as in the realization $\x$. 
 When it is clear from the context, we write $\x'$ instead of $\x[\X']$. 

\textbf{Causal Graph:}
Consider a directed graph $\G:=(\V\cup \U, \E)$ over node $\V\cup\U$ in which $\V$ and $\U$ denote the set of observed and hidden variables, respectively and $\E\subseteq (\V\cup \U)\times(\V\cup \U)$ denotes the set of directed edges. 
A causal graph $\G$ is a directed acyclic\footnote{It contains no directed cycle. } graph (DAG).
%over a set of vertices $\V\cup\U$ with directed edges from $\E$, where $\V$ is responsible for a set of observed random variables and $\U$ is responsible for a set of latent random variables. 
We say that node $X$ is a parent of another node $Y$ (subsequently, $Y$ is a child of $X$) if and only if there exists a direct edge from $X$  to $Y$ in $\G$, e.g. $(X, Y) \in \E$. 
Similarly, $X$ is said to be an ancestor of $Y$ (subsequently, $Y$ is a descendant of $X$) if and only if there is a directed path from $X$ to $Y$ in $\G$. 
We denote the set of parents, children, ancestors, and descendants of $X$ by $\Pa{X}{\G}$, $\Ch{X}{\G}$, $\Anc{X}{\G}, \De{X}{\G}$ respectively. We assume that $X$ belongs to all the aforementioned sets. 
%Further, we suppose that any node $X \in \V\cup \U$ is a child\textbackslash parent\textbackslash ancestor\textbackslash descendant of itself and we denote a set of parents, children, ancestors, and descendants of $X$ by $\Pa{X}{\G}$, $\Ch{X}{\G}$, $\Anc{X}{\G}, \De{X}{\G}$ respectively. 
Additionally, for a subset of nodes $\X$, we define $\Pa{\X}{\G}:=\bigcup_{X\in \X}\Pa{X}{\G}$ and analogously, define $\Ch{\X}{\G}$, $\Anc{\X}{\G}$ and $\De{\X}{\G}$. 

A causal graph $\G$ is called a semi-Markovian, if any node from $\U$ has exactly two children without any parents. 
Suppose that $\G$ is a semi-Morkovain graph and $\X\subseteq\V$. 
In this case, we use $\G[\X]$ to denote the induced subgraph of $\G$ over variables in $\X$ including all unobserved variables that have both children in $\X$. 
We also use $\widehat{\G}[\X]$ to denote the dual graph of $\G[\X]$ that is a mixed\footnote{It contains both directed and bidirected edges.} graph and it is constructed from $\G[\X]$ by replacing unobserved variables and their outgoing arrows with bidirected edges.
By the abuse of notation, we use $\G[\X]$ and $\widehat{\G}[\X]$ interchangeably.% to talk about the same subgraph but refer to different properties. 

%Other important structures that we need to define are c-component and c-forest. These structures play a key role in the identifiability problems.
\begin{definition}[c-component]
    C-components of a subset $\X$ in $\G$ are the connected components in $\widehat{\G}[\X]$ after removing all directed edges, i.e., nodes in each c-component are connected via bidirected edges. 
    $\X$ is called a single c-component if $\X$ has only one c-component, i.e., $\widehat{\G}[\X]$ is a connected graph after removing all directed edges.
\end{definition}

\begin{figure}
\hspace{1cm}
        \begin{tikzpicture}[
                roundnode/.style={circle, draw=black!60,, fill=white, thick, inner sep=1pt},
                dashednode/.style = {circle, draw=black!60, dashed, fill=white, thick, inner sep=1pt},
                ]
                    % Nodes
                    \node[roundnode]        (X1)        at (-1.5, 0)                   {$X_1$};
                    \node[roundnode]        (X2)        at (1.5, 0)                   {$X_2$};
                    \node[roundnode]        (Y1)        at (-1.5, -1.6)                  {$Y_1$};
                    \node[roundnode]        (Y2)        at (1.5, -1.6)                  {$Y_2$};
                    \node[dashednode]       (U1)        at (0, 0)                   {$U_1$};
                    \node[dashednode]       (U2)        at (-2.75, -.8)                   {$U_2$};
                    
                    %Edges
                    \draw[-latex] (X1.south) -- (Y1.north) ;
                    \draw[-latex] (X2) -- (Y2);
                    \draw[latex-, dashed] (X1) -- (U1);
                    \draw[-latex, dashed] (U1) -- (X2);
                    \draw[latex-, dashed] (Y1) -- (U2);
                    \draw[-latex, dashed] (U2) -- (X1);
                \end{tikzpicture}
\caption{A semi-Markovian DAG  over the set of observed variable $\V=\{X_1,X_2,Y_1,Y_2\}$ and the set of hidden variables $\U=\{U_1,U_2\}$. } 
    \label{fig: example}
\end{figure}

For instance in Figure \ref{fig: example}, c-components of $\{X_1,X_2,Y_2\}$ are $\{X_1,X_2\}$ and $\{Y_2\}$. In this DAG, $\{X_1,X_2,Y_1\}$ and $\{Y_2\}$ are each single c-components. 


\begin{definition}[c-forest]
    Let $\F$ be a subgraph of $\G$ over a set of nodes $\X$. The maximal subset of $\X$ with no children in $\F$ is called the root set and denoted by $\R$. 
    $\mathcal{F}$ is a $\R$-rooted c-forest if $\X$ is a single c-component with root set $\R$, and all observable nodes in $\X$ have at most one child in $\F$.
\end{definition}

In Figure \ref{fig: example}, $\G[\{X_1,X_2,Y_1\}]$ is a $\{Y_1,X_2\}$-rooted c-forest. 


\textbf{Causal Model:}
A causal model $\M$ is defined over a set of random variables $\V\cup \U$ via Structural Equation Model (SEM) \citep{pearl2009causality} with a causal graph $\G$. 
In a SEM with a causal graph $\G$, each variable $X \in \V\cup \U$ is determined by its parents and an exogenous variable $\epsilon_X$, i.e. $X=f_{X}(\Pa{X}{\G}, \epsilon_X)$. 
It is assumed that the set of exogenous variables, $\{ \epsilon_X | X \in \V \cup \U\}$, are mutually independent.
If graph $\G$ is a semi-Markovian, then $\M$ is said to be a semi-Markovian causal model. 
Because, the problem of identifiability in a DAG is equivalent to a relative identifiability problem in a semi-Markovian DAG \citep{huang2006identifiability}, in this work, we only consider the problem of identifiability in semi-Markovian models.

In a semi-Markovian causal model, by Markov property \citep{pearl2009causality}, the induced joint distribution can be factorized as follows
\begin{equation*}
    P^{\M}(\mathbf{v}) = \sum_{\textbf{u}}\prod_{X\in \V}P^{\M}(x|\Pa{X}{\G})\prod_{U \in \U}P^{\M}(u),
\end{equation*}
where the summation is over latent variables in $\U$. 
We use $\mathbb{M}(\G)$ to denote the set of causal models with graph $\G$ such that for any $\M \in \mathbb{M}(\G)$ and any realization $\mathbf{v} \in \dom{\mathbf{V}}{}$,  $P^{\M}(\mathbf{v})>0$.
In the remainder of this work, we assume that all causal models belong to $\mathbb{M}$.
This is known as the positivity assumption in the causality literature and as it is discussed in \cite{kivva2022revisiting}, it is crucial for developing sound identification algorithm. 

 In a causal model $\M$, post-interventional distribution is defined using $do$-operation. 
 An intervention $do(X=x)$ modifies the corresponding SEM by replacing the equation of $X=f_{X}(\Pa{X}{\G}, \epsilon_X)$ by $X=x$.
The conditional post-interventional distribution of $\textbf{y}$ given $\textbf{s}$ after intervening on $do(X=x)$ is denoted by $P(\textbf{y}| do(\X=\x), \textbf{s}):= P_{\x}(\textbf{y}|\textbf{s})$.

 Suppose that $\mathbf{S} = \mathbf{S}' \cup \mathbf{S}''$, where $\mathbf{S}'$ and $\mathbf{S}''$ are two disjoint subsets of observed variables $\V$. 
 We define $Q$-notations, $Q[\mathbf{S}](\cdot)$ and $Q[\mathbf{S}'|\mathbf{S}''](\cdot)$ as follows:
 \begin{align}
     & Q[\mathbf{S}](\mathbf{v}) := P_{\mathbf{v}\setminus \mathbf{s}}(\mathbf{s}), \\
     & Q[\mathbf{S}'|\mathbf{S}''](\mathbf{v}) := P_{\mathbf{v}\setminus \mathbf{s}}(\mathbf{s}'|\mathbf{s}''),
 \end{align}
 where $\mathbf{s} = \mathbf{v}[\mathbf{S}], \mathbf{v}\setminus \mathbf{s} = \mathbf{v}[\V \setminus \mathbf{S}], \mathbf{s}' = \mathbf{v}[\mathbf{S}']$, and $\quad \mathbf{s}'' = \mathbf{v}[\mathbf{S}'']$.
% \begin{equation*}
 %    \begin{split}
  %       & \mathbf{s} = \mathbf{v}[\mathbf{S}], \quad \mathbf{v}\setminus \mathbf{s} = \mathbf{v}[\V \setminus \mathbf{S}], \\
   %      & \mathbf{s}' = \mathbf{v}[\mathbf{S}'], \quad \mathbf{s}'' = \mathbf{v}[\mathbf{S}''].
    % \end{split}
% \end{equation*}
 Note that $Q[\V](\textbf{v})=P(\V=\textbf{v})$.
 By Markov property and basic probabilistic manipulation, we have
 \begin{align}
    \label{eq: Q}
     & Q[\mathbf{S}](\mathbf{v}) = \sum_{\U}\prod_{S \in \mathbf{S}}P(s|\Pa{S}{\G})\prod_{U \in \U}P(u)\\
     \label{eq: conditional Q}
     & Q[\mathbf{S}'|\mathbf{S}''](\mathbf{v}) = \frac{Q[\mathbf{S}](\mathbf{v})}{\sum_{\mathbf{v}' \in \dom{\V}{\mathbf{v}\setminus\mathbf{s}'}} Q[\mathbf{S}](\mathbf{v}')}
 \end{align}

%To link the independence properties of the random variables in graph $\G$ we use the well-known notion of d-separation \citep{pearl2009causality}.
\begin{definition}[Blocked path]
A path in $\G$ is a non-repeated sequence of connected nodes. 
A path $p$ in $\G$  is said to be blocked by a set of nodes in $\Z$ if and only if
    \begin{itemize}[leftmargin=*]
        \item $p$ contains a chain $X\rightarrow W \rightarrow Y$ or fork $X\leftarrow W \rightarrow Y$, such that $W \in \Z$, or
        \item $p$ contains a collider $X \rightarrow W \leftarrow Y$ (node $W$ is called a collider), such that $\Z \cap \De{W}{\G}=\emptyset$.
    \end{itemize}
\end{definition}
Two disjoint sets of nodes $\X$ and $\Y$ are d-separated by $\Z$ in $\G$ if any path between $\X$ and $\Y$ are blocked by $\Z$ and denote it by $(\X \independent \Y|\Z)_{\G}$. 
Using d-separation, we introduce rules of do-calculus \citep{pearl2009causality} as the main tools for causal effect identification.
\begin{itemize}[leftmargin=*]
    \item Rule 1: $P_{\x}(\y|\z,\w)=P_{\x}(\y|\w)$ if $(\Z \independent \Y|\X,\W)_{\G_{\overline{\X}}}$.
    
    \item Rule 2: $P_{\x,\z}(\y|\w)=P_{\x}(\y|\z,\w)$ if $(\Z \independent \Y|\X,\W)_{\G_{\overline{\X},\underline{\Z}}}$.
    
    \item Rule 3: $P_{\x,\z}(\y|\w)=P_{\x}(\y|\w)$ if $(\Z \independent \Y|\X,\W)_{\G_{\overline{\X},\overline{\Z_W}}}$,
\end{itemize}
where $\G_{\overline{\X},\underline{\Y}}$ denotes an edge subgraph of $\G$ where all incoming arrows into $\X$ and all outgoing arrows from $\Y$ are deleted and $\Z_W:=\Z\setminus Anc_{\G_{\overline{\X}}}(\W)$.


\subsection{Classical identifiability (ID) }


\begin{table*}[th]
\caption{Different types of identifiability problems.} \label{id problems}
\begin{center}
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{Problem}  &\textbf{Target}  & \textbf{Input} & \textbf{Solved}\\
\hline
\hline
Causal effect identifiability (ID)   &   &   &\\
\color{blue}\cite{shpitser2006identification}   & $P_{\x}(\y)$   & $\G, P(\V)$   &\checkmark\\
\color{blue}\cite{huang2006identifiability}   &   &   &\\
\hline
Conditional causal effect identifiability (c-ID) & $P_{\x}(\y|\z)$ & $\G, P(\mathbf{V})$ & \checkmark\\
\color{blue}\cite{shpitser2012identification}   &   &   &\\
\hline
z-identifiability (zID) & $P_{\x}(\y)$ & $\G, P(\V), \{P_{\V \setminus \A'}(\A')| \forall \A' \subset \A\}$ & \checkmark\\
\color{blue}\cite{bareinboim2012causal}   &   &   &\\
\hline
g-identifiability (gID) &    &   &\\
\color{blue}\cite{lee2019general,kivva2022revisiting}   & $P_{\x}(\y)$   & $\G, \{P(\mathbf{A}_i|do(\mathbf{V}\setminus\mathbf{A}))\}_{i=0}^m$   & \checkmark\\
\hline
\textit{Conditional general identifiability (c-gID)}  & $P_{\x}(\y|\z)$ & $\G, \{P(\mathbf{A}_i|do(\mathbf{V}\setminus\mathbf{A}))\}_{i=0}^m$ & \checkmark our work \\
 \color{blue}\cite{lee2020general,correa2021nested} &   &   &\\
\hline
Generalized identifiability  & $P_{\x}(\y|\z)$ & $\G, \{P(\mathbf{A}_i|do(\mathbf{B}_i), \mathbf{C}_i)\}_{i=0}^m$ & ?\\
\hline
\end{tabular}
\end{center}
\end{table*}
Classical identifiability problem refers to computing a causal effect $P_{\x}(\y)$ from a given joint distribution $P(\V)$ in a causal graph $\G$. 
This problem was solved independently by \cite{shpitser2006identification} and \cite{huang2006identifiability}.  
\cite{shpitser2012identification} extended this result to identifiability of a conditional causal effect, i.e., $P_{\x}(\y|\z)$.

%{\color{red} Here, you can add a remark and say about the positivity violations of \cite{lee2020general,correa2021nested} and refer to the appendix for further details on how the positivity assumptions are ignored there and say that correcting their proofs are not straightforward and hence we provide an alternative proof. }
\begin{definition}[conditional ID]
    Suppose $\X$, $\Y$, and $\Z$ are three disjoint subsets of $\V$.
    The causal effect $P_{\x}(\y|\z)$ is said to be conditional ID in $\G$ if for any $\x \in \dom{\X}{}$, $\y \in \dom{\Y}{}$, and $\z \in \dom{\Z}{}$, $P^{\M}_{\x}(\y|\z)$ is uniquely computable from $P^{\M}(\mathbf{V})$ in any causal model $\mathcal{M} \in \mathbb{M}(\G)$.
    %consistent with the given observational distribution $P(\V)$.
\end{definition}
Knowing $P_{\x}(\y, \z)$, it is straightforward to uniquely compute $P_{\x}(\y|\z)$ from
$
    P_{\x}(\y|\z) = {P_{\x}(\y, \z)}/{\sum_{\Y'}P_{\x}(\y', \z)}.
$
On the other hand, \cite{tian2004identifying} showed that $P_{\x}(\y|\z)$ might be identifiable in $\G$ even if $P_{\x}(\y, \z)$ is not identifiable. 
This happens when the ``non-identifiable parts'' of $P_{\x}(\y, \z)$ in the nominator cancel out  with the non-identifiable parts of $P_{\x}(\z)$ in the denominator. Next example demonstrates such a scenario.

\begin{figure}
\centering
\begin{tikzpicture}[
        roundnode/.style={circle, draw=black!60,, fill=white, thick, inner sep=1pt, minimum size=0.65cm},
        dashednode/.style = {circle, draw=black!60, dashed, fill=white, thick, inner sep=1pt, minimum size=0.65cm},
        ]
            % Nodes
            \node[roundnode]        (Z1)        at (-1.5, 0)                 {$Z_1$};
            \node[roundnode]        (W1)        at (1.5, 0)                  {$W_1$};
            \node[roundnode]        (X1)        at (-1.5, 1.3)               {$X_1$};
            \node[roundnode]        (Y1)        at (1.5, 1.3)                {$Y_1$};
            \node[roundnode]        (Z2)        at (0, 0)                    {$Z_2$};
            
            %Edges
            \draw[-latex] (X1.south) -- (Z1.north);
            \draw[latex-] (Z1.east) -- (Z2.west);
            \draw[latex-] (Z2.east) -- (W1.west);
            \draw[latex-] (W1.north) -- (Y1.south);
            \draw[latex-latex, dashed] (X1) .. controls +(left:10mm ) and +(left:10mm) .. (Z1);
            % \draw[latex-latex, dashed] (Z1.north east) .. controls +(up:5mm) and +(up:5mm) .. (W1.north west);
            \draw[latex-latex, dashed] (W1) .. controls +(right:10mm ) and +(right:10mm) .. (Y1);
        \end{tikzpicture}
\caption{A semi-Markovian DAG over \mbox{$\V=\{X_1,Y_1,Z_1,Z_2, W_1\}$}} 
    \label{fig: example c-ID}
\end{figure}
\textbf{Example:}
Consider the causal graph $\G$ as on Figure \ref{fig: example c-ID}. Assume that one wants to compute the causal effect $P_{\x}(\y|\z)$, where $\X = \{X_1\}$, $\Y = \{Y_1\}$ and $\Z = \{Z_1, Z_2\}$. Then,
\begin{equation*}
     P_{x_1}(y_1| z_1, z_2) = \frac{P_{x_1}(y_1, z_1, z_2)}{P_{x_1}(z_1, z_2)}.
\end{equation*}
where 
$$P_{x_1}(y_1, z_1, z_2) = \sum_{w_1 \in \dom{W_1}{}}P_{x_1}(y_1, w_1, z_1, z_2),$$
 and 
$$P_{x_1}(z_1, z_2) = \sum_{\substack{w_1 \in \dom{W_1}{}\\ y_1 \in \dom{Y_1}{}}}P_{x_1}(y_1, w_1, z_1, z_2).$$

%\begin{align*}
 %    P_{x_1}(y_1, z_1, z_2) =& \sum_{w_1 \in \dom{W_1}{}}P_{x_1}(y_1, w_1, z_1, z_2),\\
 %    P_{x_1}(z_1, z_2) =& \sum_{\substack{w_1 \in \dom{W_1}{}\\ y_1 \in \dom{Y_1}{}}}P_{x_1}(y_1, w_1, z_1, z_2).
%\end{align*}
In terms of $Q$-notation, we have
\begin{align*}
     P_{x_1}(y_1, z_1, z_2) =& \sum_{W_1}Q[Y_1, W_1, Z_1, Z_2],\\
     P_{x_1}(z_1, z_2) =& \sum_{W_1, Y_1}Q[Y_1, W_1, Z_1, Z_2].
\end{align*}
Using results of \cite{huang2006identifiability}, the above equations can be simplified as follows,
\begin{align*}
     P_{x_1}(y_1, z_1, z_2) =& Q[Z_1] \sum_{W_1} Q[Y_1, W_1, Z_2],\\
     P_{x_1}(z_1, z_2) = & Q[Z_1] \sum_{W_1, Y_1}Q[Y_1, W_1, Z_2].
\end{align*}
Results of \cite{huang2006identifiability, shpitser2006identification} imply that $Q[Z_1]$ is not ID from $\G$, however $Q[Y_1, W_1, Z_2]$ is ID in $\G$. Therefore, both causal effects $P_{\x}(\y, \z)$ and $P_{\x}(\z)$ are not ID in $\G$, but clearly 
$$
P_{\x}(\y|\z) = \frac{\sum_{W_1} Q[Y_1, W_1, Z_2]}{\sum_{W_1, Y_1}Q[Y_1, W_1, Z_2]}
$$
is identifiable in $\G$.


%The last theorem links the problem of causal effect identifiability to the problem of conditional causal effect identifiability.


\subsection{Generalized identifiability (gID)}

In this problem, the goal is to identify a causal effect in a given graph $\G$ from a set of observational and/or interventional distributions instead of only observational distribution $P(\V)$. 
This problem, to the best of our knowledge, remains open when the set of given distributions are arbitrary. In the special case, when the set of given distributions are in the form of $Q$-notations, the problem is called generalized identifiability (gID) (See below for a formal definition) and was solved by \citep{lee2019general,kivva2022revisiting}.
See Table \ref{id problems} for a summary of solved and unsolved problems in the causal identifiability context. 



%Assume that instead of observational distribution $P(\V)$ are given interventional distributions (?). Then one can verify that the causal effect from \textbf{Example} become identifiable, although $P_{\x}(\y, \z)$ still non-identifiable. In general, the question of which observational and interventional distributions are enough to know in order to identify a causal effect $P_{\x}(\y|\z)$ is very complicated and remains open, i.e. "Generalized identifiability" in Table \ref{id problems}.
 
%\cite{lee2019general} and \cite{kivva2022revisiting} considered a relaxation of "Generalized identifiability" problem which is called by gID problem.

\begin{definition}[gID]
Suppose $\X$ and $\Y$ are two disjoint subsets of $\V$ and $\mathbb{A} := \{\A_i\}_{i=0}^m$ is a collection of subsets of $\V$, i.e., $\A_i \subseteq \V$ for all \mbox{$i \in [0:m]$}. 
The causal effect $P_{\x}(\y)$ is said to be gID from $(\mathbb{A}, \G)$ if for any $\x\in \dom{\X}{}$ and $\y\in \dom{\Y}{}$ if $P_{\x}^{\M}(\y)$ is uniquely computable from $\{Q^{\M}[\A_i]\}_{i=0}^{m}$ in any causal model $\M \in \mathbb{M}(\G)$.
%consistent with the set of given distributions $\{Q[\A_i]\}_{i=0}^{m}$. 
%$Q[\Y]$ is said to be gID from $(\mathbb{A}, \G)$ if the the causal effect $P_{\mathbf{v}\setminus\y}(\y)$ is gID from $(\mathbb{A}, \G)$.
\end{definition}

Note that the classical ID problem is a special case of the gID problem when $\mathbb{A}=\{\V\}$. 
More than a decade after \cite{shpitser2012identification} proposed a sound and complete algorithm for ID, \cite{kivva2022revisiting} solved the gID problem by showing that gID problem can be reduced to a series of separated ID problems. Formally, they showed the following result. % that can be solved separately.
\begin{theorem}[\cite{kivva2022revisiting}] \label{th: gID main}
    Suppose that $\mathbf{S}\subseteq \V$ is a single c-component in $\G$. Then, $Q[\mathbf{S}]$ is gID from $(\mathbb{A}, \G)$ if and only if there exists $\A \in \mathbb{A}$, such that $\mathbf{S} \subseteq \A$ and $Q[\mathbf{S}]$ is identifiable from $\G[\A]$.
\end{theorem}
%Intuitively this theorem says that if we want to identify something in one c-component then information regarding distributions outside of this c-component may not help. 


\subsection{Conditional generalized identifiability (c-gID) }

In this work, we address an extension of both conditional ID and g-ID problem in which the goal is to identify a conditional causal effect from a set of observational and/or interventional distributions.
\begin{definition}(c-gID)
    Suppose $\X$, $\Y$ and $\Z$ are three disjoint subsets of $\V$ and $\mathbb{A} := \{\A_i\}_{i=0}^m$ is a collection of subsets of $\V$, i.e., $\A_i \subseteq \V$ for all \mbox{$i \in [0:m]$}. 
    The causal effect $P_{\x}(\y|\z)$ is said to be c-gID from $(\mathbb{A}, \G)$ if for any $\x\in \dom{\X}{}$, $\y\in \dom{\Y}{}$, and $\z\in \dom{\Z}{}$, $P_{\x}^{\M}(\y|\z)$ is uniquely computable from $\{Q^{\M}[\A_i]\}_{i=0}^{m}$ in any causal model $\M \in \mathbb{M}(\G)$.
    %consistent with the set of given distributions $\{Q[\A_i]\}_{i=0}^{m}$. 
    %$Q[\Y]$ is said to be c-gID from $(\mathbb{A}, \G)$ if the the causal effect $P_{\mathbf{v}\setminus\y}(\y)$ is c-gID from $(\mathbb{A}, \G)$.
\end{definition}

From this definition, it is clear that c-gID covers both conditional ID and gID. 
Namely, when $\Z=\emptyset$, then c-gID reduces to the gID problem, studied by \cite{lee2019general,kivva2022revisiting}. 
When $\mathbb{A}=\{\V\}$, c-gID becomes the conditional ID problem studied by \cite{shpitser2012identification}. 
{
Both \cite{lee2020general} and \cite{correa2021nested} proposed algorithms for identification problems that can also be used for solving c-gID problem. 
However, the completeness of their algorithms rely on causal models that violate the positivity assumption. For more details see Appendix B. 
Additionally, they miss discussions on whether this issue in their proofs can be resolved. 
%they do not specify the positivity assumption in their work explicitly and the proof of completeness misses an explanation whether the issues with a positivity assumption can be resolved. For details look into Appendix \ref{sec: app B}.

Next we propose an alternative solution for the c-gID problem under the positivity assumption. 
The soundness and completeness of our solution are based on novel techniques that we believe they are important for further generalizations of identifiability problems. 
%allows us to prove the completeness part of the general identifiability of the conditional causal queries. Note that our results is important by itself, since they contain a new ideas that also might be used for the further generalizations of identifiability problems.
}

\section{Main result}

%In the previous section, we recap the main results of the conditional identifiability and g-identifiability problems. Unfortunately, the conditional ID problem is limited to the assumption that only the observation distribution $P(\V)$ is given when a relaxation of this condition is of much interest and importance. As we discussed earlier, \cite{lee2019general} and \cite{kivva2022revisiting} relax this assumption by giving access to multiple distributions of a specific type, but solve the problem only for the unconditional causal effects $P_{\x}(\y)$. In this paper we extend results of \citep{lee2019general, kivva2022revisiting} by solving the problem for any causal effect $P_{\x}(\y|\z)$.
% \begin{definition}(c-gID)
%     Suppose $\X$, $\Y$ and $\Z$ are disjoint subsets of $\V$ and $\mathbb{A} := \{\A_i\}_{i=0}^m$ such that $\A_i \subseteq \V$ for all \mbox{$i \in [0:m]$}. The causal effect $P_{\x}(\y|\z)$ is said to be c-gID from $(\mathbb{A}, \G)$ if for any $\x\in \dom{\X}{}$ and any  $\y\in \dom{\Y}{}$ one can uniquely compute $P_{\x}^{\M}(\y|\z)$ from the set of known distributions $\{Q[\A_i]\}_{i=0}^{m}$ in any causal model $\M \in \mathbb{M}(\G)$ consistent with observations. $Q[\Y]$ is said to be c-gID from $(\mathbb{A}, \G)$ if the the causal effect $P_{\mathbf{v}\setminus\y}(\y)$ is c-gID from $(\mathbb{A}, \G)$.
% \end{definition}
%Note that in case $\Z=\emptyset$ the c-gID definition is exactly the same as the gID definition. Therefore we may use c-gID and gID notation interchangeably for this setting.

The main idea presented in this work for solving the c-gID problem is to construct an equivalent gID problem and then use the results of  \citep{lee2019general,kivva2022revisiting} to solve the equivalent gID problem. 

Suppose $\X, \Y$ and $\Z$ are three disjoint subsets of $\V$ and $\mathbb{A}$ is a collection of subsets of $\V$. 
We are interested in identifying $P_{\x}(\y|\z)$ from $(\G,\mathbb{A})$.
To this end, we define $\W$ to be the maximal subset of $\Z$, such that $P_{\x}(\y|\z)=P_{\x, \w}(\y|\z\setminus\mathbf{w})$. 
 \cite{shpitser2012identification} proved that such a maximal set is unique and it is given by
 \begin{align}\label{eq: wset}
     \W=\bigcup_{W'\in\Z}\big\{W'|\ P_{\x}(\y|\z)=P_{\x, w'}(\y|\z\setminus \{w'\}) \big\}.
 \end{align}
 More precisely, they showed the following result. 
\begin{theorem}[\cite{shpitser2012identification}]\label{th: ships1}
    For a given graph $\G$ and any conditional effect $P_{\x}(\y|\z)$, there exists a unique maximal set $\W=\{ W \in \Z | P_{\x}(\y|\z)=P_{\x, w}(\y|\z\setminus \{w\}) \}$ such that rule 2 of do-calculus applies to $\W$ in $\G$ for $P_{\x}(\y|\z)$.
\end{theorem}

In a special case when $\W=\Z$, it is trivial that the equivalent gID problem boils down to identifying $P_{\x, \z}(\y)$ from $(\G,\mathbb{A})$.
In the next result, we present the form of an equivalent gID problem for a general c-gID problem.

\begin{theorem} \label{th: main}
    Let $\W$ be the maximal subset of $\Z$, such that $P_{\x}(\y|\z)=P_{\x, \w}(\y|\z\setminus\mathbf{w})$. Then, $P_{\x}(\y|\z)$ is c-gID from $(\mathbb{A}, \G)$ if and only if $P_{\x, \mathbf{w}}(\y, \z\setminus\mathbf{w})$ is gID from $(\mathbb{A},\G)$.
\end{theorem}
A sketch of the proof of this Theorem appears in Section \ref{sec:proof}.
This result extends the result of \cite{shpitser2012identification} for conditional ID to c-gID.
Furthermore, Theorem \ref{th: main} allows us to develop an algorithm for solving the c-gID problem.  
Algorithm \ref{algo: c-gID} summarizes the steps of the proposed algorithm.
The algorithm consists of  two main steps:
\\
    \textbf{1.} Find the maximal set $\W\subseteq \Z$ in $\G$, such that $P_{\x}(\y|\z)=P_{\x, \w}(\y|\z\setminus\mathbf{w})$. 
    For this part, we propose function \textbf{MaxBI} presented in Algorithm \ref{algo: c-gID} that is based on Equation \eqref{eq: wset}.
    \\
    \textbf{2.} Run any sound and complete gID algorithm (e.g., the proposed algorithm by \cite{kivva2022revisiting}) for checking the gID of $P_{\x, \mathbf{w}}(\y, \z\setminus\mathbf{w})$ from $(\mathbb{A}, \G)$.

%\cite{shpitser2012identification} presented the following important result that established a bridge between the conditional ID and classical ID problem.


%\begin{theorem}[\cite{shpitser2012identification}]
%    \label{th: c-ID main}
%    Let $\W \subset \Z$ be the maximal set such that $P_{\x}(\y|\z)=P_{\x, \w}(\y|\z\setminus\mathbf{w})$. Then, $P_{\x}(\y|\z)$ is identifiable in $\G$ if and only if $P_{\x, \mathbf{w}}(\y, \z\setminus\mathbf{w})$ is identifiable in $\G$.
%\end{theorem}

%The following theorem links the c-gID problem for the conditional causal effect to the  gID problem for the unconditional causal effect.



%Note that c-gID problem for the unconditional causal effect is equivalent to the gID problem. Therefore the algorithm which determines whether the causal effect $P_{\x}(\y|\z)$ is c-gID follows immediately from the Theorem \ref{th: main}. An algorithm \textbf{C-GID} has a straightforward structure:


\begin{theorem}
    Algorithm \ref{algo: c-gID} is sound and complete. 
\end{theorem}
\begin{proof}
    The result immediately follows from Theorem \ref{th: main} since the gID algorithm is sound and complete.
\end{proof}
\begin{corollary}
    Rules of do-calculus are sound and complete for the c-gID problems.
\end{corollary}

\begin{algorithm}[t]
    \caption{c-gID}
    \label{algo: c-gID}
    \begin{algorithmic}[1]
        \State \textbf{Function C-GID}($\X,\Y, \Z, \mathbb{A}=\{\A_i\}_{i=0}^m ,\G$)
        \State \textbf{Output:} True, if $P_{\x}(\y|\z)$ is c-gID from $(\mathbb{A},\G)$.
        \State $\W \leftarrow \textbf{MaxBI}(\X, \Y, \Z, \G)$
        \State \textbf{Return} $\textbf{GID}(\X\cup\W, \Y \cup (\Z\setminus\W), \mathbb{A}, \G)$
    \end{algorithmic}
    \hrulefill
    \begin{algorithmic}[1]
        \State \textbf{Function MaxBI}($\X, \Y, \Z, \G$)
        \State \textbf{Output:} set $\W$
        \State $\W \leftarrow \emptyset$
        \For{$Z$ in $\Z$}
            \If{$(\Y \independent \Z|\X, \Z\setminus\{Z\})_{\G_{\overline{\X}, \underline{\Z\setminus\{Z\}}}}$}
                \State $\W \leftarrow \W \cup \{Z\}$
            \EndIf
        \EndFor
        \State \textbf{Return} $\W$
    \end{algorithmic}
\end{algorithm}

\begin{remark}
    Algorithm \ref{algo: c-gID} is polynomial time in the input size.
\end{remark}
In subroutine \textbf{MaxBI}, a conditional independence test is performed for each variable in $\mathbf{Z}$. Subsequently, the problem is reduced to the gID problem, which can be solved in polynomial number of steps by using any of the algorithms proposed in \cite{lee2019general, kivva2022revisiting}.


\section{Proof of the Theorem \ref{th: main}}\label{sec:proof}
In this section, we present the main steps of proof of Theorem \ref{th: main}. Further details can be found in Appendix A.
Before going into the details and purely for simpler representation, we define the following notations, $\X' := \X \cup \W$, $\Y' := \Y$, and $\Z' := \Z\setminus\W$.
Note that by the definition of $\W$ and Theorem \ref{th: ships1}, we have
$P_{\x}(\y|\z)=P_{\x'}(\y'|\z') $.


The proof consists of two main parts: sufficiency and necessity. 
In the sufficiency part, which is more straightforward, we show that if $P_{\x'}(\y', \z')$ is gID from $(\mathbb{A}, \G)$, then $P_{\x}(\y|\z)$ is c-gID.
For the reverse, which is much more involved, we use a proof by contradiction. 
That is we show if $P_{\x'}(\y', \z')$ is not gID from $(\mathbb{A}, \G)$, then $P_{x}(\y|\z)$ is also not c-gID.

\textbf{Sufficiency:} Suppose $P_{\x'}(\y', \z')$ is gID from $(\mathbb{A}, \G)$, then  the result follows immediately from the Bayes rule and the fact that $P_{\x'}(\y'| \z') = P_{\x}(\y|\z)$, i.e., 
\begin{equation}\label{eq: bayes}
    P_{x}(\y|\z) = \frac{P_{\x'}(\y', \z')}{\sum_{\y'' \in \dom{\Y}{}}P_{\x'}(\y'', \z')}.
\end{equation}

\textbf{Necessity:} Suppose that $P_{\x'}(\y', \z')$ is not gID from $(\mathbb{A}, \G)$.
To show the non-identifiability of $P_{\x}(\y|\z)=P_{\x'}(\y'|\z')$ from $(\mathbb{A}, \G)$, we construct two causal models $\M_1$ and $\M_2$ from $\mathbb{M}(\G)$, such that for each $i\in [0:m]$ and any $\mathbf{v} \in \dom{\V}{}$,
    \begin{equation*}
        Q^{\M_1}[\mathbf{A}_i](\mathbf{v}) = Q^{\M_2}[\mathbf{A}_i](\mathbf{v}), 
    \end{equation*}
but there exists a triple $(\mathbf{x}',\mathbf{y}',\mathbf{z}') \in \dom{\X'}{}\times \dom{\Y'}{}\times \dom{\Z'}{}$, such that $P_{\x'}^{\M_1}(\y'|\z') \neq P_{\x'}^{\M_2}(\y'|\z').$
      %  \begin{equation*}
     %   P_{\x'}^{\M_1}(\y'|\z') \neq P_{\x'}^{\M_2}(\y'|\z').
    %\end{equation*}

\cite{huang2006identifiability} showed that that $P_{\x'}(\y', \z')$ can be written as follows
\begin{equation*}
    P_{\x'}(\y', \z') = \sum_{\mathbf{S} \setminus (\Y' \cup \Z')}Q[\mathbf{S}](\mathbf{v}),
\end{equation*}
where $\mathbf{S} := \Anc{\Y'\cup \Z'}{\G[\V \setminus \X']}$ and the marginalization is over all variables in set $\textbf{S}\setminus (\Y' \cup \Z')$.
Suppose that $\mathbf{S}_1, \mathbf{S}_2, \dots, \mathbf{S}_n$ are the c-components of $\mathbf{S}$ in a graph $\G[\mathbf{S}]$. 
It is known by \cite{huang2006identifiability} that 
$$
Q[\mathbf{S}](\mathbf{v}) =\prod_{i=1}^n Q[\mathbf{S}_i](\mathbf{v}).
$$
Since $P_{\x'}(\y', \z')$ is not gID from $(\mathbb{A}, \G)$, using Proposition 4 and Theorem 1 in \cite{kivva2022revisiting}, we conclude that there exists $i \in [1:n]$, such that for any $j\in[0:m]$, the causal effect $Q[\mathbf{S}_i]$ is not ID from $\G[\A_j]$. 
 %Using the well-known properties of $Q[\cdot]$ function we have 

Analogously, let $\mathbf{S}':=\Anc{\Z'}{\G[\V \setminus \X']}$ and assume $\mathbf{S}'_1, \mathbf{S}'_2, \dots, \mathbf{S}'_{n'}$ are the c-components of $\mathbf{S}'$ in graph $\G[\mathbf{S}']$. 
Then, we have 
\begin{align}\label{eq: denom}
   P_{\x'}(\z') =\sum_{\mathbf{S}'\setminus \Z'} \prod_{i=1}^{n'} Q[\mathbf{S}'_i](\mathbf{v}).
\end{align}
 Consequently, we obtain the following expression
\begin{equation*}
    P_{\x'}(\y'|\z') = \frac{
    \sum_{\mathbf{S}\setminus (\Y'\cup \Z')} \prod_{i=1}^n Q[\mathbf{S}_i](\mathbf{v})
    }{\sum_{\mathbf{S}'\setminus \Z'} \prod_{i=1}^{n'} Q[\mathbf{S}'_i](\mathbf{v})
    }.
\end{equation*}

Note that $\mathbf{S}' \subseteq \mathbf{S}$ and for any $i \in [1:n]$ and $j \in [1:n']$ either $\mathbf{S}'_j$ and $\mathbf{S}_i$ are disjoint or $\mathbf{S}'_j\subseteq\mathbf{S}_i$.

Depending on the relationships between $\{Q[\mathbf{S}_i]\}_{i=1}^n$ and $\{Q[\mathbf{S}'_j]\}_{j=1}^{n'}$ and which parts are gID, in the remainder, we consider two different cases and study each one separately.

\subsection{First case}\label{sec: first case}

In this case, we assume that there exists an index $i\in[1:n]$, such that both $Q[\mathbf{S}_i]$ is not gID from $(\mathbb{A},\G)$ and  $\mathbf{S}_i\neq\mathbf{S}'_j$ for all $j\in [1:n']$. 

If we show that $P_{\x'}(\y'|\z')$ remain not c-gID even after adding additional knowledge about the distributions $\{Q[\mathbf{S}'_j]\}_{j=1}^{n'}$ to $\{Q[\mathbf{A}_k]\}_{k=0}^m$, then, we can conclude that $P_{\x'}(\y'|\z')$ is also not c-gID from $(\mathbb{A},\G)$. 
To do so, let \mbox{$\mathbb{A}':=\mathbb{A}\cup(\bigcup_{j=1}^{n'}\{\mathbf{S}'_i\})$}.

%is not c-gID from $(\mathbb{A}', \G)$, where \mbox{$\mathbb{A}'=\mathbb{A}\cup(\bigcup_{j=1}^{n'}\{\mathbf{S}'_i\})$}, then we can conclude the result. 
%Let's consider whether $P_{\x'}(\y'|\z')$ is c-gID from $(\mathbb{A}', \G)$, where \mbox{$\mathbb{A}'=\mathbb{A}\cup(\bigcup_{j=1}^{n'}\{\mathbf{S}'_i\})$}, i.e.  distributions from a set $\{Q[\mathbf{S}'_j]\}_{j \in [1:n']}$ become known too. 

Clearly, $P_{\x'}(\z')$ is c-gID from $(\mathbb{A}', \G)$ as all the terms in \eqref{eq: denom} are given in $\mathbb{A}'$. 
On the other hand, $Q[\mathbf{S}_i]$ is not gID from $(\mathbb{A}', \G)$. 
This is due to the assumptions of this setting,  that are $Q[\mathbf{S}_i]$ is not gID from  $(\mathbb{A}, \G)$ and $\mathbf{S}_i\not\subset \mathbf{S}'_j$ for all $j\in [1:n']$. The latter assumption implies that none of the additional distributions $\{Q[\mathbf{S}'_j]\}_{j=1}^{n'}$ can be used to identify $Q[\mathbf{S}_i]$. 
Since, we have established that $Q[\mathbf{S}_i]$ and consequently $P_{\x'}(\y', \z')$ are not gID from $(\mathbb{A}', \G)$, there exists two models $\M_1,\M_2\in\mathbb{M}(\G)$, such that for any $\mathbf{v} \in \dom{\V}{}$,
\begin{align*}
    & Q^{\M_1}[\mathbf{A}_j](\mathbf{v}) = Q^{\M_2}[\mathbf{A}_j](\mathbf{v}),\quad  j\in [0:m],\\
    & Q^{\M_1}[\mathbf{S}_{j'}](\mathbf{v}) = Q^{\M_2}[\mathbf{S}_{j'}](\mathbf{v}), \quad j'\in [1:n'],
\end{align*}
and there exists $(\widehat{\mathbf{x}}',\widehat{\mathbf{y}}',\widehat{\mathbf{z}}' ) \in \dom{\X'}{}\times\dom{\Y'}{}\times \dom{\Z'}{}$, such that 
\begin{equation*}
    P_{\widehat{\x}'}^{\M_1}(\widehat{\y}', \widehat{\z}') \neq P_{\widehat{\x}'}^{\M_2}(\widehat{\y}', \widehat{\z}').
\end{equation*}
Because $P_{\x'}(\z')$ is gID from $(\mathbb{A}', \G)$ and from \eqref{eq: bayes}, we have
\begin{equation*}
    P_{\widehat{\x'}}^{\M_1}(\widehat{\y'}|\widehat{\z'}) \neq P_{\widehat{\x'}}^{\M_2}(\widehat{\y'}|\widehat{\z'}).
\end{equation*}
This implies that $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}', \G)$.

\subsection{Second case} \label{sec: second case}
Suppose that there is no $i\in[1:n]$, such that both $Q[\mathbf{S}_i]$ is not gID from $(\mathbb{A}, \G)$ and $\mathbf{S}_i \neq \mathbf{S}'_j$ for all $j \in [1:n']$. 

Without loss of generality, suppose that for some $k\leq n$, all $Q[\mathbf{S}_1], Q[\mathbf{S}_2], \dots, Q[\mathbf{S}_k]$ are not gID from $(\mathbb{A}, \G)$ and the remaining $Q[\mathbf{S}_{k+1}], \dots, Q[\mathbf{S}_n]$ are gID from $(\mathbb{A}, \G)$. 
By the assumption of this case, for each $i\in [1:k]$, there exists $j_i\in[1:n']$ such that $\mathbf{S}_i=\mathbf{S}'_{j_i}$. 
Without loss generality, suppose that $j_i=i$ for all $i\in [1:k]$, i.e., $\mathbf{S}_1 = \mathbf{S}'_1$, $\dots$, $\mathbf{S}_k = \mathbf{S}'_k$. 
Therefore, $\mathbf{S}_i \subset \mathbf{S}'=\Anc{\Z'}{\G[\V\setminus\X']}$, for all $i \in [1:k]$.

To establish the result, we further consider three different sub-cases:\\
%\begin{enumerate}
 %   \item $\Y' \cap \mathbf{S}_1 \neq \emptyset$. 
    %, w.l.g $i=1$;
    %\item  $\mathbf{S}_1 \subseteq \Z'$.
    %, w.l.g. $i=1$;
   % \item $\mathbf{S}_1 \setminus (\Z'\cup \Y')\neq \emptyset$.
    %, w.l.g. $i=1$.
%\end{enumerate}
1:  $\Y' \cap \mathbf{S}_1 \neq \emptyset$, \
2:  $\mathbf{S}_1 \subseteq \Z'$, and \
3:  $\mathbf{S}_1 \setminus (\Z'\cup \Y')\neq \emptyset$.

\begin{remark}
    Although, the above sub-cases may have non-empty intersection, it is easy to see that their union covers all possible scenarios of the second case.
\end{remark}

\subsubsection{Sub-case 1: $\Y'\cap \mathbf{S}_1 \neq \emptyset$} \label{sec: first subcase}
Let $Y$ denotes a random variable in $\Y' \cap \mathbf{S}_1$. 
Since $Y$ belongs to $\mathbf{S}_1=\mathbf{S}'_1$,  $Y$ is an ancestor of a variable in $\Z'$ in a graph $\G[\V\setminus \X']$, i.e. $Y \in \Anc{\Z'}{\G[\V \setminus \X']}=\mathbf{S}'$. 
This implies that
\begin{equation}\label{eq: sub_case1}
    \hspace{-.2cm} P_{\x'}(y|\z')\!\!=\! \frac{
        P_{\x'}(y, \z')
    }{
        P_{\x'}(\z')
    } \!=\! \frac{
        \sum_{\mathbf{S}'\setminus (\Z'\cup \{Y\})} \prod_{i=1}^{n'} Q[\mathbf{S}'_i](\mathbf{v})
    }{
        \sum_{\mathbf{S}'\setminus \Z'} \prod_{i=1}^{n'} Q[\mathbf{S}'_i](\mathbf{v})
    }
\end{equation}
We prove this sub-case by showing that $P_{\x'}(y|\z')$ is not c-gID from $(\mathbb{A}, \G)$ and subsequently $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}, \G)$. 
To this end, first, we prove \textbf{I:} $Q[\{Y\}| \mathbf{S}'_1\setminus \{Y\}]$ is not c-gID from $(\mathbb{A}, \G_{\underline{\{Y\}}})$, and then use it to show \textbf{II:} $Q[\{Y\}|\mathbf{S}'_1\setminus \{Y\}]$ is not c-gID from $(\mathbb{A}, \G)$. Finally, we show \textbf{III:} $P_{\x'}(y|\z')$ is not c-gID from $(\mathbb{A}, \G)$.


\textbf{I:} 
In graph $\G_{\underline{\{Y\}}}$ and using \eqref{eq: sub_case1}, we obtain
\begin{equation*}
\begin{split}
     Q\big[\{Y\}| \mathbf{S}'\setminus \{Y\}\big] &= \frac{
        \prod_{i=1}^{n'} Q[\mathbf{S}'_i]
    }{
        \sum_{Y}\prod_{i=1}^{n'} Q[\mathbf{S}'_i]
    }  \\
    & = \frac{
        Q[\mathbf{S}_1]
    }{
        \sum_{Y}Q[\mathbf{S}_1] 
    } = Q\big[\{Y\}|\mathbf{S}_1\setminus \{Y\}\big].
\end{split}
\end{equation*}
Recall that $\textbf{S}_1=\textbf{S}'_1$.
Next result shows that $Q[\{Y\}|\mathbf{S}_1\setminus \{Y\}]$ is not c-gID from $(\mathbb{A}, \G_{\underline{\{Y\}}})$ because $Q[\mathbf{S}_1]$ is not gID from $(\mathbb{A}, \G_{\underline{\{Y\}}})$. A proof is presented in Appendix A.

\begin{lemma} \label{lemma: construct models subcase 1}
    Suppose $\mathbf{L}\subseteq \V$ is a single c-component, such that $\mathbf{L} = \mathbf{L}'\cup\mathbf{L}''$ for some disjoint sets $\mathbf{L}'$ and $\mathbf{L}''$. 
    $Q[\mathbf{L}'|\mathbf{L}'']$ is c-gID from $(\mathbb{A}, \G)$ if and only if $Q[\mathbf{L}'\cup \mathbf{L}'']$ is gID from $(\mathbb{A}, \G)$.
\end{lemma}


\textbf{II:}
\cite{shpitser2006identification} showed the following result for a non-identifiable causal effect. 
\begin{lemma}[\cite{shpitser2006identification}]\label{lemma: Q ID}
    Suppose \mbox{$\mathbf{L} \subseteq\A \subseteq \V$}. 
    $Q[\mathbf{L}]$ is not identifiable from $\G[\A]$ if and only if there exists at least one $\mathbf{L}$-rooted c-forest  $\mathcal{F}$ with the set of observed variables $\B$ such that $\mathbf{L}\subsetneq \B \subseteq \A$, the bidirected edges of $\widehat{\F}[\B]$ form a spanning tree, and $\widehat{\mathcal{F}}[\mathbf{L}]$ is a connected graph with respect to the bidirected edges.
\end{lemma}


On the other hand, because $Q[\mathbf{S}_1]$ is not gID from $(\mathbb{A}, \G)$, by the results of \cite{kivva2022revisiting},  $Q[\mathbf{S}_1]$ is not ID from $\G[\A_i]$ for all $i\in [0:m]$. 
Lemma \ref{lemma: Q ID} implies that adding or removing outgoing edges from $Y \in \mathbf{S}_1$ will not affect the non-identifiability of $Q[\mathbf{S}_1]$ from $\G[\A_i]$ for all $i\in [0:m]$. Thus, we have $Q[\mathbf{S}_1]$ is not gID from $(\mathbb{A}, \G_{\underline{\{Y\}}})$. This means that exists two causal models $\M_1$ and $\M_2$ from $\mathbb{M}(\G_{\underline{\{Y\}}})$ which are consistent with all known distributions but disagree on the causal effect $Q[\mathbf{S}_1]$, i.e., there exists $\widetilde{\textbf{v}}\in\dom{\V}{}$ such that
$$
Q^{\mathcal{M}_1}[\mathbf{S}_1](\widetilde{\textbf{v}})\neq Q^{\mathcal{M}_2}[\mathbf{S}_1](\widetilde{\textbf{v}}).
$$
Note that $\mathbb{M}(\G_{\underline{\{Y\}}}) \subset \mathbb{M}(\mathbb{\G})$ which in combination with the above result yield that $Q[\{Y\}|\mathbf{S}\setminus \{Y\}]$ is not c-gID from $(\mathbb{A}, \G)$.

\textbf{III:}
To prove this part, we first present the following result. A proof is provided in Appendix A.
%\begin{itemize}
%    \item $Q[\{Y\}|\mathbf{S}'\setminus \{Y\}]$ is not c-gID from $(\mathbb{A}, \G)$.
%    \item $P_{\x'}(y|\z')$ is not c-gID from $(\mathbb{A}, \G)$.
%\end{itemize}
\begin{lemma}\label{lemma: eliminate var in cond}
    Suppose that $\X$, $\Y$ and $\Z$ are disjoint subsets of $\V$ in graph $\G$ and variables $Z_1 \in \Z$, $Z_2 \in \Y \cup \Z$, such that there is a directed edge from $Z_1$ to $Z_2$ in $\G$. If the causal effect $P_{\x}(\y|\z)$ is not c-gID from $(\mathbb{A}, \G)$, then the causal effect $P_{\x}(\y|\z\setminus\{z_1\})$ is also not c-gID from $(\mathbb{A}, \G)$. 
\end{lemma}

Note that $P_{\x'}(\mathbf{s}') = Q[\mathbf{S}']$ since $\mathbf{S}' = \Anc{\mathbf{S}'}{\G[\V \setminus \X']}$.  Therefore, by the definition of $Q$-notation, we have
\begin{equation*}
    Q\big[\{Y\}|\mathbf{S}'\setminus \{Y\}\big] = P_{\x'}(y|\mathbf{s}'\setminus\{y\}), 
\end{equation*}

which is shown to be not c-gID from $(\mathbb{A}, \G)$ in part \textbf{II}.
In the remainder of this part of our proof, we introduce a set of nodes in $\textbf{S}'$ that satisfy the condition in Lemma \ref{lemma: eliminate var in cond} and thus, can be eliminated without affecting the non-identifiability. Bellow, we show that the nodes in $\mathbf{S}'\setminus (\Z' \cup \{Y\})$ satisfy Lemma \ref{lemma: eliminate var in cond}'s condition and by deleting them, we conclude that $P_{\x'}(y|\z')$ is not c-gID from $(\mathbb{A}, \G)$.

Recall that $\mathbf{S}'=\Anc{\Z'}{\G[\V \setminus \X']}$ which means that from any node in $\mathbf{S}'\setminus (\Z' \cup \{Y\})$, there exists a directed path to a node in $\Z'$ in graph $\G[\V \setminus \X']$. 
We assign a real number to each node in $\mathbf{S}'\setminus (\Z' \cup \{Y\})$, namely, the length of its shortest path to set $\Z$.
Let $(W_1, W_2, \dots, W_\eta)$ denote the nodes in $\mathbf{S}'\setminus (\Z' \cup \{Y\})$ sorted in a descending order using their assigned numbers. 
Observe that for any $i \in [1:\eta]$, there is a direct edge from $W_i$ to a node in $\{Y\}\cup \Z' \bigcup_{j=i+1}^\eta \{W_j\}$. 
In other words, Lemma \ref{lemma: eliminate var in cond} allows us to delete $W_{i}$ from $\textbf{S}'\setminus\big(\{Y\}\bigcup_{j=1}^{i-1}\{W_j\}\big)$ without violating the non-identifiability. 
%that is why we can apply Lemma \ref{lemma: eliminate var in cond} to the $P_{\x'}(y|\mathbf{s}'\setminus\{y\})$ and eliminate variables $W_1, W_2, \dots, W_\eta$ one by one from $\mathbf{s}'\setminus\{y\}$. The latter proofs that $P_{\x'}(y|\z')$ is not c-gID from $(\mathbb{A}, \G)$ and thus $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}, \G)$ as well.

\subsubsection{Sub-case 2: $\mathbf{S}_1 \subseteq \Z'$} \label{sec: second subcase}

In this sub-case, we prove non-identifiability of $P_{\x'}(\y'|\z')$  from $(\mathbb{A}, \G)$ in two steps: \textbf{I:} we introduce a conditional causal effect that is not c-gID from $(\mathbb{A}, \G)$. \textbf{II:} Analogous to the previous sub-case, we apply Lemma \ref{lemma: eliminate var in cond} to prune this causal effect and conclude the result.

\textbf{I:} 
Let $Z'$ be a node in $\mathbf{S}_1$. 
Recall that $\W$ is the maximal set such that $P_{\x,\w}(\y|\z\setminus\w)=P_{\x}(\y|\z)$, which means that we can not apply the second rule of do-calculus to $Z'$ in $\G$ for $P_{\x'}(\y'|\z')$, i.e.,
$$
(\Y' \notindependent Z'|\X',\Z'\setminus\{Z'\})_{\G_{\overline{\X'},\underline{\{Z'\}}}}.
$$
This implies that there exists at least a unblocked backdoor path from $Z'$ to $\Y'$ given $\X'\cup \Z'\setminus\{Z'\}$.
We use $p$ to denote an unblocked path from $Z'$ to $\Y'$ with the least number of colliders. 
Path $p$ satisfies the following properties:\\
    1. If path $p$ contains a chain $W'\rightarrow W \rightarrow W''$ or a fork $W' \leftarrow W \rightarrow W''$, then node $W$ does not belong to any of the sets $\X'$, $\Z'$ or $\Y'$.\\
    2. If path $p$ contains a collider $W'\rightarrow W \leftarrow W''$, then there is a directed path $p_W$ from $W$ to a node in $\Z'$. 
    Moreover, none of the intermediate nodes in the path $p_W$ belong to the set $\X'\cup\Z'\cup\Y'$.\\
    3. Path $p$ does not contain any node from the set $\X'$.

Proofs of the above statements are provided in Appendix A.
Suppose $\mathbf{F}$ is a set of all colliders on the path $p$. 
We use $\mathcal{P}$ to denote a collection of paths $\{p\} \cup \{p_W|W \in \mathbf{F}\}$ and use $\D$ to denote the set of all nodes on the paths in $\mathcal{P}$ excluding the ones in $\Z'$.
Given the above definitions, we are ready to introduce the non-identifiable conditional causal effect in the next result.

\begin{lemma}\label{lemma: construct models subcase 2}
    Let $\mathbf{S}: = \Anc{\Y'\cup \Z'}{\G[\V \setminus \X']}$ and $\D$ denote the set defined above. Then,
    % {
    % \color{red}
    % \begin{equation*} 
    %     P_{\x'}(\dd\cap \y'|\mathbf{s}\setminus \dd)=\frac{
    %     \sum_{\D \setminus \Y'} Q[\mathbf{S}]
    %     }{
    %     \sum_{\D} Q[\mathbf{S}]
    %     }
    % \end{equation*}
    % }
    % {
    % \color{teal}
    \begin{equation}\label{eq: lemma-con}
        P_{\x'}(\dd|\mathbf{s}\setminus \dd)=\frac{ 
        Q[\mathbf{S}]
        }{
        \sum_{\D} Q[\mathbf{S}]
        } = Q[\D|\mathbf{S}\setminus \D]
    \end{equation}
    % }
    
    is not c-gID from $(\mathbb{A}, \G)$.
\end{lemma}
Proof of this lemma is presented in Appendix A.

\textbf{II:}
 In order to complete the proof of this part, besides Lemma \ref{lemma: eliminate var in cond}, we require the following technical lemmas. 
\begin{lemma} \label{lemma: move cond}
    Suppose that $\X$, $\Y$ and $\Z$ are disjoint subsets of $\V$ and $Z \in \Z$.
    If the conditional causal effect $P_{\x}(\y|\z)$ is not c-gID from $(\mathbb{A}, \G)$, the conditional causal effect 
    $P_{\x}(\y, z|\z\setminus\{z\})$ is not c-gID from $(\mathbb{A}, \G)$ as well. 
\end{lemma}
\begin{proof}
    Proof is by contradiction. Suppose that $P_{\x}(\y, z|\z\setminus\{z\})$ is c-gID from $(\mathbb{A}, \G)$. This implies that $P_{\x}( z|\z\setminus\{z\})$ is also c-gID from $(\mathbb{A}, \G)$. Applying Bayes rule yields
    \begin{equation*}
        P_{\x}(\y|\z) = \frac{P_{\x}(\y, z|\z\setminus\{z\})}{P_{\x}( z|\z\setminus\{z\})},
    \end{equation*}
    which results in c-gID of $P_{\x}(\y|\z)$ from $(\mathbb{A}, \G)$. This contradicts the non-identifiability assumption on $P_{\x}(\y|\z)$.
\end{proof}
% {
% \color{teal}
\begin{lemma}\label{lemma: for_the_main}
Suppose that $\X$, $\Y$ and $\Z$ are disjoint subsets of $\mathbf{V}$ in graph $\G$ and variables $Y_1 \in \Y$, $Y_2 \in \Y \cup \Z$, such that there is a directed edge from $Y_1$ to $Y_2$ in $\G$. If the causal effect $P_{\x}(\y|\z)$ is not c-gID from $(\mathbb{A}, \G)$, then the causal effect $P_{\x}(\y\setminus\{y_1\}|\z)$ is also not c-gID from $(\mathbb{A}, \G)$. 
\end{lemma}
Proof of this lemma is presented in Appendix A.
% }

Recall that the goal is to prune the conditional causal effect in \eqref{eq: lemma-con} to get $P_{\x'}(\y'|\z')$. 
We do this in two pruning steps: first using Lemma \ref{lemma: move cond} and then via Lemmas \ref{lemma: eliminate var in cond}, {\ref{lemma: for_the_main}}.
Let $\Y'' := \Y' \setminus \D$. 
Recall that $\mathbf{S}=\Anc{\Y', \Z'}{\G[\V\setminus\X']}$. 
It is easy to see that $\Y''$ is a subset of $\mathbf{S}\setminus \D$ and thus we can apply Lemma \ref{lemma: move cond} to the causal effect 
% {\color{red}$P_{\x'}(\dd\cap \y'|\mathbf{s}\setminus \{\dd\})$}
% {\color{teal}
$P_{\x'}(\dd|\mathbf{s}\setminus \dd)$
% } 
and conclude that 
% {\color{red}$P_{\x'}(\y'|\mathbf{s}\setminus(\dd \cup \y'))$}{
% \color{teal}
$P_{\x'}(\dd\cup\y'|\mathbf{s}\setminus(\dd \cup \y'))$
% }
is not c-gID from $(\mathbb{A}, \G)$.


To use Lemmas \ref{lemma: eliminate var in cond}, \ref{lemma: for_the_main} for the second pruning steps, we use similar type of argument as in the first sub-case. 
More precisely, using the fact that there exists a direct path for each node in $\mathbf{S} \setminus (\Z' \cup \Y')$ to a node in $\Z' \cup \Y'$, we sort the nodes in
% {
% \color{red}
% $$
% \W':=\mathbf{S} \setminus (\Z' \cup \Y' \cup \D)
% $$
% }
% {
% \color{teal}
$$
\W':=\mathbf{S} \setminus (\Z' \cup \Y')
$$
% }
in a descending order based on the length of their corresponding shortest direct path to the set $\Z' \cup \Y'$.
We denote these sorted nodes by $(W'_1, W'_2, \dots, W'_{\eta'})$. 
Note that for any $i \in [1:\eta']$, there exists a direct edge from $W'_i$ to a node in $\Y' \cup \Z' \cup \{W'_j\}_{j=i+1}^{\eta'}$. 

Since $\W'$ is a subset of $\mathbf{S}\setminus (\Z' \cup \Y')$, similar to the second sub-case, we apply Lemmas \ref{lemma: eliminate var in cond}, \ref{lemma: for_the_main} to the causal effect 
% {\color{red}$P_{\x'}(\y'|\mathbf{s}\setminus(\dd \cup \y'))$}
% {\color{teal}
$P_{\x'}(\dd\cup\y'|\mathbf{s}\setminus(\dd \cup \y'))$
% }
and remove variables $(W'_1, \dots, W'_{\eta'})$ one by one from the $P_{\x'}(\dd\cup\y'|\mathbf{s}\setminus(\dd \cup \y'))$. 
From definitions of $\D$ and $\Z'$, we have $\D\cap\Z'=\emptyset$, which means 
$$
\mathbf{S}\setminus(\W' \cup \Y'\cup \D) = \Z'.
$$
Therefore, after removing all nodes of $\W'$ from the set $\mathbf{S}\setminus(\D \cup \Y')$ without affecting the non-identifiability of 
% {\color{red}$P_{\x'}(\y'|\mathbf{s}\setminus(\dd \cup \y'))$} 
% {\color{teal}
$P_{\x'}(\dd\cup\y'|\mathbf{s}\setminus(\dd \cup \y'))$
% }
, we can claim that $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}, \G)$.



\subsubsection{Sub-case 3: $\mathbf{S}_1 \setminus (\Z'\cup\Y')\neq \emptyset$} \label{sec: third subcase}

The proof of this sub-case is quite similar to the second sub-case with a few twists. 
Let $T$ be an arbitrary node in $\mathbf{S}_1 \setminus (\Z'\cup\Y')$. Since $\textbf{S}_1$ is a subset of the ancestors of $\Z'$, then there exists a directed path from $T$ to the set $\Z'$. 
Let $p_T$ denote the shortest directed path from node $T$ to a node $Z'$ in the set $\Z'$. Analogous to the second sub-case, we define $\widetilde{p}$ to be an unblocked backdoor path from $Z'$ to $\Y'$ given $\X', 
\Z'\setminus \{Z'\}$ with the least number of colliders. 
Path $\widetilde{p}$ satisfies the following properties: \\
    1. Assume that path $\widetilde{p}$ contains a chain $W'\rightarrow W \rightarrow W''$ or a fork $W' \leftarrow W \rightarrow W''$, then $W$ does not belong to any of the sets $\X'$, $\Z'$ or $\Y'$.\\
    2. Assume that path $\widetilde{p}$ contains an inverted fork $W'\rightarrow W \leftarrow W''$, then there is a directed path $p_W$ from the node $W$ to a node in the set $\Z'$. Moreover, none of the intermediate nodes on this path $p_W$ belong to set $\X'\cup\Z'\cup\Y'$.\\
    3. Path $\widetilde{p}$ does not contain any node from the set $\X'$

Proofs of the above statements are provided in Appendix A.
Let $\widetilde{\mathbf{F}}$ be the set of all colliders on the path $\widetilde{p}$. 
Define $\widetilde{\mathcal{P}}:= \{\widetilde{p}\}\cup\{p_T\}\cup \{\widetilde{p}_W|W \in \widetilde{\mathbf{F}}\}$ and $\widetilde{\D}$ to be a set containing all nodes on the paths from $\widetilde{\mathcal{P}}$ excluding the nodes in $\Z'$.

\begin{lemma} \label{lemma: construct models subcase 3}
    Let $\mathbf{S}:=\Anc{\Y', \Z'}{\G[\V \setminus \X']}$ and $\widetilde{\D}$ denote the set defined above. Then,
    % { 
    % \color{red}
    % \begin{equation*}
    %     P_{\x'}(\widetilde{\dd}\cap \y'|\mathbf{s}\setminus \widetilde{\dd})=\frac{
    %     \sum_{\widetilde{\D} \setminus \Y'} Q[\mathbf{S}]
    %     }{
    %     \sum_{\widetilde{\D}} Q[\mathbf{S}]
    %     }
    % \end{equation*}
    % }
    % {
    % \color{teal}
    \begin{equation*}
        P_{\x'}(\widetilde{\dd}|\mathbf{s}\setminus \widetilde{\dd})=\frac{
        Q[\mathbf{S}]
        }{
        \sum_{\widetilde{\D}} Q[\mathbf{S}]
        } = Q[\widetilde{\D}|\mathbf{S}\setminus\widetilde{\D}]
    \end{equation*}
    % }
    is not c-gID from $(\mathbb{A}, \G)$.
\end{lemma}
A proof for this lemma is presented in Appendix A.
The remainder of the proof of this sub-case is identical to the proof of the second sub-case. 
 %Further we use the arguments completely identical as in the Section \ref{sec: second subcase}, and finally we obtain that $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}, \G)$. 

In both cases considered in Sections \ref{sec: first case}-\ref{sec: second case}, we proved that $P_{\x'}(\y'|\z')$ is not c-gID from $(\mathbb{A}, \G)$. 
Recall that $P_{\x}(\y|\z)=P_{\x'}(\y'|\z')$. 
This concludes the proof of the necessity part of \mbox{Theorem \ref{th: main}}.

\textbf{Summing up:}
Recall that the necessity part required us to show when $P_{\x'}(\y', \z')$ is not gID from $(\A, \G)$,  $P_{\x}(\y|\z)$ is not c-gID from $(\A, \G)$.
In the sufficiency part, had to show that $P_{\x}(\y|\z)$ is c-gID from $(\A, \G)$ whenever $P_{\x'}(\y', \z')$ is gID from $(\A, \G)$. 
These two results together conclude the proof of Theorem \ref{th: main}.

\section{Conclusion}
We considered the problem of identifying a conditional causal effect from a causal graph $\G$ and a particular set of known observational/interventional distributions in the form of $Q$-notations. 
We called this problem c-gID and showed that any c-gID problem has an equivalent g-ID problem. 
%There exists sound and complete algorithm for solving the latter problem in literature. 
%We proved that an c-gID problem is equivalent to the specific g-ID problem. 
Using this equivalency, we proposed the first sound and complete algorithm for solving c-gID problem.
%that generalizes both algorithms of \cite{kivva2022revisiting} and \cite{shpitser2012identification}. 

% References
\bibliography{kivva_47}
\end{document}