\label{sec4}
In this section, we extend PoC to vectors of continuous or discrete variables $\boldsymbol{Y}$ and $\boldsymbol{X}$,  % under a totally ordered vector structural causal model.
and we consider PoC for a sub-population with specific covariates information. 
%We additionally consider the subject's covariates in this section, and there are two merits 
The benefits of considering the subject's covariates include (i) they reveal the heterogeneity of causal effects; and (ii) they weaken identification assumptions.

%\yuta{There exists many datasets with vector treatment and outcome as the example of  \citep{Hannart2018}.}


\begin{comment}
\begin{figure}[tb]
%\vspace{-0.5cm}
   % \hspace{0.3cm}
    \centering
    \scalebox{1}{
\begin{tikzpicture}
    % x node set with absolute coordinates
    \node[mynode] (x) at (0,0) {$\bf{X}$};
    \node[mynode] (y) at (3,0) {$\bf{Y}$};
    \node[mynode] (u) at (1.5,1) {$\bf{C}$};

    % Directed edge
    \path (x) edge[->] (y);
    \path (x) edge[dotted,<->,bend right] (y);
%    \path (z) edge[->] (x);
    \path (u) edge[->] (y);
    \path (u) edge[dotted,<->,bend left] (y);
    \path (u) edge[->]  (x);
\path (x) edge[dotted,<->,bend left] (u);
\end{tikzpicture}
}
\vspace{-0cm}
    \caption{A causal graph representing ${\cal M}_{T}$.}% Causal graph and two types of non-separability in the IV setting, ${\cal M}_{Z}^{IV}$.}
    \label{DAG1}
    \end{figure}
    \end{comment}

\subsection{Problem Setup}

\begin{comment}
{\bf Total Order.} First, we introduce total order as below.
\begin{definition}[Total Order]
   We call the partial order $\leq_{\text{p}}$ which satisfies the following property total order
    %\begin{center}
%    (Total.) 
    \begin{equation}
        \mathbb{P}({\boldsymbol A}\leq_{\text{p}} {\boldsymbol a}\vee{\boldsymbol a}\leq_{\text{p}} {\boldsymbol A})=1,
    \end{equation}
    for a random variable ${\boldsymbol A}$ and any value ${\boldsymbol a} \in \Omega$,
   % \end{center}
     and denote it simply ``$\preceq$'',and denote totally ordered set $(\Omega,\preceq)$.
\end{definition}
totally ordered set means $\Omega$ is totally ordered set almost surely w.r.t. ${\boldsymbol A}$.
Total orders are also total orders, and the converse does not hold.
\begin{lemma}
    $\mathbb{P}({\boldsymbol A}\preceq {\boldsymbol a})=1-\mathbb{P}({\boldsymbol a}\prec {\boldsymbol A})$
\end{lemma}
\begin{proof}
    Since $1=\mathbb{P}({\boldsymbol A}\preceq{\boldsymbol a}\vee{\boldsymbol a}\preceq{\boldsymbol A})=\mathbb{P}({\boldsymbol A}\preceq{\boldsymbol a})+\mathbb{P}({\boldsymbol a}\preceq{\boldsymbol A})-\mathbb{P}({\boldsymbol a}= {\boldsymbol A})=\mathbb{P}({\boldsymbol A}\preceq{\boldsymbol a})+\mathbb{P}({\boldsymbol a}\prec {\boldsymbol A})$, we have $1-\mathbb{P}({\boldsymbol a}\prec {\boldsymbol A})=\mathbb{P}({\boldsymbol A}\preceq{\boldsymbol a})$.
\end{proof}
\end{comment}

\subsection{Conditional PoC Definition} 
%{\bf Totally ordered vector structural causal model.}  
Let ${\boldsymbol X}$, ${\boldsymbol Y}$, and ${\boldsymbol C}$ be a set of continuous or discrete  treatment variables, %${\boldsymbol Y}$ be a set of %totally ordered
%continuous or discrete  
outcome variables, 
%with $\preceq$
 and %${\boldsymbol C}$ be a set of continuous or discrete  %subject's 
covariates, respectively.  
We assume the following SCM ${\cal M}_{T}$:
%represented by the causal graph in Fig \ref{DAG1}: 
%\jin{I don't think some of the bidirected edges are allowed, maybe we don't draw the causal graph? Or draw a graph satisfying Assumption 5.1 after Assumption 5.1.}\yuta{[Deleted]}
\begin{eqnarray}
\begin{aligned}
    {\boldsymbol Y}:= f_{\boldsymbol Y}({\boldsymbol X},{\boldsymbol C},{\boldsymbol U}), {\boldsymbol X}:= f_{\boldsymbol X}({\boldsymbol C},{\boldsymbol \epsilon}_{\boldsymbol X}), {\boldsymbol C}:= f_{\boldsymbol C}({\boldsymbol \epsilon}_{\boldsymbol C})
    %& {\text{where $\boldsymbol{U}$, $\boldsymbol{\epsilon}_{\boldsymbol{X}}$, and $\boldsymbol{\epsilon}_{\boldsymbol{X}}$ are mutually independent}.}
    \end{aligned}
\end{eqnarray}
The functions $f_{\boldsymbol Y}$, $f_{\boldsymbol X}$, and $f_{\boldsymbol C}$ are vector-valued functions. 
${\boldsymbol \epsilon}_{\boldsymbol X}$, ${\boldsymbol \epsilon}_{\boldsymbol C}$, and ${\boldsymbol U}$ are latent exogenous variables.
%, and ${\boldsymbol U}$ are totally ordered with $\preceq$. 
We assume that the domains $\Omega_{\boldsymbol Y}$ and $\Omega_{\boldsymbol U}$ are totally ordered sets with $\preceq$. %, or any two elements of $\Omega_{\boldsymbol Y}$ and $\Omega_{\boldsymbol U}$ are comparable with $\preceq$, respectively.}
%\jin{It's unclear what it means to say $\boldsymbol{Y}$ (or $\boldsymbol{U}$) are totally ordered. What does it mean to say a set of random variables $Y_1, Y_2, ...$ are totally ordered?}
%We name ${\cal M}_{T}$ \emph{totally ordered vector SCM}.
%The scalar structural causal model ${\cal M}_S$ is one example of the totally ordered vector SCM ${\cal M}_T$.
%
%We make the following assumption.
%\begin{assumption}[Boundness of PDF]
%\label{TOT2}
%The conditional PDF of ${\boldsymbol U}$ satisfies $\sup_{{\boldsymbol u}\in \Omega_{\boldsymbol U}}\mathfrak{p}({\boldsymbol u}|{\boldsymbol C}={\boldsymbol c})<\infty$ for any ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.
%\end{assumption}
%\begin{assumption}[Absolute Completeness]
%\label{TOT2}
%The probability distribution of $U$ given ${\boldsymbol C}={\boldsymbol c}$ is absolutely continuous for any ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.
    %$\Omega_U$ is totally ordered set, and 
%    $\sup_{u \in \Omega_U}\mathfrak{p}(u)<0$.
%\end{assumption}
\begin{comment}
Potential outcome ${\boldsymbol Y}_{\boldsymbol x}({\boldsymbol C},{\boldsymbol U})$ is defined by $f_{\boldsymbol Y}({\boldsymbol x},{\boldsymbol C},{\boldsymbol U})$ for any ${\boldsymbol x} \in \Omega_{\boldsymbol X}$, and 
exogenous variables ${\boldsymbol \epsilon}_{\boldsymbol X}$ and ${\boldsymbol \epsilon}_{\boldsymbol C}$ are irrelevant to potential outcome ${\boldsymbol Y}_{\boldsymbol x}({\boldsymbol C},{\boldsymbol U})$.
The domains of ${\boldsymbol X}, {\boldsymbol Y}, {\boldsymbol C}, {\boldsymbol U}$, i.e., $\Omega_{\boldsymbol X}, \Omega_{\boldsymbol Y},\Omega_{\boldsymbol C}, \Omega_{\boldsymbol U}$, are subsets of $\mathbb{N}^{d}, \mathbb{Z}^{d}$, or $\mathbb{R}^{d}$ for $d=d_X,d_Y,d_C,d_U$ respectively, where $d_X$, $d_Y$, $d_C$, $d_U$ are the dimensions of ${\boldsymbol X}, {\boldsymbol Y}, {\boldsymbol C}, {\boldsymbol U}$.
\end{comment}
Let the dimensions of ${\boldsymbol X}, {\boldsymbol Y}, {\boldsymbol C}, {\boldsymbol U}$ be $d_X$, $d_Y$, $d_C$, $d_U$.
%We also assume:
%\begin{assumption}[Conditional exogeneity]
%\label{EXO2}
%    ${\boldsymbol X}$ are independent of ${\boldsymbol U}$ given ${\boldsymbol C}={\boldsymbol c}$ for all ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.
%\end{assumption}

We make the following assumption.
\begin{assumption}[Conditional exogeneity]
\label{ASEXO2}
   %Under SCM ${\cal M}_{T}$, 
   %${\boldsymbol X}$ are independent of ${\boldsymbol U}$ given ${\boldsymbol C}$.
${\boldsymbol Y}_{\boldsymbol x}\indep {\boldsymbol X} | {\boldsymbol C}$ for all $\boldsymbol{x} \in \Omega_{\boldsymbol X}$.   
%   ${\boldsymbol Y}_{\boldsymbol X}$ are independent of ${\boldsymbol X}$ given ${\boldsymbol C}$.
\end{assumption}
Conditional exogeneity implies $\mathbb{P}({\boldsymbol Y}_{\boldsymbol x} \prec {\boldsymbol y}|{\boldsymbol C}={\boldsymbol c})=\mathbb{P}({\boldsymbol Y} \prec {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x},{\boldsymbol C}={\boldsymbol c})$ for any ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.


%All proofs in this section can be given by substituting $\leq$ in the previous section to $\preceq$.
We define the multivariate  conditional PoC %for the totally ordered vector SCM 
as below:
\begin{definition}[Conditional PoC]
\label{def41}
For any ${\boldsymbol x}_0,{\boldsymbol x}_1 \in \Omega_{\boldsymbol X}$, ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$, and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$, we define conditional PoC by 
\begin{equation}
\begin{aligned}
    &\text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})\defeq\mathbb{P}({\boldsymbol Y}_{{\boldsymbol x}_0} \prec {\boldsymbol y} \preceq {\boldsymbol Y}_{{\boldsymbol x}_1}|{\boldsymbol C}={\boldsymbol c}),\\
    &\text{PN}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})\defeq\mathbb{P}({\boldsymbol Y}_{{\boldsymbol x}_0} \prec {\boldsymbol y} |{\boldsymbol y} \preceq {\boldsymbol Y},{\boldsymbol X}={\boldsymbol x}_1,{\boldsymbol C}={\boldsymbol c}),\\
    &\text{PS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})\defeq\mathbb{P}({\boldsymbol y} \preceq {\boldsymbol Y}_{{\boldsymbol x}_1} |{\boldsymbol Y} \prec {\boldsymbol y},{\boldsymbol X}={\boldsymbol x}_0,{\boldsymbol C}={\boldsymbol c}).
\end{aligned}
\end{equation}
\end{definition}
$\text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})$ provides a measure of the sufficiency and necessity of ${\boldsymbol x}_1$ w.r.t. ${\boldsymbol x}_0$ to produce ${\boldsymbol Y}\succeq {\boldsymbol y}$ given ${\boldsymbol C}={\boldsymbol c}$.
$\text{PN}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})$ provides a measure of the necessity of ${\boldsymbol x}_1$ w.r.t. ${\boldsymbol x}_0$ to produce ${\boldsymbol Y}\succeq {\boldsymbol y}$ given ${\boldsymbol C}={\boldsymbol c}$.
$\text{PS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})$ provides a measure of the sufficiency of ${\boldsymbol x}_1$ w.r.t. ${\boldsymbol x}_0$ to produce ${\boldsymbol Y}\succeq {\boldsymbol y}$ given ${\boldsymbol C}={\boldsymbol c}$.

%We note that the above definition of PoC depends on the choice of total order. 
%\jin{Again, any prior work on defining multivariate or conditional PoC?}
%\yuta{
\citet{Hannart2018} studied multivariate PNS where the outcomes are the space-time vectorial random variables of the Earth's surface temperatures. 
\citet{Li2019,Li2022,Li2022b} considered conditional PNS over discrete variables in their benefit function and called it z-specific PNS, but their definition of PNS is different from ours and is not suitable for continuous variables.   %in their objective function, called benefit function, and \citet{Li2019} call it z-specific PNS. They give bound and identification theorem of conditional PNS for binary or discrete treatment and outcome.





\begin{comment}
{\bf Lexicographical Order.}
We have the following useful lemmas for calculating conditional CDF, especially of discrete variables, with lexicographical order $\preceq_{\text{lexi}}$.
\begin{lemma}
    $\mathbb{P}({\boldsymbol Y} \succ_{\text{lexi}} {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x})$ is equal to
    \begin{equation}
    \label{LEX1}
    \begin{aligned}
        &\mathbb{P}(Y^1 > y^1|{\boldsymbol X}={\boldsymbol x})\\
        &\hspace{0.25cm}+\mathbb{P}(Y^1=y^1,Y^2 > y^2|{\boldsymbol X}={\boldsymbol x})+\ldots\\
        &\hspace{0.5cm}+\mathbb{P}(Y^1=y^1,\ldots,Y^{d_Y} >y^{d_Y}|{\boldsymbol X}={\boldsymbol x}),
        \end{aligned}
    \end{equation}
    or
    \begin{equation}
    \label{LEX2}
    \begin{aligned}
        &\mathbb{P}(Y^1 > y^1|{\boldsymbol X}={\boldsymbol x})\\
        &\hspace{0.25cm}+\mathbb{P}(Y^2 > y^2|Y^1=y^1,{\boldsymbol X}={\boldsymbol x})\\
        &\hspace{0.5cm}\times \mathbb{P}(Y^1=y^1|{\boldsymbol X}={\boldsymbol x})+\ldots\\
        &+\mathbb{P}(Y^{d_Y} >y^{d_Y}|Y^1=y^1,\ldots,Y^{d_Y-1} =y^{d_Y-1},{\boldsymbol X}={\boldsymbol x})\\
        &\hspace{0.5cm}\times\mathbb{P}(Y^1=y^1,\ldots,Y^{d_Y-1} =y^{d_Y-1}|{\boldsymbol X}={\boldsymbol x}).
        \end{aligned}
    \end{equation}
\end{lemma}

\begin{proof}
    From the definition of lexicographical order, $\mathbb{P}({\boldsymbol Y} \succ_{\text{lexi}} {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x})=\mathbb{P}(Y^1>y^1|{\boldsymbol X}={\boldsymbol x})+\mathbb{P}(Y^1=y^1,\overline{{\boldsymbol Y}^{2}} \succ_{\text{lexi}} \overline{{\boldsymbol y}^{2}} |{\boldsymbol X}={\boldsymbol x})$, where $\overline{{\boldsymbol Y}^{2}}=(Y^2,\ldots,Y^{d_Y})$ and $\overline{{\boldsymbol y}^{2}}=(y^2,\ldots,y^{d_Y})$. 
    Recursively, Eq. (\ref{LEX1}) holds.
    Eq. (\ref{LEX2}) holds from the Bayesian formula.
\end{proof}

\begin{lemma}
    If the $d$-th outcome $Y^d$ satisfies $\mathfrak{p}(Y^d=y^d|{\boldsymbol X}={\boldsymbol x})<\infty$ for all $y^d \in \Omega_{Y^d}$,
   then
    \begin{equation}
    \begin{aligned}
    &\mathbb{P}({\boldsymbol Y} \succ_{\text{lexi}} {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x})\\
        &=\mathbb{P}(Y^1 > y^1|{\boldsymbol X}={\boldsymbol x})\\
        &\hspace{0.25cm}+\mathbb{P}(Y^1=y^1,Y^2 > y^2|{\boldsymbol X}={\boldsymbol x})+\ldots\\
        &\hspace{0.5cm}+\mathbb{P}(Y^1=y^1,\ldots,Y^{d} >y^{d}|{\boldsymbol X}={\boldsymbol x})
        \end{aligned}
    \end{equation}
    holds.
\end{lemma}

\begin{proof}
    This is because the probability $\mathbb{P}(Y^d=y^d|{\boldsymbol X}={\boldsymbol x})$ is equal to $0$.
\end{proof}
\end{comment}



\subsection{Identification Assumptions and Theorem}




We generalize Assumptions \ref{MONO_A}, \ref{AS1}, and \ref{SAS1}
%and \ref{RP1}  
to multivariate outcomes and treatments with covariates as below, respectively.
%\yuta{We denote the structural function from ${\boldsymbol X}$ to ${\boldsymbol Y}$ by $f_{\boldsymbol Y}({\boldsymbol X},{\boldsymbol U})$.}
\begin{assumption}[Conditional monotonicity over ${\boldsymbol Y}_{{\boldsymbol x}}$]
\label{MONO2}
    The potential outcomes ${\boldsymbol Y}_{{\boldsymbol x}}$ satisfy:  for any ${\boldsymbol x}_0,{\boldsymbol x}_1 \in \Omega_{\boldsymbol X}$, ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$, and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$, either $\mathbb{P}({\boldsymbol Y}_{{\boldsymbol x}_0}\prec {\boldsymbol y} \preceq {\boldsymbol Y}_{{\boldsymbol x}_1}|{\boldsymbol C}={\boldsymbol c})=0$ or $\mathbb{P}({\boldsymbol Y}_{{\boldsymbol x}_1}\prec {\boldsymbol y} \preceq {\boldsymbol Y}_{{\boldsymbol x}_0}|{\boldsymbol C}={\boldsymbol c})=0$.
\end{assumption}
This assumption extends Assumptions \ref{MONO_A} to totally ordered vector variables.
%\yuta{This assumption represents the monotonicity of vector variables with the total order $\preceq$ on $\Omega_{\boldsymbol Y}$ given ${\boldsymbol C}={\boldsymbol c}$.}

\begin{assumption}[Conditional monotonicity over  $f_{\boldsymbol Y}$]
\label{AS2}
{The function $f_{\boldsymbol Y}({\boldsymbol x},{\boldsymbol c},{\boldsymbol U})$ is either (i) monotonic increasing on ${\boldsymbol U}$ with $\preceq$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$ almost surely w.r.t. $\mathbb{P}_{\boldsymbol U}$, or (ii) monotonic decreasing on ${\boldsymbol U}$ with $\preceq$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$ 
almost surely w.r.t. $\mathbb{P}_{\boldsymbol U}$.} 
\end{assumption}
{This assumption says that the function $f_{\boldsymbol Y}$ preserves the total order from $\Omega_{\boldsymbol U}$ to $\Omega_{\boldsymbol Y}$ given ${\boldsymbol X}={\boldsymbol x}, {\boldsymbol C}={\boldsymbol c}$.}

\begin{assumption}[Strict conditional  monotonicity over $f_{\boldsymbol Y}$]
\label{SAS2}
{The function $f_{\boldsymbol Y}({\boldsymbol x},{\boldsymbol c},{\boldsymbol U})$ is either (i) strictly monotonic increasing on ${\boldsymbol U}$ with $\preceq$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$
almost surely w.r.t. $\mathbb{P}_{\boldsymbol U}$ with $\sup_{{\boldsymbol u} \in \Omega_{\boldsymbol U}}\mathfrak{p}({\boldsymbol u}|{\boldsymbol C}={\boldsymbol c})<\infty$ for all ${\boldsymbol c} \in \Omega_{\boldsymbol C}$, or (ii) strictly monotonic decreasing on ${\boldsymbol U}$ with $\preceq$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$
almost surely w.r.t. $\mathbb{P}_{\boldsymbol U}$ with $\sup_{{\boldsymbol u} \in \Omega_{\boldsymbol U}}\mathfrak{p}({\boldsymbol u}|{\boldsymbol C}={\boldsymbol c})<\infty$ for all ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.} 
\end{assumption}
This assumption implies that there exists a one-to-one mapping from $\Omega_{\boldsymbol U}$ to $\Omega_{\boldsymbol Y}$ given ${\boldsymbol X}={\boldsymbol x}, {\boldsymbol C}={\boldsymbol c}$.

Assumptions \ref{MONO2}, \ref{AS2}, and \ref{SAS2} reduce to Assumptions \ref{MONO_A}, \ref{AS1}, and \ref{SAS1} under SCM ${\cal M}_{S}$, respectively.
\begin{comment}
\begin{assumption}[Conditional rank preservation]
\label{RP2}
The potential outcomes ${\boldsymbol Y}_{{\boldsymbol x}}$ satisfy (i) the uniqueness of ranking, i.e, ${\boldsymbol Y}_{\boldsymbol x}({\boldsymbol c},{\boldsymbol u}_0) \ne {\boldsymbol Y}_{\boldsymbol x}({\boldsymbol c},{\boldsymbol u}_1)$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$, ${\boldsymbol c}\in \Omega_{\boldsymbol C}$, and $\mathbb{P}_{\boldsymbol U}$-almost every ${\boldsymbol u}_0,{\boldsymbol u}_1 \in \Omega_{\boldsymbol U}$ such that ${\boldsymbol u}_0 \ne {\boldsymbol u}_1$, and (ii) the rank preservation, i.e,  ${\boldsymbol Y}_{{\boldsymbol x}_0}({\boldsymbol c},{\boldsymbol u}_0) \prec {\boldsymbol Y}_{{\boldsymbol x}_0}({\boldsymbol c},{\boldsymbol u}_1) \Rightarrow  {\boldsymbol Y}_{{\boldsymbol x}_1}({\boldsymbol c},{\boldsymbol u}_0) \prec {\boldsymbol Y}_{{\boldsymbol x}_1}({\boldsymbol c},{\boldsymbol u}_1)$
and
${\boldsymbol Y}_{{\boldsymbol x}_0}({\boldsymbol c},{\boldsymbol u}_0) \succ {\boldsymbol Y}_{{\boldsymbol x}_0}({\boldsymbol c},{\boldsymbol u}_1) \Rightarrow  {\boldsymbol Y}_{{\boldsymbol x}_1}({\boldsymbol c},{\boldsymbol u}_0) \succ {\boldsymbol Y}_{{\boldsymbol x}_1}({\boldsymbol c},{\boldsymbol u}_1)$
hold for all ${\boldsymbol x}_0,{\boldsymbol x}_1 \in \Omega_{\boldsymbol X}$, ${\boldsymbol c}\in \Omega_{\boldsymbol C}$, and $\mathbb{P}_{\boldsymbol U}$-almost every ${\boldsymbol u}_0,{\boldsymbol u}_1 \in \Omega_{\boldsymbol U}$ such that ${\boldsymbol u}_0 \ne {\boldsymbol u}_1$. %\jin{"either or" means you just need one of the conditions holds. I'd think you need both to hold? These are two independent conditions. Same question for Assumption 4.6.} \yuta{I require both, and fixed it.}
\end{assumption}
\end{comment}
We establish the relationships between Assumptions \ref{MONO2}, \ref{AS2}, and \ref{SAS2} under the following assumption: 
\begin{assumption}
\label{SUP2}
Potential outcome ${\boldsymbol Y}_{\boldsymbol x}$ has conditional PDF $p_{{\boldsymbol Y}_{\boldsymbol x}|{\boldsymbol C}={\boldsymbol c}}$ given ${\boldsymbol C}={\boldsymbol c}$ for each ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$, and its support $\{{\boldsymbol y} \in \Omega_{\boldsymbol Y}: p_{{\boldsymbol Y}_{\boldsymbol x}|{\boldsymbol C}={\boldsymbol c}}({\boldsymbol y}) \ne0 \}$ is the same
%$[-\infty,\infty]^{d_Y}$ 
for each ${\boldsymbol x} \in \Omega_{\boldsymbol X}$ and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.
\end{assumption}
This assumption is similar to Assumption \ref{SUP1} and reasonable for continuous variables.
For example, the multivariate linear regression model with Gaussian noise in \citep{Hannart2018} satisfies this assumption.
%We have the following results: 
\begin{theorem}
\label{prop1}
Under SCM ${\cal M}_{T}$ and Assumption \ref{SUP2}, 
Assumptions \ref{MONO2} and \ref{AS2} are equivalent, and 
%Assumptions \ref{SAS2} and \ref{RP2} are equivalent. 
{Assumption \ref{SAS2} is a strictly stronger requirement than \ref{AS2}.}
\end{theorem}
%This is the same relationship of Assumption \ref{MONO_A}, \ref{AS1}, \ref{SAS1}, and \ref{RP1}.
For example, %if $d_U=d_Y$ with the same total order, then 
the additive noise model ${\boldsymbol Y}:=f_{\boldsymbol Y}({\boldsymbol X},{\boldsymbol C})+{\boldsymbol U}$ satisfies all Assumptions \ref{MONO2}, \ref{AS2}, and \ref{SAS2}. %and \ref{RP2}.



We denote conditional CDF
\begin{equation}
\begin{aligned}
\rho({\boldsymbol y};{\boldsymbol x},{\boldsymbol c})\defeq\mathbb{P}({\boldsymbol Y}\prec {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x},{\boldsymbol C}={\boldsymbol c})%\\
%&=\mathbb{P}({\boldsymbol Y}\preceq {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x},{\boldsymbol C}={\boldsymbol c})
\end{aligned}
\end{equation}
for all ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$, ${\boldsymbol x} \in \Omega_{\boldsymbol X}$, and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$.
%We have $\rho({\boldsymbol y};{\boldsymbol x},{\boldsymbol c})=\mathbb{P}({\boldsymbol Y}\prec {\boldsymbol y}|{\boldsymbol X}={\boldsymbol x},{\boldsymbol C}={\boldsymbol c})$ for all ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$, ${\boldsymbol x} \in \Omega_{\boldsymbol X}$, and ${\boldsymbol c} \in \Omega_{\boldsymbol C}$. 
%We assume
%\begin{assumption}[Positivity]
%\label{POS2}
%    $\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c})<1$ and $0<\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})$ for all ${\boldsymbol x}_0, {\boldsymbol x}_1 \in \Omega_{\boldsymbol X}$, ${\boldsymbol c} \in \Omega_{\boldsymbol C}$ and ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$ such that.
%\end{assumption}
%An order topology is a certain topology that can be defined on any totally ordered set.
Then. we have the following theorem: 
\begin{theorem}[Identification of conditional PoC]
\label{THEO41}
{Under SCM ${\cal M}_{T}$ and}  
Assumptions \ref{ASEXO2}, \ref{MONO2} (or \ref{AS2}, \ref{SAS2}), and \ref{SUP2}, 
PNS, PN, and PS are  identifiable by
\begin{equation}
    \begin{aligned}
    &\text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})=\max\{\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})-\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c}),0\},\\
    &\text{PN}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})=\max\left\{\frac{\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})-\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c})}{1-\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c})},0\right\},\\
    &\text{PS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c})=\max\left\{\frac{\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})-\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c})}{\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})},0\right\}
    \end{aligned}
\end{equation}
for any ${\boldsymbol x}_0,{\boldsymbol x}_1 \in \Omega_{\boldsymbol X}$, ${\boldsymbol c} \in \Omega_{\boldsymbol C}$, and ${\boldsymbol y} \in \Omega_{\boldsymbol Y}$ such that $\rho({\boldsymbol y};{\boldsymbol x}_1,{\boldsymbol c})<1$ and $\rho({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol c})>0$.
\end{theorem}

%This theorem also consists of conditional CDF.

{\bf Remark.}
PoC, like $\text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1)$, can be computed through conditional PoC:
\begin{equation}
    \text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1)=\int_{{\boldsymbol c} \in \Omega_{\boldsymbol C}} \text{PNS}({\boldsymbol y};{\boldsymbol x}_0,{\boldsymbol x}_1,{\boldsymbol c}) \mathfrak{p}({\boldsymbol c})d{\boldsymbol c}
\end{equation}
where $\mathfrak{p}({\boldsymbol c})$ is PDF of ${\boldsymbol C}$.
Then, we can estimate it under weaker conditions than required by Theorem \ref{THEO1} % using observational data, e.g., the backdoor criterion \citep{Pearl09}.  \jin{Please provide justification/proof for this claim. You are essentially claiming Assumption 4.1 is weaker than Assumption 3.1.}
since the conditional version of the assumptions required by Theorem~\ref{THEO41} are weaker. 

%\yuta{Exogeneity for vector treatment and outcome is "${\boldsymbol Y}_{\boldsymbol x}$ is independent of ${\boldsymbol X}$ for all ${\boldsymbol x} \in \Omega_{\boldsymbol X}$". Exogeneity implies conditional exogeneity, Assumption \ref{ASEXO2}, and vice versa does not hold.}