
%First, we introduce a new concept \emph{conditional average partial causal effect (CAPCE)} to capture the heterogeneous causal effects of a continuous treatment.
First, we formally define \emph{conditional average partial causal effect (CAPCE)} to capture the heterogeneous causal effects of a continuous treatment. 
Then we present a theorem for identifying CAPCE under the IV model. 
\begin{definition}[CAPCE]
$\displaystyle \mathbb{E}[\partial_x Y_x|{\boldsymbol w}]:=\mathbb{E}_{\boldsymbol U}\left[\frac{\partial}{\partial x} Y_{x}({\boldsymbol U})\Big|{\boldsymbol W}={\boldsymbol w}\right]$.
\end{definition}
%{We also denote $\mathbb{E}[Y_x|{\boldsymbol w}]:=\mathbb{E}_{\boldsymbol U}[ Y_{x}({\boldsymbol U})|{\boldsymbol W}={\boldsymbol w}]$.}
CAPCE is a real-valued function from $x \in \Omega_X$ and ${\boldsymbol w} \in \Omega_{\boldsymbol W}$ to $\mathbb{R}$.
It is a generalization of CACE for continuous treatment. %conditional average causal effect $\mathbb{E}[Y_1-Y_0|{\boldsymbol W}={\boldsymbol w}]$ \citep{Athey2016,Kunzel2019}.
It is also a generalization of APCE $\mathbb{E}[\partial_x Y_x]$ 
 %$:=\mathbb{E}_{\boldsymbol U}[\frac{\partial}{\partial x}Y_x({\boldsymbol U})]$ have been introduced by 
 %\citep{Chamberlain1984,Wooldridge2005,Graham2012} 
 to represent heterogeneous causal effects. 
%The quantity represented by CAPCE has been implicitly studied in the literature (e.g. \citep{Galagate2016}) but is never formally defined to the best of our knowledge, and it is not tied to the IV analysis. Still most existing works have focused on $\mathbb{E}[Y_x|{\boldsymbol W}={\boldsymbol w}]$. One contribution of this work is showing that under the IV model, CAPCE is identifiable under a weaker separability assumption than required by $\mathbb{E}[Y_x|{\boldsymbol w}]$. We present theoretical and empirical results to show the usefulness of formally defining and investigating CAPCE and the merits of estimating CAPCE on behalf of $\mathbb{E}[Y_x|{\boldsymbol w}]$. Granted, given an estimated $\mathbb{E}[Y_x|{\boldsymbol w}]$, one can compute its derivative to obtain CAPCE, but not the other way around. However, in practice,  the causal effect from a reference point (e.g., CACE) is often the main interest, and CAPCE is enough to compute causal effects from a reference point: $\mathbb{E}[Y_{x''}-Y_{x'}|{\boldsymbol w}]=\int_{x'}^{x''} \mathbb{E}[\partial_x Y_x|{\boldsymbol w}]$.
%
%\begin{definition}[Potential APCE]
%$\mathbb{E}_{\boldsymbol U}[\partial_x Y_{x,{\boldsymbol w}}]$. \jin{this definition is not needed, delete}
%\end{definition}
%
%We can obtain the conditional average causal effects of two treatments by \yuta{$\mathbb{E}[Y_{x''}-Y_{x'}|{\boldsymbol W}={\boldsymbol w}]=\int_{x'}^{x''}\mathbb{E}[\partial_x Y_{x}|{\boldsymbol W}={\boldsymbol w}]dx$}. 
%Potential APCE relates to the multiple treatments effect \citep{Imai2004,Wang2019}.
%We can obtain $\mathbb{E}[Y_{x'',{\boldsymbol w}''}-Y_{x',{\boldsymbol w}''}]-\mathbb{E}[Y_{x'',{\boldsymbol w}'}-Y_{x',{\boldsymbol w}'}]$, which is called additive interaction effect between two treatments (${\boldsymbol x''}$, ${\boldsymbol x'}$) and other factor of outcome $({\boldsymbol w''},{\boldsymbol w'})$ \citep{VanderWeele2014}, through potential APCE.
%
%{\bf Identification theorems of CAPCE.} 
Next, we present conditions for identifying CAPCE under the IV model. %using IVs from distributions $\mathbb{P}(X, {\boldsymbol W}|Z)$ and $\mathbb{P}( Y |Z)$.
%$\mathbb{P}(X, Y ,{\boldsymbol W}|Z)$.
%We assume the following conditions:
\begin{assumption}
Under the SCM ${\cal M}_{IV}$, given $ {\boldsymbol W}={\boldsymbol w}$, 
\label{AS1}
\begin{enumerate}
\vspace{-0cm}
  \setlength{\parskip}{0.cm}
  \setlength{\itemsep}{0.25cm}
   \item Instrument relevance: IV $Z$ has a causal effect on $X$, i.e., $\mathbb{E}[X_{z}]$ is not a constant function of $z$.
   %the instrument $Z$ has a causal effect on $X$, i.e., $X_{z}$ is not a constant function by varying $z$ for each subject.}
    \item %the potential outcome 
    $Y_{x}$ is differentiable and bounded in $x \in \Omega_X$.
    \item %the potential outcome 
    %$X_{z}$ is not a zero function \yuta{by varying $z$ for each subjects}, and 
$\displaystyle \sup_{x,z,{\boldsymbol w}}\mathfrak{p}(X_{z}=x|{\boldsymbol W}={\boldsymbol w}) < \infty$. 
    %, where $p$ denotes the density function.
    \item {The set of distributions $\mathbb{P}(X|Z=z,{\boldsymbol W}={\boldsymbol w})$} induced by varying $z$ is a complete set.
\vspace{-0.2cm}
\end{enumerate}
\end{assumption}
%These assumptions are related to the conditions for identifying APCE \citep{Wong2022}, and 
%These assumptions are needed just to set up the model and are not restrictive. 
The first assumption is standard for the IV setting.
The second assumption means that there exists CAPCE for all subjects for $x \in \Omega_X$ and ${\boldsymbol w} \in \Omega_{\boldsymbol W}$. 
The third assumption means the density function of $X_{z,{\boldsymbol w}}$ is bounded.
The fourth assumption implies that $h$ is a zero function if $\mathbb{E}[h(X)|Z=z,{\boldsymbol W}={\boldsymbol w}]$ does not depend on $z$ for all ${\boldsymbol w} \in \Omega_{\boldsymbol W}$, which is also assumed in \citep{Whitney2003} for identifying $\mathbb{E}[Y_{x}|{\boldsymbol w}]$.

%Next, we assume the following condition:
\begin{assumption}[Separability on $X$]
\label{AS2}
$f_Y(X,{\boldsymbol W},{\boldsymbol H},{\boldsymbol u}_Y)$ is in the form of a summation of two functions over $X$ and ${\boldsymbol H}$ separately, i.e., 
    $f_Y(X,{\boldsymbol W},{\boldsymbol H},{\boldsymbol u}_Y)=f_Y^1(X,{\boldsymbol W},{\boldsymbol u}_Y)+f_Y^2({\boldsymbol W},{\boldsymbol H},{\boldsymbol u}_Y)$.
\end{assumption}
%Assumption \ref{AS2} states that there cannot be any interactions between the unmeasured confounders (${\boldsymbol H}$) and treatment ($X$) unless they are fully mediated by the observed covariates (${\boldsymbol W}$). 
%This assumption is weaker than the assumptions in (\ref{eq-sep}) needed by existing work sieve NTSLS \citep{Whitney2003}, PTSLS \citep{Wooldridge2010}, and Kernel IV  \citep{Singh2019} for identifying $\mathbb{E}[Y_{x}|{\boldsymbol w}]$, which require both covariates $\boldsymbol{W}$ and the treatment $X$ to be separable from the unmeasured confounders $\boldsymbol{H}$. 
%It is sufficient that only the treatment variable satisfies separability with all unmeasured confounders. 
%\jin{Will NTSLS/PTSLS work also under Assumption 1? or Is the stronger separability assumption necessary for NTSLS? Is it because  identifying $E[Y_x|W]$ needs stronger assumption than identifying $E[\partial_x Y_x|E]$?} 
%NTSLS, PTSLS and Kernel IV work under stricter separability, $f_Y(X,{\boldsymbol W},{\boldsymbol H},{\boldsymbol u}_Y)=f_Y^1(X,{\boldsymbol W},{\boldsymbol u}_Y)+f_Y^2({\boldsymbol H},{\boldsymbol u}_Y)$ and $\mathbb{E}[f_Y^2({\boldsymbol H},{\boldsymbol u}_Y)|Z]=0$. since all covariates and the treatment must be separable from all unmeasured confounders.
%The following theorem holds:
%\begin{proposition}
%\jin{Give SCM ${\cal M}_{IV}$ -add this to all the theorems?}    
%\yuta{Give SCM ${\cal M}_{IV}$ and Assumption \ref{AS2}}, conditional APCE is equal to the potential APCE.
%     $\mathbb{E}[\partial_x Y_{x}|{\boldsymbol W}={\boldsymbol w}]=\mathbb{E}[\partial_x Y_{x,{\boldsymbol w}}]$.
%\end{proposition}
%Thus, we call both conditional APCE and potential APCE CAPCE (conditional average partial causal effect).
%Thereafter, in this paper, we use the notation $\mathbb{E}[\partial_x Y_{x,{\boldsymbol w}}]$ to represent CAPCE.
%In contrast to most existing work, this paper does not identify the effect $\mathbb{E}[Y_{x}|{\boldsymbol W}={\boldsymbol w}]$, but the CAPCE $\mathbb{E}[\partial_x Y_{x}|{\boldsymbol W}={\boldsymbol w}]$. However, the main interest in causal inference is to infer the effects of the treatment under changing conditions \citep{Pearl2010}; thus, the CAPCE is often sufficient to reveal causal relationships.
We obtain the following result. 
\begin{theorem}[Identification of CAPCE]
\label{TEO2}
Under SCM ${\cal M}_{IV}$ and Assumptions \ref{AS1} and \ref{AS2}, CAPCE $\mathbb{E}[\partial_x Y_{x}|{\boldsymbol w}]$ is identifiable from distributions $\mathbb{P}(X, {\boldsymbol W}|Z)$ and $\mathbb{P}( Y |Z)$ via the  integral equation:
%\vspace{-0.6cm}
\begin{eqnarray}
\label{IE6}
\mu(z)=\int_{\Omega_{\boldsymbol W}}\int_{\Omega_X} k(z,x,{\boldsymbol w})\mathbb{E}[\partial_x Y_{x}|{\boldsymbol w}] dxd{\boldsymbol w},
\end{eqnarray}
where $\mu(z)=\mathbb{E}[Y|Z=z_0]-\mathbb{E}[Y|Z=z], k(z,x,{\boldsymbol w})=\mathfrak{p}(X\leq x,{\boldsymbol W}={\boldsymbol w}|Z=z)-\mathfrak{p}(X\leq x,{\boldsymbol W}={\boldsymbol w}|Z=z_0)$, and $z_0$ is a {arbitrary} fixed value.
%\begin{eqnarray}
%\label{EQ3}
%\begin{array}{l}
%    \mu(z)=\mathbb{E}[Y|Z=z_0]-\mathbb{E}[Y|Z=z],\\
%    k(z,x,{\boldsymbol w})=\mathfrak{p}(X\leq x,{\boldsymbol W}={\boldsymbol w}|Z=z)-\mathfrak{p}(X\leq x,{\boldsymbol W}={\boldsymbol w}|Z=z_0).
%    \end{array}
%\end{eqnarray}
\end{theorem}
\textbf{Remark:} Assumption~\ref{AS2} is weaker than the assumption (\ref{eq-sep}) needed by existing work sieve NTSLS \citep{Whitney2003}, PTSLS \citep{Wooldridge2010}, and Kernel IV  \citep{Singh2019} for identifying $\mathbb{E}[Y_{x}|{\boldsymbol w}]$, which require both covariates $\boldsymbol{W}$ and the treatment $X$ to be separable from the unmeasured confounders $\boldsymbol{H}$. Assumption~\ref{AS2} is particularly less restrictive  when there are many covariates. Theorem~\ref{TEO2} states that \emph{CAPCE $\mathbb{E}[\partial_x Y_{x}|{\boldsymbol w}]$ is identifiable under a weaker assumption than required by $\mathbb{E}[Y_x|{\boldsymbol w}]$.} The result enables us to compute causal effects in IV models where Assumption~\ref{AS2} holds but assumption (\ref{eq-sep}) does not such that existing methods are not applicable. Theorem~\ref{TEO2} extends the results in \citep{Wong2022,Kawakami2023} for identifying APCE  $\mathbb{E}[\partial_x Y_{x}]$; however, it is worth noting that this important point about weaker separability assumption does not arise in the work of \citet{Wong2022} and \citet{Kawakami2023} because they study the setting with no covariates $\boldsymbol{W}$.

%The integral equation (\ref{IE6})  is a ``Fredholm Integral Equation of the First Kind” with $k$ called an integral kernel \citep{Bocher1926}. Equation (\ref{IE6}) is ill-posed since the integral operator ${\cal K}$ is not guaranteed to be compact,  where ${\cal K}(f)(z)=\int_{\Omega_{\boldsymbol W}}\int_{\Omega_X} k(z,x,{\boldsymbol w})f(x,{\boldsymbol w}) dxd{\boldsymbol w}$.
%A well-posed problem satisfies the following three properties \citep{Tikhonov1995}: the solution's existence, uniqueness, and stability.
%Problems, where one or more of these conditions do not hold, are called ill-posed problems.
