


\subsection{When $X=\text{Bern}({0.5})$}

{\bf Simulation for the moments of causal effects.}
%Next, we perform experiments to illustrate finite-sample properties of the estimator.
We assume the following SCM:
\begin{equation}
Y:=-(X+1)U, X \sim \text{Bern}(0.5), U\sim \text{Unif}(0,1),
\end{equation}
where $\text{Bern}(0.5)$ is a Bernoulli distribution with probability $0.5$, and $\text{Unif}(0,1)$ is a uniform distribution of $[0,1]$.
This setting satisfies Assumptions \ref{ASEXO2} and \ref{MONO2}.
{This setting satisfies the rank invariance assumption.}
The domain of $Y$ is bounded within $[-2,0]$.
%The central moments of the causal effects $\overline{\mu}^{(m)}$ are equal to $\mathbb{E}[(-U+\mathbb{E}[U])^m]$ for $m=1,\dots$.
We simulate 1000 times with the sample size $N=20,100,1000$, respectively.
We let $N_1$, $N_2$, $N_3$, and $N_4$ be all 1000.



{\bf Results (Ours).}
We present the estimates obtained using our proposed method.
The ground truth of the second moment of $Y_1-Y_0$ is $0.333$, and the estimates of the second moment are
\begin{center}
\textbf{$N=20$}:\, \, \,  $0.406$ (95\%CI: $[0.084,0.883]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.332$ (95\%CI: $[0.156,0.562]$),\\\vspace{0.1cm}
\textbf{$N=1000$}:  $0.331$ (95\%CI: $[0.272,0.406]$).
\end{center}
The ground truth of the third moment of $Y_1-Y_0$ is $-0.25$, and the estimates of the third moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $-0.350$ (95\%CI: $[-0.958,-0.055]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \, $-0.284$ (95\%CI: $[-0.557,-0.106]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $-0.254$ (95\%CI: $[-0.320,-0.198]$).
\end{center}
The ground truth of the fourth moment of $Y_1-Y_0$ is $0.2$, and the estimates of the fourth moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $0.334$ (95\%CI: $[0.020,1.016]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.207$ (95\%CI: $[0.079,0.419]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.206$ (95\%CI: $[0.149,0.293]$).
\end{center}
All means of the estimators are close to the ground truth. 
However, estimators for small sample sizes have large 95 $\%$ CIs, and they show slow convergence to the ground truth from the point of view of the 95 \% CIs, especially for high-order moments.


We present the estimated bounds on the moments of causal effects.
We estimate bounds of the moment when $N=1000$.
\begin{center}
\textbf{Upper bound of the second moment}:
$1.003$ (95\%CI: $[0.915,1.085]$),\\\vspace{0.1cm}
\textbf{Lower bound of the second moment}:
$0.000$ (95\%CI: $[0.000,0.001]$).
\end{center}
\begin{center}
\textbf{Upper bound of the third moment}:
$0.082$ (95\%CI: $[0.057,0.107]$),\\\vspace{0.1cm}
\textbf{Lower bound of the third moment}:
$-1.1320$ (95\%CI: $[-1.471,-1.175]$).
\end{center}
\begin{center}
\textbf{Upper bound of the fourth moment}:
$2.190$ (95\%CI: $[1.945,2.537]$),\\\vspace{0.1cm}
\textbf{Lower bound of the fourth moment}:
$0.000$ (95\%CI: $[0.000,0.000]$).
\end{center}
All ground truth values lie within the computed bounds.


{\bf Results (\citep{Heckman1997}).}
We present the estimates obtained using the method of \citep{Heckman1997}.
%The ground truth of the variance of $Y_1-Y_0$ is $0.083$, and 
The estimates of the second moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $0.392$ (95\%CI: $[0.090,1.127]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.347$ (95\%CI: $[0.186,0.577]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.334$ (95\%CI: $[0.279,0.397]$).
\end{center}
%The ground truth of the skewness of $Y_1-Y_0$ is $0$, and 
The estimates of the third moment are
\begin{center}
\textbf{$N=20$}:\, \,   $-0.349$ (95\%CI: $[-1.096,-0.068]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $-0.259$ (95\%CI: $[-0.474,-0.129]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $-0.254$ (95\%CI: $[-0.352,-0.194]$).
\end{center}
%The ground truth of the kurtosis of $Y_1-Y_0$ is $1.8$, and 
The estimates of the fourth moment are
\begin{center}
\textbf{$N=20$}:\,  \,  \, $0.309$ (95\%CI: $[0.017,1.120]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.220$ (95\%CI: $[0.084,0.402]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.205$ (95\%CI: $[0.159,0.267]$).
\end{center}



\subsection{Continuous Case}





{\bf Simulation for the moments of causal effects.}
%Next, we perform experiments to illustrate finite-sample properties of the estimator.
We assume the following SCM:
\begin{equation}
Y:=-(X+1)U, X \sim \text{Bern}({0.8}), U\sim \text{Unif}(0,1),
\end{equation}
where $\text{Bern}(p)$ is a Bernoulli distribution with probability $p$, and $\text{Unif}(0,1)$ is a uniform distribution of $[0,1]$.
This setting satisfies Assumptions \ref{ASEXO2} and \ref{MONO2}.
{This setting satisfies the rank invariance assumption.}
The domain of $Y$ is bounded within $[-2,0]$.
%The central moments of the causal effects $\overline{\mu}^{(m)}$ are equal to $\mathbb{E}[(-U+\mathbb{E}[U])^m]$ for $m=1,\dots$.
We simulate 1000 times with the sample size $N=20,100,1000$, respectively.
We let $N_1$, $N_2$, $N_3$, and $N_4$ be all 1000.






{\bf Results (Ours).}
We present the estimates obtained using our proposed method.
The ground truth of the second moment of $Y_1-Y_0$ is $0.333$, and the estimates of the second moment are
\begin{center}
\textbf{$N=20$}:\, \, \,  $0.457$ (95\%CI: $[0.180,1.124]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.359$ (95\%CI: $[0.206,0.575]$),\\\vspace{0.1cm}
\textbf{$N=1000$}:  $0.339$ (95\%CI: $[0.270,0.409]$).
\end{center}
The ground truth of the third moment of $Y_1-Y_0$ is $-0.25$, and the estimates of the third moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $-0.458$ (95\%CI: $[-1.433,-0.052]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \, $-0.279$ (95\%CI: $[-0.508,-0.137]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $-0.251$ (95\%CI: $[-0.322,-0.196]$).
\end{center}
The ground truth of the fourth moment of $Y_1-Y_0$ is $0.2$, and the estimates of the fourth moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $0.390$ (95\%CI: $[0.048,1.390]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.254$ (95\%CI: $[0.118,0.524]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.209$ (95\%CI: $[0.137,0.281]$).
\end{center}
All means of the estimators are close to the ground truth. 
However, estimators for small sample sizes have large 95 $\%$ CIs, and they show slow convergence to the ground truth from the point of view of the 95 \% CIs, especially for high-order moments.


We present the estimated bounds on the moments of causal effects.
We estimate bounds of the moment when $N=1000$.
\begin{center}
\textbf{Upper bound of the second moment}:\\
$1.000$ (95\%CI: $[0.931,1.078]$),\\\vspace{0.1cm}
\textbf{Lower bound of the second moment}:\\
$0.000$ (95\%CI: $[0.000,0.001]$).
\end{center}
\begin{center}
\textbf{Upper bound of the third moment}:\\
$0.083$ (95\%CI: $[0.065,0.101]$),\\\vspace{0.1cm}
\textbf{Lower bound of the third moment}:\\
$-1.1323$ (95\%CI: $[-1.455,-1.192]$).
\end{center}
\begin{center}
\textbf{Upper bound of the fourth moment}:\\
$2.211$ (95\%CI: $[1.965,2.502]$),\\\vspace{0.1cm}
\textbf{Lower bound of the fourth moment}:\\
$0.000$ (95\%CI: $[0.000,0.000]$).
\end{center}
All ground truth values lie within the computed bounds.


{\bf Results (\citep{Heckman1997}).}
We present the estimates obtained using the method of \citep{Heckman1997}.
%The ground truth of the variance of $Y_1-Y_0$ is $0.083$, and 
The estimates of the second moment are
\begin{center}
\textbf{$N=20$}:\, \, \, $0.727$ (95\%CI: $[0.209,2.074]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.424$ (95\%CI: $[0.262,0.799]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.344$ (95\%CI: $[0.279,0.403]$).
\end{center}
%The ground truth of the skewness of $Y_1-Y_0$ is $0$, and 
The estimates of the third moment are
\begin{center}
\textbf{$N=20$}:\, \,   $-0.898$ (95\%CI: $[-3.714,-0.187]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $-0.328$ (95\%CI: $[-0.567,-0.152]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $-0.259$ (95\%CI: $[-0.323,-0.215]$).
\end{center}
%The ground truth of the kurtosis of $Y_1-Y_0$ is $1.8$, and 
The estimates of the fourth moment are
\begin{center}
\textbf{$N=20$}:\,  \,  \, $1.216$ (95\%CI: $[0.098,5.245]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,  $0.282$ (95\%CI: $[0.129,0.602]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $0.207$ (95\%CI: $[0.159,0.273]$).
\end{center}





Our estimator is more efficient than the estimators proposed by \citep{Heckman1997}, particularly when $N=20$ and $N=100$.






\subsection{Discrete Case}


{\bf Simulation for the product moments of causal effects.}
We assume the following SCM:
\begin{equation}
Y:=X^2U, U\sim \text{Unif}(0,1),
\end{equation}
where $U$ takes values in $\{0, 0.5, 1\}$ with the probabilities $\mathbb{P}(U=0)=\mathbb{P}(U=0.5)=\mathbb{P}(U=1)=1/3$.
%The covariance of the causal effect $\overline{\rho}_{i,j;k,h}$ is equal to $\mathbb{E}[-(U-\mathbb{E}[U])^2]$.
%The variances of the causal effects $\mathbb{E}[\{(Y_1-Y_0)-(\mathbb{E}[Y_1]-\mathbb{E}[Y_0])\}^2]$ and $\mathbb{E}[\{(Y_0-Y_{-1})-(\mathbb{E}[Y_0]-\mathbb{E}[Y_{-1}])\}^2]$ are equal to $\mathbb{E}[(-U+\mathbb{E}[U])^2]$.
This setting satisfies Assumptions \ref{ASEXO2} and \ref{MONO2}.
This setting does not satisfy the rank invariance assumption.
We simulate 1000 times with the sample size $N=20,100,1000$, respectively.
We let $N_1$ and $N_2$ be all 1000.
\citet{Heckman1997} did not study the product moments of causal effects.


{\bf Results (Ours).}
The ground truth of the covariance of $Y_1-Y_0$ and $Y_0-Y_{-1}$ is $-0.417$, and the estimates of the product moment are
\begin{center}
\textbf{$N=20$}:\, \, \,  $-0.364$ (95\%CI: $[-0.602,-0.143]$),\\\vspace{0.1cm}
\textbf{$N=100$}:\, \,   $-0.375$ (95\%CI: $[-0.502,-0.248]$),\\\vspace{0.1cm}
\textbf{$N=1000$}: $-0.415$ (95\%CI: $[-0.514,-0.327]$).
\end{center}
All means of the estimators are close to the ground truth. 
However, estimators for small sample sizes have large 95 $\%$ CIs.



We present the estimated bounds on the moments of causal effects.
We estimate bounds of the moment when $N=1000$.
\begin{center}
\textbf{Upper bound of the product moment}:
$-0.091$ (95\%CI: $[-0.147,-0.054]$),\\\vspace{0.1cm}
\textbf{Lower bound of the product moment}:
$-0.418$ (95\%CI: $[-0.496,-0.332]$).
\end{center}
The ground truth value lies within the computed bounds.






%We present additional experiments for a discrete outcome in Appendix \ref{appF2}.













%\subsection{Estimates of Bounds}
%\label{appF1}
%We present the estimated bounds on the moments of causal effects.
%We estimate bounds of variance, skewness, and kurtosis when $N=1000$.
%\begin{center}
%\textbf{Upper bound of variance}: $0.738$ (95\%CI: $[0.429,1.177]$),\\\vspace{0.1cm}
%\textbf{Lower bound of variance}: $0.000$ (95\%CI: $[0.000,0.000]$).
%\end{center}
%\begin{center}
%\textbf{Upper bound of skewness}: $431.084$ (95\%CI: $[52.761,943.139]$),\\\vspace{0.1cm}
%\textbf{Lower bound of skewness}: $-0.692$ (95\%CI: $[-1.505,-0.179]$).
%\end{center}
%\begin{center}
%\textbf{Upper bound of kurtosis}: $1243.341$ (95\%CI: $[0.000,3859.094]$),\\\vspace{0.1cm}
%\textbf{Lower bound of kurtosis}: $0.000$ (95\%CI: $[0.000,0.000]$).
%\end{center}
%All ground truth values lie within the computed bounds.




%\subsection{Discrete Outcome}
%\label{appF2}

%{\bf Simulation for the central moments of causal effects.}
%Next, we perform experiments to illustrate finite-sample properties of the estimator.
%We assume the following SCM:
%\begin{equation}
%Y:=-(X+1)U, X \sim \text{Bern}(0.5),
%\end{equation}
%where $U$ takes values in $\{0, 0.5, 1\}$ with the probabilities $\mathbb{P}(U=0)=\mathbb{P}(U=0.5)=\mathbb{P}(U=1)=1/3$.
%The central moments of the causal effects $\overline{\mu}^{(m)}$ are equal to $\mathbb{E}[(-U+\mathbb{E}[U])^m]$ for $m=1,\dots$.
%We simulate 1000 times with the sample size $N=20,100,10000$, respectively.
%We let $N_1$, $N_2$, $N_3$, and $N_4$ be all 100.




%{\bf Results (Ours).}
%The ground truth of the variance of $Y_1-Y_0$ is $0.166$, and the estimates of variance are
%\begin{center}
%\textbf{$N=20$}:\, \, \, \, $0.246$ (95\%CI: $[0.032,0.532]$),\\\vspace{0.1cm}
%\textbf{$N=100$}:\, \, \,  $0.218$ (95\%CI: $[0.053,0.523]$),\\\vspace{0.1cm}
%\textbf{$N=10000$}: $0.170$ (95\%CI: $[0.007,0.356]$).
%\end{center}
%The ground truth of the skewness of $Y_1-Y_0$ is $0$, and the estimates of skewness are
%\begin{center}
%\textbf{$N=20$}:\, \, \, \, \, \, $0.371$ (95\%CI: $[-5.269,5.730]$),\\\vspace{0.1cm}
%\textbf{$N=100$}:\, \, \,  $-0.436$ (95\%CI: $[-4.910,4.838]$),\\\vspace{0.1cm}
%\textbf{$N=10000$}: $-0.307$ (95\%CI: $[-3.403,3.575]$).
%\end{center}
%The ground truth of the kurtosis of $Y_1-Y_0$ is $1.5$, and the estimates of kurtosis are
%\begin{center}
%\textbf{$N=20$}:\, \, \, \,  $2.279$ (95\%CI: $[0.000,34.288]$),\\\vspace{0.1cm}
%\textbf{$N=100$}:\, \, \,  $2.187$ (95\%CI: $[0.000,22.153]$),\\\vspace{0.1cm}
%\textbf{$N=10000$}: $1.851$ (95\%CI: $[0.000,13.283]$).
%\end{center}
%All means of the estimators are close to the ground truth. 
%However, estimators for small sample sizes have large 95 $\%$ CIs, and they show slow convergence to the ground truth from the point of view of the 95 \% CIs, especially for high-order moments.







%{\bf Simulation for the central product moments of causal effects.}
%We assume the following SCM:
%\begin{equation}
%Y:=X^2U, X \sim \text{Bern}(0.5),
%\end{equation}
%where $U$ takes values in $\{0, 0.5, 1\}$ with the probabilities $\mathbb{P}(U=0)=\mathbb{P}(U=0.5)=\mathbb{P}(U=1)=1/3$.
%The covariance of the causal effect $\overline{\rho}_{i,j;k,h}$ is equal to $\mathbb{E}[-(U-\mathbb{E}[U])^2]$.
%The variances of the causal effects $\mathbb{E}[\{(Y_1-Y_0)-(\mathbb{E}[Y_1]-\mathbb{E}[Y_0])\}^2]$ and $\mathbb{E}[\{(Y_0-Y_{-1})-(\mathbb{E}[Y_0]-\mathbb{E}[Y_{-1}])\}^2]$ are equal to $\mathbb{E}[(-U+\mathbb{E}[U])^2]$.
%We simulate 1000 times with the sample size $N=30,100,10000$, respectively.
%We let $N_1$ and $N_2$ be all 100.



%{\bf Results (Ours).}
%The ground truth of the covariance of $Y_1-Y_0$ and $Y_0-Y_{-1}$ is $-0.180$, and the estimates of covariance are
%\begin{center}
%\textbf{$N=30$}:\, \, \, \, $-0.120$ (95\%CI: $[-0.259,-0.022]$),\\\vspace{0.1cm}
%\textbf{$N=100$}:\, \, \,  $-0.148$ (95\%CI: $[-0.308,-0.023]$),\\\vspace{0.1cm}
%\textbf{$N=10000$}: $-0.168$ (95\%CI: $[-0.301,-0.043]$).
%\end{center}
%The ground truth of the correlation of $Y_1-Y_0$ and $Y_0-Y_{-1}$ is $-1$, and the estimates of correlation are
%\begin{center}
%\textbf{$N=30$}:\, \, \, \, \, \, $-0.741$ (95\%CI: $[-1.000,0.000]$),\\\vspace{0.1cm}
%\textbf{$N=100$}:\, \, \,  $-0.867$ (95\%CI: $[-1.000,-0.600]$),\\\vspace{0.1cm}
%\textbf{$N=10000$}: $-0.986$ (95\%CI: $[-1.000,-0.911]$).
%\end{center}
%All means of the estimators are close to the ground truth. 
%However, estimators for small sample sizes have large 95 $\%$ CIs.



