\documentclass{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
       

\newcommand{\removed}[1]{}
\usepackage{times}
\usepackage{soul}
\usepackage{url}
%\usepackage{hyperref}
\usepackage[utf8]{inputenc}
%\usepackage[small]{caption}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{booktabs}
\usepackage{algorithm}
\usepackage{algorithmic}
%\usepackage[switch]{lineno}
\usepackage{stackengine}
\def\defeq{\mathrel{\ensurestackMath{\stackon[1pt]{=}{\scriptscriptstyle\Delta}}}}


\usepackage{algorithm}
\usepackage{algorithmic}

% Set the typeface to Times Roman
\usepackage{times}

%\usepackage{hyperref}
\usepackage{url}

\usepackage{amsmath}
%\usepackage{wrapfig,lipsum,booktabs}

\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}


\usepackage{algorithmic}

\usepackage{lscape}
% if yo {\boldsymbol u} use cleveref..
\usepackage[capitalize,noabbrev]{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}

% Todonotes is  during development; simply uncomment the next line
%    and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}
\usepackage[textsize=tiny]{todonotes}
\usepackage{multirow}

\usepackage{ascmac}
%\usepackage{fancybx}
\usepackage{float}
\usepackage{perpage}
\MakeSorted{figure}
\MakeSorted{table}

\usepackage{url}
\usepackage{natbib}
\usepackage{chapterbib}

\usepackage{color}
\usepackage{tikz}
\tikzset{%
mynode/.style={circle,minimum width=.5ex, fill=none,draw}, % no filling
myfillnode/.style={circle,minimum width=.5ex, fill=lightgray,draw}, % fill with black
}
\usepackage{amssymb}
\usepackage{natbib}

\newcommand{\0}{$\mathrm{I}$}
\newcommand{\2}{$\mathrm{I}\hspace{-1.2pt}\mathrm{I}$}
\newcommand{\3}{$\mathrm{I}\hspace{-1.2pt}\mathrm{I}\hspace{-1.2pt}\mathrm{I}$}
\newcommand{\4}{$\mathrm{I}\hspace{-1.2pt}\mathrm{V}$}
%\newcommand{\3}{$\mathrm{i}$}
%\newcommand{\4}{$\mathrm{i}\hspace{-0.8pt}\mathrm{i}$}
%\newcommand{\5}{$\mathrm{i}\hspace{-0.8pt}\mathrm{i}\hspace{-0.8pt}\mathrm{i}$}
\newcommand{\6}{$\mathrm{i}\hspace{-0.8pt}\mathrm{v}$}
\newcommand{\indep}{\perp \!\!\! \perp}
\usepackage{amsmath}               
\usepackage{lscape}
\usepackage{algorithm}
%\usepackage[dvipdfmx]{graphicx}
%\bibliographystyle{unsrtnat}
%\DeclareMathOperator*{\argmin}{arg\,min}
%\DeclareMathOperator*{\argmax}{arg\,max}
\usepackage{color}
\usepackage{tikz}
% The \icmltitle yo {\boldsymbol u} define below is probably too long as a header.
% Therefore, a short form for the running title is supplied here:
\usepackage{amsmath,amsthm}
\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\newtheorem{assumption}{Assumption}
\newtheorem{lemma}{Lemma}
\newtheorem{proposition}{Proposition}
\newtheorem{corollary}{Corollary}
\usepackage{multirow}
\usepackage{comment}
\usepackage{here}
\allowdisplaybreaks[4]
%\usepackage{bbm}
\usepackage{caption}
\usepackage{bbding}
\usepackage{arydshln}
\usepackage{afterpage}

%\usepackage{algpseudocode}
\usepackage{mathrsfs}
\DeclareMathOperator*{\plim}{p-lim}

\newcommand{\jin}[1]{\textcolor{blue}{[[#1]]}}
\newcommand{\jina}[1]{\textcolor{blue}{#1}}
\newcommand{\yuta}[1]{\textcolor{red}{#1}}
\newcommand{\error}[1]{\textcolor{green}{#1}}
\usepackage{soul}


% If accepted, instead use the following line for the camera-ready submission:
%\usepackage[accepted]{icml2024}

% For theorems and such
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
                 
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Moments of Causal Effects}

% The standard author block has changed for UAI 2024 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2024 paper}{Jane~J.~von~O'L\'opez}{}}
\author[1]{Harry~Q.~Bovik}
\author[1,2]{Further~Coauthor}
\author[3]{Further~Coauthor}
\author[1]{Further~Coauthor}
\author[3]{Further~Coauthor}
\author[3,1]{Further~Coauthor}
% Add affiliations after the authors
\affil[1]{%
    Computer Science Dept.\\
    Cranberry University\\
    Pittsburgh, Pennsylvania, USA
}
\affil[2]{%
    Second Affiliation\\
    Address\\
    …
}
\affil[3]{%
    Another Affiliation\\
    Address\\
    …
  }
  
\begin{document}



Thank you for your valuable feedback. We will revise the paper accordingly. 

>Comment:
The paper addresses interesting research questions, but it is not clear why a better understanding of the causal process should be derived by knowing more about the distribution of the causal effects. Some insight is provided in the conclusion (e.g. more effective policies) but more details should be provided.
The introduction should insist more on the practical benefits of having a better understanding of the moments of the causal effect.

Our response:
Thanks for the suggestion. We will add the following  to the introduction section to further clarify why understanding the shape of the distribution of causal effects is important:


"The shape of the distribution of causal effects uncovers causal effect heterogeneity, which is an actively researched topic in the field of statistics, causal inference, and machine learning.
Causal effect heterogeneity refers to the variation in causal effects across individuals or subgroups within a population.
Existing work on causal effect heterogeneity mainly examines the conditional average causal effects (CACE), $E[Y_1-Y_0|W]$, based on subjects’ covariates $W$.
However, CACE captures only the heterogeneity across subpopulations specified by observed covariates $W$, not the heterogeneity across individuals.
In contrast, the shape of the distribution of causal effects reveals the heterogeneity of causal effects across individuals and provides complementary information to CACE."

We have discussed how 
the higher order of moments of causal effects provide useful information on the distribution in the last paragraph of Section 3.1 as follows: ``Variance and standard deviation quantify the dispersion of a distribution.
If the variance of causal effects is large, the causal effects may deviate significantly from ACE for some subjects. Skewness is a measure of the asymmetry of a probability distribution. 
If the causal effect is positively skewed, the right tail of the distribution of the causal effect is longer.
If the causal effect is negatively skewed, the left tail of the distribution is longer.
Kurtosis is a measure of the tailedness or peakedness of a distribution.  High kurtosis values indicate the presence of outliers in causal effects.''

>Comment:
A better discussion of the impact of exogeneity/monotonicity assumption on the practical use of the results should be added.

Our response:
We will add the following discussion: "The exogeneity and monotonicity assumptions, while common in the causal inference literature, can  restrict the practical applicability of the identification results. The assumption of exogeneity, requiring the absence of unmeasured confounders, might be plausible in some applications, especially in certain controlled settings. 
The monotonicity assumption can be challenging to verify in practice. In such scenarios, the bounding results that relax the monotonicity assumption and depend primarily on exogeneity provide a range of plausible causal effects, still offering valuable information for guiding decisions. In general, these assumptions require a cautious interpretation of the findings in the practical use of the results.

>Comment:
Experimental validation is limited to a single real dataset for which the validity of the exogeneity/monotonicity assumptions is not sufficiently discussed.
P7: what about the assumptions of exogeneity and monotonicity in the real-world example?


Our response:
We will add the following discussion on the exogeneity/monotonicity assumptions in Section 6.

"We assume the exogeneity assumption holds, as Westfall et al. [2011] did not report any potential confounding factors.
We assume that the monotonicity assumption holds, meaning that increasing the formulation from 1 time to 2 times always leads to greater cholesterol reduction for all subjects, and similarly, increasing it from 2 times to 4 times consistently results in further cholesterol reduction for all subjects."

>Comment:
The authors should make clear the bias and variance of the estimators in Table 1 and discuss which factors those two components depend on.
In particular, some graphical representation of the variance of the estimator wrt number observations is required.
Legend of Table 1 should contain more information about the table, notably about the different estimators.

Our response:
Thanks for the feedback. We will provide a graph, add a legend explaining the symbols for different estimators, and add the following  to the caption of Table 1.

"We present the estimates of the second, third, and fourth moments of causal effects along with their respective upper and lower bounds.
Additionally, we report the means of each estimator accompanied by their 95\% confidence intervals."

>Comment:
P8 "suggesting the presence of distinct response groups": isn't this more a role of the CACE quantity?

Our response:
CACE captures the heterogeneity across groups specified by observed covariates $W$, while the moments of causal effects can reveal the presence of distinct response groups not characterized by covariate values and provide complementary information to CACE. Perhaps rephrasing the sentence as "suggesting the presence of distinct response individuals" is more precise.


>Comment:
Questions for the authors: Have you considered extending the framework to handle instrumental variables settings where unmeasured confounding is present? 
Could the approach be adapted to analyze the moments of treatment effects in non-binary treatment settings, such as with continuous treatments?


Our response: Great questions. These are interesting directions for future work that we have not explored.


>Comment:
Are there specific application domains where you anticipate these higher-order moments to be particularly informative beyond the medical example provided?

Our response:
These higher moments of causal effects should be computed whenever there is a need to understand the heterogeneity of the causal effects, to gain a deeper understanding of how causal effects differ across individuals, in fields such as economics, business, education, and psychology, where causal inference is commonly applied in empirical research.

>Comment:
Given the computational demands of the estimators, have you considered developing more efficient implementations or approximations for large-scale datasets?

Our response: This will  be an interesting future work that we will explore.

>Comment:
How might your framework connect to recent work on heterogeneous treatment effects in machine learning contexts?

Our response: 
To the best of our understanding, recent work on heterogeneous treatment effects in ML focuses on the conditional average causal effects (CACE), that is,  the heterogeneity across subpopulations specified by  subjects’ covariates. This work studies the heterogeneity not characterized by covariate values and provides complementary information to CACE. 
It will be interesting to explore how the ML techniques developed for estimating heterogeneous treatment effects can be leveraged to obtain better estimators for the moments of causal effects in large-scale datasets.




%Please see our first response.





\end{document}

%Both the exogeneity and monotonicity assumptions are standard in causal inference, as in our application. These assumptions do not place extreme limitations on the practical applicability of our results.


%As described in the initial response, our study is closely related to the concept of CACE. Both CACE and the moments of causal effects can provide insights into the presence of distinct response groups. We will provide a more detailed explanation of the differences between CACE and the moments of causal effects after this sentence you pointed out on page 8 as follows:
%"Also, CACE can provide insights into the presence of distinct response groups. However, CACE may fail to uncover distinct response groups when researchers lack appropriate covariates to explain these subgroups. On the other hand, the moments of causal effects can reveal the presence of such groups even in the absence of appropriate covariates, although the monotonicity assumption is required."


%We anticipate that our results will be informative in fields such as economics, business, education, and psychology, where causal inference is commonly applied in empirical research.

%>Comment:
%The bounds derived without the monotonicity assumption could be quite wide in practice, potentially limiting their informativeness in some applications.






%>Comment: The estimators for higher-order moments may require relatively large sample sizes for reliable inference, as suggested by the wide confidence intervals with N=20. Additional discussion on sample size requirements for achieving desirable precision in the moment estimators would strengthen the practical applicability.


%Our response: It depends on the situation. Our simulation results indicate that the estimates are relatively reliable when N = 1000.
%\jin{A poor response. A better discussion is needed.}

%\yuta{When N = 20, the estimators exhibit wide confidence intervals (CI). In particular, the upper bound of the confidence interval for the third moment $\sigma^{(3)}$ exceeds zero, indicating potential positive skewness. When N=100 and 1000, the upper bound of the confidence interval for the third moment $\sigma^{(3)}$ does not exceeds zero. Thus, in our settnig, we can derive reliable estimates of $\sigma^{(3)}$ when N=100 and 1000.}
%\jin{I don't think this answer the comments. Perhaps we simply ignore this comment?}
%\yuta{yes, I will delete} \jin{ok}






