\documentclass{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
       

\newcommand{\removed}[1]{}
\usepackage{times}
\usepackage{soul}
\usepackage{url}
%\usepackage{hyperref}
\usepackage[utf8]{inputenc}
%\usepackage[small]{caption}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{booktabs}
\usepackage{algorithm}
\usepackage{algorithmic}
%\usepackage[switch]{lineno}
\usepackage{stackengine}
\def\defeq{\mathrel{\ensurestackMath{\stackon[1pt]{=}{\scriptscriptstyle\Delta}}}}


\usepackage{algorithm}
\usepackage{algorithmic}

% Set the typeface to Times Roman
\usepackage{times}

%\usepackage{hyperref}
\usepackage{url}

\usepackage{amsmath}
%\usepackage{wrapfig,lipsum,booktabs}

\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}


\usepackage{algorithmic}

\usepackage{lscape}
% if yo {\boldsymbol u} use cleveref..
\usepackage[capitalize,noabbrev]{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}

% Todonotes is  during development; simply uncomment the next line
%    and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}
\usepackage[textsize=tiny]{todonotes}
\usepackage{multirow}

\usepackage{ascmac}
%\usepackage{fancybx}
\usepackage{float}
\usepackage{perpage}
\MakeSorted{figure}
\MakeSorted{table}

\usepackage{url}
\usepackage{natbib}
\usepackage{chapterbib}

\usepackage{color}
\usepackage{tikz}
\tikzset{%
mynode/.style={circle,minimum width=.5ex, fill=none,draw}, % no filling
myfillnode/.style={circle,minimum width=.5ex, fill=lightgray,draw}, % fill with black
}
\usepackage{amssymb}
\usepackage{natbib}

\newcommand{\0}{$\mathrm{I}$}
\newcommand{\2}{$\mathrm{I}\hspace{-1.2pt}\mathrm{I}$}
\newcommand{\3}{$\mathrm{I}\hspace{-1.2pt}\mathrm{I}\hspace{-1.2pt}\mathrm{I}$}
\newcommand{\4}{$\mathrm{I}\hspace{-1.2pt}\mathrm{V}$}
%\newcommand{\3}{$\mathrm{i}$}
%\newcommand{\4}{$\mathrm{i}\hspace{-0.8pt}\mathrm{i}$}
%\newcommand{\5}{$\mathrm{i}\hspace{-0.8pt}\mathrm{i}\hspace{-0.8pt}\mathrm{i}$}
\newcommand{\6}{$\mathrm{i}\hspace{-0.8pt}\mathrm{v}$}
\newcommand{\indep}{\perp \!\!\! \perp}
\usepackage{amsmath}               
\usepackage{lscape}
\usepackage{algorithm}
%\usepackage[dvipdfmx]{graphicx}
%\bibliographystyle{unsrtnat}
%\DeclareMathOperator*{\argmin}{arg\,min}
%\DeclareMathOperator*{\argmax}{arg\,max}
\usepackage{color}
\usepackage{tikz}
% The \icmltitle yo {\boldsymbol u} define below is probably too long as a header.
% Therefore, a short form for the running title is supplied here:
\usepackage{amsmath,amsthm}
\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\newtheorem{assumption}{Assumption}
\newtheorem{lemma}{Lemma}
\newtheorem{proposition}{Proposition}
\newtheorem{corollary}{Corollary}
\usepackage{multirow}
\usepackage{comment}
\usepackage{here}
\allowdisplaybreaks[4]
%\usepackage{bbm}
\usepackage{caption}
\usepackage{bbding}
\usepackage{arydshln}
\usepackage{afterpage}

%\usepackage{algpseudocode}
\usepackage{mathrsfs}
\DeclareMathOperator*{\plim}{p-lim}

\newcommand{\jin}[1]{\textcolor{blue}{[[#1]]}}
\newcommand{\jina}[1]{\textcolor{blue}{#1}}
\newcommand{\yuta}[1]{\textcolor{red}{#1}}
\newcommand{\error}[1]{\textcolor{green}{#1}}
\usepackage{soul}


% If accepted, instead use the following line for the camera-ready submission:
%\usepackage[accepted]{icml2024}

% For theorems and such
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
                 
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Moments of Causal Effects}

% The standard author block has changed for UAI 2024 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2024 paper}{Jane~J.~von~O'L\'opez}{}}
\author[1]{Harry~Q.~Bovik}
\author[1,2]{Further~Coauthor}
\author[3]{Further~Coauthor}
\author[1]{Further~Coauthor}
\author[3]{Further~Coauthor}
\author[3,1]{Further~Coauthor}
% Add affiliations after the authors
\affil[1]{%
    Computer Science Dept.\\
    Cranberry University\\
    Pittsburgh, Pennsylvania, USA
}
\affil[2]{%
    Second Affiliation\\
    Address\\
    …
}
\affil[3]{%
    Another Affiliation\\
    Address\\
    …
  }
  
\begin{document}



Thank you for your valuable feedback. 

>Comment:
While mathematically rigorous, the notation is dense and may be challenging for readers unfamiliar with structural causal models (SCMs). Some key definitions (e.g., Definition 3.1 on path-specific PNS) could benefit from intuitive explanations or illustrative examples.

Our response: 
We will go through the paper to provide intuitive explanations whenever needed. Specifically for Definition 3.1, detailed explanations have been provided on pages 4–5, and the application in Section 6 serves as an illustrative example.

>Comment:
The identification of path-specific PNS relies on strong monotonicity assumptions (e.g., Assumptions 4.1 and 4.3). However, the paper does not empirically test their robustness or provide alternative estimation strategies when these assumptions are violated.

Our response:
We will provide a sensitivity analysis in the numerical experiments by introducing a non-monotonic term in the SCM in Section 5, i.e., $Y:=X+M+ N+ C+\alpha U_Y+ (1-\alpha) U_Y^4$, where $\alpha \in [0,1]$ controls the degree of monotonicity.
When $\alpha = 1$, the setting corresponds to the one in Section 5.
When $\alpha = 0$, the model includes only a non-monotonic error term.

In addition, we will add the following discussion regarding when the monotonicity assumption is violated:  "In the settings where the monotonicity assumption does not hold, we can aim to derive bounds for the path-specific PNS [Tian and Pearl 2000] [1]. One approach is to use Fréchet inequalities [2]. Deriving bounds for the path-specific PNS will be a future work."

[1]  Li, Ang, and Judea Pearl. "Probabilities of causation with nonbinary treatment and effect." Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 38. No. 18. 2024.

[2] Fréchet, Maurice. "Sur les tableaux dont les marges et des bornes sont données." Revue de l'Institut international de statistique (1960): 10-32.

>Comment:
While the paper cites prior work on mediation analysis and probabilities of causation, it lacks a detailed comparison with alternative mediation decomposition methods in numerical experiments and application to real data.

Our response:
Existing works, such as [Daniel et al., 2015], provide mediation decomposition of (total) causal effects $E[Y_{x}-Y_{x'}]$. To the best of our knowledge, this paper is the first work that provides mediation decomposition of PNS over two mediators - hence, no alternative methods are available for comparison in the experiments.

>Comment:
Could you clarify the meaning of the probability of necessity and sufficiency (PNS)?

Our response:
PNS represents the probability of necessity and sufficiency of one event causing another. Specifically, we will add the following sentence after Definition 2.1.

"$PNS(y;x',x,{\cal E},c)$ provides a measure of the necessity and sufficiency of $x$ w.r.t. $x'$ to produce $Y \succeq y$ given $C= c$ and evidence ${\cal E}$, that is, when $X$ is set to $X=x$, the event $Y \succeq y$ occurs; when $X$ is set to $X=x'$, the event $Y \succeq y$ does not occur."

>Comment:
For Definition 3.1 (Path-Specific PNS), consider adding a diagram or flowchart to illustrate the four types of path-specific PNS with two mediators.

Our response:
We provided Figure 2 to illustrate the four types of path-specific PNS. We will add additional graphical illustration in the appendix.

>Comment:
Table 1 is informative, but adding a row for bias (relative bias) and mean squared error (MSE) could improve interpretability.

Our response:
Thanks for the suggestion.

>Comment:
The assumption of strict monotonicity may not always hold in practice. It would be useful to discuss realistic scenarios where this assumption could fail and include a sensitivity analysis in the numerical experiments to test the impact of violating this assumption.

Our response:
We provided a discussion about monotonicity in the real-world application in Section 6 in the first paragraph on page 8. 
In this application, the monotonicity assumption is relatively reasonable; however, it may be violated in cases where some students perform worse despite increased study times.

We will provide a sensitivity analysis in the numerical experiments as described previously in the response.

>Comment:
The results in 1B and 2B of Theorem 4.2 appear to be unreasonable. The terms on the left sides of Equations 19, 20, 25, and 26 can take any value between 0 and 1, whereas the terms on the right sides are restricted to either 0 or 1. This discrepancy suggests that the equalities may not always hold.

Our response:
The results in (1B) and (2B) of Theorem 4.2 hold under the condition $\mathbb{P}(Y \prec y^l|X=x^e,C=c) = \mathbb{P}(Y \prec y^u|X=x^e,C=c)$ stated in the theorem. The path-specific PNS can only take the values 0 or 1 under this strong constraint.












%The results in 1B and 2B of Theorem 4.2 state that, "under Assumptions 2.1, 4.1, 4.2, and 4.3", the terms on the left-hand sides of Equations (19), (20), (25), and (26) can only take the values 0 or 1. If Assumptions 2.1, 4.1, 4.2, and 4.3 do not hold, the equalities may not necessarily be valid, as you have pointed out. Nevertheless, our Theorem 4.2 remains valid statements under our assumptions.
%\jin{Your answer is quite puzzling, doesn't make sense to me. The reviewer's question is why the PNS is restricted to either 0 or 1. I don't think it's because of the assumptions. In general, the PNS should take values between 0 and 1 as in (1A) and (2A) which also need the assumptions. So your answer makes no sense.}

%\yuta{The probability of the evidence ${\cal E}=(Y \in [y^l,y^u],X=x^e,C=c)$ (as shown in the Remark), given $X=x^e$ and $C=c$, is given by $\mathbb{P}(Y \preceq y^l|X=x^e,C=c) - \mathbb{P}(Y \prec y^u|X=x^e,C=c)$. Then, the conditions in (1B) and (2B) state the probability of the evidence ${\cal E}=(Y \in [y^l,y^u],X=x^e,C=c)$, given $X=x^e$ and $C=c$, is equal to zero. For example, when the evidence give a specific value of outcome $y'=y^l=y^u$ and $Y$ is continuous outcome, the evidence ${\cal E}=(Y = y',X=x^e,C=c)$ corresponds to the conditions described in (1B) and (2B). This type of evidence, which satisfies the conditions of (1B) and (2B), provides highly specific information on subjects. Thus, statements (1B) and (2B) indicate that the path-specific PNS is deterministically 0 or 1 when highly specific evidence about subject is given. These phenomena are consistent with those reported in [Kawakami and Tian, 2025].}




%\yuta{(2A) and (2B) in Theorem 4.2 state that the path-specific PNS is restricted to either 0 or 1 given the evidence ${\cal E}$ such that $\mathbb{P}(Y \prec y^l|X=x^e,C=c) = \mathbb{P}(Y \prec y^u|X=x^e,C=c)$. This evidence provides highly specific information, and the event ${\cal E}$ occurs with probability zero. \jin{Do you mean probability 1 here?} Thus, statements (1B) and (2B) indicate that the path-specific PNS is deterministically 0 or 1 under highly specific evidence. These results coincide with those reported in [Kawakami and Tian, 2025].} 
%\jin{Should (2A) be (1B) in the above??? How is $\mathbb{P}(Y \prec y^l|X=x^e,C=c) = \mathbb{P}(Y \prec y^u|X=x^e,C=c)$ anything to do with evidence ${\cal E}$ in $PNS(y;x',x,{\cal E},c)$??? Please be careful with your writing!!  }







\end{document}




%For readers unfamiliar with SCM, we will provide an explicit form of potential outcomes (nested counterfactual), e.g., $Y_{x,{M}_{x'},{N}_{x'',{M}_{x'''}}}=f_Y(x,f_{{M}}(x',C,U_{{M}}),f_{{N}}(x'',f_{{M}}(x''',C,U_{{M}}),C,U_{{N}}),C,U_Y)$.

%We provide an additional experiment where the monotonicity assumption is violated to examine the robustness of our method under such conditions.
%We convert $U_Y$ in Eq. (27) to $U_Y^4$, which is not a monotonic function on $U_Y$.
%Then, the estimates when $N=20$ are

%$\text{\normalfont T-PNS}$: 0.202 (CI: [0.000,0.414], Truth:  0.329),

%$\text{\normalfont PNS}^{X \rightarrow Y}$: 0.046 (CI: [0.000,0.196], Truth: 0.034),

%$\text{\normalfont PNS}^{X \rightarrow {N}  \rightarrow Y}$: 0.038 (CI: [0.,0.149], Truth: 0.065),

%$\text{\normalfont PNS}^{X \rightarrow {M} \rightarrow {N}  \rightarrow Y}$: 0.048 (CI: [0.000,0.186], Truth: 0.100),

%$\text{\normalfont PNS}^{X \rightarrow {M}  \rightarrow Y}$: 0.069 (CI: [0.000,0.232], Truth: 0.130).

%In this case, the bias of the estimates appears to be moderate, and the largest component of the path-specific PNS is $\text{\normalfont PNS}^{X \rightarrow {M}  \rightarrow Y}$.

%\jin{I don't think the above simple experiments can illustrate anything. Replace the above with the following:}




%\jin{Remove the following response.} 
%We will add the estimates of path-specific causal effects for the binarized outcome $\mathbb{I}(Y<10)$ based on the method proposed by [Daniel et al., 2015] in the application section.
%The estimates of path-specific causal effects by [Daniel et al., 2015] in the application are as follows:
%Total Effect: 0.082 (CI: [-0.016,0.222]),
%Path-specific Effect of $X \rightarrow Y$: -0.047 (CI: [-0.113,0.019]),
%Path-specific Effect of $X \rightarrow {N} \rightarrow Y$: 0.027 (CI: [-0.022,0.082]),
%Path-specific Effect of $X \rightarrow {M} \rightarrow {N} \rightarrow Y$: 0.000 (CI: [0.000,0.000]),
%Path-specific Effect of $X \rightarrow {M} \rightarrow Y$: 0.102 (CI: [0.023,0.204]).
%The results show that the path-specific effect of $X \rightarrow Y$ is negative, the path-specific effects of $X \rightarrow {N} \rightarrow Y$ and $X \rightarrow {M} \rightarrow Y$ are positive, and there is no path-specific effect of $X \rightarrow {M} \rightarrow {N} \rightarrow Y$.
%\jin{What is the relation between our method vs. [Daniel et al., 2015]? If [Daniel et al., 2015] provides an alternative mediation decomposition method, why is it not compared with in the paper? Not sure it is a good idea to provide this experiment here. It may raise more questions/concerns.}


%\yuta{Daniel et al. [2015]) studied causal mediation analysis in the context of the total causal effect, expressed as  $E[Y_{x}]-E[Y_{x'}]$.On the other hand, this paper focuses on PNS, expressed as  $P(Y_{x'} \prec y \preceq Y_{x})$.} \jin{Then what is the point of providing the above experiments? I'd just respond with the following:}
%\yuta{We will provide additional graphs in the appendix that highlight the specific pathways captured by each PNS measure using bold lines, as in Figure 1 in [Daniel et al., 2015].}



%The bias and uncertainty of the estimators can be interpreted from Table 1. The bias can be evaluated by comparing the mean of the estimators to the ground truth value of the parameter. The uncertainty or precision of the estimates is expressed through 95\% confidence intervals.
%We provide an additional experiment in which the monotonicity assumption is violated. (See our previous response.) \jin{That experiment is not a sensitivity analysis. Any idea on how to perform a sensitivity analysis?}



%We will provide a sensitivity analysis by introducing a non-monotonic term in the SCM in Section 5, i.e., $Y:=X+{M}+ {N}+ C+\alpha U_Y+ (1-\alpha) U_Y^4$, where $\alpha \in [0,1]$ controls the degree of monotonicity. When $\alpha = 1$, the setting corresponds to the one in Section 5. When $\alpha = 0$, the model includes only a non-monotonic error term.