% \section{Statistical Testing}\label{sec:tests}
% While fairness notions are phrased in terms of either observational, interventional, counterfactual or graphical queries on the family of causal models, in practice, such a condition can often only be verified using observational data. 

% When the criterion that defines a fairness notion is identifiable, a corresponding estimand can be computed on statistical data. However, identifiability is not always guaranteed. Indeed, as we shall see, the criteria that we base our notions on are not identifiable when we allow for confounding. We use the symbolic linear-programming approach that resorts to a response-function parameterization to derive bounds on these criteria, as in \cite{BalkePearl97, Bonet01}. 
% % \subsection{Identifiability and Bounds on Fairness Notions} \label{subsec:bounds}



% \subsection{Graphical Notion and the Instrumental Variable (IV) Inequalities}\label{subsec:graph_iv}

% Constraints that graphical queries, such as absence of edges, impose on the observational distribution could take the form of conditional independence, equality or inequality constraints \cite{Evans16, WolfeSF19}. For example, for $\model \in \nullgraphunconf$, by the global Markov property, the graph implies the conditional independence $\sex \indep \outcome \mid \dept$ for any distribution that is Markov with respect to $\model$ \todo{in particular, for the observational distribution of $\model$ (the other distributions are not relevant to consider here)}. However, this no longer holds for the case with confounding since the path $\sex \rightarrow \dept \leftrightarrow \outcome$ is open when conditioned on $\dept$. Our test for the graphical notion of fairness for $\modelsedge$ stems from the observation that for a model $\model \in \nullgraph$, the\todo{Marginalization in preliminaries?} causal model after marginalizing $\formsex$, $\model_{\backslash \formsex}$, lies in the instrumental variable (IV) model class $\modeliv$ where $\sex$ is treated as the instrument, $\dept$ is treated as the cause and $\outcome$ is treated as the effect. If all modeled endogenous variables are discrete-valued, a necessary condition for the observational distribution resulting from $\model \in \modeliv$ is to satisfy the IV inequalities \cite{Pearl95}, which in the context of Figure~\ref{fig:iv} is given by  

% \begin{equation}\label{eq:iv}
%     \max_{x} \sum_{y} \max_{z} P_{\model}\Paren{X=x,Y=y| Z=z} \leq 1. 
% \end{equation}

% % \st{This implies that a violation of the instrumental variable inequalities is evidence of unfairness.} \todo{Not necessarily, violations of the IV model class need not imply that there is an edge from $\sex \rightarrow \outcome$. Can we then not conclude anything from the IV inequality violation? }
% Since the IV inequalities are only necessary conditions, an arbitrary distribution on $X,Y,Z$ that satisfies the IV inequality does not necessarily imply that it is an entailed distribution of a model from the IV model class. Bonet \cite{Bonet01} showed that for the binary instrument, treatment and effect case, the IV inequalities are also sufficient conditions. In Theorem~\ref{thm:iv_tight}, we show that for the case where the instrument and outcome are binary and the treatment is discrete-valued with a finite support, any distribution that satisfies the IV inequality is also entailed by some causal model from the IV model class. To the best of our knowledge, Theorem~\ref{thm:iv_tight} is a novel result.

% % We define the set of conditional distributions that satisfy the instrumental-variable inequalities \eqref{eq:iv} as 
% % \begin{equation}\label{eq:distiv}
% %     \distiv \triangleq \left \lbrace P(X,Y|Z) : P(X,Y|Z)\text{ satisfies }\eqref{eq:iv} \right \rbrace.
% % \end{equation}

% % Next, we define the set of observational distributions of $\model \in \modeliv$. 

% % \begin{equation}\label{eq:dist_modeliv}
% % \distmodeliv \triangleq \left \lbrace P_{\model}(X,Y|Z) : \model \in \modeliv \right \rbrace.
% %\end{equation}


% \begin{theorem}\label{thm:iv_tight}
% Let $X,Y,Z$ be random variables defined on $\cX,\cY,\cZ$ respectively, with $|\cX| = n\geq 2, |\cY|=2, |\cZ| =2$ where $n \in \NN$. Let the set of conditional distributions that satisfy the instrumental-variable inequalities \eqref{eq:iv} be defined as $\distiv \triangleq \left \lbrace P(X,Y|Z) : P(X,Y|Z)\text{ satisfies }\eqref{eq:iv} \right \rbrace$. Define the set of conditional observational distributions of $\model \in \modeliv$ as $\distmodeliv \triangleq \left \lbrace P_{\model}(X,Y|Z) : \model \in \modeliv \right \rbrace.$ Then
% \begin{equation} \label{eq:ivsharp}
%     \distiv = \distmodeliv.
% \end{equation}
% \end{theorem}

% For the Berkeley admissions case, assuming for now that the true observational distribution over $\sex,\dept,\outcome$ is known, the observational distribution satisfying the IV inequalities implies that there exists a causal explanation (model) where the directed edge $\formsex \rightarrow \outcome$ is absent. On the other hand, the observational distribution violating the IV inequalities need not imply that the edge $\formsex \rightarrow \outcome$ is present since the IV model class is only a subset of all the models that do not contain the edge $S \rightarrow A$ in the causal graph. For example, the existence of latent confounding between $\sex$ and $\outcome$ in a model $\model$ results in $\model_{\backslash \formsex} \notin \modeliv$ but is also such that $\cg{\model}$ does not contain the directed edge $\formsex \rightarrow \outcome$. However, the causal modeling assumption that defined $\modelsedge$ rules out latent confounding between $\sex$ and $\outcome$. Therefore, given our modeling assumptions, we conclude that violating the IV inequalities implies that the edge $\formsex \rightarrow \outcome$ is absent \todo{you mean present?}. In Section~\ref{subsec:bayesiantest} we introduce a Bayesian test for the IV inequalities.

% \subsection{Bounds on Non-Identifiable Notions of Fairness}\label{subsec:bounds}
% For $\model \in \modelsunconfedge$, the interventional notion of fairness is the CDE which is identifiable in our case. In this subsection, we focus on deriving bounds for the interventional and conditional notions of fairness for models in $\modelsedge$ defined in Section~\ref{sec:notions}. 

% We resort to a response-function parameterization of $\model \in \modelsedge$. For $\model \in \modelsedge$, the response-function parameterization yields a counterfactually equivalent \todo{Define in Preliminaries?}SCM, $\tilde{\model}$, represented by the tuple $(\enop,\tilde{\exrv},\tilde{\spc},\tilde{f},\tilde{P})$, where $ \enop = \left \lbrace \sex, \formsex, \dept, \outcome \right \rbrace, \tilde{\exrv} = \left \lbrace \response, U_{S}\right \rbrace, \tilde{\spc} =\spc_{\enop}\times\spc_{\tilde{\exrv}}, \tilde{f} = \left \lbrace \tilde{f}_{\sex}, \tilde{f}_{\formsex}, \tilde{f}_{\dept}, \tilde{f}_{\outcome} \right \rbrace$ where we define $\spc_{\response}, \tilde{f},\tilde{P}$ as

% \begin{align*}
%     \spc_{\response} &\triangleq \spc_{\dept}^{\spc_{\sex}} \times \spc_{\outcome}^{\spc_{\formsex}\times \spc_{\dept}}, \\
%     \tilde{f}_{\sex}\Paren{U_{\sex}} &\triangleq f_{\sex}\Paren{U_{\sex}},\\ 
%     \tilde{f}_{\formsex}(\sex) &\triangleq  f_{\formsex}(\sex) = \sex,\\
%     \tilde{f}_{\dept}\Paren{\respfunc,\sex} &\triangleq \respfunc_1\Paren{\sex} = f_{\dept}\Paren{\sex,U,U_{\dept}}, \\
%     \tilde{f}_{\outcome}\Paren{\respfunc,\formsex,\dept} &\triangleq \respfunc_2\Paren{\formsex,\dept} = f_{\outcome}\Paren{\formsex,\dept,U,U_{\outcome}},
% \end{align*}
% where $\respfunc = \Paren{\respfunc_1,\respfunc_2}$.
% Note that $\spc_{\response}$ is a discrete space, $\response$ a discrete random variable, and $\tilde{P}$  a discrete distribution over $\spc_{\response}$. Under the response-function parameterization, only $\tilde{P}$ is a parameter. We now express the interventional queries $\Pr\Paren{\outcome|\doop{\formsex},\doop{\dept}}$ and $\Pr\Paren{\outcome|\dept, \sex, \doop{\formsex}}$ as a linear function of $\tilde{P}$. 

% \begin{equation}\label{eq:inter_resp}
%         \Pr\Paren{\outcome=\loutcome\mid \doop{\formsex=\lsex'}, \doop{\dept=\ldept}} = \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex',\ldept}=\loutcome}\tilde{P}\Paren{\respfunc_1,\respfunc_2}.
% %    \Pr\Paren{\outcome=\loutcome\mid \dept=\ldept, \sex = \lsex, \doop{\formsex=\lsex'}} &= \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex',\ldept}=\loutcome, \respfunc_1\Paren{\lsex} = \ldept}\tilde{P}\Paren{\respfunc_1,\respfunc_2}.\label{eq:cond_resp}
% \end{equation}

% Likewise, the conditional distribution $P_{\model}\Paren{\outcome,\dept|\sex}$ can also be expressed as a linear function of $\tilde{P}$, 

% \begin{equation}\label{eq:obs_resp}
%     P_{\model}\Paren{\outcome=\loutcome,\dept=\ldept|\sex=\lsex} = \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex,\ldept}=\loutcome, \respfunc_1\Paren{\lsex} = \ldept}\tilde{P}\Paren{\respfunc_1,\respfunc_2}.
% \end{equation}

% Similar to \cite{Balke95}, we setup a symbolic linear program that derives upper and lower bounds in terms of the observational distribution $P_{\model}\Paren{\outcome,\dept \mid \sex}$ by maximizing and minimizing \eqref{eq:inter_resp} subject to symbolic constraints obtained from \eqref{eq:obs_resp}. These bounds can be used to bound the difference between the interventional queries in \eqref{eq:interfair}. The code to obtain these bounds is in the supplementary material.

% For the conditional notion of fairness, we first rewrite $\Pr\Paren{\outcome|\dept, \sex, \doop{\formsex}} = \frac{\Pr\Paren{\outcome, \dept| \sex, \doop{\formsex}}}{\Pr\Paren{\dept| \sex, \doop{\formsex}}}$. Note that the denominator, $\Pr\Paren{\dept| \sex, \doop{\formsex}}$ is identifiable and equal to $\Pr\Paren{\dept| \sex}$. The numerator can be expressed in terms of a linear function of $\tilde{P}$, 

% \begin{equation}\label{eq:cond_resp}
%        \Pr\Paren{\outcome=\loutcome, \dept=\ldept \mid \sex = \lsex, \doop{\formsex=\lsex'}} = \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex',\ldept}=\loutcome, \respfunc_1\Paren{\lsex} = \ldept}\tilde{P}\Paren{\respfunc_1,\respfunc_2}.
% \end{equation}

% With a similar linear program setup, we obtain bounds on the difference of the interventional queries in \eqref{eq:cndfair}. In the next section we see how the IV inequalities and the bounds obtained in this subsection can be turned into a statistical test.

% For the interventional notion of fairness we express the bounds in closed form below. Due to space constraints, for the conditional notion of fairness the bounds are in the supplementary material. The bounds on $\Pr\Paren{\outcome=\loutcome\mid \doop{\formsex=\lsex'}, \doop{\dept=\ldept}}$ are 

% \begin{equation}\label{eq:bounds_inter}
% \Pr\left(A=1,\dept=\ldept|\sex=s'\right)\leq \Pr\Paren{\outcome=1\mid \doop{\formsex=\lsex'}, \doop{\dept=\ldept}} \leq 1 - \Pr\left(\outcome=0,\dept=\ldept|\sex=s'\right)
% \end{equation}

% Therefore, the difference $$F(\ldept) = \Pr\Paren{\outcome=1\mid \doop{\formsex=1}, \doop{\dept=\ldept}} - \Pr\Paren{\outcome=1\mid \doop{\formsex=0}, \doop{\dept=\ldept}}$$ lies in the interval

% \begin{align*}
%     &\left[ \Pr\left(\outcome=1,\dept=\ldept|\sex=1\right) + \Pr\left(\outcome=0,\dept=\ldept|\sex=0\right) - 1,\right.\\
%     & \left.1 - \Pr\left(\outcome=0,\dept=\ldept|\sex=1\right) - \Pr\left(\outcome=1,\dept=\ldept|\sex=0\right)\right].
% \end{align*}

% Note that for the interventional notion of fairness, $F(d)$ must be $0$ for all $d$. By the bounds obtained above, for $0$ to lie in the above interval, we have, for all $\ldept$, 

% $$\Pr\left(\outcome=1,\dept=\ldept|\sex=1\right) + \Pr\left(\outcome=0,\dept=\ldept|\sex=0\right) - 1 \leq 0,$$

% $$1 - \Pr\left(\outcome=0,\dept=\ldept|\sex=1\right) - \Pr\left(\outcome=1,\dept=\ldept|\sex=0\right) \geq 0.
% $$

% The above conditions, for all $\ldept$, are the same as the IV inequalities in \eqref{eq:iv}. As we shall see in the next subsection, this is not coincidence. 

% \subsection{A Family of Equivalent Tests}\label{subsec:equivalence}

% \todo{I'm not sure if this is the best way to present things... you spend 1.5 page on deriving bounds for the interventional H0. But then later on you see that you could have skipped that... Does the reader need to go through the same chronological order of understanding? Or is it better to take a different perspective (that would have saved us work if we would have had it initially)?}
% Different fairness notions that we introduced in Section~\ref{sec:notions} end up imposing identical constraints on the observed distribution.\todo{Is it clear they are actually different? Perhaps people would believe them to be equivalent!} Note that $\nullinter \supseteq \nullgraph$. Further, models in $\nullinter \backslash \nullgraph$ are those where the edge $\formsex \rightarrow \outcome$ exists in the causal graph and yet, the interventional queries in \eqref{eq:interfair} are equal. Therefore, models in $\nullinter \backslash \nullgraph$ violate certain faithfulness assumptions. Given that the null hypothesis of the interventional fairness notion is a strict superset of that of the graphical fairness notion, we might expect the same relation to hold in the resulting set of conditional observational distributions for these hypotheses, thus giving us different tests. We now show that, in fact, the corresponding sets of observational distributions resulting from models in $\nullinter$ and $\nullgraph$ are identical.

%While we express the null hypotheses of the tests for fairness in the space of causal models, the statistical tests ultimately only have access to observational data. 

% We represent $\model \in \modelsedge$ by the tuple $(\exip,\enop,\exrv,\spc,f,P)$, where $\exip = \left \lbrace \sex \right \rbrace, \enop = \left \lbrace \formsex, \dept, \outcome \right \rbrace, \exrv = \left \lbrace U, U_D, U_A \right \rbrace, \spc = \spc_{\exip}\times\spc_{\enop}\times\spc_{\exrv}, f = \left \lbrace f_{\formsex}, f_{\dept}, f_{\outcome} \right \rbrace$ where the individual functions are as defined in \eqref{eq:SCMfunctions}, and $P$ is an arbitrary product distribution on $\spc_{\exrv}$. Denote the null hypotheses of the graphical-notion-based-test and the interventional-notion-based test as
% \begin{align*}
%     \nullgraph &\triangleq \left \lbrace M \in \modelsedge : \formsex \notin Pa_{G(M)}(\outcome) \right \rbrace, \\
%     \nullinter &\triangleq \left \lbrace M \in \modelsedge : \forall \ldept, \Pr\Paren{\outcome=1 \mid \doop{\formsex = m}, \doop{\dept = \ldept}} = \Pr\Paren{\outcome=1 \mid \doop{\formsex = f}, \doop{\dept = \ldept}} \right \rbrace.  
% \end{align*}








