%\section{Proof of Equivalence}

\begin{proof}[Proof of Lemma~\ref{lem:mk_equivalence}]
% From the proof of Theorem~\ref{thm:iv_tight}, $\distiv = \distivpos \hspace{2pt} \dot{\cup} \hspace{2pt} \distivzero$ where $\distivpos$ and $\distivzero$ are as defined in \eqref{eq:distivpos} and \eqref{eq:distivzero}, respectively. Similarly, we can decompose $\distinter, \distctrf, \distgraph$ as $\distnotion = \distnotionpos \hspace{2pt} \dot{\cup} \hspace{2pt} \distnotionzero$ where 
% \begin{align}
% \distnotionpos &\triangleq \left \lbrace P_{\model}(D,A,S) : \model \in H^{0}_{\text{cf-notion}} \text{ and }\forall s, P_{\model}(S=s) > 0 \right \rbrace, \label{eq:distnotionpos} \\
% \distnotionzero &\triangleq  \left \lbrace P_{\model}(D,A,S) : \model \in H^{0}_{\text{cf-notion}} \text{ and } \exists s \text{ s.t. } P_{\model}(S=s) = 0 \right \rbrace,\label{eq:distnotionzero}
% \end{align}
% and we use ``notion'' as a placeholder for ``inter, ctrf, graph'' for clarity. For each of these notions, it is clear that if $P_{\model}(S=s) = 0$, for some $s$, then $\model \in H^{0}_{\text{cf-notion}}$ does not impose additional constraints on $P_{\model}\Paren{A,D \mid S=s'}$ where $s \neq s'$. Therefore, $\distivzero = \distnotionzero$ and it is sufficient to restrict attention to proving the equality of $\distnotioncnd$ and $\distivposcnd$ where the former is defined as 
% $$\distnotioncnd = \left \lbrace P_{\model}(D,A \mid S): \model \in H^{0}_{\text{cf-notion}} \right \rbrace.$$

%Again, like in the proof of Theorem~\ref{thm:iv_tight}, since $P(X,Y,Z) = P(Z) \otimes P(X,Y \mid Z)$ and $P(D,A,S) = P(S) \otimes P(D,A \mid S)$, 

For $\model \in \modelsedgerelax$, the response-function parameterization yields a counterfactually equivalent SCM, $\tilde{\model}$ represented by the tuple $(\enop,\tilde{\exrv},\tilde{\spc},\tilde{f},\tilde{P})$, where $\enop = \left \lbrace \sex, \dept, \outcome \right \rbrace, \tilde{\exrv} = \left \lbrace \response, U_{\sex} \right \rbrace, \tilde{\spc} =\spc_{\enop}\times\spc_{\tilde{\exrv}}, \tilde{f} = \Paren{\tilde{f}_{\sex}, \tilde{f}_{\dept}, \tilde{f}_{\outcome}}$ where we define $\spc_{\response}, \tilde{f},\tilde{P}$ through the function $\Phi: \spc_{\exrv} \mapsto \spc_{\tilde{\exrv}}$ where
\begin{align*}
    \spc_{\response} &\triangleq \spc_{\dept}^{\spc_{\sex}} \times \spc_{\outcome}^{\spc_{\sex}\times \spc_{\dept}},\\
    \forall u_S,u_D,u_A,u, \Phi\Paren{u_S,u_D,u_A,u} &\triangleq \Paren{\Paren{s \mapsto f_D(s,u,u_D),(s,d) \mapsto f_A(s,d,u,u_A)},u_S},\\
    \forall u_{\sex}, \tilde{f}_{\sex}\Paren{u_S}&\triangleq f_{\sex}(u_{\sex}),\\
\forall \lsex, \tilde{f}_{\dept}\Paren{\respfunc,\lsex} &\triangleq \respfunc_1\Paren{\lsex}, \\
\forall \lsex, \ldept, \tilde{f}_{\outcome}\Paren{\respfunc,\lsex,\ldept} &\triangleq \respfunc_2\Paren{\lsex,\ldept},
\end{align*}
where $\respfunc = \Paren{\respfunc_1,\respfunc_2}$ and $\tilde{P}$ is the push-forward distribution $\Phi_{*}(P)$.
Note that $\spc_{\response}$ is a discrete space, $\response$ a discrete random variable, and $\tilde{P}(\response)$  a discrete distribution over $\spc_{\response}$. Under the response-function parameterization, only $\tilde{P}(\response)$ is a parameter and we will abuse notation and denote it as $\tilde{P}$ henceforth. Therefore, we can represent $\nullgraphrelax$ in the parameter space as 

% \begin{equation}\label{eq:respfunc_graph_edge}
%     \nullgraphresp 
%     \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} = 0 \text{ where } \respfunc_2\Paren{.,.} \text{ is such that } \exists \ldept 
%     \text{ such that }\respfunc_2(m,\ldept) \neq \respfunc_2\Paren{f,\ldept} \right \rbrace.
% \end{equation}

\begin{equation}\label{eq:respfunc_graph_edge}
    \nullgraphresp 
    \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} \neq 0 \text{ implies } \forall \ldept, \respfunc_2(0,\ldept) = \respfunc_2\Paren{1,\ldept} \right \rbrace.
\end{equation}

To express $\nullinterrelax$, we express the interventional Markov kernels $P_{\tilde{\model}}\Paren{\outcome\mid \doop{\sex}, \doop{\dept}}$ in terms of $\tilde{P}$. Since counterfactual equivalence implies interventional equivalence, for all $\lsex, \ldept$, $P_{\model}\Paren{\outcome=1\mid \doop{\sex=\lsex}, \doop{\dept=\ldept}} = P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\sex=\lsex}, \doop{\dept=\ldept}}$, where 
\begin{align}
    P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\sex=\lsex}, \doop{\dept=\ldept}} &= \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex,\ldept}=1}\tilde{P}\Paren{\respfunc_1,\respfunc_2}, \label{eq:inter_resp}\\
    P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\dept=\ldept}} &= \sum\limits_{\lsex^*}\sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex^*,\ldept}=1}\tilde{P}\Paren{\respfunc_1,\respfunc_2}P_{\tilde{\model}}\Paren{\lsex^*} \label{eq:inter_resp_doD},
\end{align}
Subtracting \eqref{eq:inter_resp} from  \eqref{eq:inter_resp_doD} we get 
\begin{align}
    &P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\sex=\lsex}, \doop{\dept=\ldept}} - P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\dept=\ldept}} \nonumber \\
    & =\Paren{\sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}} \Paren{\bm{1}\Brack{\respfunc_2\Paren{0,\ldept}=1} - \bm{1}\Brack{\respfunc_2\Paren{1,\ldept}=1}}\tilde{P}\Paren{\respfunc_1,\respfunc_2}}P_{\tilde{\model}}\Paren{s'} = 0 \label{eq:inter_resp_s}
\end{align}
for $\model \in \nullinterrelax$, where $s' \neq s$. Similarly, 
\begin{align}
    &P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\sex=\lsex'}, \doop{\dept=\ldept}} - P_{\tilde{\model}}\Paren{\outcome=1\mid \doop{\dept=\ldept}} \nonumber\\
    & =\Paren{\sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}} \Paren{\bm{1}\Brack{\respfunc_2\Paren{0,\ldept}=1} - \bm{1}\Brack{\respfunc_2\Paren{1,\ldept}=1}}\tilde{P}\Paren{\respfunc_1,\respfunc_2}}P_{\tilde{\model}}\Paren{s} = 0 \label{eq:inter_resp_sp}.
\end{align}
Since both \eqref{eq:inter_resp_s} and \eqref{eq:inter_resp_sp} hold, 
 the response-function parameterized analogue of $\nullinterrelax$ is 
\begin{equation}\label{eq:respfun_inter_edge}
    \nullinterresp \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \forall \ldept, \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}} \Paren{\bm{1}\Brack{\respfunc_2\Paren{0,\ldept}=1} - \bm{1}\Brack{\respfunc_2\Paren{1,\ldept}=1}}\tilde{P}\Paren{\respfunc_1,\respfunc_2} = 0 \right \rbrace. 
\end{equation}

Note that both $\nullgraphresp$ and $\nullinterresp$ are polyhedra in $\triangle\Paren{\cX_{\response}}$. Further, $\nullgraphresp \subseteq \nullinterresp$. While, $\nullgraphresp, \nullinterresp$ are collections of distributions, we will also refer to them as collection of response-function-parameterized SCMs. 

%So far, we looked at the response-function parameterization for models in $\modelsedge$. However, the instrumental-variable inequalities arise from 
% While we have framed the hypotheses in terms of the exogenous distribution of the response-function parameterization, for a statistical test, we only have access to the observed Markov kernels $\Pr\Paren{\outcome,\dept,\formsex \mid \doop{\sex}}$. Therefore, we now characterize the sets of observed Markov kernels 
% It can be shown that the set of observed Markov kernels that are solutions of SCMs in $\nullgraph$ is the same as $\distiv$ where we define the former as 
From interventional equivalence (which follows as a result of counterfactual equivalence) of the response-function-parameterization, we have 
\begin{align*}
    \mkgraph &= \left \lbrace P_{\tilde{\model}}\Paren{\dept,\outcome\mid \doop{\sex}} : \tilde{\model} \in \nullgraphresp \right \rbrace \\
    \mkinter &= \left \lbrace P_{\tilde{\model}}\Paren{\dept,\outcome\mid \doop{\sex}} : \tilde{\model} \in \nullinterresp \right \rbrace.
\end{align*}

% Therefore, $\distgraph = \distiv$. The set of observed Markov kernels that are solutions of SCMs in $\nullinter$ is given by 
% \begin{equation}\label{eq:distinter}
%     \distinter \triangleq \left \lbrace P_{\model}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \model \in \nullinter \right \rbrace =  
% \end{equation}

We now show that $\mkinter = \mkgraph = \mkiv$. First, notice that $\mkinter \supseteq \mkgraph$ since $\nullinterresp \supseteq \nullgraphresp$. We first show that $\mkinter \subseteq \mkiv$ and then $\mkgraph = \mkiv$ which concludes the argument. 

\bm{$\mkinter \subseteq \mkiv$}: 
The solution function of the response-function parameterized SCM, $g_{A,D}: \cX_{\sex} \times \cX_{\response} \mapsto \cX_{\outcome} \times \cX_{\dept}$ induces a mapping from $\triangle\Paren{\cX_{\response}}$ which can be considered as a subset of $\RR^{\# \cX_{\response}}$ to the set of Markov kernels $P_{\tilde{\model}}\Paren{\dept,\outcome \mid \doop{\sex}}$ which  can be considered to be a subset of $\RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\sex}}}$.
% We denote this map by $G: \RR^{\#\Paren{\cX_{\response}}} \mapsto \RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\sex}}} $. 
% \begin{align*}
% g_{A,D}(\lsex,\respfunc) &= \Paren{\respfunc_2\Paren{\lsex,\respfunc_1\Paren{\lsex}},\respfunc_1\Paren{\lsex}}
% \end{align*}
% G\Paren{e_{\respfunc}} &= \sum\limits_{\lsex} e_{g_{A,D}\Paren{\lsex,\respfunc}}.
% First, note that for all $\tilde{\model} \in \nullinterresp$, $P_{\tilde{\model}}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} = P_{\tilde{\model}}\Paren{\outcome,\dept\mid \sex} \times\delta_{\sex}\Paren{\formsex}$. Therefore, we only restrict attention to $P_{\tilde{\model}}\Paren{\outcome,\dept\mid \sex = \formsex}$. 
The condition in \eqref{eq:respfun_inter_edge} implies that for all $\ldept$,
\begin{equation}\label{eq:constraint_outcome_one}
    \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc). 
\end{equation}
Since, $\sum\limits_{\respfunc} \tilde{P}\Paren{\respfunc} = 1$, 
\begin{equation}\label{eq:constraint_outcome_zero}
    \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc). 
\end{equation}
Denote $P_{\tilde{\model}}\Paren{\dept = \ldept, \outcome = \loutcome \mid \doop{\sex = \lsex}} $ by $P_{\tilde{\model}}\Paren{d,a || s}$. For  $P_{\tilde{\model}}\Paren{d,a || s} \in \mkinter$,
\begin{equation*}
    P_{\tilde{\model}}\Paren{d,a || s} = \sum\limits_{\respfunc: \respfunc_1\Paren{\lsex}=\ldept, \respfunc_2\Paren{\lsex,\ldept} = \loutcome
    } \tilde{P}(\respfunc). 
\end{equation*}
Therefore, from \eqref{eq:constraint_outcome_one}, 
\begin{align}
    \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) &= P_{\tilde{\model}}(1,\ldept || 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) \label{eq:1d0}\\
    &= \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) \nonumber \\
    &= P_{\tilde{\model}}(1,\ldept || 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) \label{eq:1d1}.
\end{align}
From \eqref{eq:constraint_outcome_zero}, 
\begin{align}
    \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) &= P_{\tilde{\model}}(0,\ldept || 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) \label{eq:0d0} \\
    &= \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc) \nonumber \\
    &= P_{\tilde{\model}}(0,\ldept || 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc) \label{eq:0d1}.
\end{align}
Since from \eqref{eq:constraint_outcome_one},
\begin{equation*}
    \sum\limits_{\respfunc} \tilde{P}\Paren{\respfunc} = \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) = 1.
\end{equation*}
Substituting from \eqref{eq:0d0} and \eqref{eq:1d1}, 
\begin{equation*}
    P_{\tilde{\model}}(0,\ldept || 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + P_{\tilde{\model}}(1,\ldept || 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) =1. 
\end{equation*}
Similarly, substituting from \eqref{eq:0d1} and \eqref{eq:1d0}, 
\begin{equation*}
    P_{\tilde{\model}}(0,\ldept || 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc)+ P_{\tilde{\model}}(1,\ldept || 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc)=1. 
\end{equation*}
 This implies $P_{\tilde{\model}}(0,\ldept || 0) + P_{\tilde{\model}}(1,\ldept || 1) \leq 1, P_{\tilde{\model}}(0,\ldept || 1) + P_{\tilde{\model}}(1,\ldept || 0) \leq 1$. These are precisely the IV inequalities and they are satisfied. Therefore, $\mkinter \subseteq \mkiv$.

 \bm{$\mkgraph = \mkiv$} follows from Theorem~\ref{thm:iv_tight} since $\model \in \nullgraphrelax$ implies $\model \in \modelivrelax$. By Proposition~\ref{prop:cfnotions}, the lemma follows. 
 %We first show that $\distgraph$ is the same as the set of observed Markov kernels that are solutions of SCMs in $\modelsnoedge$, i.e.,

%  \begin{equation}
%      \distgraphnoedge \triangleq \left \lbrace P_{\model}\Paren{\outcome,\dept \mid \sex} : \model \in \modelsnoedge \right \rbrace = \distgraph.
%  \end{equation}
 
%  We then show that $\distgraphnoedge = \distiv$. 
 
% For $\model \in \modelsnoedge$, the response-function parameterization yields a counterfactually equivalent SCM, $\tilde{\model^*}$ represented by the tuple $(\exip,\enop,\tilde{\exrv}^*,\tilde{\spc}^*,\tilde{f}^*,\tilde{P}^*)$, where $\exip = \left \lbrace \sex \right \rbrace, \enop = \left \lbrace \formsex, \dept, \outcome \right \rbrace, \tilde{\exrv}^* = \left \lbrace \response^*\right \rbrace, \tilde{\spc} = \spc_{\exip}\times\spc_{\enop}\times\spc_{\tilde{\exrv}^*}, \tilde{f}^* = \left \lbrace \tilde{f}_{\formsex}^*, \tilde{f}_{\dept}^*, \tilde{f}_{\outcome}^* \right \rbrace$ where we define $\spc_{\response}^*, \tilde{f}^*,\tilde{P}^*$ as

% \begin{align*}
%     \spc_{\response^*} &\triangleq \spc_{\dept}^{\spc_{\sex}} \times \spc_{\outcome}^{\spc_{\dept}}, \\
%     \tilde{f}^*_{\formsex}(\sex) &\triangleq  f_{\formsex}(\sex) = \sex,\\
%     \tilde{f}^*_{\dept}\Paren{\respfunc^*,\sex} &\triangleq \respfunc_1^*\Paren{\sex} = f_{\dept}\Paren{\sex,U,U_{\dept}} = \respfunc_1(\sex), \\
%     \tilde{f}^*_{\outcome}\Paren{\respfunc^*,\dept} &\triangleq \respfunc_2^*\Paren{\dept} = f_{\outcome}\Paren{\dept,U,U_{\outcome}},
% \end{align*}
% where $\respfunc^* = \Paren{\respfunc_1^*,\respfunc_2^*}$.
% Note that $\spc_{\response^*}$ is a discrete space, $\response^*$ a discrete random variable, and $\tilde{P}^*$  a discrete distribution over $\spc_{\response^*}$. Under the response-function parameterization, only $\tilde{P}$ is a parameter. Therefore, we can represent $\modelsnoedge$ in the parameter space, $\modelsnoedgeresp \in \triangle\Paren{\spc_{\response^*}}$. By observational equivalence of the response-function parameterization,

% \begin{equation}
%     \distgraphnoedge = \left \lbrace P_{\tilde{\model}^*}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \tilde{\model}^* \in \modelsnoedgeresp \right \rbrace.
% \end{equation}

% Consider the set $D = \left \lbrace \respfunc \in \cX_{\response} \text{ such that } \exists \ldept \text{ where } \respfunc_2(0,\ldept) \neq \respfunc_2\Paren{1,\ldept} \right \rbrace.$ For any $\tilde{P} \in \nullgraphresp$, $\tilde{P}\Paren{D}=0$. Therefore, the function $h: \nullgraphresp \mapsto \modelsnoedgeresp$ such that $$h(\tilde{P})(\respfunc_1^*(\lsex), \respfunc_2^*(\ldept)) = \tilde{P}\Paren{\respfunc_1(\lsex),\respfunc_2(0,\ldept)=\respfunc_2(1,\ldept)}$$
% is well-defined and bijective since for any $\tilde{P}^* \in \modelsnoedgeresp$, $$h^{-1}(\tilde{P}^*)\Paren{\respfunc_1(\lsex),\respfunc_2(0,\ldept),\respfunc_2(1,\ldept)} = \bm{1}\left[ \respfunc_2(0,\ldept) = \respfunc_2(1,\ldept)\right] \tilde{P}^*\Paren{\respfunc_1(\lsex),\respfunc_2(0,d)}. $$ 

% The solution function of the response-function parameterized SCM $\tilde{\model^*}$ denoted by $g^*: \cX_{\sex} \times \cX_{\response^*} \mapsto \cX_{\outcome} \times \cX_{\dept} \times \cX_{\formsex}$ induces a mapping from $\triangle\Paren{\cX_{\response^*}}$ which can be considered as a subset of $\RR^{\# \cX_{\response^*}}$ to the set of conditional distributions $\Pr\Paren{\outcome,\dept,\formsex \mid \sex}$ which  can be considered to be a subset of $\RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}}$. We denote this map by $G^*: \RR^{\#\Paren{\cX_{\response^*}}} \mapsto \RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}} $ where 

% \begin{align*}
% g^*(\lsex,\respfunc^*) &= \Paren{\respfunc_2^*\Paren{\respfunc_1^*\Paren{\lsex}},\respfunc_1^*\Paren{\lsex},\lsex} =\Paren{\respfunc_2^*\Paren{\respfunc_1\Paren{\lsex}},\respfunc_1\Paren{\lsex},\lsex} , \\
% G^*\Paren{e_{\respfunc^*}} &= \sum\limits_{\lsex} e_{g^*\Paren{\lsex,\respfunc^*}}. 
% \end{align*}
% For $\respfunc \notin D$, note that $g(\lsex,\respfunc) = g^*(\lsex,(\respfunc_1,\respfunc_2'))$ where $\respfunc_2'(\ldept) = \respfunc_2(0,\ldept) = \respfunc_2(1,\ldept)$. Further, for all $\respfunc^* \in \cX_{\response^*}, g^*(\lsex,\respfunc^*) = g(\lsex,\respfunc')$ where $\respfunc' \notin D$ and is defined as $(\respfunc_1' = \respfunc_1^*, \respfunc_2'(0,\ldept) = \respfunc_2'(1,\ldept) =\respfunc_2^*(\ldept))$ Therefore, for any $\tilde{P} \in \nullgraphresp$, since $\tilde{P}(D) =0, G(\tilde{P}) = G^*(h(\tilde{P}))$ thus implying that $\distgraphnoedge = \distgraph$. 

% \begin{lemma}
% \begin{equation}
%     \distgraphnoedge = \distiv. 
% \end{equation}
% \end{lemma}

%\todo{Add proof from lecture notes.}

% We prove a bijection between $\modelsnoedgeresp$ and $\nullgraphresp$. For $\tilde{P} \in \nullgraphresp$, 


%  \begin{equation}\label{eq:respfunc_graph_noedge}
%     \modelsnoedgeresp
%     \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} \neq 0 \text{ implies } \forall \ldept, \respfunc_2(0,\ldept) = \respfunc_2\Paren{1,\ldept} \right \rbrace.
% \end{equation}

\end{proof}




% For $\model \in \modelsedge$, the response-function parameterization yields a counterfactually equivalent \todo{Define in Preliminaries?}SCM, $\tilde{\model}$ represented by the tuple $(\exip,\enop,\tilde{\exrv},\tilde{\spc},\tilde{f},\tilde{P})$, where $\exip = \left \lbrace \sex \right \rbrace, \enop = \left \lbrace \formsex, \dept, \outcome \right \rbrace, \tilde{\exrv} = \left \lbrace \response\right \rbrace, \tilde{\spc} = \spc_{\exip}\times\spc_{\enop}\times\spc_{\tilde{\exrv}}, \tilde{f} = \left \lbrace \tilde{f}_{\formsex}, \tilde{f}_{\dept}, \tilde{f}_{\outcome} \right \rbrace$ where we define $\spc_{\response}, \tilde{f},\tilde{P}$ as

% \begin{align*}
%     \spc_{\response} &\triangleq \spc_{\dept}^{\spc_{\sex}} \times \spc_{\outcome}^{\spc_{\formsex}\times \spc_{\dept}}, \\
%     \tilde{f}_{\formsex}(\sex) &\triangleq  f_{\formsex}(\sex) = \sex,\\
%     \tilde{f}_{\dept}\Paren{\respfunc,\sex} &\triangleq \respfunc_1\Paren{\sex} = f_{\dept}\Paren{\sex,U,U_{\dept}}, \\
%     \tilde{f}_{\outcome}\Paren{\respfunc,\formsex,\dept} &\triangleq \respfunc_2\Paren{\formsex,\dept} = f_{\outcome}\Paren{\formsex,\dept,U,U_{\outcome}},
% \end{align*}
% where $\respfunc = \Paren{\respfunc_1,\respfunc_2}$.
% Note that $\spc_{\response}$ is a discrete space, $\response$ a discrete random variable, and $\tilde{P}$  a discrete distribution over $\spc_{\response}$. Under the response-function parameterization, only $\tilde{P}$ is a parameter. Therefore, we can represent $\nullgraph$ in the parameter space, $\nullgraphresp$, defined as \todo{Might need to add definition of parent in a casual graph, in preliminaries?} 

% % \begin{equation}\label{eq:respfunc_graph_edge}
% %     \nullgraphresp 
% %     \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} = 0 \text{ where } \respfunc_2\Paren{.,.} \text{ is such that } \exists \ldept 
% %     \text{ such that }\respfunc_2(m,\ldept) \neq \respfunc_2\Paren{f,\ldept} \right \rbrace.
% % \end{equation}

% \begin{equation}\label{eq:respfunc_graph_edge}
%     \nullgraphresp 
%     \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} \neq 0 \text{ implies } \forall \ldept, \respfunc_2(0,\ldept) = \respfunc_2\Paren{1,\ldept} \right \rbrace.
% \end{equation}

% To express $\nullinter$, we express the interventional Markov kernels $\Pr\Paren{\outcome\mid \doop{\formsex}, \doop{\dept}}$ in terms of $\tilde{P}$, 
% \begin{equation}\label{eq:inter_resp}
%     \Pr\Paren{\outcome=1\mid \doop{\formsex=\lsex'}, \doop{\dept=\ldept}} = \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}}\bm{1}\Brack{\respfunc_2\Paren{\lsex',\ldept}=1}\tilde{P}\Paren{\respfunc_1,\respfunc_2}.
% \end{equation}

% Therefore the response-function parameterized analogue of $\nullinter$ is 
% \begin{equation}\label{eq:respfun_inter_edge}
%     \nullinterresp \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \forall \ldept, \sum\limits_{\Paren{\respfunc_1,\respfunc_2}  \in \cX_{\response}} \Paren{\bm{1}\Brack{\respfunc_2\Paren{0,\ldept}=1} - \bm{1}\Brack{\respfunc_2\Paren{1,\ldept}=1}}\tilde{P}\Paren{\respfunc_1,\respfunc_2} = 0 \right \rbrace. 
% \end{equation}

% Note that both $\nullgraphresp$ and $\nullinterresp$ are polyhedra in $\triangle\Paren{\cX_{\response}}$. Further, $\nullgraphresp \subseteq \nullinterresp$. While, $\nullgraphresp, \nullinterresp$ are collections of distributions, we will abuse notation and also refer to them as collection of response-function-parameterized SCMs. 

% %So far, we looked at the response-function parameterization for models in $\modelsedge$. However, the instrumental-variable inequalities arise from 

% % While we have framed the hypotheses in terms of the exogenous distribution of the response-function parameterization, for a statistical test, we only have access to the observed Markov kernels $\Pr\Paren{\outcome,\dept,\formsex \mid \doop{\sex}}$. Therefore, we now characterize the sets of observed Markov kernels 


% % It can be shown that the set of observed Markov kernels that are solutions of SCMs in $\nullgraph$ is the same as $\distiv$ where we define the former as 
% From the observational equivalence of the response-function-parameterization, we have \todo{Observational equivalence also in preliminaries.}
% \begin{align*}
%     \distgraph &= \left \lbrace P_{\tilde{\model}}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \tilde{\model} \in \nullgraphresp \right \rbrace \\
%     \distinter &= \left \lbrace P_{\tilde{\model}}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \tilde{\model} \in \nullinterresp \right \rbrace.
% \end{align*}

% % Therefore, $\distgraph = \distiv$. The set of observed Markov kernels that are solutions of SCMs in $\nullinter$ is given by 
% % \begin{equation}\label{eq:distinter}
% %     \distinter \triangleq \left \lbrace P_{\model}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \model \in \nullinter \right \rbrace =  
% % \end{equation}

% We now show that $\distinter = \distgraph = \distiv$. First, notice that $\distinter \supseteq \distgraph$ since $\nullinterresp \supseteq \nullgraphresp$. We first show that $\distinter \subseteq \distiv$ and then $\distgraph = \distiv$ which concludes the argument. 

% \bm{$\distinter \subseteq \distiv$}: 
% The solution function of the response-function parameterized SCM, $g: \cX_{\sex} \times \cX_{\response} \mapsto \cX_{\outcome} \times \cX_{\dept} \times \cX_{\formsex}$ induces a mapping from $\triangle\Paren{\cX_{\response}}$ which can be considered as a subset of $\RR^{\# \cX_{\response}}$ to the set of conditional distributions $\Pr\Paren{\outcome,\dept,\formsex \mid \sex}$ which  can be considered to be a subset of $\RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}}$. We denote this map by $G: \RR^{\#\Paren{\cX_{\response}}} \mapsto \RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}} $ where 

% \begin{align*}
% g(\lsex,\respfunc) &= \Paren{\respfunc_2\Paren{\lsex,\respfunc_1\Paren{\lsex}},\respfunc_1\Paren{\lsex},\lsex}, \\
% G\Paren{e_{\respfunc}} &= \sum\limits_{\lsex} e_{g\Paren{\lsex,\respfunc}}. 
% \end{align*}

% First, note that for all $\tilde{\model} \in \nullinterresp$, $P_{\tilde{\model}}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} = P_{\tilde{\model}}\Paren{\outcome,\dept\mid \sex} \times\delta_{\sex}\Paren{\formsex}$. Therefore, we only restrict attention to $P_{\tilde{\model}}\Paren{\outcome,\dept\mid \sex = \formsex}$. The condition in \eqref{eq:distinter} implies that for all $\ldept$,
% \begin{equation}\label{eq:constraint_outcome_one}
%     \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc). 
% \end{equation}

% Since, $\sum\limits_{\respfunc} \tilde{P}\Paren{\respfunc} = 1$, 

% \begin{equation}\label{eq:constraint_outcome_zero}
%     \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc). 
% \end{equation}

% Expressing the marginal over $\outcome, \dept$ 
%  of $P \in \distinter$,
% \begin{equation*}
%     p\Paren{\loutcome,\ldept \mid \lsex} = \sum\limits_{\respfunc: \respfunc_1\Paren{\lsex}=\ldept, \respfunc_2\Paren{\lsex,\ldept} = \loutcome
%     } \tilde{P}(\respfunc). 
% \end{equation*}

% Therefore, from \eqref{eq:constraint_outcome_one} and \eqref{eq:constraint_outcome_zero}, 
% \begin{align*}
%     \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) &= p(1,\ldept \mid 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) = p(1,\ldept \mid 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc)\\
%     \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) &= p(0,\ldept \mid 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc) = p(0,\ldept \mid 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc)
% \end{align*}

% Since
% \begin{equation*}
%     \sum\limits_{\respfunc} \tilde{P}\Paren{\respfunc} = \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc) = \sum\limits_{\respfunc: \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + \sum\limits_{\respfunc: \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) = 1, 
% \end{equation*}
% we have 
% \begin{equation*}
%     p(0,\ldept \mid 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=0} \tilde{P}(\respfunc) + p(1,\ldept \mid 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=1} \tilde{P}(\respfunc) =1. 
% \end{equation*}

% Similarly, 
% \begin{equation*}
%     p(0,\ldept \mid 1) + \sum\limits_{\respfunc: \respfunc_1(1) \neq \ldept, \respfunc_2\Paren{1,\ldept}=0} \tilde{P}(\respfunc)+ p(1,\ldept \mid 0) + \sum\limits_{\respfunc: \respfunc_1(0) \neq \ldept, \respfunc_2\Paren{0,\ldept}=1} \tilde{P}(\respfunc)=1. 
% \end{equation*}
%  Since the IV inequalities are satisfied, $\distinter \subseteq \distiv$.

%  \bm{$\distgraph = \distiv$}: We first show that $\distgraph$ is the same as the set of observed Markov kernels that are solutions of SCMs in $\modelsnoedge$, i.e.,

%  \begin{equation}
%      \distgraphnoedge \triangleq \left \lbrace P_{\model}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \model \in \modelsnoedge \right \rbrace = \distgraph.
%  \end{equation}
 
%  We then show that $\distgraphnoedge = \distiv$. 
 
% For $\model \in \modelsnoedge$, the response-function parameterization yields a counterfactually equivalent SCM, $\tilde{\model^*}$ represented by the tuple $(\exip,\enop,\tilde{\exrv}^*,\tilde{\spc}^*,\tilde{f}^*,\tilde{P}^*)$, where $\exip = \left \lbrace \sex \right \rbrace, \enop = \left \lbrace \formsex, \dept, \outcome \right \rbrace, \tilde{\exrv}^* = \left \lbrace \response^*\right \rbrace, \tilde{\spc} = \spc_{\exip}\times\spc_{\enop}\times\spc_{\tilde{\exrv}^*}, \tilde{f}^* = \left \lbrace \tilde{f}_{\formsex}^*, \tilde{f}_{\dept}^*, \tilde{f}_{\outcome}^* \right \rbrace$ where we define $\spc_{\response}^*, \tilde{f}^*,\tilde{P}^*$ as

% \begin{align*}
%     \spc_{\response^*} &\triangleq \spc_{\dept}^{\spc_{\sex}} \times \spc_{\outcome}^{\spc_{\dept}}, \\
%     \tilde{f}^*_{\formsex}(\sex) &\triangleq  f_{\formsex}(\sex) = \sex,\\
%     \tilde{f}^*_{\dept}\Paren{\respfunc^*,\sex} &\triangleq \respfunc_1^*\Paren{\sex} = f_{\dept}\Paren{\sex,U,U_{\dept}} = \respfunc_1(\sex), \\
%     \tilde{f}^*_{\outcome}\Paren{\respfunc^*,\dept} &\triangleq \respfunc_2^*\Paren{\dept} = f_{\outcome}\Paren{\dept,U,U_{\outcome}},
% \end{align*}
% where $\respfunc^* = \Paren{\respfunc_1^*,\respfunc_2^*}$.
% Note that $\spc_{\response^*}$ is a discrete space, $\response^*$ a discrete random variable, and $\tilde{P}^*$  a discrete distribution over $\spc_{\response^*}$. Under the response-function parameterization, only $\tilde{P}$ is a parameter. Therefore, we can represent $\modelsnoedge$ in the parameter space, $\modelsnoedgeresp \in \triangle\Paren{\spc_{\response^*}}$. By observational equivalence of the response-function parameterization,

% \begin{equation}
%     \distgraphnoedge = \left \lbrace P_{\tilde{\model}^*}\Paren{\outcome,\dept,\formsex \mid \doop{\sex}} : \tilde{\model}^* \in \modelsnoedgeresp \right \rbrace.
% \end{equation}

% Consider the set $D = \left \lbrace \respfunc \in \cX_{\response} \text{ such that } \exists \ldept \text{ where } \respfunc_2(0,\ldept) \neq \respfunc_2\Paren{1,\ldept} \right \rbrace.$ For any $\tilde{P} \in \nullgraphresp$, $\tilde{P}\Paren{D}=0$. Therefore, the function $h: \nullgraphresp \mapsto \modelsnoedgeresp$ such that $$h(\tilde{P})(\respfunc_1^*(\lsex), \respfunc_2^*(\ldept)) = \tilde{P}\Paren{\respfunc_1(\lsex),\respfunc_2(0,\ldept)=\respfunc_2(1,\ldept)}$$
% is well-defined and bijective since for any $\tilde{P}^* \in \modelsnoedgeresp$, $$h^{-1}(\tilde{P}^*)\Paren{\respfunc_1(\lsex),\respfunc_2(0,\ldept),\respfunc_2(1,\ldept)} = \bm{1}\left[ \respfunc_2(0,\ldept) = \respfunc_2(1,\ldept)\right] \tilde{P}^*\Paren{\respfunc_1(\lsex),\respfunc_2(0,d)}. $$ 

% The solution function of the response-function parameterized SCM $\tilde{\model^*}$ denoted by $g^*: \cX_{\sex} \times \cX_{\response^*} \mapsto \cX_{\outcome} \times \cX_{\dept} \times \cX_{\formsex}$ induces a mapping from $\triangle\Paren{\cX_{\response^*}}$ which can be considered as a subset of $\RR^{\# \cX_{\response^*}}$ to the set of conditional distributions $\Pr\Paren{\outcome,\dept,\formsex \mid \sex}$ which  can be considered to be a subset of $\RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}}$. We denote this map by $G^*: \RR^{\#\Paren{\cX_{\response^*}}} \mapsto \RR^{\#\Paren{\cX_{\outcome}}\times \#\Paren{\cX_{\dept}}\times \#\Paren{\cX_{\formsex}}} $ where 

% \begin{align*}
% g^*(\lsex,\respfunc^*) &= \Paren{\respfunc_2^*\Paren{\respfunc_1^*\Paren{\lsex}},\respfunc_1^*\Paren{\lsex},\lsex} =\Paren{\respfunc_2^*\Paren{\respfunc_1\Paren{\lsex}},\respfunc_1\Paren{\lsex},\lsex} , \\
% G^*\Paren{e_{\respfunc^*}} &= \sum\limits_{\lsex} e_{g^*\Paren{\lsex,\respfunc^*}}. 
% \end{align*}
% For $\respfunc \notin D$, note that $g(\lsex,\respfunc) = g^*(\lsex,(\respfunc_1,\respfunc_2'))$ where $\respfunc_2'(\ldept) = \respfunc_2(0,\ldept) = \respfunc_2(1,\ldept)$. Further, for all $\respfunc^* \in \cX_{\response^*}, g^*(\lsex,\respfunc^*) = g(\lsex,\respfunc')$ where $\respfunc' \notin D$ and is defined as $(\respfunc_1' = \respfunc_1^*, \respfunc_2'(0,\ldept) = \respfunc_2'(1,\ldept) =\respfunc_2^*(\ldept))$ Therefore, for any $\tilde{P} \in \nullgraphresp$, since $\tilde{P}(D) =0, G(\tilde{P}) = G^*(h(\tilde{P}))$ thus implying that $\distgraphnoedge = \distgraph$. 

% \begin{lemma}
% \begin{equation}
%     \distgraphnoedge = \distiv. 
% \end{equation}
% \end{lemma}

% %\todo{Add proof from lecture notes.}

% % We prove a bijection between $\modelsnoedgeresp$ and $\nullgraphresp$. For $\tilde{P} \in \nullgraphresp$, 


% %  \begin{equation}\label{eq:respfunc_graph_noedge}
% %     \modelsnoedgeresp
% %     \triangleq \left \lbrace \tilde{P} \in \triangle\Paren{\cX_{\response}} : \tilde{P}\Paren{\respfunc_1,\respfunc_2} \neq 0 \text{ implies } \forall \ldept, \respfunc_2(0,\ldept) = \respfunc_2\Paren{1,\ldept} \right \rbrace.
% % \end{equation}

% \end{proof}