\section{Preliminaries} \label{sec:prelim}
We outline a few definitions that follow the formal setup of \cite{BongersFPM21}.
\begin{definition}[Structural Causal Model (SCM)]
A \textbf{Structural Causal Model (SCM)} is a tuple $\model = \Paren{\enop,\exrv,\spc,f,P}$ where a) $\enop,\exrv$ are disjoint, finite index sets of \textbf{endogenous} and \textbf{exogenous} random variables respectively, b) $\spc = \prod_{i \in \enop \cup \exrv} \spc_i $ is the \textbf{domain} which is a product of standard measurable spaces $\spc_i$, c) for every $v \in \enop$, $f_v:\spc \mapsto \spc_{v}$ is a measurable function,  and $f = (f_v)_{v \in V}$ is called the \textbf{causal mechanism}; the equations $X_v = f_v(X)$ for every $v \in \enop$ are called the \textbf{structural equations}, and d) $P\Paren{\spc_{\exrv}} = \bigotimes_{w \in \exrv} P(X_w)$ is the \textbf{exogenous distribution} which is a product of probability distributions $P(X_w)$ on $\spc_{w}$.  
\end{definition}

\begin{definition}[Parent]
Let $\model = \Paren{\enop,\exrv,\spc,f,P}$ be an SCM. $k \in \enop \cup \exrv$ is a \textbf{parent} of $v \in \enop$ if and only if it is not the case that for all $x_{\enop \backslash k}, f_v(x_V,X_W)$ is constant in $x_k$ (if $k \in \enop$, resp. $X_k$ if $k \in \exrv$)  $P(\spc_{\exrv})$-a.s..
\end{definition}


\begin{definition}[Causal Graph]
             Let $\model = \Paren{\enop, \exrv, \spc, f,P}$ be an SCM. The \textbf{causal graph}, $\cg{\model}$, is a directed mixed graph with nodes $V$, directed edges $u \longrightarrow v$ if and only if $u \in \enop$ is a parent of $v \in \enop$, and bidirected edges $u \leftrightarrow v$ if and only if $ \,\exists w \in \exrv$ that is a parent of both $u,v \in \enop$. 
             %\todo{It would be good to spell out the definition of parent: $u \in \enop$ is a parent of $v \in \enop$ iff $f_v(x_V,x_W)$ is constant in $x_u$ $P$-a.s. and similarly for $w \in \exrv$ a parent of $v \in \enop$.}
\end{definition}
For simplicity of exposition, we restrict attention to acyclic SCMs, i.e.\ SCMs whose causal graph is acyclic (contains no directed cycle $X \rightarrow \cdots \rightarrow Y \rightarrow X$).
\begin{definition}[Observational Distribution]
Given an acyclic SCM, $\model= \Paren{\enop,\exrv,\spc,f,P}$, the exogenous distribution, $P$ and the causal mechanism $f$ induce a probability distribution over the endogenous variables which is called the \textbf{observational distribution}, $P_{\model}(X_{\enop})$.
\end{definition}

\begin{definition}[Hard Intervention]
Given an acyclic SCM, $\model= \Paren{\enop,\exrv,\spc,f,P}$, an intervention target $\,T \subseteq \enop$, and an intervention value $x_T \in \cX_{T}$, the \textbf{intervened SCM} is defined as $\model_{\doop{X_T=x_T}} \triangleq \Paren{\enop,\exrv,\spc,(f_{V \backslash T},x_T),P}.$ Further, the observational distribution of the intervened SCM, $P_{\model_{\doop{X_T=x_T}}}$, is called an \textbf{interventional distribution}, and denoted by $P_{M}\Paren{X_V \mid \doop{X_T=x_T}}$.
\end{definition}



% \begin{definition}{Observational Distribution}
% Let $\model = \Paren{\enop, \exrv, \spc, f,P}$ be an SCM. The \textbf{observational distribution} of $\model$, $P_{\model}(X_{\enop})$, is 
% \end{definition}
%The following definitions are concerned primarily with counterfactual notions of fairness that we define in the next section. 
% \begin{definition}[Solution function]
% Let $\model = \Paren{\enop, \exrv, \spc, f,P}$ be an acyclic SCM and $C \subseteq \enop$. A \textbf{solution function} of $\model$ with respect to $C$ is a measurable mapping $g_C : \cX_{\enop \backslash C} \times \spc_{\exrv} \mapsto \spc_{C}$ such that for all $x_{\enop \backslash C} \in \spc_{\enop \backslash C}$, all $P(X_{\exrv})$-a.a $x_{\exrv} \in \spc_{\exrv}$,  $g_{C}\Paren{x_{\enop\backslash C},x_{\exrv}}$ satisfies the structural equations for $C$, i.e., 
% \begin{equation*}
%     g_{C}\Paren{x_{\enop\backslash C},x_{\exrv}} = f_{C}\Paren{x_{\enop \backslash C},g_{C}\Paren{x_{\enop\backslash C},x_{\exrv}},x_{\exrv}}.
% \end{equation*}
% \end{definition}
% Acyclic SCMs have essentially unique solution functions.

% \begin{definition}[Intervened SCM]
% Let $\model = \Paren{\enop, \exrv, \spc, f,P}$ be an SCM, $T \subseteq \enop$ be an intervention target and $\eta_{T} \in \spc_{T}$ be an intervention value. The \textbf{intervened SCM} is defined as $\model_{\doop{X_{T} = \eta_T}} = \Paren{\enop,\exrv,\cX,P,\Paren{f_{V\backslash T}, \eta_T}}$. 
    
% \end{definition}
%Note that observational distributions w.r.t  intervened SCMs are also termed interventional distributions. 

\begin{definition}[Potential Outcome]
  Let $\model = \Paren{\enop, \exrv, \spc, f,P}$ be an acyclic SCM, $C \subseteq \enop$ and $x_C \in \spc_{C}$. A random variable $X^{\doop{x_C}}$ taking values in $\spc_{\enop} \times \spc_{\exrv}$ is called a \textbf{potential outcome} of $\model$ under intervention $x_C$ if a) its $\exrv$-component has the exogenous distribution specified by $\model$, i.e., $X^{\doop{x_C}}_{\exrv} \sim P\Paren{\spc_{\exrv}}$, and b) it satisfies $X^{\doop{x_C}}_C = x_C\ \mathrm{a.s.}$ and
    \begin{equation*}
        X^{\doop{x_C}}_{\enop \backslash C} = f_{\enop \backslash C}\Paren{x_C,X^{\doop{x_C}}_{\enop \backslash C},X^{\doop{x_C}}_{\exrv} } \text{ a.s.}.
    \end{equation*}
If $\,C = \emptyset$, we write $X$ instead of $X^{\doop{x_{\emptyset}}}$. 
When dealing with multiple potential outcomes, we assume that they share the same $\exrv$-component:
for any $n$, for $C_1, C_2, \cdots, C_n \subseteq \enop$ and for $x_{C_1} \in \cX_{C_1}, x_{C_2} \in \cX_{C_2}, \cdots, x_{C_n} \in \cX_{C_n}$, 
  $$X_{\exrv}^{\doop{x_{C_1}}} = X_{\exrv}^{\doop{x_{C_2}}}= \cdots =X_{\exrv}^{\doop{x_{C_n}}} = X_W \text{ a.s.}.$$ 
\end{definition}

Instrumental variables (IVs) are used to estimate the causal effect of a treatment $X$ on the outcome $Y$ in the presence of latent confounding. 
\begin{definition}[Instrumental Variable (IV) Model Class]\label{def:IVmodelclass}
The \textbf{instrumental variable model class}, $\modelivrelax$, is a collection of SCMs $\model$ such that $\cg{\model}$ is a subgraph of Figure~\ref{fig:iv} where $Z$ is the \textbf{instrument}, $X$ is the \textbf{treatment}, and $Y$ is the \textbf{outcome}. 
\end{definition}
We make an explicit positivity assumption and define $\modeliv \triangleq \left \{ \model \in \modelivrelax:\forall z, P_{\model}(Z=z) > 0 \right\}$. Causal graphs of SCMs in the IV model class rule out the directed edge $Z \rightarrow Y$ and any latent confounding between $Z$ and $Y$ as well. In Section~\ref{app:SDconf} we show that our results for the IV model class hold even when confounding is allowed between $Z$ and $X$.
%However, our results pertaining to the IV model class hold for a larger class of SCMs where confounding is allowed between the instrument $Z$ and the treatment $X$. 
% To save space, we put these extensions in the appendix.

\begin{figure}[t]
     \centering
            \begin{tikzpicture}
            \tikzstyle{vertex}=[circle,fill=none,draw=black,minimum size=17pt,inner sep=0pt]
\node[vertex] (Z) at (0,0) {$Z$};
\node[vertex] (Y) at (3,0) {$Y$};
\node[vertex] (X) at (1.5,0) {$X$};
%\node[vertex] (S') at (1,-0.5) {$S'$};
\path (Z) edge (X);
\path (X) edge (Y);
\path[bidirected] (X) edge[bend left=60] (Y);
%\path[red] (S') edge (A);
%\path (S) edge (S'); 
 %\path (S) edge node[near start, below] {=} (S');
% \draw[->, line width=0.3mm]  (S)--(D);
% \draw[->, line width=0.3mm]  (D)--(A);
% \draw[->, line width=0.3mm]  (S)--(A);
% \draw[<->, line width=0.3mm]  (D)--(A);
            \end{tikzpicture}
        \caption{Causal graph of $M \in \modelivrelax$} 
        \label{fig:iv}
        \end{figure}
