\documentclass{article}
\usepackage{graphicx} % Required for inserting images

\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% Also
\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage{hyperref}       % hyperlinks
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors

\usepackage{graphicx}
\usepackage{comment}
\usepackage{bm}
\usepackage{mathtools}
\usepackage{amssymb}
\usepackage{enumitem} 
\usepackage{amsthm}
\usepackage{booktabs}
\usepackage{xfrac}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{caption}
\usepackage{subcaption}
\newtheorem{theorem}{Theorem}
\theoremstyle{definition}
\newtheorem{definition}{Definition}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}[theorem]
\input{macros.tex}

%% For notation
\newcommand{\Scalar}[1]{#1}
\newcommand{\Vector}[1]{\boldsymbol{#1}}
\newcommand{\SSigma}{\boldsymbol{\Sigma}}
% \newcommand{\Matrix}[1]{\ensuremath{\mathbf{\uppercase{#1}}}}
\newcommand{\Matrix}[1]{\boldsymbol{#1}}

\newcommand{\DotProd}{\cdot}

\newcommand{\KappaXX}{\kappa_{xx}}
\newcommand{\KappaXY}{\kappa_{xy}}
\newcommand{\KappaYY}{\kappa_{yy}}

\include{macros}

\begin{document}

\begin{abstract}
    In our linear SEM, with exogenous noise terms $(\epsilon_x, \epsilon_y)$ independent to $\Vector{Z}$ (A2), the value of e.g., $\mathrm{Cov} (X, Y \mid \Vector{Z})$ is independent to $\Vector{Z}$. This results in a significant simplification of the `law of total covariance', which means our definitions of $\kappa_{xx}$, $\kappa_{xy}$ and $\kappa_{yy}$ remain correct. For clarity, we subscript covariances and expectation values with the variables being averaged over throughout this document. The exception is when all (linearly independent) variables $(X, Y, \Vector{Z})$ are being averaged over together---then we simply write $\mathbb{E} (\cdot)$ or $\mathrm{Cov} (\cdot)$.
\end{abstract}

We begin with general `law of total covariance', which itself is derived from the tower property of expectation values. We apply this law to the following pairs covariates $(X,Y)$, $(Y,Y)$ and $(X,X)$, and apply the law, always conditioning on $Z$. 

\paragraph{$(\boldsymbol{\kappa_{xy}})$} The resulting equation for the first pair of covariates is: 
\begin{align}
    \mathrm{Cov}_{XY \Vector{Z}} (X, Y) = \mathbb{E}_{\Vector{Z}} \mathrm{Cov}_{XY} (X, Y \mid \Vector{Z}) + \mathrm{Cov}_{\Vector{Z}} \left(\mathbb{E}_{XY} (X \mid \Vector{Z}), \mathbb{E}_{XY} (Y \mid \Vector{Z})\right),
\end{align}
This is equivalent, in the notation we used throughout the manuscript, to 
\begin{align}
    \Sigma_{xy} = \mathbb{E}_{\Vector{Z}} (\kappa_{xy}) + \mathrm{Cov}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z}), \mathbb{E}_{XY} (Y \mid \Vector{Z})\right).
\end{align}
Because the model is linear in the observables and (A2) is satisfied\footnote{We can see the following result directly by noting that the part of $X$ that varies with $\Vector{Z}$ is $\epsilon_x$, and the analogous part of $Y$ is equal to $\theta \epsilon_x + \epsilon_y$. These quantities are independent of $\Vector{Z}$ by (A2). Such a result wouldn't be true if the observables had a non-linear relationship, as we may be `carrying' spurious $Z$ of we had e.g., a term like $\Vector{Z} \cdot \Vector{Z} X$ in the definition of $Y$.}: 
\begin{align}
    \kappa_{xy} \perp\!\!\!\perp \Vector{Z},
\end{align}
so we can reduce the law of total covariance down to#
\begin{align}
    \kappa_{xy} = \Sigma_{xy} - \mathrm{Cov}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z}), \mathbb{E}_{XY} (Y \mid \Vector{Z})\right).
\end{align}
We remind ourselves of the exact structural equations,
\begin{align}
    &X = \Vector{\beta} \cdot \Vector{Z} + \epsilon_x, \\
    &Y = \Vector{\gamma} \cdot \Vector{Z} + \theta X + \epsilon_y,
\end{align}
which allow us to write the following:
\begin{align}
    & \mathbb{E}_{XY} (X \mid \Vector{Z}) = \Vector{\beta} \cdot \Vector{Z} + \mathbb{E} (\epsilon_x), \\
    & \mathbb{E}_{XY} (Y \mid \Vector{Z}) = \Vector{\gamma} \cdot \Vector{Z} + \theta \mathbb{E}_{XY} (X \mid \Vector{Z}) + \mathbb{E} (\epsilon_y).
\end{align}
Again, because (A2) is satisfied, this provides a simplification of the total law of covariance: 
\begin{align}
    \mathrm{Cov}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z}), \mathbb{E}_{XY} (Y \mid \Vector{Z})\right) = \Vector{\beta} \cdot \SSigma_{\Vector{zz}} \cdot \left( \Vector{\gamma} + \theta \Vector{\beta} \right).
\end{align}
We use the (A2) assumption once more to calculate the following expressions for $\Vector{\beta}$ and $\Vector{\gamma}$:
\begin{align}
    & \Vector{\beta} = \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{x\Vector{z}}, \\
    & \Vector{\gamma} = \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{y\Vector{z}} - \theta \Vector{\beta},
\end{align}
through which we see:
\begin{align}
    \mathrm{Cov}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z}), \mathbb{E}_{XY} (Y \mid \Vector{Z})\right) = \SSigma_{x\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{y\Vector{z}},
\end{align}
to conclude:
\begin{align}
    \kappa_{xy} = \Sigma_{xy} - \SSigma_{x\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{y\Vector{z}},
\end{align}
as asserted in the manuscript. 

\paragraph{$(\boldsymbol{\kappa_{xx}})$} Similarly to the equation above, we can prove the validity of the definition for the conditional covariance $\kappa_{xx}$ given in the manuscript. Following previous discussion, we may write:
\begin{align}
    \Sigma_{xx} &= \mathbb{E}_{\Vector{Z}} (\kappa_{xx}) + \mathrm{Var}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z})\right)\\
    &= \kappa_{xx} + \mathrm{Var}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z})\right),
\end{align}
since $\kappa_{xx} \perp\!\!\!\perp \Vector{Z}$. Using the structural equations, we write: 
\begin{align}
    \mathbb{E}_{XY} (X \mid \Vector{Z}) = \Vector{\beta} \cdot \Vector{Z} + \mathbb{E} (\epsilon_x),
\end{align}
so that, 
\begin{align}
    \mathrm{Var}_{\Vector{Z}} \left( \mathbb{E}_{XY} (X \mid \Vector{Z})\right) &= \Vector{\beta} \cdot \SSigma_{\Vector{zz}} \cdot \Vector{\beta}\\
    &= \SSigma_{x\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{x\Vector{z}}.
\end{align}
Therefore,
\begin{align}
    \kappa_{xx} = \Sigma_{xx} - \SSigma_{x\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{x\Vector{z}}
\end{align}
as required. 

\paragraph{$\boldsymbol{(\kappa_{yy})}$} Finally, we can do something similar for $\kappa_{yy}$: 
\begin{align}
    \kappa_{yy} = \Sigma_{yy} - \mathrm{Var}_{\Vector{Z}} \left( \mathbb{E}_{XY} (Y \mid \Vector{Z})\right),
\end{align}
so, 
\begin{align}
    \mathbb{E}_{XY} (Y \mid \Vector{Z}) &= \theta \mathbb{E} (X \mid \Vector{Z}) + \Vector{\gamma} \cdot \Vector{Z} + \mathbb{E}(\epsilon_y) \\
    &= (\theta \Vector{\beta} + \Vector{\gamma}) \cdot \Vector{Z} + \mathbb{E}(\epsilon_y) \\
    &= \SSigma_{y\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \Vector{Z} + \mathbb{E}(\epsilon_y),
\end{align}
giving,
\begin{align}
    \kappa_{yy} = \Sigma_{yy} - \SSigma_{y\Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{y\Vector{z}},
\end{align}
as required. 
% \newpage
% 
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%
%

% \section{Another derivation}

% A simple proof that the conditional (co)variances are defined properly in our linear model. 

% Recall the model,
% \begin{align}
%     & X = \Vector{\beta} \cdot \Vector{Z} + \epsilon_x, \\
%     & Y = \theta X + \Vector{\gamma} \cdot \Vector{Z} + \epsilon_y,
% \end{align}
% and just keep (A1) and (A2) in mind. 

% We'll go about computing the conditional (co)variances directly. Start with $\kappa_xx$:
% \begin{align}
%     & \kappa_{xx} := \mathrm{Cov} (X, X \mid \Vector{Z}) = \Vector{\beta} \cdot \mathrm{Cov} (\Vector{Z} \Vector{Z} \mid \Vector{Z}) \cdot \Vector{\beta} + \eta_x^2. \\
% \end{align}
% The variance of $\Vector{Z}$ given itself is, of course, zero. Thus, we recover the result as before: 
% \begin{align}
%     \kappa_{xx} = \eta_x^2,
% \end{align}
% which, after applying the equation for $\Vector{\beta}$, shows:
% \begin{align}
%     \kappa_{xx} = \Sigma_{xx} - \SSigma_{x \Vector{z}} \cdot \SSigma_{\Vector{zz}}^{-1} \cdot \SSigma_{\Vector{z}x}.
% \end{align}

% Using our linear structural equations to calculate $\kappa_{xy}$ and $\kappa_{yy}$ leads to similar results: 
% \begin{align}
%     \kappa_{xy} := \mathrm{Cov} (X, Y \mid \Vector{Z}) = \theta \kappa_{xx} + \eta_y^2,
% \end{align}
% since 
% \begin{align}
%     \mathrm{Cov} (X, \Vector{Z} \mid \Vector{Z}) = 0.
% \end{align}
% Finally, 
% \begin{align}
%     \kappa_{yy} := \mathrm{Cov} (Y, Y \mid \Vector{Z}) = \theta^2 \kappa_{xx} + \theta \rho \eta_x \eta_y + \eta_y^2
% \end{align}
% comes directly from the structural equation for $Y$.

% We can prove these are as defined before: 

\end{document}