%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PACKAGING AND IMPORTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[accepted]{uai2022}

%% Choose your variant of English; be consistent
\usepackage[american]{babel}

% fonts and math
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{bm}
\usepackage{amsmath}
\usepackage{mathtools} % amsmath with fixes and additions
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{xspace}
% \usepackage[colorlinks=false, pdfborder={0 0 0}]{hyperref}
\usepackage{appendix}
\DeclareMathOperator{\sign}{sign}

% theorems, lemmas, models
\usepackage{amsthm}
\usepackage{thmtools}
\input{notation}
\setcounter{theorem}{3}     % Offsetting counters not to confuse theoretical results with those in the main paper
\setcounter{lemma}{2}
\setcounter{definition}{1}

% % % citations and bibliography
\usepackage[style=authoryear-comp, natbib=true]{biblatex}
\addbibresource{warren_734.bib}
\setlength\bibitemsep{1.5\itemsep}
\AtEveryBibitem{%
    \clearfield{issn}% Remove issn
    \clearfield{doi} % Remove doi
    \clearfield{url}%
    \clearfield{urldate}%
    \clearfield{urlyear}%
    \clearfield{urlmonth}%
    \clearfield{review}%
    \clearfield{series}%
    \clearfield{note}%
    \clearfield{isbn}%
    \clearfield{urlmonth}%
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TITLE, AUTHORS, AND ABSTRACT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{Generalized Bayesian Quadrature with Spectral Kernels - Supplementary Material}

% Add authors
\author[1]{{Houston Warren}{}}
\author[2]{{Rafael Oliveira}{}}
\author[1, 3]{{Fabio Ramos}{}}
% Add affiliations after the authors
\affil[1]{%
    School of Computer Science\\
    The University of Sydney\\
    Australia
}
\affil[2]{%
    Brain and Mind Centre\\
    The University of Sydney\\
    Australia
}
\affil[3]{%
    NVIDIA, USA
}
  
    \begin{document}
\onecolumn
\maketitle
\appendix

\section{Generalized Bayesian Quadrature Derivations}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% GAUSSIAN GBQ DERIVATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Preliminaries}\label{sec:prelims}

We start by deriving the general solution to indefinite integrals of the form:
\begin{equation}\label{eq:rffintegral}
    \begin{split}
        \fintreg &= \int \frac{1}{R} \sum_{r=1}^R \cos(\bm{\omega}_r^T(\x - \bm{X})) \, d\x \\
        &\stackrel{\dag}{=} \frac{1}{R} \sum_{r=1}^R \int \cos(\bm{\omega}_r^T(\x - \bm{X})) \, d\x
    \end{split}
\end{equation}
where $\bm{\omega} \in \mathcal{R}^{R \times d}$, and $\dag$ makes use of the fact that $\int x + x \, dx = \int x \, dx + \int x \, dx$. Equation \ref{eq:rffintegral} represents the integral of an RFF estimated kernel. Using u-substitution to integrate out a single $x^j$ variable from the vector-valued $\x$ results in:
\begin{equation} \label{eq:2}
     \fintreg_{x^j} = \frac{1}{R} \sum_{r=1}^R \int_{\x^i \in \mathcal{R}^{d-1}} \frac{\sin(\bm{\omega}_r^T(\x - \bm{X}))}{\omega_r^j} \, d\x^i.
\end{equation}
where 
\begin{align}
\x^i &:= \begin{bmatrix}
       x^{1} \\
       \vdots \\
       x^{j-1} \\
       x^{j+1} \\
       \vdots \\
       x^d
     \end{bmatrix}
\end{align}
If we integrate \eqref{eq:2} again over a new variable $x^t$ in $\x^i$, we start to see a general pattern emerge:
\begin{equation}
     \fintreg_{x^j x^t} = \frac{1}{R} \sum_{r=1}^R \int_{\x^{i^\prime} \in \mathcal{R}^{d-2}} \frac{-\cos(\bm{\omega}_r^T(\x - \bm{X}))}{\omega_r^j \omega_r^t} \, d\x^{i^\prime}.
\end{equation}
The repeated integral of the $\cos$ follows a repeating pattern through $h = [\sin, -\cos, -\sin, \cos, \sin, \dots]$, while the integral of u-substituted $\bm{\omega}^T(\x - \X)$ simply results in the multiplication of the integrand denominator by $w_r^j$ for each variable $x^j$ in $\x$ we integrate over. Thus, integrating over the entirety all $d$ dimensions of $\x$ will result in an indefinite integral of the form :
\begin{equation}\label{eq:kmeuni}
     \fintreg = \frac{1}{R} \sum_{r=1}^R \frac{h^d(\bm{\omega}_r^T(\x - \X))}{\prod_{j=1}^d \omega_r^j}
\end{equation}
where $h$ is defined as the repeating series above and $h^d$ represents the $d$-th index of $h$. For an RFF kernel parametrized by $\bm{\omega}$, equation \eqref{eq:kmeuni} represents the indefinite uniform expectation; in other words, the uniform kernel mean.

\subsection{RFF Kernel Means Over Gaussian Measures}\label{sec:gaussgbqmean}

Equipped with the knowledge of the anti-derivative pattern that arises in \eqref{eq:kmeuni}, we now turn our attention to indefinite integrals of the form:
\begin{equation}\label{eq:6}
    \begin{split}
        \fintreg &= \int \frac{1}{R} \sum_{r=1}^R \cos(\bm{\omega}_r^T(\x - \bm{X}))p(\x) \, d\x \\
        &= \frac{1}{R} \sum_{r=1}^R \int \cos(\bm{\omega}_r^T(\x - \bm{X}))p(\x) \, d\x \, ,
    \end{split}
\end{equation}
where $p(\x)$ is a Gaussian. Equation \ref{eq:6} represents the RFF kernel approximation expectation over a Gaussian distribution, or the Gaussian kernel mean $\mu_{\x}(\bm{X})$.

As in the main paper, we parametrize $p(\x)$ as an RFF approximation $q(\x)$ to the multivariate Gaussian, and can rewrite \eqref{eq:6} and substitute into the BQ integral mean formulation $\fint = \mu_{\x}(\bm{X})^T \bm{K}^{-1} \bm{y}$ resulting in:
\begin{equation}\label{eq:7}
    \begin{split}
    \fint &= \bm{y}^T \bm{K}^{-1} \int \frac{1}{R} \sum_{r=1}^R \cos(\bm{\omega}_r^T(\x - \bm{X}))
    \times \frac{1}{Z |(2\pi)^d \bm{\Sigma}|^{1/2}} \sum_{z=1}^Z \cos(\bm{\rho}_z^T(\x - \bm{\mu})) d\x \\
    &= \frac{\bm{y}^T \bm{K}^{-1}}{RZ |(2\pi)^d \bm{\Sigma}|^{1/2}} \sum_{r=1}^R \sum_{z=1}^Z \int \cos(\bm{\omega}_r^T(\x - \bm{X})) \cos(\bm{\rho}_z^T(\x - \bm{\mu})) d\x \, .
    \end{split}
\end{equation}

Looking at the integrand term in \eqref{eq:7}, we can apply the trigonometric identity $\cos(\alpha)\cos(\beta) = \cos(\alpha + \beta) / 2 + \cos(\alpha - \beta) / 2$ and rewrite the integrand as:
\begin{equation}
    \begin{split}
        \cos(\bm{\omega}_r^T(\x - \bm{X})) \cos(\bm{\rho}_z^T(\x - \bm{\mu})) d\x &= \frac{\cos(\bm{\omega}_r^T(\x - \bm{X}) + \bm{\rho}_z^T(\x - \bm{\mu}))}{2} \\
        &+ \frac{\cos(\bm{\omega}_r^T(\x - \bm{X}) - \bm{\rho}_z^T(\x - \bm{\mu}))}{2} ,
    \end{split}
\end{equation}
which we can reorganize, while also moving the division by two outside the integral in \eqref{eq:7}, as:
\begin{equation}\label{eq:gbq_gauss_final_integrand}
    \cos(\x^T(\bm{\omega}_r + \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu})) + \cos(\x^T(\bm{\omega}_r - \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu})).
\end{equation}
Using the same method of u-substitution and properties of the anti-derivatives of $\cos$ and $\sin$ as in \eqref{eq:2} and \eqref{eq:kmeuni}, applying the anti-derivative over the integrand term in \eqref{eq:gbq_gauss_final_integrand} yields the indefinite form:
\begin{equation}
    \frac{h^d(\x^T(\bm{\omega}_r + \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d (\omega_r^j + \rho_z^j)} +
    \frac{h^d(\x^T(\bm{\omega}_r - \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d(\omega_r^j - \rho_z^j)} + c \, ,
\end{equation}
where $c$ is a constant of integration. Substituting the above into the full GBQ formulation in \eqref{eq:7} and applying over definite bounds $\bm{a} \leq \x \leq \bm{b}$ results in:
\begin{equation}\label{gbq_gauss_mean}
    \frac{\bm{y}^T \bm{K}^{-1}}{Q_{\bm{a}}^{\bm{b}} \times RZ |(2\pi)^d \bm{\Sigma}|^{1/2}} 
    \Bigg[
        \sum_{r=1}^R \sum_{z=1}^Z
        \frac{h^d(\x^T(\bm{\omega}_r + \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d (\omega_r^j + \rho_z^j)} +
        \frac{h^d(\x^T(\bm{\omega}_r - \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d(\omega_r^j - \rho_z^j)} 
    \Bigg]_{\bm{a}}^{\bm{b}}  \, ,
\end{equation}
which we can recognize as the same equation as definition 1 in the main paper: the GBQ mean $\fint$ of the approximation to the ingtegral of $f$ over a Gaussian measure. 

We note that in \eqref{gbq_gauss_mean} we have also added $[Q_{\bm{a}}^{\bm{b}}]^{-1}$ to the formulation. This is due to the fact that integrating over RFF approximated Gaussian measure $q(\x)$ on bounds $[\bm{a}, \bm{b}]$ will necessarily truncate $q(\x)$ such that it is no longer a proper probability density function. To account for this, we can modify $q(\x)$ to the truncated normal form $\frac{q(\x)}{Q(\bm{b}) - Q(\bm{a})}$ in equation \ref{eq:7} where $Q$ is the CDF of $q$. We can calculate $Q$ analytically as described in section \ref{rffcdf}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ALGORITHM FOR DEFINITE BOUNDS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsubsection{Algorithm for Efficient Implementation over Definite Bounds}\label{implementation_algo}

Before progressing further, we will briefly describe a method by which \eqref{gbq_gauss_mean} can be applied over bounds $|_{\bm{a}}^{\bm{b}}$. If done naively, the application of the fundamental law of calculus to calculate the definite integral of \eqref{gbq_gauss_mean} over multidimensional bounds will add a multiplicative factor of $2^d$ to the GBQ complexity, as all possible vector permutations of limits $\{\bm{a}, \bm{b}\}$ in each dimension would need to be evaluated. However, we will briefly describe here a method that allows for the application of GBQ to bounded integrals which shares the same complexity ($\mathcal{O}(dNRZ)$) of calculating the indefinite integral \eqref{gbq_gauss_mean} at a single point.

First, we note that $h^d$ in this case can be any of $[\cos, \sin, -\cos, -\sin]$. Our notation moving forward from hereon will assume $h^d = \cos$ but the transformation of of other cases to cosine form is trivial using the identity $\sin(x) = \cos(x - \frac{\pi}{2})$, the effect of which will not change the methodology.

Next, we will use the identity $\cos(\alpha - \beta) = \cos(\alpha)\cos(\beta) + \sin(\alpha)\sin(\beta)$ to further simplify the terms within the double summation in \eqref{gbq_gauss_mean} to isolate those not involving $\x$ (including the $-\frac{\pi}{2}$ if the $h^d$ transformation was necessary), resulting in:
\begin{equation}\label{eq:preharmonic}
    \begin{split}
        \Big[\prod_{j=1}^d(\omega_r^j + \rho_z^j)\Big]^{-1}
        \Big[
            \cos(\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}) \cos(\x^T(\bm{\omega}_r + \bm{\rho}_z))
            &+
            \sin(\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}) \sin(\x^T(\bm{\omega}_r + \bm{\rho}_z))
        \Big] \\
        + 
        \Big[\prod_{j=1}^d(\omega_r^j - \rho_z^j)\Big]^{-1}
        \Big[
            \cos(\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}) \cos(\x^T(\bm{\omega}_r - \bm{\rho}_z))
            &+
            \sin(\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}) \sin(\x^T(\bm{\omega}_r - \bm{\rho}_z))
        \Big] ,
    \end{split}
\end{equation}
Next, we will apply the harmonic addition theorem to further reduce terms. 
\begin{theorem}[{Harmonic addition theorem}]
    \label{thr:harmonic}
    A linear combination of sinusoids which share a frequency $x$ but have differing amplitudes $\{a, b\}$ can be reformulated as:
    \begin{equation}
        a \cos(x) + b \sin(x) = \sign(a) \sqrt{a^2 + b^2} \cos(x + \arctan(\frac{-b}{a})) \, .
    \end{equation}
\end{theorem}
Using the identities that $\cos^2(x) + \sin^2(x) = 1$ and $\frac{\sin(x)}{\cos(x)} = \tan(x)$, we can modify \eqref{eq:preharmonic} to
\begin{equation}
    \begin{split}
        \Big[\prod_{j=1}^d(\omega_r^j + \rho_z^j)\Big]^{-1}
        \Big[
            \sign \left( \cos \left(\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu} \right) \right)
            & \cos \left(\x^T(\bm{\omega}_r + \bm{\rho}_z) + \arctan \left(-\tan \left(\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu} \right) \right) \right)
        \Big] \\
        + 
        \Big[\prod_{j=1}^d(\omega_r^j - \rho_z^j)\Big]^{-1}
        \Big[
            \sign \left( \cos \left(\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu} \right) \right)
            & \cos \left(\x^T(\bm{\omega}_r - \bm{\rho}_z) + \arctan \left(-\tan \left(\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu} \right) \right) \right)
        \Big] \, ,
    \end{split}
\end{equation}
which we will then re-parametrize with $[\alpha_{rz}, \beta_{rz}, \gamma_{rz}, \delta_{rz}]$ for terms that do not involve $\x$ and substitute back into \eqref{gbq_gauss_mean} for
\begin{equation}\label{eq:gaussgbqmean_reduced}
    \frac{\bm{y}^T \bm{K}^{-1}}{Q_{\bm{a}}^{\bm{b}} \times RZ |(2\pi)^d \bm{\Sigma}|^{1/2}} 
    \Bigg[
        \sum_{r=1}^R \sum_{z=1}^Z
        \alpha_{rz} \times \cos \left( \x^T(\bm{\omega}_r + \bm{\rho}_z) + \beta_{rz} \right) + 
        \gamma_{rz} \times \cos \left( \x^T(\bm{\omega}_r - \bm{\rho}_z) + \delta_{rz} \right)
    \Bigg]_{\bm{a}}^{\bm{b}}  \, .
\end{equation}
Given that we can evaluate each dimension of the interval of integration independently ie.
\begin{equation}
    f(\x) \big|_{\bm{a}}^{\bm{b}} = f(x^1, \cdot) \big|_{a^1}^{b^1} \cdots  f(x^d, \cdot) \big|_{a^d}^{b^d} \, ,
\end{equation}
we can apply the integration bounds iteratively to \eqref{eq:gaussgbqmean_reduced}. By leveraging trigonometric identities, we can reduce the resulting terms at each step to provide computational advantages.

Consider the case of evaluating \eqref{eq:gaussgbqmean_reduced} over the bounds of a single dimension $d=1$. Leveraging the identity $\cos(\alpha + \beta) = \cos(\alpha)\cos(\beta) - \sin(\alpha)\sin(\beta)$, we can the separate the terms within the double summation in \eqref{eq:gaussgbqmean_reduced} that involve $x^{d=1}$ from those that do not, resulting in
\begin{equation}
    \begin{split}
        \alpha_{rz}
        \Big[
            \cos([x^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz})
            \cos([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} &+ \bm{\rho}_z^{d \neq 1})) \\
            - 
            \sin([x^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) 
            \sin([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} &+ \bm{\rho}_z^{d \neq 1})) 
        \Big] 
        \\
        + \, \gamma_{rz}
        \Big[
            \cos([x^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) 
            \cos([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} &- \bm{\rho}_z^{d \neq 1})) \\
            - 
            \sin([x^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz})
            \sin([\x_{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} &- \bm{\rho}_z^{d \neq 1})) 
        \Big] \, .
    \end{split}
\end{equation}
If we now substitute $a^1$ and $b^1$ for $x^1$ as we apply over a single bound (ie. $f(x)|_a^b = f(b) - f(a)$), the result (within the double summation of \eqref{eq:gaussgbqmean_reduced}) is:
\begin{equation}
    \begin{split}
        &\alpha_{rz} \Big[\cos([b^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) - \cos([a^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz})  \Big]
        \cos([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} + \bm{\rho}_z^{d \neq 1})) \\
        - 
        &\alpha_{rz} \Big[ \sin([b^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) - \sin([a^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) \Big]
        \sin([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} + \bm{\rho}_z^{d \neq 1})) \\
        + 
        &\gamma_{rz} \Big[\cos([b^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) - \cos([a^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz})  \Big]
        \cos([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} - \bm{\rho}_z^{d \neq 1})) \\
        - 
        &\gamma_{rz} \Big[ \sin([b^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) - \sin([a^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) \Big]
        \sin([\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} - \bm{\rho}_z^{d \neq 1})) \, .
    \end{split}
\end{equation}
We again have a form that can leverage the harmonic addition theorem \ref{thr:harmonic} to collect the constant terms that do not involve $\x^{d \neq 1}$. Applying the theorem and reparametrizing results in
\begin{multline}\label{eq:gaussgbqmean_onedim}
    \frac{\bm{y}^T \bm{K}^{-1}}{Q_{\bm{a}}^{\bm{b}} \times RZ |(2\pi)^d \bm{\Sigma}|^{1/2}} \\
    \times 
    \Bigg[
        \sum_{r=1}^R \sum_{z=1}^Z
        \alpha_{rz}^* \times \cos \left( [\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} + \bm{\rho}_z^{d \neq 1}) + \beta_{rz}^* \right) + 
        \gamma_{rz}^* \times \cos \left( [\x^{d \neq 1}]^T(\bm{\omega}_r^{d \neq 1} - \bm{\rho}_z^{d \neq 1}) + \delta_{rz}^* \right)
    \Bigg]_{\bm{a}^{d \neq 1}}^{\bm{b}^{d \neq 1}}  \, ,
\end{multline}
where
\begin{gather}
    \alpha_{rz}^* = \alpha_{rz} \times \sign \left(\cos([b^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) - \cos([a^1]^T(\omega_r^1 + \rho_z^1) + \beta_{rz}) \right)
    \times \sqrt{2 - 2\cos((b^1 - a^1)(\omega_r^1 + \rho_z^1))}  \label{eq:alpha_update}\\
    \beta_{rz}^* = \arctan \left( -\cot \left( \frac{(b^1 + a^1)(\omega_r^1 + \rho_z^1) + 2\beta_{rz}}{2} \right) \right) \\
    \gamma_{rz}^* = \gamma_{rz} \times \sign \left(\cos([b^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) - \cos([a^1]^T(\omega_r^1 - \rho_z^1) + \delta_{rz}) \right)
    \times \sqrt{2 - 2\cos((b^1 - a^1)(\omega_r^1 - \rho_z^1))} \\
    \delta^* = \arctan \left( -\cot \left( \frac{(b^1 + a^1)(\omega_r^1 - \rho_z^1) + 2\delta_{rz}}{2} \right) \right) \, . \label{eq:delta_update}
\end{gather}

We can see that \eqref{eq:gaussgbqmean_onedim} is of the same form as \eqref{eq:gaussgbqmean_reduced}, but with updated parameters $[\alpha_{rz}^*, \beta_{rz}^*, \gamma_{rz}^*, \delta_{rz}^*]$ that rely only on the constant values $[a^1, b^1, \omega_r^1, \rho_z^1, \alpha_{rz}, \beta_{rz}, \gamma_{rz}, \delta_{rz}]$. Starting at \eqref{eq:gaussgbqmean_onedim} and repeating the steps in \ref{eq:gaussgbqmean_reduced} through \ref{eq:gaussgbqmean_onedim} for all $d - 1$ remaining dimensions will result in the full evaluation of the GBQ integral estimate over bounds $[\bm{a}, \bm{b}]$. Complexity is further discussed in \ref{sec:complexity}.

\subsection{GBQ Variance Over Gaussian Measures}

We begin with by stating the BQ formulation of the variance of the mean integration approximation $\fint$:
\begin{equation} \label{eq:bqvar}
    \mathbb{V}(\fint) =  \int \int k(\x, X) \, p(\x) p(\bm{X}) \, d\x d\bm{X} \, .
\end{equation}

To find the variance of $\fint$ over a Gaussian measure, we substitute the expectation of the Gaussian kernel mean, which we calculate in \ref{sec:gaussgbqmean}, into \eqref{eq:bqvar} for $\int k(\x, X) \, p(\x) \, d\x$. To do so, we first rewrite the Gaussian kernel mean approximation as 
\begin{equation}\label{eq:gausskmecomp}
    \begin{split}
        \bm{\mu}_{\x}(\bm{X}) &= 
        [2RZ[(2\pi)^d |\bm{\Sigma}|]^{1/2}]^{-1} \sum_{r=1}^R \sum_{z=1}^Z 
        \Bigg[
            \frac{h^d(\x^T(\bm{\omega}_r + \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}))}{ \prod_{j=1}^d(\omega_r^j + \rho_z^j)} \\
            &+ \frac{h^d(\x^T(\bm{\omega}_r - \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d(\omega_r^j - \rho_z^j)}
        \Bigg] \\
        &= \frac{1}{2RZ[(2\pi)^d |\bm{\Sigma}|]^{1/2}} 
        \sum_{r=1}^R \sum_{z=1}^Z 
        \Bigg[
            \frac{h^d(\tau - \bm{\omega}_r^T\bm{X})}{ \prod_{j=1}^d(\omega_r^j + \rho_z^j)} 
            + \frac{h^d(\nu - \bm{\omega}_r^T\bm{X})}{\prod_{j=1}^d(\omega_r^j - \rho_z^j)}
        \Bigg]
    \end{split}
\end{equation}
Where we have combined the terms inside the $h^d$s not involving $\bm{X}$ with $\tau$ and $\nu$. 

We next substitute \eqref{eq:gausskmecomp} into \eqref{eq:bqvar}, while also introducing the RFF estimate $q(\X)$ to $p(\X)$, to obtain the integral variance:
\begin{equation}\label{eq:11}
    \begin{split}
        \mathbb{V}(\fint) &= \int \frac{\cos(\bm{\rho}_u^T(\X - \bm{\mu}))}{RZU[(2\pi)^d |\bm{\Sigma}|]} \sum_{r=1}^R \sum_{z=1}^Z \sum_{u=1}^U 
        \Bigg[
            \frac{h^d(\tau - \bm{\omega}_r^T\bm{X})}{2 \prod_{j=1}^d(\omega_r^j + \rho_z^j)} 
            + \frac{h^d(\nu - \bm{\omega}_r^T\bm{X})}{2 \prod_{j=1}^d(\omega_r^j - \rho_z^j)}
        \Big] \, d\X \\
        &= 
        L^\prime \sum_{r=1}^R \sum_{z=1}^Z \sum_{u=1}^U \int \cos(\bm{\rho}_u^T(\X - \bm{\mu}))  
        \Bigg[
            \frac{h^d(\tau - \bm{\omega}_r^T\bm{X})}{2 \prod_{j=1}^d(\omega_r^j + \rho_z^j)} 
            + \frac{h^d(\nu - \bm{\omega}_r^T\bm{X})}{2 \prod_{j=1}^d(\omega_r^j - \rho_z^j)}
        \Bigg] \, d\X \, ,
    \end{split}
\end{equation}
where have introduced another index of $\bm{\rho}$ in $u=1, \dots, U$, and substituted $L^\prime$ for $[RZU(2\pi)^d |\bm{\Sigma}|]^{-1}$

We note here that $h^d$ could be any of $[\cos, \sin, -\cos, -\sin]$, so we cannot necessarily leverage the same identity we used previously on the products of cosines. However, the simple identity $sin(x) = cos(x- \frac{\pi}{2})$ can rectify this case. We will continue with the variance proof under the assumption that $h^d = \cos$, but it is straightforward to derive the variance alternative cases. 

Using the identity $\cos(\alpha)\cos(\beta) = \cos(\alpha + \beta)/2 + \cos(\alpha - \beta)/2$, we simplify the integrand in \eqref{eq:11} to:
\begin{equation}
    \begin{split}
    \frac{\cos(\bm{\rho}_u^T(\X - \bm{\mu}) + \tau - \bm{\omega}_r^T\bm{X})}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)} 
     &+ \frac{\cos(\bm{\rho}_u^T(\X - \bm{\mu}) - \tau + \bm{\omega}_r^T\bm{X})}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)} \\
     + \frac{\cos(\bm{\rho}_u^T(\X - \bm{\mu}) + \nu - \bm{\omega}_r^T\bm{X})}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)} 
     &+ \frac{\cos(\bm{\rho}_u^T(\X - \bm{\mu}) - \nu + \bm{\omega}_r^T\bm{X})}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)}
    \end{split}
\end{equation}
and simplify further to
\begin{equation}\label{eq:rffint}
    \begin{split}
        \frac{\cos(\X^T(\bm{\rho}_u - \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu} + \tau)}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)}
        &+ \frac{\cos(\X^T(\bm{\rho}_u + \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu} - \tau)}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)} \\
        + \frac{\cos(\X^T(\bm{\rho}_u - \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu}  + \nu)}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)}
        &+ \frac{\cos(\X^T(\bm{\rho}_u + \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu}  - \nu))}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)} \, .
    \end{split}
\end{equation}

Lastly, we use the anti-derivative methods described in \ref{sec:prelims} to calculate the anti-derivative of this integrand, which represents the indefinite GBQ variance over a Gaussian measure:
\begin{equation}\label{eq:indefinite_gbq_gauss_var}
    \begin{split}
        \mathbb{V}(\fint) &= L^\prime \sum_{r=1}^R \sum_{z=1}^Z \sum_{u=1}^U 
        \Bigg[
            \frac{h^{2d}(\X^T(\bm{\rho}_u - \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu} + \tau)}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)(\rho_u^j - \omega_r^j)} 
            + \frac{h^{2d}(\X^T(\bm{\rho}_u + \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu} - \tau)}{4\prod_{j=1}^d(\omega_r^j + \rho_z^j)(\rho_u^j + \omega_r^j)} \\
            &+ \frac{h^{2d}(\X^T(\bm{\rho}_u - \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu}  + \nu)}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)(\rho_u^j - \omega_r^j)} 
            + \frac{h^{2d}(\X^T(\bm{\rho}_u + \bm{\omega}_r) - \bm{\rho}_u^T\bm{\mu}  - \nu))}{4 \prod_{j=1}^d(\omega_r^j - \rho_z^j)(\rho_u^j + \omega_r^j)} 
        \Bigg]
    \end{split}
\end{equation}
where $h^{2d}$ is defined as the $2d$-th index of function vector $h$ as previously defined in \ref{sec:prelims}. As we again have an indefinite anti-derivative form that is a linear combination of trigonometric functions evaluated over the dot products $\x$ and $\X$ with their measure RFF frequencies $\bm{\rho}$, we can use a variant of the method outlined in \ref{implementation_algo} to evaluate over definite bounds $[\bm{a}, \bm{b}]$ for both $\x$ and $\X$. We note that in this case it is again necessary to modify $L^\prime$ in \eqref{eq:indefinite_gbq_gauss_var} with the necessary truncation term $(Q_{\bm{a}}^{\bm{b}})^{2}$, as in \eqref{eq:gaussgbqmean_reduced} (where $Q_{\bm{a}}^{\bm{b}}$ is squared in this case due to integration over truncated forms of both $q(\x)$ and $q(\X)$).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% UNIFORM GBQ DERIVATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{GBQ Integral Mean Over Uniform Measures} \label{sec:unimean}

We showed in \ref{sec:prelims} that \eqref{eq:kmeuni} represents the RFF kernel mean over a uniform measure. Given this, it is straightforward to derive GBQ over a uniform measure. We simply need to evaluate the definite form of \eqref{eq:kmeuni} and then substitute it for $\mu_{\x}(\bm{X})$ in $\fint = \mu_{\x}(\bm{X})^T \bm{K}^{-1} \bm{y}$.

The GBQ integral mean posterior in full is
\begin{equation}\label{eq:gbqunimean}
    \fint = \frac{\bm{y}^T\bm{K}^{-1}}{R} \sum_{r=1}^R \frac{h^d(\bm{\omega}_r^T(\x - \X))}{\prod_{j=1}^d \omega_r^j} \,  \Bigg|_{\bm{a}}^{\bm{b}} \, .
\end{equation}
We note that GBQ over the uniform measure is equivalent to direct analytical integration of the RFF-parametrized GP integrand $\Bar{f}$.

\subsection{GBQ Integral Variance Over Uniform Measures}

To calculate the variance, substitute \eqref{eq:gbqunimean} for $\int k(\x, X) \, p(\x) \, d\x$ in \eqref{eq:bqvar},
\begin{equation}
    \begin{split}
        \mathbb{V}(\fint) &= \int \frac{1}{R} \sum_{r=1}^R \frac{h^d(\bm{\omega}_r^T(\x -
        \X))}{\prod_{j=1}^d \omega_r^j} p(\X) d\X \\
        &= \frac{1}{R} \sum_{r=1}^R \int \frac{h^d(\bm{\omega}_r^T(\x - \X))}{\prod_{j=1}^d \omega_r^j} d\X
    \end{split} \, .
\end{equation}
Using the same techniques as those in \ref{sec:prelims} and \ref{sec:unimean}, we can easily arrive at the anti-derivative form of this variance estimate:
\begin{equation}
    \mathbb{V}(\fint) = \frac{1}{R} \sum_{r=1}^R \frac{-1^d h^{2d}(\bm{\omega}_r^T(\x - \X))}{\prod_{j=1}^d \omega_r^j \omega_r^j}
\end{equation}
where $h^{2d}$ is the $2d$-th index of $h$ as defined in \ref{sec:prelims}, and the term $-1^d$ is introduced due to the fact that $\bm{X}$ is negative in the integrand.

For both the GBQ uniform mean and variance calculations, a simplified version of the algorithm described in section \ref{implementation_algo} can be used for efficient implementation.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MULTIVARIATE RFF CDF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{Multivariate CDF of the RFF Formulated Gaussian}\label{rffcdf}

Using the established methods from \ref{sec:prelims} and \ref{sec:unimean} on the integrals of RFF-parametrized kernels and distributions, it is trivial to show through u-substitution and trigonometric anti-derivatives that the indefinite integral an RFF-parametrized Gaussian $q(\x)$ is:
\begin{equation}
    \begin{split}
        Q(\x) &= \int_{\x \in \mathcal{R}^d} \frac{1}{R[(2\pi)^d |\bm{\Sigma}|]^{1/2}} \sum_{r=1}^R \cos(\bm{\rho}^T(\x - \bm{\mu}))\, d\x \\
        &= \frac{1}{R[(2\pi)^d |\bm{\Sigma}|]{1/2}} \sum_{r=1}^R \int_{\x \in \mathcal{R}^d} \cos(\bm{\rho}^T(\x - \bm{\mu})) \, d\x \\
        &=  \frac{h^d(\bm{\rho}^T(\x - \bm{\mu}))}{R[(2\pi)^d |\bm{\Sigma}|]^{1/2} \prod_{j=1}^d \rho_r^j}
    \end{split}
\end{equation}
where $h^d$ is defined as in \ref{sec:prelims}. A simplified form of the algorithm presented in \ref{implementation_algo} can be used for the application of this indefinite integral over definite bounds $[\bm{a}, \bm{b}]$, which can then be used to estimate the CDF of a multivariate Gaussian approximation over a domain .

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% COMPLEXITY %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Computational Complexity}\label{sec:complexity}
In this section we focus on deriving the complexity of the GBQ mean integral estimate $\fint$ and compare this complexity to that of traditional BQ.

\subsection{GBQ Over Gaussian Measures}
We can examine the equation of the GBQ integral estimate mean over a Gaussian measure, reproduced here, to derive the computational complexity of the mean estimation
\begin{equation}\tag{\ref{gbq_gauss_mean}}
    \frac{\bm{y}^T \bm{K}^{-1}}{Q_{\bm{a}}^{\bm{b}} \times RZ |(2\pi)^d \bm{\Sigma}|^{1/2}} 
    \Bigg[
        \sum_{r=1}^R \sum_{z=1}^Z
        \frac{h^d(\x^T(\bm{\omega}_r + \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} + \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d (\omega_r^j + \rho_z^j)} +
        \frac{h^d(\x^T(\bm{\omega}_r - \bm{\rho}_z) - (\bm{\omega}_r^T\bm{X} - \bm{\rho}_z^T\bm{\mu}))}{\prod_{j=1}^d(\omega_r^j - \rho_z^j)} 
    \Bigg]_{\bm{a}}^{\bm{b}}  \, .
\end{equation}
There are two potential terms which may dominate the complexity of GBQ: (1) the inversion of $\bm{K}^{-1}$, which is an operation shared by vanilla BQ and has a complexity of $\mathcal{O}(N^3)$; or (2) The kernel mean calculation inside the double summation and evaluation over bounds $[\bm{a}, \bm{b}]$.

We will focus on deriving the complexity of the bracketed double summation, which represents the RFF kernel mean $\mu_{\x}(\bm{X})$. We will first note that $\mu_{\x}(\bm{X}) = [\mu_{\x}(\bm{x}_1) \dots \mu_{\x}(\bm{x}_n)]$, which implies a baseline complexity of $\mathcal{O}(N)$ when we evaluate $\mu_{\x}(\x_i) \, \forall \, \x_i \in \bm{X}$. 

Next, the double summation over $R$ and $Z$ implies additional multiplicative complexity of of $RZ$, for an aggregate complexity of $\mathcal{O}(NRZ)$. Finally, the operations within the double sum have at most complexity $d$, which results in a total complexity of $\mathcal{O}(dNRZ)$ in the indefinite form.

For application of the indefinite form over definite bounds as in \eqref{eq:gaussgbqmean_onedim}, we can derive the complexity through the individual complexities of the single-dimension parameter update equations \ref{eq:alpha_update} through \ref{eq:delta_update}. A single iteration of the update equations are evaluated in $\mathcal{O}(1)$ time, but we must apply them $d$ times for all dimensions of $\x$. In addition, the double summation over $R$ and $Z$, and subsequent evaluation across all $\x_i \in \bm{X}$ results in a total complexity of $\mathcal{O}(dNRZ)$, which is the same as the evaluation of the indefinite form at a single point. Considering that a naive implementation of the indefinite integral over multidimensional bounds results in a multiplicative increase to the indefinite complexity of $2^d$, the algorithm presented in \ref{implementation_algo} represents a significant performance incentive. 

\subsection{GBQ Over Uniform Measures}
The complexity of GBQ over the uniform measure follows a very similar derivation to that of GBQ over the Gaussian measure. The complexity can be alternatively dominated by the inversion of the kernel matrix $\bm{K}$ or calculation of the kernel mean $\mu_{\x}(\bm{X})$.

We derive here the complexity of the kernel mean over a uniform measure. By the same reasoning through which we derive the indefinite Gaussian GBQ complexity as $\mathcal{O}(dNRZ)$, and the fact that \eqref{eq:gbqunimean} only contains a single summation over one set of Fourier features $\{\bm{\omega}_r\}_{r=1}^R$ rather than two, we can easily derive that the indefinite form of uniform GBQ has complexity $\mathcal{O}(dNR)$.

Similarly, we can use the implementation in \ref{implementation_algo} when applying \eqref{eq:gbqunimean} over multidimensional bounds. As we have previously derived that the method in \ref{implementation_algo} results in the same complexity as evaluation of the indefinite anti-derivative at a single point, we can similarly reason that GBQ over a uniform measure and multidimensional bounds has complexity $\mathcal{O}(dNR)$. 

\subsection{Comparison of BQ and GBQ Complexity}
Traditional BQ scales in $\mathcal{O}(N^3)$ due to the necessary operation $\bm{K}^{-1}$, and from the previous sections we see that GBQ scales in $\mathcal{O}(N^3)$ or $\mathcal{O}(dNR)$ (uniform) / $\mathcal{O}(dNRZ)$ (Gaussian).

Eliminating common terms in $\mathcal{O}(N^3)$, $\mathcal{O}(dNR)$, and $\mathcal{O}(dNRZ)$ allows us to see that when $dR < N^2$, uniform GBQ shares the same complexity as traditional BQ in $\mathcal{O}(N^3)$. The same statement applies for Gaussian GBQ when $dRZ < N^2$. 

As the number of RFF parameters $R$ and $Z$ are traditionally kept well below $N$ in practice, and BQ is generally used in $d \leq 10$, these are very reasonable conditions under which, at medium-size $N$, BQ and GBQ share the same computational complexity for evaluation of the mean of the approximated integral, $\fint$.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PROOFS AND THEORETICAL RESULTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Proofs for the theoretical results}

\subsection{Background}
We consider a standard GP posterior mean and variance, respectively, as:
\begin{align}
    \gpMean_\nObs(\location) &:= \vec k_\nObs(\location)^\transpose(\mat K_\nObs + \gpnoisefactor\eye)^{-1}\observations_\nObs
    \label{eq:gp-mean}\\
    \sigma_\nObs^2(\location) &:= k(\location,\location') - \vec k_\nObs(\location)^\transpose(\mat K_\nObs + \gpnoisefactor\eye)^{-1} \vec k_\nObs(\location)\label{eq:gp-variance}
\end{align}
where we use notation shortcuts for the vector $\vec k(\location) := [k(\location,\location_i)]_{i=1}^\nObs\in\R^\nObs$ and the kernel matrix $\mat k := [k(\location_i, \location_j)]_{i,j=1}^\nObs \in \R^{\nObs\times\nObs}$. Correspondingly, the our method employs Fourier features to approximate a GP posterior mean as:
\begin{align}
    \sbq\gpMean_\nObs(\location) &:= \vec{\rff{k}}(\location)^\transpose\left(\mat{k}_\nObs + \gpnoisefactor\eye\right)^{-1}\observations\,,\label{eq:ssgp-mean}
\end{align}
where $\rff k:\locDomain\times\locDomain\to\R$ is formally defined according to the next statement.

\begin{definition}
\label{def:rff}
Let $k:\locDomain\times\locDomain\to\R$ denote a translation-invariant positive-definite kernel on $\locDomain\subset\R^\locDim$, $\locDim\in\N$. The random Fourier feature approximation is defined as:
\begin{equation}
    \rff{k}(\location,\location') := \ffeature(\location)^\transpose\ffeature(\location'), \quad \location,\location' \in\locDomain\,,
\end{equation}
where:
\begin{equation}
    \ffeature(\location) := \sqrt{\frac{1}{\nFeatures}}
    \begin{bmatrix}
    \sin(\frequency_1^\transpose \location)\\
    \cos(\frequency_1^\transpose \location)\\
    \vdots\\
    \sin(\frequency_\nFeatures^\transpose \location)\\
    \cos(\frequency_\nFeatures^\transpose \location)
    \end{bmatrix}\,,
    \quad \frequency_i \overset{\iid}{\sim} \pMeasure_k\,,\quad \location\in\locDomain\,,
\end{equation}
with $\pMeasure_k$ denoting the probability distribution that corresponds to the Fourier transform of the kernel $k$. Equivalently, we can also write:
\begin{equation}
    \rff{k}(\location,\location') = \frac{1}{\nFeatures} \sum_{i=1}^\nFeatures \cos(\frequency_i^\transpose (\location - \location')\,, \quad \location, \location'\in\locDomain\,.
\end{equation}
\end{definition}

\subsection{Auxiliary results}
We will make use of guarantees for RFFs to bound the kernel approximation error. In particular, we consider the following result from \citet{Sutherland2015}.

\begin{lemma}[{\citet[Proposition 1]{Sutherland2015}}, full version]
\label{thr:kernel-approximation}
Let $k:\locDomain\times\locDomain\to\R$ be a continuous shift-invariant positive-definite kernel with $k(\location,\location) = 1$ and such that $\nabla^2 k(\location, \location)$ exists, for all $\location\in\locDomain\subset\R^\locDim$. Suppose $\locDomain$ is compact with diameter $\ell_\locDomain < \infty$. Denote $k$'s Fourier transform as $\pMeasure_k$, which is a probability measure, and let $\sigma_k^2 := \expectation[\norm{\vec\omega}_2^2]$ for $\omega\sim\pMeasure_k$. Let $\rff{k}:\locDomain\times\locDomain\to\R$ denote $k$'s RFF approximation with $\nFeatures$ frequencies according to \autoref{def:rff}. For any $\error > 0$, let:
\begin{align}
    \alpha_\error &:= \min\left(1, \sup_{\location,\location'\in\locDomain} \frac{1}{2} + \frac{1}{2} k(2\location, 2\location') - k(\location, \location')^2 + \frac{1}{3}\error \right)\,,\\
    \beta_\locDim &:= \left(\left(\frac{\locDim}{2}\right)^{-\frac{\locDim}{\locDim+2}} + \left(\frac{\locDim}{2}\right)^{\frac{2}{\locDim+2}}\right) 2^{\frac{6\locDim+2}{\locDim+2}}\,.
\end{align}
Then the following holds for any $\error > 0$:
\begin{equation}
    \begin{split}
        \prob{\sup_{\location, \location' \in\locDomain} |\rff{k}(\location,\location') - k(\location,\location')| \geq \error} &\leq \beta_\locDim \left(\frac{\sigma_k\ell_\locDomain}{\error}\right)^{\frac{2}{1+ \frac{2}{\locDim}}} \exp\left(-\frac{\nFeatures\error^2}{4(\locDim+2)\alpha_\error}\right)\\
        &\leq 66 \left(\frac{\sigma_k\ell_\locDomain}{\error}\right)^2 \exp\left(-\frac{\nFeatures\error^2}{4(\locDim+2)}\right)\,,
    \end{split}
\end{equation}
where for the second statement we assume $\error \leq \sigma_k\ell_\locDomain$.
Therefore, for any $\delta \in (0, 1)$, we can achieve pointwise approximation error less than $\error$ with probability at least $1-\delta$ if:
\begin{equation}
    \nFeatures \geq \frac{4(\locDim+2)\alpha_\error}{\error^2} \left( \frac{2}{1 + \frac{2}{\locDim}} \log\frac{\sigma_k \ell_\locDomain}{\error} + \log\frac{\beta_\locDim}{\delta} \right)\,.
    \label{eq:rff-min-features}
\end{equation}
% In particular, for any $0 < \error \leq \sigma_k \ell_\locDomain$, we have:
% \begin{equation}
%     \prob{\sup_{\location, \location' \in\locDomain} |\rff{k}(\location,\location') - k(\location,\location')| \geq \error} \leq 66 \left(\frac{\sigma_k\ell_\locDomain}{\error}\right)^2 \exp\left(-\frac{\nFeatures\error^2}{4(\locDim+2)}\right)\,.
% \end{equation}
\end{lemma}

Compared to the original statement of the result in \citet{Sutherland2015}, note that we use the number of Fourier frequencies $\nFeatures$, instead of the dimensionality of the feature vector, i.e., $D := 2\nFeatures$, so that some constants are changed. Considering the result above, as $\max_{\locDim\in\N}\beta_\locDim = 66$ \citep[see][]{Sutherland2015} and $\alpha_\error \leq 1$, we can also set the minimum number of features for a given error bound $\error >0$ and $\delta\in(0,1)$ as:
\begin{equation}
    \nFeatures(\error,\delta,\sigma_k) := \frac{4(\locDim+2)}{\error^2} \left( \frac{2}{1 + \frac{2}{\locDim}} \log\frac{\sigma_k \ell_\locDomain}{\error} + \log\frac{66}{\delta} \right)\,,
\end{equation}
though a tighter bound is available via \autoref{eq:rff-min-features}. Therefore, the restatement of the result in the main paper as Lemma 2 is still valid.

The norm of the observations vector $\observations$ in a Gaussian process can be bounded in terms of the integrand $\integrand$'s extremes and the number of data points, as in the following result.

\begin{lemma}
\label{thr:obs-norm}
Given $\delta\in(0,1)$, assuming \iid Gaussian observation noise $\obsNoise\sim\normal(0,\sigma_\obsNoise^2)$, we have that:
\begin{equation}
    \prob{\norm{\observations}_2 \leq \sqrt{\nObs}\left(\norm{\integrand}_\infty + \sigma_\obsNoise\sqrt{2\log\left(\frac{\nObs}{\delta}\right)}\right)} \geq 1 - \delta\,.
\end{equation}
\end{lemma}
\begin{proof}
    Starting from the definition of the 2-norm, we have:
    \begin{equation}
        \norm{\observations}_2^2 = \sum_{i=1}^\nObs \observation_i^2 = \sum_{i=1}^\nObs (\integrand(\location_i) + \obsNoise_i)^2 \leq \nObs \max_{i\in\{1,\dots,\nObs\}} (\integrand(\location_i) + \obsNoise_i)^2\,.
        \label{eq:obs-norm}
    \end{equation}
    Assuming \iid Gaussian observation noise $\obsNoise\sim\normal(0,\sigma_\obsNoise^2)$, the following holds:
    \begin{equation}
        \forall\beta>0, \quad \prob{|\obsNoise| \geq \beta\sigma_\obsNoise} \leq \exp\left(-\beta^2/2\right)\,,
    \end{equation}
    By applying a union bound, we have:
    \begin{equation}
        \begin{split}
            \prob{\exists i \in\{1,\dots,\nObs\}: \observation_i \geq \integrand(\location_i) + \beta\sigma_\obsNoise} &
            \leq \sum_{i=1}^\nObs \prob{\obsNoise_i \geq \beta\sigma_\obsNoise}\\
            &\leq \nObs \prob{|\obsNoise| \geq \beta\sigma_\obsNoise}\\
            &\leq\nObs\exp\left(-\beta^2/2\right)
        \end{split}
    \end{equation}
    Solving for $\nObs\exp\left(-\beta^2/2\right) = \delta$ and taking the complement, we then obtain:
    \begin{equation}
        \prob{\forall i\in\{1,\dots,\nObs\}, \quad \observation_i \leq \norm{f}_\infty + \sigma_\obsNoise\sqrt{2\log\left(\frac{\nObs}{\delta}\right)}} \geq 1 - \delta\,.
    \end{equation}
    The result then follows by applying the latter to \autoref{eq:obs-norm}.
\end{proof}

\subsection{The probability distribution approximation via RFF}
For the approximation of $\pDensity$ by $\rff{\pDensity}$, we use the following fact.

% \begin{lemma}
% The Fourier transform of a probability measure yields a translation-invariant positive-definite kernel.
% \end{lemma}
% \begin{proof}
% The proof follows standard results in real analysis and the theory of positive-definite functions. It does not require Bochner's theorem, which validates the converse and involves a more elaborate proof. %TODO: Complete proof.
% \end{proof}


\begin{theorem}[{Bochner's theorem \citep{Rudin1990}}]
\label{thr:bochner}
A function $\anyfunction:\locDomain\to\R$, $\locDomain\subset\R^\locDim$ is positive-definite if and only if it is the Fourier transform of a non-negative measure.
\end{theorem}

By Bochner's theorem (\autoref{thr:bochner}), as previously applied to positive-definite kernels (Theorem 1, main paper), we can also trivially conclude that any \emph{positive-definite} probability density function is by itself the Fourier transform of a probability measure, so that it admits a Fourier-feature representation of the form in \autoref{def:rff}. A probability density function $\pDensity:\R^\locDim\to[0,\infty)$ is positive-definite if, for all $\nObs\in\N$, $\{\alpha_i\}_{i=1}^\nObs \subset\R$ and all $\{\location_i\}_{i=1}^\nObs \subset \R^\locDim$ the following holds:
\begin{equation}
    \sum_{i=1}^\nObs\sum_{j=1}^\nObs \alpha_i\alpha_j \pDensity(\location_i-\location_j) \geq 0\,.
\end{equation}
Not every probability density function is positive-definite, but examples include Gaussian and Student-T distributions \citep{Rossberg1995}. In particular, we can make a kernel $k_\pDensity$ from a probability density function $\pDensity$ on $\locDomain$ by:
\begin{equation}
    \begin{split}
        k_\pDensity: \locDomain\times\locDomain &\longrightarrow \R\\
        \location,\location' &\longmapsto
        \begin{cases}
            \pDensity(\location-\location')\,, &\location-\location'\in\locDomain,\\
            0\,, &\location - \location' \notin\locDomain\,.
        \end{cases}
    \end{split}
    \label{eq:density-kernel}
\end{equation}
It is easy to verify that a kernel defined as above is positive-definite if $\pDensity$ is positive-definite. The kernel is also translation-invariant, since $k_\pDensity(\anyvector + \location, \anyvector+\location') = k_\pDensity(\location,\location')$, for any $\location,\location'\in\locDomain$ and any $\anyvector\in\R^\locDim$. Similarly, we have the equivalence $\pDensity(\location) = k_\pDensity(\location, \vec 0)$ and a corresponding $\rff\pDensity(\location) = \rff{k}_\pDensity(\location, \vec 0)$, for $\location\in\locDomain$, by applying \autoref{def:rff} to $k_\pDensity$. As a result, we can use \autoref{thr:kernel-approximation} to $k_\pDensity$ to bound the approximation error in $|\pDensity(\location) - \rff\pDensity(\location)|$.

\begin{theorem}[Restatement of Theorem 2]
\label{thr:density-approximation}
Let $p:\locDomain\to\R$ be a positive-definite probability density function defined on $\locDomain\subset\R^\locDim$ which is such that $\nabla^2 \pDensity(\vec 0)$ exists. Assume $\locDomain$ is compact, and let $\bound_\pDensity > 0$ be any constant such that $\bound_\pDensity \geq \max_{\location\in\locDomain}\pDensity(\location)$. Let $\rff k_\pDensity$ denote an RFF approximation with $\nFeatures_\pDensity\in\N$ frequencies to $k_\pDensity$ as defined in \autoref{eq:density-kernel}, and let $\rff\pDensity:\location\mapsto\rff k_\pDensity(\location,\vec 0)$, $\location\in\locDomain$. Then, for any $\error > 0$, the following holds:
\begin{equation}
\begin{split}
        \prob{\sup_{\location\in\locDomain} |\rff\pDensity(\location) - \pDensity(\location)| \geq \bound_\pDensity\error}
        &\leq \beta_\locDim \left(\frac{\sigma_{k_\pDensity}\ell_\locDomain}{\error}\right)^{\frac{2}{1+ \frac{2}{\locDim}}} \exp\left(-\frac{\nFeatures_\pDensity\error^2}{4(\locDim+2)\alpha_\error}\right)\\
        &\leq 66\left(\frac{\sigma_{k_\pDensity}\ell_\locDomain}{\error}\right)^2 \exp\left(-\frac{\nFeatures_\pDensity\error^2}{4(\locDim+2)}\right)
\end{split}
\end{equation}
where for the second statement we assume $\error \leq \sigma_{k_\pDensity}\ell_\locDomain$, and $\sigma_{k_\pDensity}$, $\ell_\locDomain$, $\alpha_\error$ and $\beta_\error$ are the same as defined in \autoref{thr:kernel-approximation} for $k:= \frac{1}{\bound_\pDensity}k_\pDensity$.

\end{theorem}
\begin{proof}
The result follows by applying \autoref{thr:kernel-approximation} to a normalised version $\bar{k}_\pDensity := \frac{1}{\bound_\pDensity}k_\pDensity$ of $k_\pDensity$ (\autoref{eq:density-kernel}), which is such that $\bar{k}_\pDensity(\location, \location') = 1$.
Noticing that:
\begin{equation}
    \begin{split}
        \sup_{\location, \location' \in\locDomain} |\rff{k}_\pDensity(\location,\location') - k_\pDensity(\location,\location')| &= \sup_{\location, \location' \in\locDomain} |\rff{k}_\pDensity(\location-\location', \vec 0) - k_\pDensity(\location-\location', \vec 0)|\\
        &= \sup_{\location, \location' \in\locDomain:\location-\location'\in\locDomain} |\rff\pDensity(\location-\location') - \pDensity(\location-\location')|\\
        &\leq \sup_{\location\in\locDomain} |\rff\pDensity(\location) - \pDensity(\location)|\,,
    \end{split}
\end{equation}
so that $\sup_{\location, \location' \in\locDomain} |\rff{k}_\pDensity(\location,\location') - k_\pDensity(\location,\location')| \geq \bound_\pDensity\error$ implies $\sup_{\location\in\locDomain} |\rff\pDensity(\location) - \pDensity(\location)| \geq \bound_\pDensity\error$, concludes the proof.
\end{proof}

Given $\error_\pDensity > 0$ such that $\sup_{\location\in\locDomain} |\pDensity(\location) - \rff\pDensity(\location)|\diff\location \leq \error_\pDensity$, the integration error is bounded by:
\begin{equation}
    \int_{\locDomain} |\pDensity(\location) - \rff\pDensity(\location)|\diff\location \leq \bound_\pDensity\error_\pDensity \int_{\locDomain}\diff\location \leq \bound_\pDensity\error_\pDensity v_\locDomain\,,
\end{equation}
where $v_\locDomain := \int_\locDomain\diff\location$ denotes the volume of the domain $\locDomain$. The latter can be bounded by the volume of a hyper-sphere of diameter $\ell_\locDomain$ in $\R^\locDim$, i.e.:
\begin{equation}
    v_\locDomain \leq \frac{\pi^{\locDim}\ell_\locDomain^\locDim}{2^\locDim\Gamma\left(\frac{\locDim}{2}+1\right)}\,,
\end{equation}
where $\Gamma$ denotes Euler's gamma function.


\subsection{Quadrature approximation error}
We now combine our results to bound the quadrature approximation error.

\begin{theorem}[Restatement of Theorem 3]
\label{thr:sbq-error-bound}
Let $\integrand\in\Hspace_k$, where $k:\locDomain\times\locDomain\to\R$ is a positive-definite, translation-invariant kernel on $\locDomain\subset\R^\locDim$. Assume that:
\begin{enumerate}
    \item $\locDomain$ is compact with diameter $\ell_\locDomain < \infty$ and volume $v_\locDomain := \int_\locDomain\diff\location < \infty$;
    \item $k(\vec 0, \vec 0) = 1$ and $\nabla^2 k(\vec 0, \vec 0)$ exists;
    \item and $\pDensity:\locDomain\to[0,\infty)$ is a positive-definite probability density function.
    % \item the number of Fourier frequencies for the kernel and the probability density functions approximation are such that $\nFeatures_k \geq \nFeatures\left(\frac{\delta}{4}, \error_k\right)$ $\nFeatures_k \geq \nFeatures\left(\frac{\delta}{4}, \error_k\right)$, respectively, where $\nFeatures(\delta,\error)$ is defined in \autoref{thr:kernel-approximation} according to $k$ or $k_\pDensity$.
\end{enumerate}
Then, given any $\delta\in(0,1)$, the following holds with probability at least $1-\delta$:
\begin{equation}
    \begin{split}
        &\left\lvert \int_\locDomain \integrand(\location)\pDensity(\location)\diff\location - \int_\locDomain \hat{\gpMean}_\nObs(\location)\rff{\pDensity}(\location)\diff\location \right\lvert\\
        &\leq \left(\frac{\nObs}{\gpnoisefactor}\beta_\obsNoise\left(\frac{\delta}{4}\right)\error_k + \beta_k\left(\frac{\delta}{4}\right)\max_{\location\in\locDomain}\sigma_\nObs(\location)\right)(1 + \bound_\pDensity\error_\pDensity v_\locDomain) + \norm{\integrand}_\infty\bound_\pDensity\error_\pDensity v_\locDomain\,,
    \end{split}
\end{equation}
for an RFF approximation to $k$ with $\nFeatures_k \geq \nFeatures\left(\error_k, \frac{\delta}{4}, \sigma_k\right)$ frequencies and an RFF approximation to $\pDensity$ with $\nFeatures_\pDensity \geq \nFeatures\left(\error_\pDensity, \frac{\delta}{4}, \sigma_{k_\pDensity}\right)$ frequencies, given $0 < \error_k \leq \sigma_k\ell_\locDomain$ and $0 < \error_\pDensity \leq \sigma_{k_\pDensity}\ell_\locDomain$, where:
\begin{align}
    \beta_\obsNoise(\delta) &:= \norm{\integrand}_\infty + \sigma_\obsNoise\sqrt{2\log\left(\frac{\nObs}{\delta}\right)}\\
    \beta_k(\delta) &:= \norm{f}_k + \sigma_\obsNoise\sqrt{\frac{2}{\gpnoisefactor}\log\left(\frac{\det(\eye + \gpnoisefactor^{-1}\mat K_\nObs )^{1/2}}{\delta}\right)}\\
    \nFeatures(\error,\delta, \sigma_k) &:= \frac{4(\locDim+2)}{\error^2} \left( \frac{2}{1 + \frac{2}{\locDim}} \log\frac{\sigma_k \ell_\locDomain}{\error} + \log\frac{66}{\delta} \right)\,.
\end{align}
\end{theorem}
\begin{proof}
    In the spectral Bayesian quadrature formulation, we have the following approximation:
    \begin{equation}
        \int_\locDomain \integrand(\location) \pDensity(\location)\diff\location \approx \observations^\transpose (\mat K_\nObs +\gpnoisefactor\eye)^{-1} \int_\locDomain \vec{\rff{k}}_\nObs(\location)\rff{\pDensity}(\location)\diff\location = \int_\locDomain \hat\gpMean(\location)\rff{\pDensity}(\location)\diff\location\,,
    \end{equation}
    where $\hat\gpMean_\nObs(\location) := \vec{\rff{k}}_\nObs(\location)^\transpose(\mat K_\nObs +\gpnoisefactor\eye)^{-1}\observations$
    We will bound the approximation error by starting with the following decomposition:
    \begin{equation}
        \begin{split}
            &\left\lvert \int_\locDomain \integrand(\location)\pDensity(\location)\diff\location - \int_\locDomain \hat{\gpMean}_\nObs(\location)\rff{\pDensity}(\location)\diff\location \right\lvert\\
            &\leq \left\lvert \int_\locDomain \integrand(\location)\pDensity(\location)\diff\location - \int_\locDomain \hat{\gpMean}_\nObs(\location)\pDensity(\location)\diff\location \right\lvert
            +
            \left\lvert \int_\locDomain \hat{\gpMean}_\nObs(\location)\pDensity(\location)\diff\location - \int_\locDomain \hat{\gpMean}_\nObs(\location)\rff{\pDensity}(\location)\diff\location \right\lvert\\
            &\leq \norm{\integrand - \hat{\gpMean}_\nObs}_\infty + \norm{\hat{\gpMean}_\nObs}_\infty \int_\locDomain \lvert\pDensity(\location) - \rff{\pDensity}(\location)\rvert \diff\location\,.
        \end{split}
        \label{eq:sbq-error-decomp}
    \end{equation}
    We first observe that:
    \begin{equation}
        \forall\location\in\locDomain, \quad \lvert\integrand(\location) - \hat{\gpMean}_\nObs(\location)\rvert \leq \lvert\integrand(\location) - \gpMean(\location)\rvert + \lvert \gpMean(\location) - \hat\gpMean(\location)|\,.
        \label{eq:sbq-mean-error}
    \end{equation}
    Assuming $\integrand\in\Hspace_k$, given $\delta_\gpMean\in(0,1)$, we can apply Lemma 1 (main paper) to bound the first term on the right-hand side as:
    \begin{equation}
        \prob{\sup_{\location\in\locDomain} \lvert\integrand(\location) - \gpMean_\nObs(\location)\rvert \leq \sup_{\location\in\locDomain}\beta_k(\delta_\gpMean)\sigma_\nObs(\location)} \geq 1 - \delta_\gpMean\,.
    \end{equation}
    For the second-term on the right-hand side of \autoref{eq:sbq-mean-error}, we have that:
    \begin{equation}
        \begin{split}
            \lvert \gpMean_\nObs(\location) - \hat\gpMean_\nObs(\location)| &\leq \norm{\vec k_\nObs(\location) - \vec{\rff{k}}_\nObs(\location)}_2 \norm{(\mat K_\nObs +\gpnoisefactor\eye)^{-1}\observations}_2\\
            &\leq \norm{\vec k_\nObs(\location) - \vec{\rff{k}}_\nObs(\location)}_2 \norm{(\mat K_\nObs +\gpnoisefactor\eye)^{-1}}_2\norm{\observations}_2\\
            &\leq \frac{\norm{\observations}_2}{\gpnoisefactor}\norm{\vec k_\nObs(\location) - \vec{\rff{k}}_\nObs(\location)}_2\,.
        \end{split}\,,
    \end{equation}
    since $\norm{(\mat K_\nObs +\gpnoisefactor\eye)^{-1}}_2 \leq \gpnoisefactor^{-1}$. Applying \autoref{thr:obs-norm}, given $\delta_\obsNoise \in (0,1)$, yields:
    \begin{equation}
        \prob{\norm{\observations}_2 \leq \sqrt{\nObs}\beta_\obsNoise(\delta_\obsNoise)} \geq 1 - \delta_\obsNoise\,.
    \end{equation}
    where $\beta_\obsNoise(\delta) := \norm{\integrand}_\infty + \sigma_\obsNoise\sqrt{2\log\left(\frac{\nObs}{\delta}\right)}$.
    In addition, considering the kernel approximation guarantee in \autoref{thr:kernel-approximation}, for a given number of Fourier frequencies $\nFeatures_k \geq \nFeatures(\delta_k, \error_k)$, leads us to:
    \begin{equation}
        \prob{\sup_{\location\in\locDomain} \norm{\vec k_\nObs(\location) - \vec{\rff{k}}_\nObs(\location)}_2 \leq \sqrt{\nObs}\error_k} \geq 1 - \delta_k\,.
    \end{equation}
    Therefore, we have:
    \begin{equation}
        \prob{\norm{\integrand - \hat\gpMean_\nObs}_\infty \leq \beta_k(\delta_\gpMean)\max_{\location\in\locDomain}\sigma_\nObs(\location) + \frac{1}{\gpnoisefactor}\nObs\error_k\beta_\obsNoise(\delta_\obsNoise)} \geq 1 - \delta_\gpMean - \delta_\obsNoise - \delta_k\,,
        \label{eq:sbq-mean-error-bound}
    \end{equation}
    which follows by applying a union bound on the complementary events in the equations above. Lastly, note that, under the assumption that the event in \autoref{eq:sbq-mean-error-bound} holds, the following is also true:
    \begin{equation}
        \norm{\hat{\gpMean}_\nObs}_\infty \leq \norm{\integrand}_\infty + \frac{1}{\gpnoisefactor}\nObs\error_k\beta_\obsNoise(\delta_\obsNoise) + \max_{\location\in\locDomain}\beta_k(\delta_k)\sigma_\nObs(\location)\,.
        \label{eq:sbq-mean-sup-bound}
    \end{equation}
    
    Regarding the probability density approximation, let $v_\locDomain := \int_\locDomain\diff\location$ represent the volume of $\locDomain$. Assume $\nFeatures_\pDensity \geq \nFeatures(\delta_\pDensity,\error_\pDensity)$ Fourier frequencies for $\rff\pDensity$, for $\delta_\pDensity \in (0,1)$. Then \autoref{thr:density-approximation} tells us that:
    \begin{equation}
        \prob{\int_\locDomain \lvert\pDensity(\location) - \rff{\pDensity}(\location)\rvert \diff\location \leq \bound_\pDensity\error_\pDensity v_\locDomain} \geq 1 - \delta_\pDensity\,.
        \label{eq:sbq-density-error-bound}
    \end{equation}
    
    The final result follows by applying a union bound to combine the events in equations \ref{eq:sbq-mean-error-bound}, \ref{eq:sbq-mean-sup-bound} and \ref{eq:sbq-density-error-bound} into \autoref{eq:sbq-error-decomp}.
\end{proof}

\newpage
\section{Full Experimental Results}
\subsection{5D Continuous Equation}
\begin{table*}[h!]
    \centering
    \caption{5D Continuous Equation Integration Results (\% Error).}\label{tab:5d}
    \begin{tabular}{|c|c|c|c|c|}
        \hline
        $N$ & MC & BQ & GBQ-U RBF & GBQ-G RBF\\
        \hline
        $10$ & $\bm{9.67 \pm 8.43}$ & $20.39 \pm 3.85$ & $23.77 \pm 4.33$ & $20.35 \pm 3.99$\\
        $25$ & $9.32 \pm 7.7$ & $3.21 \pm 1.87$ & $6.02 \pm 2.46$ & $\bm{3.0 \pm 1.97}$\\
        $50$ & $5.57 \pm 4.14$ & $\bm{0.61 \pm 0.34}$ & $2.48 \pm 0.51$ & $0.88 \pm 0.42$\\
        $100$ & $3.81 \pm 2.1$ & $2.05 \pm 0.35$ & $\bm{0.89 \pm 0.44}$ & $2.25 \pm 0.4$\\
        $200$ & $2.65 \pm 2.23$ & $2.29 \pm 0.24$ & $\bm{0.52 \pm 0.26}$ & $2.45 \pm 0.27$\\
        $300$ & $3.89 \pm 2.44$ & $2.28 \pm 0.21$ & $\bm{0.4 \pm 0.17}$ & $2.44 \pm 0.19$\\
        $400$ & $2.74 \pm 1.7$ & $2.28 \pm 0.2$ & $\bm{0.33 \pm 0.14}$ & $2.44 \pm 0.2$\\
        $500$ & $1.29 \pm 1.06$ & $2.27 \pm 0.14$ & $\bm{0.22 \pm 0.12}$ & $2.43 \pm 0.16$\\
        $600$ & $1.53 \pm 0.97$ & $2.28 \pm 0.12$ & $\bm{0.21 \pm 0.09}$ & $2.44 \pm 0.17$\\
        $700$ & $2.39 \pm 2.43$ & $2.29 \pm 0.12$ & $\bm{0.16 \pm 0.1}$ & $2.43 \pm 0.16$\\
        $800$ & $1.42 \pm 1.06$ & $2.24 \pm 0.1$ & $\bm{0.17 \pm 0.12}$ & $2.38 \pm 0.16$\\
        $900$ & $1.64 \pm 1.21$ & $2.24 \pm 0.09$ & $\bm{0.15 \pm 0.09}$ & $2.38 \pm 0.12$\\
        $1000$ & $1.79 \pm 1.09$ & $2.22 \pm 0.08$ & $\bm{0.14 \pm 0.09}$ & $2.37 \pm 0.13$\\
        \hline
    \end{tabular}
\end{table*}

\subsection{5D Disjoint Equation}
\begin{table*}[h!]
    \centering
    \caption{5D Disjoint Equation Integration Results (\% Error).}\label{tab:5ddis}
    \begin{tabular}{|c|c|c|c|c|}
        \hline
        $N$ & MC & BQ & GBQ-G RBF & GBQ-G M3/2\\
        \hline
        $10$ & $\bm{23.94 \pm 13.0}$ & $33.32 \pm 3.0$ & $33.26 \pm 3.12$ & $38.11 \pm 3.78$\\
        $25$ & $\bm{16.84 \pm 20.99}$ & $18.26 \pm 0.86$ & $17.96 \pm 1.08$ & $22.17 \pm 1.22$\\
        $50$ & $\bm{7.58 \pm 5.92}$ & $15.15 \pm 0.59$ & $14.87 \pm 0.6$ & $16.69 \pm 0.7$\\
        $100$ & $5.89 \pm 3.64$ & $\bm{1.71 \pm 0.83}$ & $2.06 \pm 1.28$ & $5.53 \pm 4.55$\\
        $200$ & $4.81 \pm 4.15$ & $2.24 \pm 0.73$ & $\bm{2.17 \pm 1.44}$ & $5.58 \pm 2.91$\\
        $300$ & $6.24 \pm 4.39$ & $\bm{0.59 \pm 0.46}$ & $0.62 \pm 0.44$ & $2.07 \pm 0.67$\\
        $400$ & $3.98 \pm 2.28$ & $1.11 \pm 0.42$ & $1.7 \pm 0.55$ & $\bm{0.79 \pm 0.64}$\\
        $500$ & $4.17 \pm 4.17$ & $\bm{3.37 \pm 0.44}$ & $4.28 \pm 0.7$ & $3.55 \pm 0.84$\\
        $600$ & $3.22 \pm 2.83$ & $2.63 \pm 0.34$ & $3.18 \pm 0.68$ & $\bm{2.59 \pm 0.58}$\\
        $700$ & $3.93 \pm 2.51$ & $1.03 \pm 0.34$ & $1.31 \pm 0.61$ & $\bm{0.85 \pm 0.44}$\\
        $800$ & $3.0 \pm 1.8$ & $\bm{0.78 \pm 0.44}$ & $1.15 \pm 0.6$ & $0.89 \pm 0.51$\\
        $900$ & $3.58 \pm 2.21$ & $\bm{0.48 \pm 0.29}$ & $0.92 \pm 0.54$ & $\bm{0.48 \pm 0.42}$\\
        $1000$ & $3.24 \pm 2.15$ & $\bm{0.38 \pm 0.24}$ & $0.89 \pm 0.46$ & $0.53 \pm 0.5$\\
        \hline
    \end{tabular}
\end{table*}

\printbibliography

\end{document}
