% \renewcommand{\thefigure}{\Alph{section}-\arabic{figure}}
% \setcounter{figure}{0}
% \renewcommand{\thetable}{\Alph{section}-\arabic{table}}
% \setcounter{table}{0}
% \renewcommand{\theequation}{\Alph{section}-\arabic{equation}}
% \setcounter{equation}{0} % Reset the equation counter to zero



\addtocontents{toc}{\protect\setcounter{tocdepth}{3}}
% Table of Contents with black links
\begingroup
\hypersetup{linkcolor=black}
\tableofcontents
\endgroup
\vfill
\clearpage






\section{Implementation details}
\label{sec:app-implementation-details}




\subsection{Model and training details}

\paragraph{Gaussian process.} 
For the mean, we use a constant function.
For the covariance, we use the concatenation of a scale kernel with the respective RBF/Laplace or Mahalanobis distance kernel.
All GP-based methods are optimized with Adam optimizer over 250 epochs \citep{kingma2014adam} with a cosine annealing learning rate schedule \citep{loshchilov2016sgdr} to a minimum learning rate of $\mathrm{lr}_{min}=10^{-7}$ and with all data points in the training set as a mini-batch using GPyTorch \citep{gardner2018gpytorch}.



\paragraph{Deep kernel learning.}
For the mean, we use a constant function.
For the covariance, we follow the GPyTorch tutorial implementation of deep kernel learning\footnote{\url{https://docs.gpytorch.ai/en/stable/examples/06_PyTorch_NN_Integration_DKL/KISSGP_Deep_Kernel_Regression_CUDA.html}, accessed 05.02.2024.}. Our model consists of a ReLU fully-connected deep neural network with dimensions $\{d, 1000, 500, 50, 2\}$ as in \citet{wilson2016deep}.
Notably by following the GPyTorch implementation, we do not pre-train the deep neural network. Further, for a fair comparison with other GP-based methods, we did not use the ideas of KISS-GP \citep{wilson2015kernel}.
Hence, we use the same optimization scheme as for all other GPs, but we reduced the number of epochs to 100 due to stability issues for longer training schemes.


\paragraph{GP-ARD-Laplace-full.}
For the mean, we use a constant function.
For the covariance, we use the Mahalanobis distance kernel from \Cref{eq:kernel-mahalanobis-laplace}. 
To stabilize training, we decompose the Mahalanobis distance as $\mM=\mU+\mU^\top+\mD$. Here, $\mU$ is a learnable upper triangular matrix of small values to enforce symmetry and the learnable $\mD$ is a diagonal matrix to focus initialization on the diagonal, similar to the RFM.
We use the same optimization scheme as for all other GPs, but we reduced the number of epochs to 150 due to stability issues for longer training schemes.


\paragraph{NGBoost.}
We use the NGBoost regressor from the official implementations of the respective authors\footnote{\url{https://stanfordmlgroup.github.io/ngboost/}, accessed 05.02.2024.}. For this model, we use the default set of hyperparameters.

\paragraph{CatBoost-ensemble.}
We use the CatBoost regressor from the official implementation of the respective authors\footnote{\url{https://catboost.ai/}, accessed 05.02.2024.}. We choose to select 10 ensemble members each consisting of 1000 trees as done similarly in \citet{malinin2021uncertainty}.
Furthermore, we consider the depth of the trees to be 6 and keep the remaining default hyperparameters








\subsection{Hyperparameter search}

For our main results, we perform a hyperparameter search for the specific hyperparameters of each method.
We run a grid search over the hyperparameters and select the best ones based on the validation set's NLL value.

\paragraph{Gaussian processes.}
The only hyperparameter is the learning rate which we optimize over the values $\mathrm{lr}=\{0.05, 0.01, 0.005, 0.001\}$.
The learning rate is decreased to a minimal value of $10^{-7}$.

\paragraph{Recursive feature machines.}
For the RFM, we optimize the hyperparameters of the GP-based methods as described above.
Additionally, we optimize the Ridge regularization for the optimization of $\alpha$ over the values $\lambda_\alpha=\{0.5, 0.1, 0.5, 0.01, 0.001, 0.0001\}$.
Furthermore, we optimize the Ridge regularization for the optimization of $\mM$ over the values $\lambda_\mM=\{0.1, 0.01, 0.001, 5\cdot10^{-5}, 10^{-6}, 10^{-7}, 0\}$.

\paragraph{Boosting-based.}
For NGBoost, we optimize the number of estimators from $\{100, 200, 300, 400, 500\}$.
For CatBoost-ensembles, we optimize the learning rate over the values $\mathrm{lr}=\{0.05, 0.01, 0.005, 0.001\}$









\subsection{Post-processing}
Some methods did not converge for some seeds and datasets. The metrics computed from these runs would heavily distort the actual results but are easy to detect in practice. To evaluate only successful runs, we remove outliers for each dataset and metric individually. We achieve this by removing entries with z-values $\geq3.5$. Almost exclusively the results from deep kernel learning and the GP-ARD-Laplace-full are affected by this post-processing.
















\section{Additional experimental results}
\label{sec:app-results}

\subsection{Computational infrastructure}
All experiments are run on a single NVIDIA A100 GPU with 40GB memory.
The GPU is part of an internal cluster supported by local resources.
% time to run the experiments
To run the experiments for all methods on all datasets in a sequence of our used OpenML benchmark, we require approximately 1 hour of computation time for one seed.
For all methods on all datasets in a sequence of the UCI benchmark, we require approximately 10 minutes of computation time for one seed.




\subsection{Main results}
Complementary to the main results, in \Cref{fig:main-app} we list the normalized results for the OpenML benchmark and the UCI benchmark. Qualitatively the observations from the OpenML benchmark also hold for the UCI benchmark. 

Additionally to the metrics NLL, RMSE and CE, in \Cref{fig:openml-app-timing,fig:uci-app-timing} we show the combined time for training the model and prediction on the test set. We have to note that the GP-based methods utilize the GPytorch library which enables GPU utilization. For NGBoost and CatBoost-ensembles, the official implementations do not allow for GPU utilization. Therefore, the time comparison is on the one side biased because we utilize different hardware, on the other side it utilizes the best openly available implementations for all respective methods. Note, that here, we excluded the method `deep Kernel learning'. This method reduces the GP dimensionality to 2 dimensions because of the neural networks extractor and is therefore the fastest method. Including it in the violin plot would distort the visualization. Detailed timing results for each method on every dataset can be found in \Cref{sec:details:tables}.






\subsection{Toy data set}

We show results from the toy dataset which was designed to be difficult for methods which do not capture feature correlation. This includes diagonal feature weighting such as the ARD-based methods. We included correlation to be modelled into the data by choosing $\vx=\gU(0_d,1_d)$ and $y=(\sum_{i=1}^10 \vx_{[i]})^2$. In \Cref{fig:toy-data-app}, we plot additionally to the NLL also the RMSE which show a qualitatively similar behaviour such that the GP-RFM-Laplace outperforms other methods. Notably the diagonal method, GP-RFM-diag becomes close for high feature dimensions. We argue that this is because we fix the number of dimensions which are relevant for the prediction but grow the actual dimension. Hence, fewer relative dimensions become relevant. However, the diagonal method will never outperform the GP-RFM-Laplace on this toy dataset. 
\begin{figure}[htb!]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        % \includegraphics{figuresTikz/toy_data_nll_app}
        % \tikzsetnextfilename{toy_data_nll_app}
        \input{tikz/appendix/toy_data_nll_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering
        % \includegraphics{figuresTikz/toy_data_rmse_app}
        % \tikzsetnextfilename{toy_data_rmse_app}
        \input{tikz/appendix/toy_data_rmse_app}
    \end{subfigure}
    \caption{Toy data set with varying feature dimensions. NLL (left) is a repetition of the main text figure; RMSE (right) shows a similar pattern. Note that for NLL the deep Kernel Learning blows up at $d=100$, hence these values are omitted. Similarly for RMSE, the GP-ARD-Laplace-full $>5$ and therefore not depicted.} 
    \label{fig:toy-data-app}
\end{figure}

\begin{figure}[htb]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[trim=0 48 0 0, clip, width=\textwidth]{figures/tabularbenchmark_nll.pdf}
        \end{subfigure}
        % \hfill
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[trim=0 48 0 0, clip, width=\textwidth]{figures/tabularbenchmark_rmse.pdf}
        \end{subfigure}
        % \vskip\baselineskip
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[width=\textwidth]{figures/tabularbenchmark_ce.pdf}
        \end{subfigure}
        \caption{OpenML benchmark datasets results.}
        \label{fig:openml-app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering 
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[trim=0 48 0 0, clip, width=\textwidth]{figures/uci_nll.pdf}
        \end{subfigure}
        % \hfill
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[trim=0 48 0 0, clip, width=\textwidth]{figures/uci_rmse.pdf}
        \end{subfigure}
        % \vskip\baselineskip
        \begin{subfigure}[b]{\textwidth}
            \centering
            \includegraphics[width=\textwidth]{figures/uci_ce.pdf}
        \end{subfigure}
        \caption{UCI benchmark results.}    
    \end{subfigure}
    \vskip\baselineskip
    \begin{subfigure}{0.475\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/tabularbenchmark_time.pdf}
        \caption{OpenML benchmark results: Timing.}
        \label{fig:openml-app-timing}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.475\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/uci_time.pdf}
        \caption{UCI benchmark results: Timing.}
        \label{fig:uci-app-timing}
    \end{subfigure}
    \caption{Violin plot results on OpenML benchmark datasets and UCI benchmark. 
    Note that \Cref{fig:openml-app} is a repetition of \Cref{fig:main-tabular-benchmark} from the main text.
    Note also that in \Cref{fig:openml-app-timing,fig:uci-app-timing} we excluded the method `deep Kernel Learning' since it is the fastest and distorts the visualization in the violin plots. %For details see the tables in \Cref{sec:details:tabular} and \Cref{sec:details:uci}.
    } 
    \label{fig:main-app}
\end{figure}






\clearpage
\subsection{Visualizing feature matrices}
In \Cref{fig:feature-matrix-app}, we compare the learnt feature matrices $\mM$ of the RFM-based methods with the ones from Kernels with ARD on the toy dataset. The only method which can capture the necessary feature correlation is the GP-RFM-Laplace. Notably, the GP-ARD-Laplace-full is not able to learn the correlation despite its structure ability through parameterization with a full feature matrix $\mM$. This might be justified by the more complicated optimization problem resulting in poor performance as \Cref{fig:toy-data-app} indicates for this method.

For the diagonal methods, the GP-RFM-Laplace-diag capture the exact dimensionality of the problem by weighting all irrelevant dimensions in the toy problem with zeros. In contrast, the GP-ARD-Laplace also capture the necessary dimensions but does not suppress irrelevant dimensions to zero. We experimented with increasing the compute budget for this method from 250 epochs up to 2,000 epochs which reduces the weighting of the irrelevant dimensions but does get close to zero weighting.

Similar conclusions can be drawn about the bottom row for real data. Again, we observe that the GP-ARD-Laplace-full struggles with the task. The remaining three methods learn similar features on the diagonal but only the GP-RFM-Laplace can model the necessary correlation to achieve the best performance.


\begin{figure}[htb]
    \centering
    % \includegraphics{figuresTikz/plot_feature_matrix_toydata_ardfull_app}
    % \tikzsetnextfilename{plot_feature_matrix_toydata_ardfull_app}
    \input{tikz/appendix/plot_feature_matrix_toydata_ardfull_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_toydata_ard_app}
    % \tikzsetnextfilename{plot_feature_matrix_toydata_ard_app}
    \input{tikz/appendix/plot_feature_matrix_toydata_ard_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_toydata_rfm_app}
    % \tikzsetnextfilename{plot_feature_matrix_toydata_rfm_app}
    \input{tikz/appendix/plot_feature_matrix_toydata_rfm_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_toydata_rfm_diag_app}
    % \tikzsetnextfilename{plot_feature_matrix_toydata_rfm_diag_app}
    \input{tikz/appendix/plot_feature_matrix_toydata_rfm_diag_app}
    % \input{tikz/appendix/plot_feature_matrix_toydata_app}


    % \includegraphics{figuresTikz/plot_feature_matrix_uci_ardfull_app}
    % \tikzsetnextfilename{plot_feature_matrix_uci_ardfull_app}
    \input{tikz/appendix/plot_feature_matrix_uci_ardfull_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_uci_ard_app}
    % \tikzsetnextfilename{plot_feature_matrix_uci_ard_app}
    \input{tikz/appendix/plot_feature_matrix_uci_ard_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_uci_rfm_app}
    % \tikzsetnextfilename{plot_feature_matrix_uci_rfm_app}
    \input{tikz/appendix/plot_feature_matrix_uci_rfm_app}
    % \includegraphics{figuresTikz/plot_feature_matrix_uci_rfm_diag_app}
    % \tikzsetnextfilename{plot_feature_matrix_uci_rfm_diag_app}
    \input{tikz/appendix/plot_feature_matrix_uci_rfm_diag_app}
    \caption{Normalized feature matrices for toy data (top) and Kin8nm dataset from UCI benchmark (bottom). Note that the two right-most columns are a repetition of \Cref{fig:feature-matrix}.}
    \label{fig:feature-matrix-app}
\end{figure}


    






% \clearpage
\subsection{Comparison of learnt features}
Given the comparable performance of the GP-RFM-Laplace and the GP-ARD-Laplace and their similar mathematical structure based on the Mahalanobis distance kernel, a question arises whether the learnt features in $\mM$ are similar. To investigate this, we compare the Pearson and Spearman correlation between the feature matrices $\mM$ learnt in RFM-based kernels and kernels with ARD: 
\begin{itemize}
    \item \textbf{Diagonal methods:} We compare diagonal of $\mM$ from the GP-RFM-Laplace-diag with GP-ARD-Laplace.
    \item \textbf{Non-diagonal methods:} Here we perform two comparisons. (1) we compare the full feature matrix $\mM$ of GP-RFM-Laplace with the one of GP-ARD-Laplace-full. (2) to capture how the features are re-weighted, we also compare the diagonal of the full feature matrix $\mM$ of both methods.
\end{itemize}
Comparing diagonal methods separately from methods which learn the full $\mM$ to disentangle the effects of the parameterization and the learning paradigm. We compare methods with the same parameterization Hence, the main difference lies in the feature learning procedure: the RFM-based kernels learn the features through AGOP iterations while the ARD-based kernels learn the features through MLE optimization.

\Cref{fig:feature-correlation-diag-app} shows the Pearson and Spearman correlation between the diagonal methods and \Cref{fig:feature-correlation-full-app} on the non-diagonal methods on the UCI benchmark dataset.
There is the same trend for the diagonal and the non-diagonal methods: For some datasets, there is a high correlation between the RFM-based kernel and the kernels with ARD, but there are also datasets where the feature correlation is low.
This indicates that learning the features with AGOP iterations in the RFM or with MLE optimization in the ARD-based kernel may result in the same features in some cases but is not guaranteed to do so. 
The similarity between the learning paradigms opens up investigations of the widely applied MLE framework from a different perspective.
Further investigation is required to understand the differences between feature learning in the two paradigms.

\begin{figure}[htb!]
    \centering
    % \includegraphics{figuresTikz/feature_correlation_diag_app}
    % \tikzsetnextfilename{feature_correlation_diag_app}
    \input{tikz/appendix/feature_correlation_diag_app}
    \caption{Correlation of feature matrices $\mM$ between diagonal methods GP-RFM-Laplace-diag and GP-ARD-Laplace.}
    \label{fig:feature-correlation-diag-app}
\end{figure}

\begin{figure*}[htb]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        % \includegraphics{figuresTikz/feature_correlation_full_app}
        % \tikzsetnextfilename{feature_correlation_full_app}
        \input{tikz/appendix/feature_correlation_full_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering 
        % \includegraphics{figuresTikz/feature_correlation_full_d_app}
        % \tikzsetnextfilename{feature_correlation_full_d_app}
        \input{tikz/appendix/feature_correlation_full_d_app}
    \end{subfigure}
    \caption{Correlation of feature matrices $\mM$ between non-diagonal methods GP-RFM-Laplace and GP-ARD-Laplace-full. Left: full $\mM$ of both methods. Right: diagonal $\mM$ of both methods.}
    \label{fig:feature-correlation-full-app}
\end{figure*}










\subsection{Feature importance}

Here, we analyse if the features learnt by the RFM are meaningful in the sense of weighting the covariates with the highest predictive power.
We consider the feature matrix $\mM$ of a trained GP-RFM-Laplace and successively remove the covariates with the highest weight of $\text{diag}(\mM)$.
Then, we re-train and evaluate all models on the reduced dataset.
Specifically, we use the `pol' dataset from the OpenML benchmarks since it has a high number of covariates (26).
In \Cref{fig:feature-importance} we observe that the NLL and RMSE increase for all methods when removing the most important covariates but this plateaus.
Additionally, we observe that the two most important covariates according to the RFM feature matrix can be removed without a significant increase in NLL and RMSE.
This indicates that for this dataset the two highest weighted covariates are equally important for the prediction and contain most of the predictive power.
The sharp increase in NLL and RMSE after removing more covariates indicates that the RFM feature matrix can identify the most important covariates for the prediction.


\begin{figure*}[htb]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        % \includegraphics{figuresTikz/feature_importance_nll_app}
        % \tikzsetnextfilename{feature_importance_nll_app}
        \input{tikz/appendix/feature_importance_nll_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering
        % \includegraphics{figuresTikz/feature_importance_rmse_app}
        % \tikzsetnextfilename{feature_importance_rmse_app}
        \input{tikz/appendix/feature_importance_rmse_app}
    \end{subfigure}
    \caption{NLL (left) and RMSE (right) when removing the most important covariates according to the diagonal of the RFM feature matrix.} 
    \label{fig:feature-importance}
\end{figure*}


    







\subsection{Distribution shift}
\label{sec:app-distribution-shift}

In the main text, we show the results of label shift in terms of normalized NLL and CE for some selected methods. Here, we additionally present results for normalized RMSE and normalized interval length in \Cref{fig:ood-label-appendix}. Furthermore, we experimented with covariate shifts. Specifically, we consider the covariate for the latitude of the house location. Similarly to the label shift we define the ID data such that $p(x_{lat} < a) =0.7$ where $a$ is the $70\%$ quantile of the labels and the OOD data such that $p(x_{lat}>a)$. We split the OOD data into four consecutive non-overlapping datasets, where each contains $7.5\%$ of the data. The results for the covariate shift over the four house datasets are in \Cref{fig:ood-covariate-appendix}. The results are qualitatively similar to the results on label shift in \Cref{fig:ood-label-appendix}.


\begin{figure*}[htb]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        % \includegraphics{figuresTikz/ood_label_nll_app}
        % \tikzsetnextfilename{ood_label_nll_app}
        \input{tikz/appendix/ood_label_nll_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering
        % \includegraphics{figuresTikz/ood_label_nll_app}
        % \tikzsetnextfilename{ood_label_nll_app}
        \input{tikz/appendix/ood_label_rmse_app}
    \end{subfigure}
    % \vskip\baselineskip
    \begin{subfigure}[b]{0.475\textwidth}   
        \centering
        % \includegraphics{figuresTikz/ood_label_coverage_app}
        % \tikzsetnextfilename{ood_label_coverage_app}
        \input{tikz/appendix/ood_label_coverage_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}   
        \centering 
        % \includegraphics{figuresTikz/ood_label_intvallen_app}
        % \tikzsetnextfilename{ood_label_intvallen_app}
        \input{tikz/appendix/ood_label_intvallen_app}
    \end{subfigure}
    \caption{
        Label shift on four house datasets from the OpenML benchmark.
        } 
    \label{fig:ood-label-appendix}
\end{figure*}


\begin{figure*}[htb]
    \centering
    \begin{subfigure}[b]{0.475\textwidth}
        \centering
        % \includegraphics{figuresTikz/ood_covariate_nll_app}
        % \tikzsetnextfilename{ood_covariate_nll_app}
        \input{tikz/appendix/ood_covariate_nll_app}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}  
        \centering
        % \includegraphics{figuresTikz/ood_covariate_rmse_app}
        % \tikzsetnextfilename{ood_covariate_rmse_app}
        \input{tikz/appendix/ood_covariate_rmse_app}
    \end{subfigure}
    % \vskip\baselineskip
    \begin{subfigure}[b]{0.475\textwidth}   
        \centering
        % \includegraphics{figuresTikz/ood_covariate_coverage_app}
        % \tikzsetnextfilename{ood_covariate_coverage_app}
        \input{tikz/appendix/ood_covariate_coverage_app}
    \end{subfigure}     
    \hfill
    \begin{subfigure}[b]{0.475\textwidth}   
        \centering 
        % \includegraphics{figuresTikz/ood_covariate_intvallen_app}
        % \tikzsetnextfilename{ood_covariate_intvallen_app}
        \input{tikz/appendix/ood_covariate_intvallen_app}
    \end{subfigure}
    \caption{
        Covariate shift (we use the covariate for the latitude of the house position) on four house datasets from the OpenML benchmark.
        } 
    \label{fig:ood-covariate-appendix}
\end{figure*}











\clearpage
\section{Detailed results on tabular benchmarks}
\label{sec:details:tables}

In the main text and \Cref{fig:main-app} we show summary results over all datasets of the two benchmarks we consider. The following tables present the results individually for each dataset in both benchmarks. 

\subsection{Tabular Benchmark}
\label{sec:details:tabular}


\input{figures/main_table_time_app}



% get the number of rows in the table
\pgfplotstableread[col sep=comma, precision=3]{data/results_tabularbenchmark.csv}{\tabletabularbenchmark}
\pgfplotstablegetrowsof{\tabletabularbenchmark}
\pgfmathtruncatemacro{\NoOfRows}{\pgfplotsretval+1}
% for each dataset
\newcommand{\datalinktabularbenchmark}{data/results_tabularbenchmark.csv}
\newcounter{datasetcounterOne}
\forloop{datasetcounterOne}{1}{\value{datasetcounterOne} < \NoOfRows}{
	\setcounter{rowcount}{0}
	\csvreader[late after line=]{
		\datalinktabularbenchmark
	}{
	% mean
		dataset=\dataset, samples=\samples, features=\features,
		GP-RBF RMSE=\rmseA_, GP-RBF NLPD=\nllA_, GP-RBF Coverage=\covA_, GP-RBF Interval Len=\intA_, GP-RBF Time=\timeA_, 
		GP-Laplace RMSE=\rmseB_, GP-Laplace NLPD=\nllB_, GP-Laplace Coverage=\covB_, GP-Laplace Interval Len=\intB_, GP-Laplace Time=\timeB_, 
            GP-deepKL-RBF RMSE=\rmseC_, GP-deepKL-RBF NLPD=\nllC_, GP-deepKL-RBF Coverage=\covC_, GP-deepKL-RBF Interval Len=\intC_, GP-deepKL-RBF Time=\timeC_, 
		GP-ARD-RBF RMSE=\rmseD_, GP-ARD-RBF NLPD=\nllD_, GP-ARD-RBF Coverage=\covD_, GP-ARD-RBF Interval Len=\intD_, GP-ARD-RBF Time=\timeD_, 
		GP-ARD-Laplace RMSE=\rmseE_, GP-ARD-Laplace NLPD=\nllE_, GP-ARD-Laplace Coverage=\covE_, GP-ARD-Laplace Interval Len=\intE_,GP-ARD-Laplace Time=\timeE_, 
            GP-ARD-Laplace-full RMSE=\rmseF_, GP-ARD-Laplace-full NLPD=\nllF_, GP-ARD-Laplace-full Coverage=\covF_, GP-ARD-Laplace-full Interval Len=\intF_,GP-ARD-Laplace-full Time=\timeF_, 
		GP-RFM-Laplace RMSE=\rmseG_, GP-RFM-Laplace NLPD=\nllG_, GP-RFM-Laplace Coverage=\covG_, GP-RFM-Laplace Interval Len=\intG_, GP-RFM-Laplace Time=\timeG_, 
            GP-RFM-Laplace-diag RMSE=\rmseH_, GP-RFM-Laplace-diag NLPD=\nllH_, GP-RFM-Laplace-diag Coverage=\covH_, GP-RFM-Laplace-diag Interval Len=\intH_, GP-RFM-Laplace-diag Time=\timeH_, 
		NG-Boost RMSE=\rmseI_, NG-Boost NLPD=\nllI_, NG-Boost Coverage=\covI_, NG-Boost Interval Len=\intI_, NG-Boost Time=\timeI_, 
		Cat-Boost-Ensemble RMSE=\rmseJ_, Cat-Boost-Ensemble NLPD=\nllJ_, Cat-Boost-Ensemble Coverage=\covJ_, Cat-Boost-Ensemble Interval Len=\intJ_, Cat-Boost-Ensemble Time=\timeJ_, 
	% std
		GP-RBF RMSE std=\rmseAstd_, GP-RBF NLPD std=\nllAstd_, GP-RBF Coverage std=\covAstd_, GP-RBF Interval Len std=\intAstd_,GP-RBF Time std=\timeAstd_, 
		GP-Laplace RMSE std=\rmseBstd_, GP-Laplace NLPD std=\nllBstd_, GP-Laplace Coverage std=\covBstd_, GP-Laplace Interval Len std=\intBstd_, GP-Laplace Time std=\timeBstd_, 
            GP-deepKL-RBF RMSE std=\rmseCstd_, GP-deepKL-RBF NLPD std=\nllCstd_, GP-deepKL-RBF Coverage std=\covCstd_, GP-deepKL-RBF Interval Len std=\intCstd_, GP-deepKL-RBF Time std=\timeCstd_, 
		GP-ARD-RBF RMSE std=\rmseDstd_, GP-ARD-RBF NLPD std=\nllDstd_, GP-ARD-RBF Coverage std=\covDstd_, GP-ARD-RBF Interval Len std=\intDstd_, GP-ARD-RBF Time std=\timeDstd_, 
		GP-ARD-Laplace RMSE std=\rmseEstd_, GP-ARD-Laplace NLPD std=\nllEstd_, GP-ARD-Laplace Coverage std=\covEstd_, GP-ARD-Laplace Interval Len std=\intEstd_, GP-ARD-Laplace Time std=\timeEstd_, 
            GP-ARD-Laplace-full RMSE std=\rmseFstd_, GP-ARD-Laplace-full NLPD std=\nllFstd_, GP-ARD-Laplace-full Coverage std=\covFstd_, GP-ARD-Laplace-full Interval Len std=\intFstd_, GP-ARD-Laplace-full Time std=\timeFstd_, 
		GP-RFM-Laplace RMSE std=\rmseGstd_, GP-RFM-Laplace NLPD std=\nllGstd_, GP-RFM-Laplace Coverage std=\covGstd_, GP-RFM-Laplace Interval Len std=\intGstd_, GP-RFM-Laplace Time std=\timeGstd_, 
            GP-RFM-Laplace-diag RMSE std=\rmseHstd_, GP-RFM-Laplace-diag NLPD std=\nllHstd_, GP-RFM-Laplace-diag Coverage std=\covHstd_, GP-RFM-Laplace-diag Interval Len std=\intHstd_, GP-RFM-Laplace-diag Time std=\timeHstd_, 
		NG-Boost RMSE std=\rmseIstd_, NG-Boost NLPD std=\nllIstd_, NG-Boost Coverage std=\covIstd_, NG-Boost Interval Len std=\intIstd_, NG-Boost Time std=\timeIstd_, 
		Cat-Boost-Ensemble RMSE std=\rmseJstd_, Cat-Boost-Ensemble NLPD std=\nllJstd_, Cat-Boost-Ensemble Coverage std=\covJstd_, Cat-Boost-Ensemble Interval Len std=\intJstd_, Cat-Boost-Ensemble Time std=\timeJstd_, 
	}{
		\stepcounter{rowcount}
		\ifnum\value{rowcount}=\thedatasetcounterOne% <-- Modify this line to specify the desired row number
		\xdef\lastdataset{\dataset} \xdef\lastsamples{\samples} \xdef\lastfeatures{\features} 
		% mean
		\xdef\lastrmseA_{\rmseA_} \xdef\lastnllA_{\nllA_} \xdef\lastcovA_{\covA_} \xdef\lastintA_{\intA_} \xdef\lasttimeA_{\timeA_}
		\xdef\lastrmseB_{\rmseB_} \xdef\lastnllB_{\nllB_} \xdef\lastcovB_{\covB_} \xdef\lastintB_{\intB_} \xdef\lasttimeB_{\timeB_}
		\xdef\lastrmseC_{\rmseC_} \xdef\lastnllC_{\nllC_} \xdef\lastcovC_{\covC_} \xdef\lastintC_{\intC_} \xdef\lasttimeC_{\timeC_}
		\xdef\lastrmseD_{\rmseD_} \xdef\lastnllD_{\nllD_} \xdef\lastcovD_{\covD_} \xdef\lastintD_{\intD_} \xdef\lasttimeD_{\timeD_}
		\xdef\lastrmseE_{\rmseE_} \xdef\lastnllE_{\nllE_} \xdef\lastcovE_{\covE_} \xdef\lastintE_{\intE_} \xdef\lasttimeE_{\timeE_}
		\xdef\lastrmseF_{\rmseF_} \xdef\lastnllF_{\nllF_} \xdef\lastcovF_{\covF_} \xdef\lastintF_{\intF_} \xdef\lasttimeF_{\timeF_}
		\xdef\lastrmseG_{\rmseG_} \xdef\lastnllG_{\nllG_} \xdef\lastcovG_{\covG_} \xdef\lastintG_{\intG_} \xdef\lasttimeG_{\timeG_}
            \xdef\lastrmseH_{\rmseH_} \xdef\lastnllH_{\nllH_} \xdef\lastcovH_{\covH_} \xdef\lastintH_{\intH_} \xdef\lasttimeH_{\timeH_}
            \xdef\lastrmseI_{\rmseI_} \xdef\lastnllI_{\nllI_} \xdef\lastcovI_{\covI_} \xdef\lastintI_{\intI_} \xdef\lasttimeI_{\timeI_}
            \xdef\lastrmseJ_{\rmseJ_} \xdef\lastnllJ_{\nllJ_} \xdef\lastcovJ_{\covJ_} \xdef\lastintJ_{\intJ_} \xdef\lasttimeJ_{\timeJ_}
		% std
		\xdef\lastrmseAstd_{\rmseAstd_} \xdef\lastnllAstd_{\nllAstd_} \xdef\lastcovAstd_{\covAstd_} \xdef\lastintAstd_{\intAstd_} \xdef\lasttimeAstd_{\timeAstd_}
		\xdef\lastrmseBstd_{\rmseBstd_} \xdef\lastnllBstd_{\nllBstd_} \xdef\lastcovBstd_{\covBstd_} \xdef\lastintBstd_{\intBstd_} \xdef\lasttimeBstd_{\timeBstd_}
		\xdef\lastrmseCstd_{\rmseCstd_} \xdef\lastnllCstd_{\nllCstd_} \xdef\lastcovCstd_{\covCstd_} \xdef\lastintCstd_{\intCstd_} \xdef\lasttimeCstd_{\timeCstd_}
		\xdef\lastrmseDstd_{\rmseDstd_} \xdef\lastnllDstd_{\nllDstd_} \xdef\lastcovDstd_{\covDstd_} \xdef\lastintDstd_{\intDstd_} \xdef\lasttimeDstd_{\timeDstd_}
		\xdef\lastrmseEstd_{\rmseEstd_} \xdef\lastnllEstd_{\nllEstd_} \xdef\lastcovEstd_{\covEstd_} \xdef\lastintEstd_{\intEstd_} \xdef\lasttimeEstd_{\timeEstd_}
		\xdef\lastrmseFstd_{\rmseFstd_} \xdef\lastnllFstd_{\nllFstd_} \xdef\lastcovFstd_{\covFstd_} \xdef\lastintFstd_{\intFstd_} \xdef\lasttimeFstd_{\timeFstd_}
            \xdef\lastrmseGstd_{\rmseGstd_} \xdef\lastnllGstd_{\nllGstd_} \xdef\lastcovGstd_{\covGstd_} \xdef\lastintGstd_{\intGstd_} \xdef\lasttimeGstd_{\timeGstd_}		
            \xdef\lastrmseHstd_{\rmseHstd_} \xdef\lastnllHstd_{\nllHstd_} \xdef\lastcovHstd_{\covHstd_} \xdef\lastintHstd_{\intHstd_} \xdef\lasttimeHstd_{\timeHstd_}
            \xdef\lastrmseIstd_{\rmseIstd_} \xdef\lastnllIstd_{\nllIstd_} \xdef\lastcovIstd_{\covIstd_} \xdef\lastintIstd_{\intIstd_} \xdef\lasttimeIstd_{\timeIstd_}
            \xdef\lastrmseJstd_{\rmseJstd_} \xdef\lastnllJstd_{\nllJstd_} \xdef\lastcovJstd_{\covJstd_} \xdef\lastintJstd_{\intJstd_} \xdef\lasttimeJstd_{\timeJstd_}
		\fi
	}
	\begin{table}[htb]
		\centering
		\caption{OpenML dataset: \lastdataset~(\lastsamples~samples; \lastfeatures~covariates)}
		% \resizebox{\textwidth}{!}{%
		\begin{tabular}{l||ll|ll|l}
			\toprule 
			         & RMSE ($\downarrow$) & NLL ($\downarrow$) & CE (95\%) ($\downarrow$) & IL (95\%) ($\downarrow$) & Time ($\downarrow$) \\
			\midrule
			GP-RBF 			& \lastrmseA_ \scriptsize{$\pm$\lastrmseAstd_} & \lastnllA_ \scriptsize{$\pm$\lastnllAstd_} & \lastcovA_ \scriptsize{$\pm$\lastcovAstd_} & \lastintA_ \scriptsize{$\pm$\lastintAstd_} & \lasttimeA_ \scriptsize{$\pm$\lasttimeAstd_} \\
			GP-Laplace 		& \lastrmseB_ \scriptsize{$\pm$\lastrmseBstd_} & \lastnllB_ \scriptsize{$\pm$\lastnllBstd_} & \lastcovB_ \scriptsize{$\pm$\lastcovBstd_} & \lastintB_ \scriptsize{$\pm$\lastintBstd_} & \lasttimeB_ \scriptsize{$\pm$\lasttimeBstd_} \\
			\midrule
                deep Kernel Learning& \lastrmseC_ \scriptsize{$\pm$\lastrmseCstd_} & \lastnllC_ \scriptsize{$\pm$\lastnllCstd_} & \lastcovC_ \scriptsize{$\pm$\lastcovCstd_} & \lastintC_ \scriptsize{$\pm$\lastintCstd_} & \lasttimeC_ \scriptsize{$\pm$\lasttimeCstd_} \\
                \midrule
			GP-ARD-RBF 		& \lastrmseD_ \scriptsize{$\pm$\lastrmseDstd_} & \lastnllD_ \scriptsize{$\pm$\lastnllDstd_} & \lastcovD_ \scriptsize{$\pm$\lastcovDstd_} & \lastintD_ \scriptsize{$\pm$\lastintDstd_} & \lasttimeD_ \scriptsize{$\pm$\lasttimeCstd_} \\
			GP-ARD-Laplace 	& \lastrmseE_ \scriptsize{$\pm$\lastrmseEstd_} & \lastnllE_ \scriptsize{$\pm$\lastnllEstd_} & \lastcovE_ \scriptsize{$\pm$\lastcovEstd_} & \lastintE_ \scriptsize{$\pm$\lastintEstd_} & \lasttimeE_ \scriptsize{$\pm$\lasttimeEstd_} \\
                GP-ARD-Laplace-full& \lastrmseF_ \scriptsize{$\pm$\lastrmseFstd_} & \lastnllF_ \scriptsize{$\pm$\lastnllFstd_} & \lastcovF_ \scriptsize{$\pm$\lastcovFstd_} & \lastintF_ \scriptsize{$\pm$\lastintFstd_} & \lasttimeF_ \scriptsize{$\pm$\lasttimeFstd_} \\
			\midrule
			\textbf{GP-RFM-Laplace} 	& \lastrmseG_ \scriptsize{$\pm$\lastrmseGstd_} & \lastnllG_ \scriptsize{$\pm$\lastnllGstd_} & \lastcovG_ \scriptsize{$\pm$\lastcovGstd_} & \lastintG_ \scriptsize{$\pm$\lastintGstd_} & \lasttimeG_ \scriptsize{$\pm$\lasttimeGstd_} \\
                \textbf{GP-RFM-Laplace-diag} 	& \lastrmseH_ \scriptsize{$\pm$\lastrmseHstd_} & \lastnllH_ \scriptsize{$\pm$\lastnllHstd_} & \lastcovH_ \scriptsize{$\pm$\lastcovHstd_} & \lastintH_ \scriptsize{$\pm$\lastintHstd_} & \lasttimeH_ \scriptsize{$\pm$\lasttimeHstd_} \\
			\midrule
			NGBoost 		& \lastrmseI_ \scriptsize{$\pm$\lastrmseIstd_} & \lastnllI_ \scriptsize{$\pm$\lastnllIstd_} & \lastcovI_ \scriptsize{$\pm$\lastcovIstd_} & \lastintI_ \scriptsize{$\pm$\lastintIstd_} & \lasttimeI_ \scriptsize{$\pm$\lasttimeIstd_} \\
			CatBoost-Ensemble 	& \lastrmseJ_ \scriptsize{$\pm$\lastrmseJstd_} & \lastnllJ_ \scriptsize{$\pm$\lastnllJstd_} & \lastcovJ_ \scriptsize{$\pm$\lastcovJstd_} & \lastintJ_ \scriptsize{$\pm$\lastintJstd_} & \lasttimeJ_ \scriptsize{$\pm$\lasttimeJstd_} \\
			\bottomrule
		\end{tabular}
		% }
	\end{table}
}

\clearpage









\subsection{UCI benchmark}
\label{sec:details:uci}

% get the number of rows in the table
\pgfplotstableread[col sep=comma]{data/results_uci.csv}{\tableuci}
\pgfplotstablegetrowsof{\tableuci}
\pgfmathtruncatemacro{\NoOfRows}{\pgfplotsretval+1}
% for each dataset
\newcommand{\datalinkuci}{data/results_uci.csv}
% \newcounter{datasetcounterOne}
\forloop{datasetcounterOne}{1}{\value{datasetcounterOne} < \NoOfRows}{
	\setcounter{rowcount}{0}
	\csvreader[late after line=]{
		\datalinkuci
	}{
	% mean
		dataset=\dataset, samples=\samples, features=\features,
		GP-RBF RMSE=\rmseA_, GP-RBF NLPD=\nllA_, GP-RBF Coverage=\covA_, GP-RBF Interval Len=\intA_, GP-RBF Time=\timeA_, 
		GP-Laplace RMSE=\rmseB_, GP-Laplace NLPD=\nllB_, GP-Laplace Coverage=\covB_, GP-Laplace Interval Len=\intB_, GP-Laplace Time=\timeB_, 
            GP-deepKL-RBF RMSE=\rmseC_, GP-deepKL-RBF NLPD=\nllC_, GP-deepKL-RBF Coverage=\covC_, GP-deepKL-RBF Interval Len=\intC_, GP-deepKL-RBF Time=\timeC_, 
		GP-ARD-RBF RMSE=\rmseD_, GP-ARD-RBF NLPD=\nllD_, GP-ARD-RBF Coverage=\covD_, GP-ARD-RBF Interval Len=\intD_, GP-ARD-RBF Time=\timeD_, 
		GP-ARD-Laplace RMSE=\rmseE_, GP-ARD-Laplace NLPD=\nllE_, GP-ARD-Laplace Coverage=\covE_, GP-ARD-Laplace Interval Len=\intE_,GP-ARD-Laplace Time=\timeE_, 
            GP-ARD-Laplace-full RMSE=\rmseF_, GP-ARD-Laplace-full NLPD=\nllF_, GP-ARD-Laplace-full Coverage=\covF_, GP-ARD-Laplace-full Interval Len=\intF_,GP-ARD-Laplace-full Time=\timeF_, 
		GP-RFM-Laplace RMSE=\rmseG_, GP-RFM-Laplace NLPD=\nllG_, GP-RFM-Laplace Coverage=\covG_, GP-RFM-Laplace Interval Len=\intG_, GP-RFM-Laplace Time=\timeG_, 
            GP-RFM-Laplace-diag RMSE=\rmseH_, GP-RFM-Laplace-diag NLPD=\nllH_, GP-RFM-Laplace-diag Coverage=\covH_, GP-RFM-Laplace-diag Interval Len=\intH_, GP-RFM-Laplace-diag Time=\timeH_, 
		NG-Boost RMSE=\rmseI_, NG-Boost NLPD=\nllI_, NG-Boost Coverage=\covI_, NG-Boost Interval Len=\intI_, NG-Boost Time=\timeI_, 
		Cat-Boost-Ensemble RMSE=\rmseJ_, Cat-Boost-Ensemble NLPD=\nllJ_, Cat-Boost-Ensemble Coverage=\covJ_, Cat-Boost-Ensemble Interval Len=\intJ_, Cat-Boost-Ensemble Time=\timeJ_, 
	% std
		GP-RBF RMSE std=\rmseAstd_, GP-RBF NLPD std=\nllAstd_, GP-RBF Coverage std=\covAstd_, GP-RBF Interval Len std=\intAstd_,GP-RBF Time std=\timeAstd_, 
		GP-Laplace RMSE std=\rmseBstd_, GP-Laplace NLPD std=\nllBstd_, GP-Laplace Coverage std=\covBstd_, GP-Laplace Interval Len std=\intBstd_, GP-Laplace Time std=\timeBstd_, 
            GP-deepKL-RBF RMSE std=\rmseCstd_, GP-deepKL-RBF NLPD std=\nllCstd_, GP-deepKL-RBF Coverage std=\covCstd_, GP-deepKL-RBF Interval Len std=\intCstd_, GP-deepKL-RBF Time std=\timeCstd_, 
		GP-ARD-RBF RMSE std=\rmseDstd_, GP-ARD-RBF NLPD std=\nllDstd_, GP-ARD-RBF Coverage std=\covDstd_, GP-ARD-RBF Interval Len std=\intDstd_, GP-ARD-RBF Time std=\timeDstd_, 
		GP-ARD-Laplace RMSE std=\rmseEstd_, GP-ARD-Laplace NLPD std=\nllEstd_, GP-ARD-Laplace Coverage std=\covEstd_, GP-ARD-Laplace Interval Len std=\intEstd_, GP-ARD-Laplace Time std=\timeEstd_, 
            GP-ARD-Laplace-full RMSE std=\rmseFstd_, GP-ARD-Laplace-full NLPD std=\nllFstd_, GP-ARD-Laplace-full Coverage std=\covFstd_, GP-ARD-Laplace-full Interval Len std=\intFstd_, GP-ARD-Laplace-full Time std=\timeFstd_, 
		GP-RFM-Laplace RMSE std=\rmseGstd_, GP-RFM-Laplace NLPD std=\nllGstd_, GP-RFM-Laplace Coverage std=\covGstd_, GP-RFM-Laplace Interval Len std=\intGstd_, GP-RFM-Laplace Time std=\timeGstd_, 
            GP-RFM-Laplace-diag RMSE std=\rmseHstd_, GP-RFM-Laplace-diag NLPD std=\nllHstd_, GP-RFM-Laplace-diag Coverage std=\covHstd_, GP-RFM-Laplace-diag Interval Len std=\intHstd_, GP-RFM-Laplace-diag Time std=\timeHstd_, 
		NG-Boost RMSE std=\rmseIstd_, NG-Boost NLPD std=\nllIstd_, NG-Boost Coverage std=\covIstd_, NG-Boost Interval Len std=\intIstd_, NG-Boost Time std=\timeIstd_, 
		Cat-Boost-Ensemble RMSE std=\rmseJstd_, Cat-Boost-Ensemble NLPD std=\nllJstd_, Cat-Boost-Ensemble Coverage std=\covJstd_, Cat-Boost-Ensemble Interval Len std=\intJstd_, Cat-Boost-Ensemble Time std=\timeJstd_, 
	}{
		\stepcounter{rowcount}
		\ifnum\value{rowcount}=\thedatasetcounterOne% <-- Modify this line to specify the desired row number
		\xdef\lastdataset{\dataset} \xdef\lastsamples{\samples} \xdef\lastfeatures{\features} 
		% mean
		\xdef\lastrmseA_{\rmseA_} \xdef\lastnllA_{\nllA_} \xdef\lastcovA_{\covA_} \xdef\lastintA_{\intA_} \xdef\lasttimeA_{\timeA_}
		\xdef\lastrmseB_{\rmseB_} \xdef\lastnllB_{\nllB_} \xdef\lastcovB_{\covB_} \xdef\lastintB_{\intB_} \xdef\lasttimeB_{\timeB_}
		\xdef\lastrmseC_{\rmseC_} \xdef\lastnllC_{\nllC_} \xdef\lastcovC_{\covC_} \xdef\lastintC_{\intC_} \xdef\lasttimeC_{\timeC_}
		\xdef\lastrmseD_{\rmseD_} \xdef\lastnllD_{\nllD_} \xdef\lastcovD_{\covD_} \xdef\lastintD_{\intD_} \xdef\lasttimeD_{\timeD_}
		\xdef\lastrmseE_{\rmseE_} \xdef\lastnllE_{\nllE_} \xdef\lastcovE_{\covE_} \xdef\lastintE_{\intE_} \xdef\lasttimeE_{\timeE_}
		\xdef\lastrmseF_{\rmseF_} \xdef\lastnllF_{\nllF_} \xdef\lastcovF_{\covF_} \xdef\lastintF_{\intF_} \xdef\lasttimeF_{\timeF_}
		\xdef\lastrmseG_{\rmseG_} \xdef\lastnllG_{\nllG_} \xdef\lastcovG_{\covG_} \xdef\lastintG_{\intG_} \xdef\lasttimeG_{\timeG_}
            \xdef\lastrmseH_{\rmseH_} \xdef\lastnllH_{\nllH_} \xdef\lastcovH_{\covH_} \xdef\lastintH_{\intH_} \xdef\lasttimeH_{\timeH_}
            \xdef\lastrmseI_{\rmseI_} \xdef\lastnllI_{\nllI_} \xdef\lastcovI_{\covI_} \xdef\lastintI_{\intI_} \xdef\lasttimeI_{\timeI_}
            \xdef\lastrmseJ_{\rmseJ_} \xdef\lastnllJ_{\nllJ_} \xdef\lastcovJ_{\covJ_} \xdef\lastintJ_{\intJ_} \xdef\lasttimeJ_{\timeJ_}
		% std
		\xdef\lastrmseAstd_{\rmseAstd_} \xdef\lastnllAstd_{\nllAstd_} \xdef\lastcovAstd_{\covAstd_} \xdef\lastintAstd_{\intAstd_} \xdef\lasttimeAstd_{\timeAstd_}
		\xdef\lastrmseBstd_{\rmseBstd_} \xdef\lastnllBstd_{\nllBstd_} \xdef\lastcovBstd_{\covBstd_} \xdef\lastintBstd_{\intBstd_} \xdef\lasttimeBstd_{\timeBstd_}
		\xdef\lastrmseCstd_{\rmseCstd_} \xdef\lastnllCstd_{\nllCstd_} \xdef\lastcovCstd_{\covCstd_} \xdef\lastintCstd_{\intCstd_} \xdef\lasttimeCstd_{\timeCstd_}
		\xdef\lastrmseDstd_{\rmseDstd_} \xdef\lastnllDstd_{\nllDstd_} \xdef\lastcovDstd_{\covDstd_} \xdef\lastintDstd_{\intDstd_} \xdef\lasttimeDstd_{\timeDstd_}
		\xdef\lastrmseEstd_{\rmseEstd_} \xdef\lastnllEstd_{\nllEstd_} \xdef\lastcovEstd_{\covEstd_} \xdef\lastintEstd_{\intEstd_} \xdef\lasttimeEstd_{\timeEstd_}
		\xdef\lastrmseFstd_{\rmseFstd_} \xdef\lastnllFstd_{\nllFstd_} \xdef\lastcovFstd_{\covFstd_} \xdef\lastintFstd_{\intFstd_} \xdef\lasttimeFstd_{\timeFstd_}
            \xdef\lastrmseGstd_{\rmseGstd_} \xdef\lastnllGstd_{\nllGstd_} \xdef\lastcovGstd_{\covGstd_} \xdef\lastintGstd_{\intGstd_} \xdef\lasttimeGstd_{\timeGstd_}		
            \xdef\lastrmseHstd_{\rmseHstd_} \xdef\lastnllHstd_{\nllHstd_} \xdef\lastcovHstd_{\covHstd_} \xdef\lastintHstd_{\intHstd_} \xdef\lasttimeHstd_{\timeHstd_}
            \xdef\lastrmseIstd_{\rmseIstd_} \xdef\lastnllIstd_{\nllIstd_} \xdef\lastcovIstd_{\covIstd_} \xdef\lastintIstd_{\intIstd_} \xdef\lasttimeIstd_{\timeIstd_}
            \xdef\lastrmseJstd_{\rmseJstd_} \xdef\lastnllJstd_{\nllJstd_} \xdef\lastcovJstd_{\covJstd_} \xdef\lastintJstd_{\intJstd_} \xdef\lasttimeJstd_{\timeJstd_}
		\fi
	}
	\begin{table}[htb]
		\centering
		\caption{UCI dataset: \lastdataset~(\lastsamples~samples; \lastfeatures~covariates)}
		% \resizebox{\textwidth}{!}{%
		\begin{tabular}{l||ll|ll|l}
			\toprule
			         & RMSE ($\downarrow$) & NLL ($\downarrow$) & CE (95\%) ($\downarrow$) & IL (95\%) ($\downarrow$) & Time ($\downarrow$) \\
			\midrule
			GP-RBF 			& \lastrmseA_ \scriptsize{$\pm$\lastrmseAstd_} & \lastnllA_ \scriptsize{$\pm$\lastnllAstd_} & \lastcovA_ \scriptsize{$\pm$\lastcovAstd_} & \lastintA_ \scriptsize{$\pm$\lastintAstd_} & \lasttimeA_ \scriptsize{$\pm$\lasttimeAstd_} \\
			GP-Laplace 		& \lastrmseB_ \scriptsize{$\pm$\lastrmseBstd_} & \lastnllB_ \scriptsize{$\pm$\lastnllBstd_} & \lastcovB_ \scriptsize{$\pm$\lastcovBstd_} & \lastintB_ \scriptsize{$\pm$\lastintBstd_} & \lasttimeB_ \scriptsize{$\pm$\lasttimeBstd_} \\
			\midrule
                deep Kernel Learning& \lastrmseC_ \scriptsize{$\pm$\lastrmseCstd_} & \lastnllC_ \scriptsize{$\pm$\lastnllCstd_} & \lastcovC_ \scriptsize{$\pm$\lastcovCstd_} & \lastintC_ \scriptsize{$\pm$\lastintCstd_} & \lasttimeC_ \scriptsize{$\pm$\lasttimeCstd_} \\
                \midrule
			GP-ARD-RBF 		& \lastrmseD_ \scriptsize{$\pm$\lastrmseDstd_} & \lastnllD_ \scriptsize{$\pm$\lastnllDstd_} & \lastcovD_ \scriptsize{$\pm$\lastcovDstd_} & \lastintD_ \scriptsize{$\pm$\lastintDstd_} & \lasttimeD_ \scriptsize{$\pm$\lasttimeCstd_} \\
			GP-ARD-Laplace 	& \lastrmseE_ \scriptsize{$\pm$\lastrmseEstd_} & \lastnllE_ \scriptsize{$\pm$\lastnllEstd_} & \lastcovE_ \scriptsize{$\pm$\lastcovEstd_} & \lastintE_ \scriptsize{$\pm$\lastintEstd_} & \lasttimeE_ \scriptsize{$\pm$\lasttimeEstd_} \\
                GP-ARD-Laplace-full& \lastrmseF_ \scriptsize{$\pm$\lastrmseFstd_} & \lastnllF_ \scriptsize{$\pm$\lastnllFstd_} & \lastcovF_ \scriptsize{$\pm$\lastcovFstd_} & \lastintF_ \scriptsize{$\pm$\lastintFstd_} & \lasttimeF_ \scriptsize{$\pm$\lasttimeFstd_} \\
			\midrule
			\textbf{GP-RFM-Laplace} 	& \lastrmseG_ \scriptsize{$\pm$\lastrmseGstd_} & \lastnllG_ \scriptsize{$\pm$\lastnllGstd_} & \lastcovG_ \scriptsize{$\pm$\lastcovGstd_} & \lastintG_ \scriptsize{$\pm$\lastintGstd_} & \lasttimeG_ \scriptsize{$\pm$\lasttimeGstd_} \\
                \textbf{GP-RFM-Laplace-diag} 	& \lastrmseH_ \scriptsize{$\pm$\lastrmseHstd_} & \lastnllH_ \scriptsize{$\pm$\lastnllHstd_} & \lastcovH_ \scriptsize{$\pm$\lastcovHstd_} & \lastintH_ \scriptsize{$\pm$\lastintHstd_} & \lasttimeH_ \scriptsize{$\pm$\lasttimeHstd_} \\
			\midrule
			NGBoost 		& \lastrmseI_ \scriptsize{$\pm$\lastrmseIstd_} & \lastnllI_ \scriptsize{$\pm$\lastnllIstd_} & \lastcovI_ \scriptsize{$\pm$\lastcovIstd_} & \lastintI_ \scriptsize{$\pm$\lastintIstd_} & \lasttimeI_ \scriptsize{$\pm$\lasttimeIstd_} \\
			CatBoost-Ensemble 	& \lastrmseJ_ \scriptsize{$\pm$\lastrmseJstd_} & \lastnllJ_ \scriptsize{$\pm$\lastnllJstd_} & \lastcovJ_ \scriptsize{$\pm$\lastcovJstd_} & \lastintJ_ \scriptsize{$\pm$\lastintJstd_} & \lasttimeJ_ \scriptsize{$\pm$\lasttimeJstd_} \\
			\bottomrule
		\end{tabular}
		% }
	\end{table}
}
