\section{Results_deafult}

\begin{table*}[ht]
\caption{AISL values for each method and dataset. The average across 50 runs is reported with two times the standard deviation in brackets. Values in bold indicate the method with better performance considering a 95\% confidence interval.}
\label{tab:aisl}
\begin{adjustbox}{width = 0.9\textwidth,center}
\begin{tabular}{lcccccccccc}
\hline
\textbf{Dataset} & \textbf{EPICSCORE-MDN} & \textbf{EPICSCORE-BART} & \textbf{EPICSCORE-GP} & \textbf{CQR} & \textbf{CQR-r} & \textbf{UACQR-P} & \textbf{UACQR-S} \\
\hline

airfoil & \textbf{18.819 (0.293)} & 24.538 (0.307) & 19.712 (0.271) & 20.521 (0.234) & 20.535 (0.236) & 23.021 (0.337) & 20.188 (0.3)\\
bike & \textbf{438.171 (3.091)} & 447.057 (2.942) & 478.461 (3.325) & 456.275 (2.561) & 456.377 (2.575) & 534.132 (3.764) & \textbf{438.146 (3.85)}\\
concrete & \textbf{44.434 (0.790)} & \textbf{43.316 (0.716)} & 45.282 (0.767) & 46.882 (0.681) & 46.896 (0.683) & 52.789 (1.097) & 47.324 (1.349)\\
cycle & \textbf{34.097 (0.131)} & 34.59 (0.142) & 35.049 (0.133) & 39.218 (0.134) & 39.408 (0.136) & 43.775 (0.181) & 35.346 (0.197)\\
electric & 0.083 (<0.001) & 0.137 (0.001) & 0.096 (<0.001) & 0.102 (<0.001) & 0.102 (<0.001) & 0.111 (0.001) & 0.097 (0.001)\\
homes $\times 10^{5}$ & \textbf{7.218 (0.052) } & 9.942 (0.092) & 8.112 (0.0777) & 8.359 (0.0755) & 8.433 (0.078) & 11.427 (0.131) & 8.544 (0.104)\\
meps19 & \textbf{64.282 (1.539)} & \textbf{65.069 (1.463)} & \textbf{64.995 (1.592)} & \textbf{64.239 (1.56)} & \textbf{64.239 (1.56)} & 71.015 (1.763) & \textbf{63.737 (1.461)}\\
superconductivity & \textbf{70.173 (0.207)} & 82.363 (0.474) & 80.239 (0.271) & 75.496 (0.219) & 75.508 (0.218) & 87.929 (0.513) & 73.971 (0.404)\\
WEC $\times 10^{5}$ & \textbf{2.371 (0.010)} & 3.006 (0.009) & 2.659 (0.012) & 3.138 (0.009) & 3.142 (0.009) & 3.517 (0.010) & 3.046 (0.010)\\
winered & \textbf{2.975 (0.050)} & 3.178 (0.057) & \textbf{3.008 (0.059)} & \textbf{2.979 (0.069)} & \textbf{2.978 (0.069)} & \textbf{3.059 (0.069)} & \textbf{2.999 (0.063)}\\
winewhite & \textbf{3.218 (0.031)} & 3.509 (0.035) & 3.327 (0.034) & 3.316 (0.036) & 3.315 (0.036) & 3.378 (0.038) & \textbf{3.2 (0.036)}\\
\hline
\end{tabular}
\end{adjustbox}
\end{table*}

\section{Summary of predictive models}\label{sec:pred_models}

\subsection{Monte-carlo dropout: Dropout as a Bayesian Approximation}
\label{sec:DropoutApprox}

Bayesian inference provides mathematically grounded tools to study model uncertainty, but these are often computationally intensive. This limitation has driven significant research into finding efficient Bayesian approximations. One notable technique is the use of dropout as a Bayesian approximation, proposed by \cite{Gal2016}. This approach treats a neural network with arbitrary depth and non-linearity as a mathematically equivalent variational approximation to a probabilistic deep Gaussian process \cite{pmlr-v31-damianou13a}. This result allows us to interpret dropout training as approximate Bayesian inference.

Consider a neural network with $L$ layers and a loss function $\mathcal{L}$. Let $\mathbf{W}_l$ denote the weight matrices of layer $l$, and let $D = \{(\mathbf{X}_1, Y_1), (\mathbf{X}_2, Y_2), \ldots, (\mathbf{X}_n, Y_n)\}$ be the training dataset. The posterior distribution of the weights $\mathbf{W}|D$ is given by:
$$
\mathbb{P}(\mathbf{W}|D) = \frac{f(y|\mathbf{X}, \mathbf{W})\mathbb{P}(\mathbf{W})}{f(y|\mathbf{X})},
$$
which is intractable. Here, $\mathbb{P}(\mathbf{W}|D)$ is the posterior distribution of the weights, $f(y|\mathbf{X}, \mathbf{W})$ is the likelihood, $\mathbb{P}(\mathbf{W})$ is the prior distribution of the weights, and $f(y|\mathbf{X})$ is the evidence. To approximate the intractable posterior, we use $q(\mathbf{W})$, a distribution over matrices with randomly zeroed columns, defined as:
$$
q(\mathbf{W}) = \prod_{l=1}^{L} \prod_{i=1}^{K_{l-1}} \mathcal{N}(W_{l,i} \mid M_{l,i}, \sigma^2 I),
$$
where $M_{l,i}$ are variational parameters and $\sigma^2$ is the variance. The goal is to minimize the Kullback-Leibler divergence between the approximate and true posterior, which is equivalent to minimizing the loss function:
$$
\mathcal{L}{\text{dropout}} = \sum{i=1}^{n} \mathbb{E}{q(\mathbf{W})} [\mathcal{L}(y_i, g{\mathbf{W}}(\mathbf{x}i))] + \sum{l=1}^{L} \frac{1}{2} \left( \|M_l\|^2 + \sigma^2 \sum_{i=1}^{K_{l-1}} \|W_{l,i}\|^2 \right),
$$
where $g_{\mathbf{W}}(\mathbf{x}_i)$ is the model output for the $i$-th observation. This loss function can be optimized using stochastic gradient descent.
The derivation shows that model uncertainty can be obtained using dropout. The predictive distribution $F(y | \mathbf{x}, D)$ is approximated by:
$$
q(y| \mathbf{x}) = \int f(y| \mathbf{x}, \mathbf{W}) q(\mathbf{W}) d\mathbf{W}.
$$
This integral allows us to approximate the moments of the predictive distribution using Monte Carlo integration by sampling from $q(\mathbf{W})$:
$$
\mathbb{E}{q(y| \mathbf{x})}[Y | \mathbf{x}, D] \approx \frac{1}{T} \sum{t=1}^{T} g_{\mathbf{W}^{(t)}}(\mathbf{x}_i),
$$
where $\mathbf{W}^{(t)}$ is one of $T$ samples from $q(\mathbf{W})$. These Monte Carlo estimates, referred to as MC dropout, provide uncertainty estimates for our predictions.

\subsection{Gaussian Process}

\subsection{Bayesian Additive Regression Trees}

%% dataset table
\begin{table*}[ht]
\caption{Information on the actual real databases selected, showing the $p$ features and $n$ samples, as well as the address for accessing them.}
\label{tab:realdata}
\begin{adjustbox}{width=0.9\textwidth,center}
\begin{tabular}{lccccccccccccp{15mm}}
\hline
\textbf{Dataset} & n & p & Source& \textbf{Dataset} & n & p &Source  \\
\hline
Airfoil & 1503  & 5  & Airfoil (UCI) & Meps19& 15781  & 141 & Meps19 (clover repository, \cite{github_clover_meps}) \\
Bike & 10885 & 12 & Bike (Kaggle, \cite{kaggle_bike_sharing_demand}) & News  & 39644  & 59 & News (UCI, \cite{online_news_popularity_332}) \\
Blog & - & -  & Blog ()  & Protein  & 45730 & 8 & Protein (UCI, \cite{physicochemical_properties_of_protein_tertiary_structure_265}) \\
Concrete & 1030  & 8 & Concrete (UCI, \cite{concrete_compressive_strength_165}) & Star & 2161  & 48 & Star (clover repository) \\
Cycle & 9568 & 4 & Cycle (UCI, \cite{combined_cycle_power_plant_294}) & SuperConductivity & 21263 & 81 & SuperConductivity (UCI, \cite{superconductivty_data_464}) \\
Homes & - & -  & Homes () & WEC& -  & - & WEC ( ) \\
Eletric & 10000 & 12  & Eletric (UCI) & Winered & -  & - & Winered (UCI, \cite{wine_quality_186})\\
Homes & - & -  & Homes () & WineWhite& 1599  & 11 & WineWhite (UCI, \cite{wine_quality_186}) \\
\hline
\end{tabular}
\end{adjustbox}
\end{table*}