% \begin{figure}
% \scriptsize
% \begin{subfigure}[t]{0.5\textwidth}
% \centering
% % \vbox to \ht\mybox{%
% % \vfill
% \begin{tikzpicture}%[node distance=10cm]
% \node (goal) [fornode] at (-1,1.95) {(a) \textbf{Train:} Estimate $v_i^j
% =P_{i-1}(x^{1\colon j}_i)
% $ 
% for each $i$
% };
% \node (init) [wiiidenode] at (-1,1.5) {Initialise ${u_{0}^j(x_{i})} \gets \Phi(x_{i}^j)$};
% \node (forobs) [fornode] at (-1,1.1) {For each preceding observation $x_k$  with $k<i$:};
% \node (forfeat) [fornode] at (-0.75,0.8) {For each feature $j$:};
% \node (bandwidth) [newnode] at (-0.75, 0.3) {Compute \textit{data-dependent} bandwidth $\rho^j(x^{1\colon j}_i, x^{1\colon j}_k)$ 
% \eqref{eq:rho}};
% \node (cdfupdate) [oldnode] at (-0.75,-0.6) {Update conditional CDF $u_i^j(x_k)\colon=P_i^j(x_k)$ based on the similarity between $u_{i-1}^j(x_k)$ and $v_{i-1}^j$ \eqref{eq:mv_autoreg_DP_copdistr}};
% \node (prequpdate) [wiiidenode] at (-1,-1.38) {Set $v_{i}^j\leftarrow u_{i}^j(x_i)$ for all $j$};
% % \draw [dashed] (-4.31, 0.8) -- (-4.31, -1.145);  
% % \draw [dashed] (-4.07, 0.3) -- (-4.07, -1.145); 
% \draw (-4.31, 0.9) -- (-4.31, -1); 
% \draw (-4.07, 0.6) -- (-4.07, -1); 
% % \node (bandwidth) [oldnode] at (-0.5,-2) {Update predictive density $p_{i-1}(x_k)\rightarrow p_i(x_k)$};
% \end{tikzpicture}
%         % \vfill
%         % }
% % \caption{Update with observation $x_i$ at \textbf{train} time.}
% % \label{fig:train}
% \end{subfigure}%
% \hfill
% \begin{subfigure}[t]{0.5\textwidth}
% \centering
% % \vbox to \ht\mybox{%
% % \vfill
% \begin{tikzpicture}%[node distance=10cm]
% \node (goal) [fornode] at (-1,1.95) {(b) \textbf{Test:} Estimate predictive at test point $p_{\nr}(z)$};
% \node (init) [wiiidenode] at (-1,1.5) {Initialise ${u_{0}^j(z)} \gets \Phi(z^j)$};
% \node (forobs) [fornode] at (-1,1.1) {For each train observation $x_i$:};
% \node (forfeat) [fornode] at (-0.75,0.8) {For each feature $j$:};
% \node (bandwidth) [newnode] at (-0.75, 0.3) {Compute \textit{data-dependent} bandwidth\\$\rho^j(x^{1\colon j}_i, z^{1\colon j})$ \eqref{eq:rho}};
% \node (bandwidth) [oldnode] at (-0.75,-0.6) {Update conditional CDF $u_i^j(z)\colon=P_i^j(z)$ based on the similarity between $u_{i-1}^j(z)$ and $v_{i-1}^j$ \eqref{eq:mv_autoreg_DP_copdistr}};
% \node (bandwidth) [widenode] at (-0.85,-1.35) {Update predictive density $p_{i-1}(z)\rightarrow p_i(z)$ \eqref{eq:mv_DP_marginal}};
% % \draw [dashed] (-4.3, 1.25) -- (-4.3, -1.65); 
% % \draw [dashed] (-4.05, 0.3) -- (-4.05, -1.145); 
% \draw (-4.31, 0.9) -- (-4.31, -1.45); 
% \draw (-4.07, 0.6) -- (-4.07, -1); 
% \end{tikzpicture}
%         % \vfill
%         % }
% % \caption{Predictive density estimate at \textbf{test} point $z$.}
% % \label{fig:test}
% \end{subfigure}%

% \caption{\textcolor{red}{Simplified summary of AR-BP. We repeat the training update for each train datum $x_i$ to estimate $v_i^j=P_{i-1}(x^{1:j}_i)$ for all $i\in\{1,\ldots,\nr\}$. These are needed at test time to update from $p_{i-1}(z)\rightarrow p_i(z)$. All steps are averaged over different feature and sample permutations. The main step that induces autoregression in the observations is highlighted \textcolor{pink}{pink}. Please see Supplement \ref{app:imp} for detailed algorithms.}}
%     \label{fig:flow}
% \end{figure}


\begin{figure}
\scriptsize
\begin{subfigure}[t]{0.5\textwidth}
\centering
% \vbox to \ht\mybox{%
% \vfill
\begin{tikzpicture}%[node distance=10cm]
\node (goal) [fornode0] at (-1,1.95) {(a) \textbf{Train:} Estimate $v_i^j
=P_{i-1}(x^{1\colon j}_i)
$ 
for each $i$
};
\node (init) [wiiidenode] at (-1,1.42) {Initialise ${u_{0}^j(x_{i})} \gets \Phi(x_{i}^j)$};
\node (forobs) [fornode0] at (-1,0.94) {For each preceding observation $x_k$  with $k<i$:};
\node (forfeat) [fornode1] at (-0.875,0.52) {For each feature $j$:};
\node (bandwidth) [newnode] at (-0.75, -0.08) {Compute \textit{data-dependent} bandwidth\\ $\rho^j(x^{1\colon j}_i, x^{1\colon j}_k)$ 
\eqref{eq:rho}};
\node (cdfupdate) [oldnode] at (-0.75,-0.93) {Update conditional CDF $u_i^j(x_k)\colon=P_i^j(x_k)$ based on the similarity between $u_{i-1}^j(x_k)$ and $v_{i-1}^j$ \eqref{eq:mv_autoreg_DP_copdistr}};
\node (prequpdate) [wiiidenode] at (-1,-1.65) {Set $v_{i}^j\leftarrow u_{i}^j(x_i)$ for all $j$};
% \draw [dashed] (-4.31, 0.8) -- (-4.31, -1.145);  
% \draw [dashed] (-4.07, 0.3) -- (-4.07, -1.145); 
\draw (-4.31, 0.7) -- (-4.31, -1.35); 
\draw (-4.07, 0.3) -- (-4.07, -1.35); 
% \node (bandwidth) [oldnode] at (-0.5,-2) {Update predictive density $p_{i-1}(x_k)\rightarrow p_i(x_k)$};
\end{tikzpicture}
        % \vfill
        % }
% \caption{Update with observation $x_i$ at \textbf{train} time.}
% \label{fig:train}
\end{subfigure}%
\hfill
\begin{subfigure}[t]{0.5\textwidth}
\centering
% \vbox to \ht\mybox{%
% \vfill
\begin{tikzpicture}%[node distance=10cm]
\node (goal) [fornode0] at (-1,1.95) {(b) \textbf{Test:} Estimate predictive at test point $p_{\nr}(z)$};
\node (init) [wiiidenode] at (-1,1.42) {Initialise ${u_{0}^j(z)} \gets \Phi(z^j)$};
\node (forobs) [fornode0] at (-1,0.94) {For each train observation $x_i$:};
\node (forfeat) [fornode1] at (-0.88,0.52) {For each feature $j$:};
\node (bandwidth) [newnode] at (-0.75,-0.08) {Compute \textit{data-dependent} bandwidth\\$\rho^j(x^{1\colon j}_i, z^{1\colon j})$ \eqref{eq:rho}};
\node (bandwidth) [oldnode] at (-0.75,-0.93) {Update conditional CDF $u_i^j(z)\colon=P_i^j(z)$ based on the similarity between $u_{i-1}^j(z)$ and $v_{i-1}^j$ \eqref{eq:mv_autoreg_DP_copdistr}};
\node (bandwidth) [widenode] at (-0.88,-1.62) {Update predictive density $p_{i-1}(z)\rightarrow p_i(z)$ \eqref{eq:mv_DP_AR_marginal}};
% \draw [dashed] (-4.3, 1.25) -- (-4.3, -1.65); 
% \draw [dashed] (-4.05, 0.3) -- (-4.05, -1.145); 
\draw (-4.31, 0.7) -- (-4.31, -1.8); 
\draw (-4.07, 0.3) -- (-4.07, -1.35); 
\end{tikzpicture}
        % \vfill
        % }
% \caption{Predictive density estimate at \textbf{test} point $z$.}
% \label{fig:test}
\end{subfigure}%

\caption{Simplified summary of AR-BP. We repeat the training update for each train datum $x_i$ to estimate $v_i^j=P_{i-1}(x^{1:j}_i)$. These are needed at test time to update from $p_{i-1}(z)\rightarrow p_i(z)$. All steps are averaged over different feature and sample permutations. The main step that induces autoregression in the observations is highlighted \textcolor{pink}{pink}. Please see Supplement \ref{app:imp} for detailed algorithms.}
    \label{fig:flow}
\end{figure}
