\vspace{-1.5in}
The supplementary material is structured as follows:
\begin{itemize}
    \item \textbf{Appendix \ref{secAlgSuppl}} lists the vanilla CMA-ES algorithm for comparison to our method and the computation of all hyperparameters that we used.
    \item \textbf{Appendix \ref{secExpDets}} presents the full experimental details of our work.
    \item \textbf{Appendix \ref{secSupplRes}} presents additional experimental results. These include ablations of SV-CMA-ES, an empirical runtime analysis, and an empirical convergence analysis.
\end{itemize}

\begin{algorithm}[!ht]
    \caption{The Vanilla CMA-ES Update. Adapted from \citet{hansen2016cma}.}
    \label{alg:cmaes}
    \textbf{Input:} Generation index $t$, search distribution parameters $\vx, \sigma, \vC$; pop.\ size $n$, num.\ elites $m$; learning rates $\epsilon, \alpha_\sigma, \alpha_1, \alpha_{m}, \alpha_c$; damping hyperparam. $d_\sigma$\\
    \begin{algorithmic}[1]
        \State \textbf{Sample and evaluate new population of search points}, for $i = 1, \dots, n$
        \begin{flalign*}
            %&&\\
            %
            &\vy_{i} = (\vxi_{i} - \vx) / \sigma, \quad \text{where } \vxi_{i} \sim \NN (\vx, \sigma^2 \vC)&\\
            %
            &f_{i} = f(\vxi_i)&
        \end{flalign*}
        
        \State \textbf{Selection and recombination}
        \Statex Sort $\vy_{i}$ by $f_{i}$ in ascending order, for $i = 1, \dots, n$
        \begin{flalign*}
            &\cma = \sigma \sum_{i=1}^m w_i \vy_{i} &&& \text{where } \sum_{i=1}^m w_i = 1,\text{ and }\forall i \in \{1, \dots, m\}.~ w_i > 0 &\\
            %
            &\vx \leftarrow \vx + \epsilon~\cma&
        \end{flalign*}

        \State \textbf{Cumulative step-size adaptation}
        \begin{flalign*}
            &\vp_{\sigma} \leftarrow (1 - \alpha_{\sigma}) \vp_{\sigma} + \sqrt{\alpha_\sigma (2 - \alpha_\sigma)~ m_{\text{eff}}}~\vC^{-\frac{1}{2}} \cma / \sigma, &&& \text{where }m_{\text{eff}} = (\tsum_{i=1}^m w_i^2)^{-1}&\\
            %
            &\sigma \leftarrow \sigma \times \exp\left(\tfrac{\alpha_{\sigma}}{d_{\sigma}} \left(\tfrac{\lVert \vp_{\sigma}\rVert}{\EEE \lVert \NN (0, \Id) \rVert} - 1\right)\right)&
        \end{flalign*}

        \State \textbf{Covariance matrix adaptation}
        \begin{flalign*}
            &h_\sigma = \mathds{1}\left(\tfrac{\lVert \vp_{\sigma} \rVert}{\sqrt{1 - (1-\alpha_\sigma)^{2(t+1)}}} < (1.4 + \tfrac{2}{d+1})\EEE \lVert \NN (0, \Id) \rVert)\right)&\\ %\label{eq:hsigma}
            %
            &\bar{w}_j = w_j \times \left( 1 \text{ if } w_j \geq 0 \text{ else } d / \lVert \vC^{-\frac{1}{2}} \vy_j \rVert^2 \right)&\\ 
            %
            &d(h_\sigma) = \mathds{1}\left( \alpha_c (1 - h_\sigma)(2-\alpha_c) \leq 1 \right)&\\ %\label{eq:dhsigma}
            %
            &\vp_c \leftarrow (1 - \alpha_c) \vp_c + h_{\sigma} \sqrt{\alpha_c (2-\alpha_c) m_{\text{eff}}}~ \cma / \sigma&\\
            %
            &\vC \leftarrow (1 + \alpha_1 d(h_\sigma) - \alpha_1 - \alpha_m \tsum_{i=1}^n w_i)\vC + \alpha_1 \vp_c {\vp_c}^T + \alpha_m \sum_{i=1}^n \bar{w}_i \vy_i \vy_i^T&
        \end{flalign*}
    \end{algorithmic}
\end{algorithm}


\section{Vanilla CMA-ES Algorithm}\label{secAlgSuppl}

The listing in Algorithm \ref{alg:cmaes} displays the update step of vanilla CMA-ES at iteration $t$.
Parallel CMA-ES, on which we base our method, uses the same update, but carries it out $\varrho$ times in parallel, once for each search distribution.
We would like to point out that our notation differs from the standard notation of the ES community.
While the number of sampled proposals in each iteration is classically denoted by $\lambda$ and the number of selected elites by $\mu$ \citep{hansen2016cma}, we deviate from this notation in the present work.
Following the main paper, we denote the number of sampled candidates by $n \in \NNN^+$, and the number of elites by $m$.
In our experiments, we tune the elite population size $m \in \NNN^+$, the initial value for $\sigma$ using a grid search (cf. Appendix \ref{secExpDets}) and use the defaults from the evosax codebase \citep{lange2023evosax} for the remaining parameters.
These hyperparameters should coincide with those of \citet{hansen2016cma}.
%For completeness, we list those hyperparameters in Appendix \ref{secHyper}.


\subsection{Computation of the Recombination Weights and Hyperparameters}\label{secHyper}
For completeness, we list the computation of the recombination weights and all remaining hyperparameters in this section.
Each population uses the same recombination weights, so $w_{ik} = w_{jk}$ for populations $i$ and $j$.
For simplicity, we therefore drop the population indices in the weights and define them for the single population case.
For this, we use the equations (49)-(58) from \citet{hansen2016cma} with an \textit{elite} population size of $m$. 
Please note that the aforementioned work uses different notation and denotes the population size by $\lambda$, and the number of elites by $\mu$.

Given population size $n$ and problem dimensionality $d$, we compute the set of weights $w_1, \ldots, w_n$ as follows:
\begin{align}
    &\epsilon = 1\\
    &w'_i = \ln\left(\frac{n + 1}{2}\right) - \ln(i), \quad \text{for } i = 1, \ldots, n\\
    &m_{\text{eff}} = \frac{(\sum_{i=1}^m w'_i)^2}{\sum_{i=1}^m {w'}_i^2} \qquad \text{Note that this is equivalent to the previous definition in } \eqref{eq:mueff}\\
    &m_{\text{eff}}^- = \frac{(\sum_{i=m+1}^n w'_i)^2}{\sum_{i=m+1}^n {w'}_i^2}\\
    &\alpha_\sigma = \frac{m_{\text{eff}} + 2}{d + m_{\text{eff}} + 5}\\
    &d_\sigma = 1 + 2 \max \Bigl( 0, \sqrt{\frac{m_{\text{eff}} - 1}{d+1}} - 1\Bigr) + \alpha_\sigma\\
    &\alpha_c = \frac{4 + m_{\text{eff}} / d}{d + 4 + 2m_{\text{eff}} / d}\\
    &\alpha_1 = \frac{2}{(d + 1.3)^2 + m_{\text{eff}}}\\
    &\alpha_m = \min \Bigl(1 - \alpha_1, 2\frac{1 / 4 + m_{\text{eff}} + 1 / m_{\text{eff}} - 2}{(d + 2)^2 + m_{\text{eff}}} \Bigr)\\
    &\beta_m = 1 + \alpha_1 / \alpha_m\\
    &\beta_{m_{\text{eff}}} = 1 + \frac{2 m_\text{eff}^-}{m_\text{eff} + 2} \\
    &\beta_{\text{pos def}} = \frac{1 - \alpha_1 - \alpha_m}{d \alpha_m}\\
    &w_i = \begin{cases}
        \frac{1}{\sum_{i=1}^n \max(w'_i, 0)} w'_i & \text{if } w'_i \geq 0 \\[2mm]
        \frac{\min(\beta_m, \beta_{m_{\text{eff}}}, \beta_{\text{pos def}})}{\sum_{i=1}^n |\min(w'_i, 0)|} w'_i & \text{else} 
    \end{cases}
\end{align}

\section{Experimental Details}\label{secExpDets}

This section lists the full experimental details for this paper.
The code is partially based on \citet{langosco2021neural} and \citet{lange2023evosax}.
For the experiments including existing ES such as vanilla CMA-ES, we use the implementations in $\texttt{evosax}$ \citep{lange2023evosax}.
All the experiments are performed on an internal cluster with eight NVIDIA A40 GPUs.
The code to reproduce our experiments and plots will be made available upon conference publication.

Each experiment is repeated 10 times using randomly generated seeds.
In each plot, we report the mean performance across all 10 runs and 1.96 standard error bars. 
To compute the maximum mean discrepancy \citep[MMD]{gretton2012kernel} we use the RBF kernel for which we select the bandwidth based on the median distance between the ground truth samples \citep{han2018stein}.
For all experiments, we use the standard RBF kernel $k(\vx, \vy) = \exp(-\lVert \vx - \vy \rVert^2 / 2h)$ for which we find the bandwidth $h$ via grid search.
Following \citet{salimans2017evolution} we use a rank transformation for fitness shaping for all OpenAI-ES-based methods.
For GF-SVGD, we follow \citet{han2018stein} in using a Gaussian prior $\NN(0, \sigma^2 \Id)$, where $\sigma^2$ is determined via grid search.

\subsection{Hyperparameter Tuning}
We tune the hyperparameters for each method separately.
For all methods, this includes the kernel bandwidth.
For SVGD, GF-SVGD and SV-OpenAI-ES, we additionally tune the Adam learning rate, while the initial step-size $\sigma$ for SV-CMA-ES is selected analogously.
For GF-SVGD, we follow \citet{han2018stein} in using a Gaussian prior centered at the origin, with isotropic covariance.
The scale of the prior covariance is also determined via grid search.
The ranges over which we search are listed in the following subsections in the corresponding hyperparameter paragraph of each experimental subsection.
For SV-OpenAI-ES, we additionally tune the width $\sigma$ of the proposal distribution, and for SV-CMA-ES we select the elite population size $m$ via grid search.
All remaining hyperparameters for the CMA-ES- and OpenAI-ES-based algorithms are chosen to be the defaults from evosax.
The ranges over which we search the hyperparameters differs across the problems due to their different characteristics.
We list the specific ranges in the following subsections.
The full list of hyperparameters can be found in Table \ref{table:hyperparams}.

% Full hyperparameter values
\begin{table}[h]
\centering
\caption{Full Hyperparameter Overview}\label{table:hyperparams}
\begin{tabular}{l|l|ll|lll|lll|lll}
\toprule
\textbf{Task} & \textbf{Dim.} & \multicolumn{2}{c|}{\textbf{SVGD}} & \multicolumn{3}{c|}{\textbf{GF-SVGD}} & \multicolumn{3}{c|}{\textbf{SV-CMA-ES}} & \multicolumn{3}{c}{\textbf{SV-OpenAI-ES}} \\
\textit{Hyperparameter} & & \multicolumn{1}{c}{$\epsilon$} & \multicolumn{1}{c|}{$h$} & \multicolumn{1}{c}{$\epsilon$} & \multicolumn{1}{c}{$h$} & \multicolumn{1}{c|}{$\sigma^2$} & \multicolumn{1}{c}{$m$} & \multicolumn{1}{c}{$h$} & \multicolumn{1}{c|}{$\sigma^2$} & \multicolumn{1}{c}{$\epsilon$} & \multicolumn{1}{c}{$h$} & \multicolumn{1}{c}{$\sigma^2$} \\
\midrule
Gaussian Mixture & 2& 0.05 & 0.223 & 1.0 & 0.889 & 2.72 & 2 & 0.889 & 0.50 & 0.50 & 0.001 & 0.10 \\
Double Banana & 2 & 1.0 & $10^{-4}$ & 0.001 & 0.011 & 1.116 & 2 & 0.011 & 0.5 & 0.001 & $10^{-4}$ & 0.15 \\
Motion Planning & 10 & 0.01 & 0.01 & 0.001 & 0.67 & 2.67 & 2 & 0.01 & 0.10 & 0.05 & 0.01 & 0.10 \\
Credit & 22 & 0.1 & $10^{-3}$ & 0.01 & 0.67 & 3.34 & 9 & $10^{-3}$ & 0.35 & 0.005 & $10^{-3}$ & 0.05 \\
Covtype & 55 & 0.01 & 0.45 & 0.05 & 1.0 & 2.28 & 12 & 0.78 & 0.15 & 0.01 & 0.334 & 0.2 \\
Spam & 58 & 0.01 & 0.11 & 0.05 & 1.0 & 0.56 & 9 & 0.45 & 0.20 & 0.01 & 0.11 & 0.2 \\
Pendulum & 353 & -- & -- & 0.05 & 16.67 & 0.34 & 2 & 3.33 & 0.47 & 0.10 & 30.0 & 0.05 \\
CartPole & 386 & -- & -- & 0.10 & 13.33 & 0.45 & 3 & 30.0 & 0.89 & 1.0 & 30.0 & 1.0 \\
MountainCar & 337 & -- & -- & 0.05 & 23.33 & 0.56 & 2 & 30.0 & 0.68 & 1.0 & 30.0 & 0.68 \\
Halfcheetah & 678 & -- & -- & 0.05 & 6.67 & 0.01 & 5 & 16.67 & 0.68 & 0.05 & 30.0 & 0.05 \\
Hopper & 515 & -- & -- & 0.10 & 16.67 & 0.01 & 5 & 3.33 & 0.05 & 0.10 & 30.0 & 0.26 \\
Walker & 662 & -- & -- & 0.10 & 10.0 & 0.01 & 8 & 10.0 & 0.79 & 0.05 & 30.0 & 0.16 \\
\bottomrule
\end{tabular}
\footnotesize{
\begin{tabbing}
\hspace*{.5cm}\= \textbf{SVGD:} \= $\epsilon$ is the Adam learning rate, $h$ is the kernel bandwidth. \\
\hspace*{.5cm}\= \textbf{GF-SVGD:} \= $\epsilon$ is the Adam learning rate, $h$ is the bandwidth, $\sigma^2$ is the scale of the prior covariance. \\
\hspace*{.5cm}\= \textbf{SV-CMA-ES:} \= $m$ is the number of elites, $\sigma^2$ is the init.\ step-size, $h$ is the bandwidth. \\
\hspace*{.5cm}\= \textbf{SV-OpenAI-ES:} \= $\epsilon$ is the Adam learning rate, $\sigma^2$ is the step-size, $h$ is the bandwidth.
\end{tabbing}
}
\end{table}

\subsection{Sampling from Synthetic Densities}

\paragraph{Gaussian Mixture}
We construct a Gaussian Mixture by uniformly sampling $N$ modes $\vmu_i$ over the fixed interval of $[-6, 6]^2$.
Furthermore, we sample the associated weights $w_i \in \RRR$ uniformly over $[0, 10]$. The resulting density is given by:
$$p(\vx) = \frac{1}{K} \sum_{i=1}^N w_i \NN (\vx; \vmu_i, \Id), \qquad K = \tsum_{i=1}^N w_i.$$
In our setting, we set $N = 4$. 
We note that in our experiments one of the sampled weights is close to zero, which is why Fig.\ \ref{fig:samples} depicts only three modes.

\paragraph{Double Banana}
We use the double banana density that was introduced by \citet{detommaso2018stein}:
\begin{gather*}
    p(\vx) \propto \exp \Bigl( -\frac{\lVert x \rVert_2^2}{2 \sigma_1} - \frac{(y - F(\vx))^2}{2 \sigma_2} \Bigr),\\
    \text{where } \vx = [x_1, x_2] \in \RRR^2, \qquad F(\vx) = \log((1-x_1)^2 + 100(x_2-x_1^2)^2).
\end{gather*}
We choose the same hyperparameters as \citet{wang2019matrix}, which are $y = \log (30), \sigma_1 = 1.0, \sigma_2 = 0.09$.

\paragraph{Motion Planning}
The motion planning problem that we sample from was introduced by \citet{barcelos2024path}.
The goal is to sample from the density over $N$ waypoints that induce a trajectory from a predefined start point $t_0$ to a target $t_T$.
The resulting density defines a cost that interpolates obstacle avoidance and smoothness penalties:
\begin{align*}
    &p(\vx) \propto \exp \Bigl( \sum_{t=1}^{W} p_{\text{collision}} (\vx_t) + \alpha \lVert \vx_t - \vx_{t-1} \rVert_2 \Bigr),\\
    \text{where } &p_{\text{collision}}(\vx) = \frac{1}{K} \sum_{i=1}^K \NN (\vx; \vmu_i, \sigma^2 \Id).
\end{align*}
The distribution $p_{\text{collision}}$ induces a randomized 2D terrain for navigation, with the valleys between modes having lower cost. 
We sample a total number of 15 obstacles $\vmu_i \in \RRR^2$ from a Halton sequence following the procedure of \citet{barcelos2024path}.
We choose the hyperparameters $\sigma = 0.25$ and choose $N = 5$.
This results in an optimization problem over $\RRR^{10}$, as each waypoint is two-dimensional.

\paragraph{Ground Truth Samples}
For each task, we sample 256 ground truth samples to compute the MMD.
For the Gaussian Mixtures, samples can be drawn by sampling from the closed form Gaussian mixture.
For the other two problems, ground truth samples are drawn with the help of standard Metropolis-Hastings MCMC.
For this, we sample from one independent chain per sample, using $100{,}000$ burn-in steps.

\paragraph{Hyperparameter Tuning} We carefully tune the hyperparameters of each method by carrying out a grid search over the following hyperparameter ranges.
For the Adam learning rate, we tune over the values proposed by \citet{wang2019matrix}: $[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0]$.
For the kernel parameter $h$, we use 10 equidistant values from the intervals $[0.001, 1]$, $[0.0001, 0.1]$, $[0.001, 3.0]$ for the Gaussian mixture, double banana and motion planning tasks respectively.
For the elite ratio hyperparameter in SV-CMA-ES, we use 10 equidistant values from the interval $[0.15, 0.5]$, and for the initial value of $\sigma$ that is used in SV-CMA-ES and SV-OpenAI-ES, we use 10 equidistant values from the interval $[0.05, 0.5]$.
For GF-SVGD $\sigma$ refers to the prior covariance scaling which we search over 10 equidistant values from the intervals $[0.1, 6.0]$, $[0.01, 2.0]$, and $[0.01, 4.0]$ for the Gaussian mixture, double banana and motion planning tasks respectively.

\begin{table}[ht]
\centering
\caption{Sampling Task Hyperparameters}
\begin{tabular}{ll|ll}
\toprule
\textbf{Hyperparameter} & \textbf{Value} & \textbf{Hyperparameter} & \textbf{Value} \\
\midrule
Number of iterations & 1000 & Total Pop.\ Size & 400 \\
Number of seeds & 10 & Number of Subpop.\ & 100 \\
\bottomrule
\end{tabular}
\end{table}

\subsection{Logistic Regression}

\paragraph{Datasets}
We follow the implementation of \citet{langosco2021neural}, which uses a hierarchical prior $p(\theta)$ on the parameters $\theta = [\alpha, \beta]$, where $\beta \sim \NN (0, \alpha^{-1})$ and $\alpha \sim \Gamma (a_0, b_0)$.
Following the aforementioned work, we use $a_0 = 1$ and $b_0 = 0.01$.
Given data $D$, the task is to approximate samples from the posterior
\begin{align*}
    &p(\theta \mid D) = p(D \mid \theta)p(\theta)\\
    \intertext{with}
    &p(D \mid \theta) = \prod_{i=1}^N \bigl[ y_i \tfrac{\exp (x_i^T \beta)}{1 + \exp (x_i^T \beta)} + (1- y_i) \tfrac{\exp (-x_i^T \beta)}{1 + \exp (-x_i^T \beta)} \bigr].
\end{align*}
We use three datasets from the UCI Machine Learning repository \citep{asuncion2007uci} in our experiments: Covtype \citep{blackard1998comparison}, Spambase \citep{hopkins1999spambase}, and Credit score data \citep{data23}.
For the Covtype dataset, we create a binary version following \citet{liu2016stein}, which groups the samples into the two groups: \textit{Lodgepole Pine} versus \textit{rest}.
For each task, we partition the data into 70\% for training, 10\% for validation and 20\% for testing and use a batch size of 128.
The dimensionalities of the problems are defined by the number of features in the respective datasets, with an additional degree of freedom for the parameter $\alpha$ of the prior.

\paragraph{Hyperparameter Tuning}
We tune the hyperparameters of each method by carrying out a grid search over the following hyperparameter ranges on the validation set. 
For the Adam learning rate, elite ratio, and initial $\sigma$ values for ES, we tune over the same parameter ranges as for the toy densities.
For the kernel parameter $h$, we use 10 equidistant values from the interval $[0.001, 1.0]$ and for the prior $\sigma$ in GF-SVGD we use the interval $[0.01, 5.0]$.

\begin{table}[h]
\centering
\caption{Logistic Regression Task Hyperparameters}
\begin{tabular}{ll|ll}
\toprule
\textbf{Hyperparameter} & \textbf{Value} & \textbf{Hyperparameter} & \textbf{Value} \\
\midrule
Number of iterations & 1000 & Total Pop.\ Size & 256 \\
Number of seeds & 10 & Number of Subpop.\ & 8 \\
Batch size & 128 & -- & -- \\
\bottomrule
\end{tabular}
\end{table}

\subsection{Reinforcement Learning}

\paragraph{Setup}
The goal of each RL task is to maximize the expected return.
The corresponding objective is to sample from the following posterior:
\begin{equation*}
    p(\theta) \propto \exp(J(\theta)), \quad J(\theta) = \EEE_{(s_t, a_t) \sim \pi_{\theta}} \bigl[\sum_{t=1}^T r(s_t, a_t)\bigr].
\end{equation*}
Here, we estimate the expected return via MC approximation across 16 independent policy rollouts for each iteration at which we evaluate the current particle set performance.
In our experiments, we report the best expected return across all particles.
Every such experiment is repeated 10 times, as with all previous tasks.
We use the standard maximum episode length for all classic tasks as specified in the gym environment implementation, except for MountainCar, where we use a maximum episode length of 500 (instead of the default 1000 steps) to make the task more difficult.
For the brax control tasks, we use a maximum episode length of 1000 steps.
For the classic tasks Pendulum, CartPole and MountainCar, we use the $\texttt{gymnax}$ implementations \citep{lange2022gymnax}, and for the remaining environments, we use the $\texttt{brax-v1}$ implementations \citep{brax2021github}.
We use the continuous version of MountainCar.
Please note that we use the brax legacy dynamics in our experiments.

\paragraph{Hyperparameter Tuning}
We tune the hyperparameters of each method by carrying out a grid search over the following hyperparameter ranges. 
For the Adam learning rate, elite ratio, and initial $\sigma$ values, we tune over the same parameter ranges as for the toy densities.
For the kernel parameter $h$, we use 10 equidistant values from the interval $[0.001, 30.0]$ and for the prior $\sigma$ in GF-SVGD we use the interval $[0.01, 1.0]$.
For brax environments, we tune the hyperparameters for 500 generations and report results for running the selected configurations over 10 seeds for 1000 generations.
Please note that while we use 5 seeds for hyperparameter tuning, all evaluation runs and plots in this paper use 10 runs as indicated.

\begin{table}[ht]
\centering
\caption{RL Task Hyperparameters}
\begin{tabular}{ll|ll}
\toprule
\textbf{Hyperparameter} & \textbf{Value} & \textbf{Hyperparameter} & \textbf{Value} \\
\midrule
Number of iterations & 200 (classic) / 1000 (brax) & Total Pop.\ Size & 64 \\
Number of seeds & 10 & Number of Subpop.\ & 4 \\
MLP Layers & 2 & MC Evaluations & 16 \\
Hidden Units & 16 & Hidden Activation & ReLU \\
-- & -- & Output Activation & Tanh \\
\bottomrule
\end{tabular}
\end{table}

\section{Additional Results}\label{secSupplRes}

\begin{figure}[t]
    \centering
    % Left side: CMA-ES-based methods
    \begin{minipage}{0.48\linewidth}
        \centering
        \textbf{CMA-ES-based methods}\\
        \subfloat[Pendulum]{
            \includegraphics[width=.31\linewidth]{imgs/classic_ctrl/CMA_onlyPendulum-v110rep200iter4pop16val}
        }
        \subfloat[CartPole]{
            \includegraphics[width=.3\linewidth]{imgs/classic_ctrl/CMA_onlyCartPole-v110rep200iter4pop16val}
        }
        \subfloat[MountainCar]{
            \includegraphics[width=.3\linewidth]{imgs/classic_ctrl/CMA_onlyMountainCarContinuous-v010rep200iter4pop16val}
        }\\
        \subfloat[Halfcheetah]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/CMA_onlyhalfcheetah10rep1000iter4pop16val}
        }
        \subfloat[Hopper]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/CMA_onlyhopper10rep1000iter4pop16val}
        }
        \subfloat[Walker]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/CMA_onlywalker2d10rep1000iter4pop16val}
        }
    \end{minipage}
    \hfill
    % Right side: OpenAI-ES-based methods
    \begin{minipage}{0.48\linewidth}
        \centering
        \textbf{OpenAI-ES-based methods}\\
        \subfloat[Pendulum]{
            \includegraphics[width=.3\linewidth]{imgs/classic_ctrl/OES_onlyPendulum-v110rep200iter4pop16val}
        }
        \subfloat[CartPole]{
            \includegraphics[width=.3\linewidth]{imgs/classic_ctrl/OES_onlyCartPole-v110rep200iter4pop16val}
        }
        \subfloat[MountainCar]{
            \includegraphics[width=.3\linewidth]{imgs/classic_ctrl/OES_onlyMountainCarContinuous-v010rep200iter4pop16val}
        }\\
        \subfloat[Halfcheetah]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/OES_onlyhalfcheetah10rep1000iter4pop16val}
        }
        \subfloat[Hopper]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/OES_onlyhopper10rep1000iter4pop16val}
        }
        \subfloat[Walker]{
            \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/OES_onlywalker2d10rep1000iter4pop16val}
        }
    \end{minipage}
    
    \caption{Comparison of CMA-ES-based methods (left) and OpenAI-ES-based methods (right). In all experiments, a total population size of 64 is used, split across 4 subpopulations in parallel methods. We report the mean ($\pm 1.96$ standard error) across 10 independent runs.}
    \label{fig:rl-baselines}
\end{figure}

\subsection{Algorithm Ablations}\label{secAblations}

\paragraph{Setting} To evaluate the potential of SV-CMA-ES for pure blackbox optimization, we compare it to other ES-based methods that perform pure maximum-likelihood estimation.
The two natural baselines to compare against are first simple CMA-ES, but with a population size of $n = \text{Num.\ particles} \times \text{Subpopsize}$, which ablates the effect of sampling multiple subpopulations in parallel.
Second, we compare against multiple independent parallel runs of CMA-ES, which ablates the coordination of the runs via the SVGD update.

\paragraph{Results} In Fig.\ \ref{fig:rl-baselines}, we display the results of our evaluation.
When comparing SV-CMA-ES against single population CMA-ES we observe similar performances on most problems. 
The most notable difference that stands out is the better performance of SV-CMA-ES on the MountainCar problem.
What happens in this experiment?
The MountainCar problem is an MDP in which a car must be accelerated to reach a goal on a hill.
Since there are negative rewards for large accelerations, there is a large local optimum at which policies are idle and do not accelerate at all to avoid negative rewards.
However, once the goal is reached, the agent receives a reward of 100.
We speculate due to these results that SV-CMA-ES may be superior to other CMA-ES-based methods on such sparse reward environments.
We will further investigate this hypothesis in Appendix \ref{secEnvAbl}.

Furthermore, we compare the performance to multiple parallel CMA-ES runs.
This experiment essentially ablates the kernel term in our update.
We find that in comparison to multiple parallel CMA-ES runs, our method achieves superior performance on half of the problems, and equal on the rest.
These results are encouraging, as they suggest that our SV-ES is a viable alternative to independent parallelizations of a given ES algorithm.

Additionally, we include Fig.~\ref{fig:mc_seeds} to display the per-seed results on the MountainCar problem.
The results highlight that GF-SVGD converges to the local optimum of being idle on two out of the ten runs.

\begin{wrapfigure}{R}{0.45\textwidth}
    \centering
    \includegraphics[width=\linewidth]{imgs/rl/hist_MountainCar}
    \caption{Per-seed performances on the MountainCar task. SV-CMA-ES is the only method to achieve optimal performance across all seeds. GF-SVGD converges to idle agents (i.e., a reward of zero) on two out of the ten seeds.}\label{fig:mc_seeds}
\end{wrapfigure}

\subsection{Environment Ablations}\label{secEnvAbl}

\paragraph{Setting} To investigate the hypothesis that SV-CMA-ES is superior to simple parallel CMA-ES runs or single large-population CMA-ES in sparse reward environments, we construct sparse reward versions of the Walker and Hopper environments following the procedure of \citet{mazoure2019leveraging}.
The goal of the environment ablations is to make the optimization landscape more similar to that of MountainCar.
To achieve this goal, we increase the control cost in the reward and remove the positive rewards for the agent being healthy.
Additionally, the original forward reward is replaced by a positive reward for being upright that is only awarded once the agent has moved beyond a certain position in space.
Essentially, the resulting environments only reward successful locomotion policies once the behavior is learned, instead of rewarding intermediate learning outcomes as well.
In this setting, the reward is negative for policies that move the agent not far enough and close to zero for policies that do not move the agent at all, similar to the MountainCar reward.
The resulting reward function is:
\begin{align*}
    r_{\text{ablated}}(s_t, a_t) &= \alpha \mathds{1} (pos(s_t) \geq 1) - \beta \lVert a_t \rVert_2^2,
\end{align*}
where $pos(s_t) \in \RRR$ is the agent's position at state $s_t$ in the task space.
In our experiments, we choose $\alpha = 2.0$ and $\beta = 0.1$.
Since the resulting ablated Walker problem is significantly more difficult, we run experiments on this benchmark for 1500 instead of 1000 iterations on this problem.

\paragraph{Results} 
Our results demonstrate that SV-CMA-ES outperforms other CMA-ES-based methods considerably in the constructed scenarios.
Both problems are more difficult to solve than the standard problems in Fig.\ \ref{fig:rl-baselines} because a positive reward is only received once the agent moves far enough.
As hypothesized, we observe a similar pattern as for MountainCar.
In other words, SV-CMA-ES outperforms the other methods on sparse reward environments.
These results suggest two things.
First, we find that the coordination of multiple CMA-ES runs via the SVGD step improves the algorithmic performance on problems for which exploration is crucial.
Second, we observe that this effect only comes to fruition if the population updates are coordinated via SVGD, as SV-CMA-ES outperforms parallel CMA-ES runs considerably.
The combination of these findings suggests that SV-CMA-ES constitutes a strong alternative to other CMA-ES-based optimizers if problems require exploration.
We thus believe that analysis of the scalability of the algorithm to a high number of particles beyond our computational capabilities (i.e., several thousands) would be an intriguing direction of future research.

\begin{figure}[hb]
    \centering
    \subfloat[Hopper-sparse]{
        \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/CMA_onlyhopper_ablated10rep1000iter4pop16val}
    }
    \hspace*{5mm}
    \subfloat[Walker-sparse]{
        \includegraphics[width=.3\linewidth]{imgs/brax_ctrl/CMA_onlywalker2d_ablated10rep1500iter4pop16val}
    }
    \caption{Comparison of CMA-ES-based methods on sparse reward environments. Coordinating parallel ES runs via the SVGD update clearly improves the performance. In all experiments, a total population size of 64 is used, split across 4 subpopulations in parallel methods. We report the mean ($\pm 1.96$ standard error) across 10 independent runs.
    }
    \label{fig:rl-ablated}
\end{figure}

\subsection{Empirical Runtime Analysis}\label{secRuntime}

\paragraph{Setting} Given the higher theoretical runtime complexity of SV-CMA-ES compared to the baselines, we perform an empirical runtime analysis.
The goal of this investigation is to evaluate the significance of the theoretical algorithm properties in practice.
To this end, we select multiple problems from the main paper to illustrate the performances over time.
We select the synthetic Gaussian mixture as a low dimensional and fast-to-evaluate objective, a logistic regression tasks as moderately dimensional tasks with fast-to-evaluate objectives, and the brax RL tasks as the most high-dimensional task of the paper which require expensive simulator calls for evaluations of the ground truth density. 
For each problem, we run 1000 iterations, just as in prior experiments. 
For the RL tasks, we use the GPU accelerated brax simulator.
To provide a fair analysis of wall-clock times, we disable deterministic operations on the GPU, which may lead to slight differences in performances, \wrt to the results reported in the main paper, but is more realistic in terms of runtimes.
We would like to point out, however, that we did not observe any visible differences in the results.
The experiment is carried out on a single NVIDIA A40 GPU and reported times are for all 10 independent runs being run in parallel.

\paragraph{Results} The results of the runtime analysis are depicted in Fig.\ \ref{fig:timing}.
Overall, we observe competitive convergence behavior of SV-CMA-ES in terms of wall-clock time. 
While the overall runtime time required to run 1000 iterations is highest for SV-CMA-ES, we do observe that our method obtains good samples efficiently under this metric.

Generally, we observe that the influence of the update computation costs on the overall runtime varies greatly between problems.
While the differences between all methods are relatively small on the synthetic 2d problems, we observe that SV-CMA-ES is slower on high dimensional problems such as the Covtype logistic regression task.
This reflects that the computational cost of SV-CMA-ES increases with the number of problem dimensions, while the cost of the other baselines is dominated by the number of particles.
However, we also observe that the quality of the updates of SV-CMA-ES is still generally better in these cases, as it samples well performing solutions the fastest. 

More importantly, we observe that the main factor that drives overall runtime is the evaluation of the objective function.
Furthermore, we observe smaller runtime differences between the methods on the Halfcheetah task despite it being the most high dimensional task that we evaluate on.
This underlines that the significance of theoretical algorithm complexity depends heavily on the problem and the costs that are associated with target density evaluations. 
Since our method requires fewer iterations than the baselines to reach comparable performance (as shown previously), we find that this demonstrates the practical relevance of the presented approach.

\begin{figure*}[ht]
    \centering
    \subfloat[Gaussian Mixture]{
        \includegraphics[width=.25\linewidth]{imgs/timing/GMMtiming}
    }\hspace*{5mm}
    \subfloat[Credit Log.\ Reg.]{
        \includegraphics[width=.25\linewidth]{imgs/timing/credittiming}
    }\hspace*{5mm}
    \subfloat[Covtype Log.\ Reg.]{
        \includegraphics[width=.25\linewidth]{imgs/timing/covtypetiming}
    }\\
    \subfloat[Halfcheetah RL]{
        \includegraphics[width=.25\linewidth]{imgs/timing/halfcheetah_timing}
    }\hspace*{5mm}
    \subfloat[Hopper RL]{
        \includegraphics[width=.25\linewidth]{imgs/timing/hopper_timing}
    }\hspace*{5mm}
    \subfloat[Walker RL]{
        \includegraphics[width=.25\linewidth]{imgs/timing/walker2d_timing}
    }\hfill
    \caption{Performance vs.\ wallclock time. We run all methods for 1000 iterations and display the elapsed wallclock time. The plot shows that SV-CMA-ES also performs well \wrt this metric.
    All results are averaged across 10 independent runs ($\pm 1.96$ standard error).
    }
    \label{fig:timing}
\end{figure*}

\begin{figure}[ht]
    \centering
    \subfloat[Gaussian Mixture]{
        \includegraphics[width=.3\linewidth]{imgs/convergences/GMM_step_convergence}
    }
    \hfill
    \subfloat[Double Banana]{
        \includegraphics[width=.3\linewidth]{imgs/convergences/DoubleBanana_step_convergence}
    }
    \hfill
    \subfloat[Motion Planning]{
        \includegraphics[width=.3\linewidth]{imgs/convergences/F_RAMOS_step_convergence}
    }
    \caption{Mean step lengths per iteration. We record the step length every 20 steps across $1\,000$ iterations. 
    The SV-CMA-ES steps and estimated gradient steps by CMA-ES converge to a stable equilibrium. We report the mean ($\pm 1.96$ standard error) across all 100 particles and 10 independent runs.
    }
    \label{fig:convergence}
\end{figure}

\subsection{Empirical Convergence Analysis}\label{sec:app_convergence}
To evaluate the convergence of our method, we expand our analysis beyond the pure MMD computations in Fig. \ref{fig:mmd} and analyze the length of the particle steps.
To this end, we use the same setting as the one in Fig. \ref{fig:mmd} to plot the Frobenius norm of the particle preconditioning matrix $\sigma C^{1/2}$ across iterations in Fig. \ref{fig:convergence}.
In the same figure, we further plot the length of the steps that were actually taken by the particles, i.e., the size of the update in Eq.\ \eqref{eq:asv-cma-final}.
The results show that the algorithm exhibits stable convergence.
A key finding in this context is that for each problem, there exists a stationary point at which the CMA-ES steps that are sampled from $\NN(\vx, \sigma^2 C)$ are counteracted by the kernel gradients, as the total step length is shorter than the matrix norm.
This shows that our method is capable of finding the point of convergence of SVGD where the kernel gradient and objective gradient balance each other out.
We see the fact that the resulting total step length does not reach zero across all problems as an artifact of the stochastic gradient estimates that CMA-ES provides.
We do not see this as a problem, however, as the MMD analysis in Fig. \ref{fig:mmd} shows that the particles only move within areas of high density.
Since the approximation of the target density in SVGD is based on a sum of delta function which approximates the target as $q(x) = \sum_{\vx_i \in X} \delta (\vx - \vx_i) / \varrho$ it permits small particle movements at the equilibrium between repulsive and driving force in the update.

% \subsection{Comparison to gradient-based SVGD}
% We show additional comparisons of all gradient-free methods with respect to the solutions that were sampled using gradient-based SVGD.
% The goal of this analysis to assess how well the individual methods can reproduce the results of gradient-based SVGD.
% Fig. \ref{fig:mmd} (d) already displays the aggregated MMD with respect to gradient-based SVGD across all synthetic tasks.
% We expand on this in Fig.\ \ref{fig:mmd2}, where we plot the individual task performances separately. 
% Overall, the results demonstrate that SV-CMA-ES approximates the sample distribution quickly and accurately.


% \begin{figure*}[h]
%     \centering
%     \subfloat[Gaussian Mixture]{
%         \includesvg[width=.21\linewidth]{imgs/approximation/GMM_approximation}
%     }\hfill
%     \subfloat[Double Banana]{
%         \includesvg[width=.225\linewidth]{imgs/approximation/banana_approximation}
%     }\hfill
%     \subfloat[Motion Planning]{
%         \includesvg[width=.22\linewidth]{imgs/approximation/ramos_approximation}
%     }\hfill
%     \subfloat[Aggregated MMD]{
%         \includesvg[width=.225\linewidth]{imgs/approximation/meta_approximation}
%     }\hfill
%     \caption{\bblue{(a)-(c): MMD convergence results \wrt to the samples obtained by gradient-based SVGD. (d) Mean log10 MMD across all three sampling tasks. 
%     All results are averaged across 10 independent runs ($\pm 1.96$ standard error).}
%     }
%     \label{fig:mmd2}
% \end{figure*}

\subsection{Convergence Plots}\label{sec:app_full}
For completeness, we depict the convergence results on the synthetic sampling tasks.
For each task and method combination, we display the evolution of the sample set across the optimization.
To this end, we split the total number of iterations into 10 equally-sized bins and generate a plot for each.
In other words, we plot the sample set every 100 iterations.
The results for the Gaussian mixture are listed in Fig.~\ref{fig:gmm_convergence_iter}, those for the Double Banana density in Fig.~\ref{fig:banana_convergence_iter}, and Fig.~\ref{fig:ramos_convergence_iter} shows the convergence on the trajectory optimization task.
All plots confirm the quantitative results, i.e., SV-CMA-ES converges quickly compared to the baselines.
We note that we refer to convergence once all particles are in high density areas.
Due to the stochastic nature of the update, there is no guarantee that there is an actual stationary distribution of the finite optimization process.
For storage reasons, we include these images as raster images.
We encourage readers to reach out to us in case they are interested in the vector graphics version for these last three plots.

\begin{figure*}[!b]
    \centering
    \subfloat[$\N$-SVGD]{
        \includegraphics[width=.9\linewidth]{imgs/convergences/OG_SVGDsamples_sequence.png}
    }\\
    \subfloat[SV-CMA-ES]{
        \includegraphics[width=.9\linewidth]{imgs/convergences/BB_SVGD_ESsamples_sequence.png}
    }\\
    \subfloat[GF-SVGD]{
        \includegraphics[width=.9\linewidth]{imgs/convergences/GF_SVGDsamples_sequence.png}
    }\\
    \subfloat[SV-OpenAI-ES]{
        \includegraphics[width=.9\linewidth]{imgs/convergences/MC_SVGDsamples_sequence.png}
    }\\
    \begin{tikzpicture}[x=\linewidth/1000, >=stealth, thick, line cap=round, line join=round]
      \draw[->] (0,0) -- (850,0);
      \draw (0, 0.2) -- (0, -0.2) node[below=2pt]{Iteration 1};
      \draw (415, 0.2) -- (415, -0.2) node[below=2pt]{500};
      \draw (850, 0.2) -- (850, -0.2) node[below=2pt]{1000};
    \end{tikzpicture}
    \caption{Convergences on GMM sampling task. For each method, the sample convergence across the full 1000 sampling iterations is displayed. }
    \label{fig:gmm_convergence_iter}
\end{figure*}

\begin{figure*}
    \centering
    \subfloat[$\N$-SVGD]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/OG_SVGDbananasamples_sequence}
    }\\
    \subfloat[SV-CMA-ES]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/BB_SVGD_ESbananasamples_sequence}
    }\\
    \subfloat[GF-SVGD]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/GF_SVGDbananasamples_sequence}
    }\\
    \subfloat[SV-OpenAI-ES]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/MC_SVGDbananasamples_sequence}
    }\\
    \begin{tikzpicture}[x=\linewidth/1000, >=stealth, thick, line cap=round, line join=round]
      \draw[->] (0,0) -- (850,0);
      \draw (0, 0.2) -- (0, -0.2) node[below=2pt]{Iteration 1};
      \draw (415, 0.2) -- (415, -0.2) node[below=2pt]{500};
      \draw (850, 0.2) -- (850, -0.2) node[below=2pt]{1000};
    \end{tikzpicture}
    \caption{Convergences on Double banana sampling task. For each method, the sample convergence across the full 1000 sampling iterations is displayed.}
    \label{fig:banana_convergence_iter}
\end{figure*}

\begin{figure*}[t]
    \centering
    \subfloat[$\N$-SVGD]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/F_RAMOSOG_SVGDsamples_sequence}
    }\\
    \subfloat[SV-CMA-ES]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/F_RAMOSBB_SVGD_ESsamples_sequence}
    }\\
    \subfloat[GF-SVGD]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/F_RAMOSGF_SVGDsamples_sequence}
    }\\
    \subfloat[SV-OpenAI-ES]{
        \includegraphics[width=.95\linewidth]{imgs/convergences/F_RAMOSMC_SVGDsamples_sequence}
    }\\
    \begin{tikzpicture}[x=\linewidth/1000, >=stealth, thick, line cap=round, line join=round]
      \draw[->] (0,0) -- (850,0);
      \draw (0, 0.2) -- (0, -0.2) node[below=2pt]{Iteration 1};
      \draw (415, 0.2) -- (415, -0.2) node[below=2pt]{500};
      \draw (850, 0.2) -- (850, -0.2) node[below=2pt]{1000};
    \end{tikzpicture}
    \caption{Convergences on motion planning sampling task. For each method, the sample convergence across the full 1000 sampling iterations is displayed.}
    \label{fig:ramos_convergence_iter}
\end{figure*}

% \subsection{Rebuttal Additions}
% \begin{figure*}
%     \centering
%     \subfloat[Annealing]{
%         \includesvg[width=.15\linewidth]{imgs/banana/BB_SVGD_ESbananasamples_new}
%     }\hspace{.05\linewidth}
%     \subfloat[No Annealing]{
%         \includesvg[width=.15\linewidth]{imgs/banana/BB_SVGD_ESbananasamples_new_no_sched}
%     }
%     \caption{Qualitative results of annealing ablation on Double banana task.}
% \end{figure*}
