% ############## Bsuite report #######################
% \newpage

% % If the package does not allow geometry, do not fail
% \ifx\newgeometry\undefined\else
% \newgeometry{top=20mm, bottom=20mm, left=20mm, right=20mm}
% \fi


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TITLE + ABSTRACT [ADD PAPER TITLE]
%
% Macros are defined in bsuite_preamble.tex, use the \label{} to \ref{} from
% other sections in your paper.
% \bsuitetitle{Approximate TS via ENNs}
% \label{app:bsuite_report}

\bsuiteabstract


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% AGENT DEFINITION [EDIT]
%
% Use this section to provide a brief overview of the agents that you run on
% bsuite. Usually this will involve links to full descriptions elsewhere in
% your paper.

\subsection{Agent definition}
\label{app:bsuite-agents}
In these experiments we use the DQN variants defined in \texttt{enn\_acme/experiments/bsuite}.
These agents differ principally in terms of their ENN definition, which are taken directly from the \texttt{neural\_testbed/agents/factories} as tuned on the Neural Testbed.
We provide a brief summary of the ENNs used by agents:
\begin{itemize}[noitemsep, nolistsep]
    \item \texttt{mlp}: A `classic' DQN network with 2-layer MLP.
    \item \texttt{ensemble}: An ensemble of DQN networks which only differ in initialization.
    \item \texttt{dropout}: An MLP with dropout used as ENN \citep{Gal2016Dropout}.
    \item \texttt{hypermodel}: A linear hypermodel \citep{Dwaracherla2020Hypermodels}.
    \item \texttt{ensemble+}: An ensemble of DQN networks with additive prior \citep{osband2016deep,osband2018rpf}.
    \item \texttt{epinet}: The epinet architecture from \citet{osband2021epistemic}, reviewed in Section~\ref{sec:benchmark_enn}.
\end{itemize}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUMMARY SCORES [DO NOT EDIT]
\subsection{Summary scores}
\label{app:bsuite-scores}

Each \bsuite\ experiment outputs a summary score in [0,1].
We aggregate these scores by according to key experiment type, according to the standard analysis notebook.
% A detailed analysis of each of these experiments may be found in a notebook hosted on Colaboratory: \bsuitecolab.

\ifx\newgeometry\undefined\vspace{-2mm}\else\fi % Squeezing for ICML

% \begin{figure}[!ht]
%     \centering
%     \includegraphics[width=0.5\textwidth]{figures/bsuite_radar_plot.png}
%     \captionof{figure}{Radar plot gives a snapshot of agent behaviour.}
%   \label{fig:radar}
% \end{figure}

\begin{figure}[!ht]
\centering
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_mlp.png}
  \caption{\texttt{mlp}}
\end{subfigure}
\hfill
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_dropout.png}
  \caption{\texttt{dropout}}
\end{subfigure}
\hfill
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_ensemble.png}
  \caption{\texttt{ensemble}}
\end{subfigure}
\hfill
\vspace{5mm}
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_hypermodel.png}
  \caption{\texttt{hypermodel}}
\end{subfigure}
\hfill
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_epinet.png}
  \caption{\texttt{epinet}}
\end{subfigure}
\hfill
\begin{subfigure}{.3\textwidth}
  \centering
  \includegraphics[height=3.6cm]{figures/bsuite_ensemble+.png}
  \caption{\texttt{ensemble+}}
\end{subfigure}
\caption{Radar plots give a snapshot of agent capabilities.}
\label{fig:radar}
\end{figure}


\begin{figure}[!ht]
    \vspace{-2mm}
    \centering
    \includegraphics[width=0.7\textwidth]{figures/bsuite_bar_plot.png}
    \vspace{-5mm}
    \captionof{figure}{Summary score for each \bsuite\ experiment.}
  \label{fig:bar}
  \vspace{-2mm}
\end{figure}



% \begin{figure}[h!]
%   \centering
%   \includegraphics[width=0.6\textwidth,height=60mm,keepaspectratio]{figures/bsuite_radar_plot.png}
%   \captionof{figure}{Radar plot gives a snapshot of agent behaviour.}
%   \label{fig:radar}
% \end{figure}
% \begin{figure}
%   \centering
%   \includegraphics[width=0.75\textwidth,height=60mm,keepaspectratio]{figures/bsuite_bar_plot.png}
%   \captionof{figure}{Summary score for each \bsuite\ experiment.}
%   \label{fig:bar}
% \end{figure}

\ifx\newgeometry\undefined\vspace{-2mm}\else\fi % Squeezing for ICML

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% RESULTS COMMENTARY [EDIT]
\newpage
\subsection{Results commentary}
\label{app:bsuite-commentary}

\begin{itemize}[noitemsep, nolistsep, leftmargin=*]
    \item \texttt{mlp}  performs well on basic tasks, and quite well on credit assignment, generalization, noise and scale.
    However, DQN performs extremely poorly across memory and exploration tasks.
    Our results match the high-level performance of the \texttt{bsuite/baselines}.
    \item \texttt{ensemble} performs similar to {\tt mlp} agent. The additional diversity provided by random initialization in ensemble particles is insufficient to drive significantly different behaviour.
    \item \texttt{dropout} performs very similar to {\tt mlp} agent. Different dropout masks are not sufficient to drive significantly different behaviour on \bsuite.
    \item \texttt{hypermodel} performs better than {\tt mlp}, {\tt ensemble}, and {\tt dropout} agents on exploration tasks, but the performance does not scale to the most challenging tasks in \bsuite.
    \item \texttt{ensemble+} also known as Bootstrapped DQN \citep{osband2016deep, osband2018rpf}. Mostly performs similar to {\tt ensemble} agent, except for exploration where it greatly outperforms {\tt mlp}, {\tt ensemble}, and {\tt dropout} agents. The addition of prior functions is crucial to this performance.
    \item \texttt{epinet} performs similar to {\tt ensemble+} agent, but with much lower compute. We do see some evidence that, compared to other approaches \texttt{epinet} agent is less robust to problem \textit{scale}. This matches our observation in supervised learning that epinet performance is somewhat sensitive to the chosen scaling of the prior networks $\sigma^P$.
\end{itemize}

None of the agents we consider have a mechanism for memory as they use feed-forward networks.
We could incorporate memory by considering modifications to the agents, but we don't explore that here.

\newpage
