% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
\usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{subfigure}
\usepackage{bibentry}
% END REMOVE bibentry

\usepackage{xcolor}
\usepackage{colortbl}
\usepackage[switch]{lineno}

\usepackage{etoolbox}
\newrobustcmd\B{\DeclareFontSeriesDefault[rm]{bf}{b}\bfseries} 

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

\title{Do Bayesian Variational Autoencoders Know What They Don't Know? (Supplementary material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author{\href{mailto:<m.glazunov@tudelft.nl>?Subject=Do Bayesian VAEs Know What They Don't Know}{Misha~Glazunov}{}}
\author{Apostolis~Zarras}
% Add affiliations after the authors
\affil{%
Delft University of Technology, the Netherlands
}

  
\begin{document}
\maketitle
% \linenumbers

\begin{figure*}[ht]
\centering 
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/BBB_MNIST_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/BBB_FMNIST_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/BBB_SVHN_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/BBB_CIFAR10_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[ width=0.47\columnwidth]{figures/SGHMC_MNIST_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/SGHMC_FMNIST_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/SGHMC_SVHN_std_crop.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/SGHMC_CIFAR10_std_crop.pdf}}
\caption{Histograms of the sample standard deviations of the marginal log-likelihoods, blue depicts in-distribution (ID) and orange - out-of-distribution (OoD).
\textbf{From left to right}: MNIST as ID vs Fashion-MNIST as OoD, Fashion-MNIST as ID vs MNIST as OoD, SVHN as ID vs CIFAR-10 as Ood, CIFAR-10 as ID vs SVHN as OoD.
\textbf{Top:} Sampling is done from Bayes-by-backprop VAE. 
\textbf{Bottom:} Sampling is done from SGHMC VAE.}
\label{fig:StdsLL}
\end{figure*}

\begin{figure*}[t]
\centering 
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/mnist_bbb_800_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/fmnist_bbb_800_data_reconstructed_3.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/svhn_bbb_995_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/cifar10_bbb_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[ width=0.5\columnwidth]{figures/mnist_sghmc_800_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/fmnist_sghmc_965_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/svhn_sghmc_800_data_reconstructed.pdf}}
\hfill
\subfigure{\includegraphics[width=0.5\columnwidth]{figures/cifar10_sghmc_data_reconstructed.pdf}}

\caption{\textbf{From left to right}: MNIST, Fashion-MNIST, SVHN, CIFAR-10.
\textbf{Top:} Random samples from BBB VAE. 
\textbf{Bottom:} Random samples from SGHMC VAE.}
\label{fig:BBBSGHMCSamples}
\end{figure*}



\appendix

\providecommand{\upGamma}{\Gamma}
\providecommand{\uppi}{\pi}

\section{Sample standard deviations of the marginal log-likelihoods}
The sample standard deviations of the marginal log-likelihoods for BBB and SGHMC methods can be observed in Figure~\ref{fig:StdsLL}.

\section{VAE distributions}
\begin{itemize}
  \item For prior we used a standard multivariate Gaussian without parameters: $p(\mathbf{z}) = \mathcal{N}(\mathbf{z}; \mathbf{0},\,\mathbf{I})$
  \item For variational distribution we used a multivariate factorized Gaussian with learned mean and variance: $q_{\boldsymbol{\phi}}(\mathbf{z}|\mathbf{x}) = \mathcal{N}(\mathbf{z}; \boldsymbol{\mu}, diag(\boldsymbol{\sigma^{2}}))$
  \item For likelihood we used a multivariate factorized Bernoulli distribution:
\begin{equation}
\begin{aligned}
\ p(\mathbf{x} \mid \mathbf{z}) &=\prod_{j=1}^{D} p\left(x_{j} \mid \mathbf{z}\right)=\prod_{j=1}^{D} \operatorname{Bernoulli}\left(x_{j} ; p_{j}\right)
\end{aligned}
\end{equation}
\end{itemize}

\section{CNN architectures used}
For MNIST and FashionMNIST datasets with a single channel we used the following architectures depicted in Table~\ref{table:FMNISTEncoderCNN} and in Table~\ref{table:FMNISTDecoderCNN}.

\begin{table}[H]
\centering
\caption{Encoder CNN for MNIST and FashionMNIST}
\label{table:FMNISTEncoderCNN}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccc}
\toprule
\textbf{Operation} &  \textbf{Kernel} & \textbf{Strides} & \textbf{Feature Maps}  \\
\midrule
 Convolution &  3 x 3 & 1 x 1 &  32\\
 Convolution & 3 x 3 & 1 x 1 & 16 \\
 Max pooling 2D & 2 x 2 & 2 x 2 & \textemdash \\
 Linear for $\boldsymbol{\mu}$ & \textemdash & \textemdash & 10 \\
 Linear for $\log \boldsymbol{\sigma}$ & \textemdash & \textemdash & 10 \\
\bottomrule
\end{tabular}
}
\end{table}

\begin{table}[H]
\centering
\caption{Decoder CNN for MNIST and FashionMNIST}
\label{table:FMNISTDecoderCNN}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccc}
\toprule
\textbf{Operation} &  \textbf{Kernel} & \textbf{Strides} & \textbf{Feature Maps}  \\
\midrule
 Linear for sampled $\mathbf{z}$ & \textemdash & \textemdash & 2306 \\
 Upsampling nearest 2D & \textemdash & \textemdash & \textemdash \\
 Max pooling 2D & 2 x 2 & 2 x 2 & \textemdash \\
 Transposed Convolution &  3 x 3 & 1 x 1 &  32\\
 Transposed Convolution & 3 x 3 & 1 x 1 & 1 \\
\bottomrule
\end{tabular}
}
\end{table}

For SVHN and CIFAR10 datasets with three channels we used the following architectures with additional padding = 1 and no bias for every convolutional layer (see Table~\ref{table:SVHNEncoderCNN} and Table~\ref{table:SVHNDecoderCNN}). For SVHN latent dimensionality = 20, for CIFAR10 = 70.

\begin{table}[H]
\centering
\caption{Encoder CNN for SVHN and CIFAR10}
\label{table:SVHNEncoderCNN}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccc}
\toprule
 \textbf{Operation} &  \textbf{Kernel} & \textbf{Strides} & \textbf{Feature Maps}  \\
\midrule
 Convolution &  3 x 3 & 1 x 1 &  16\\
 Batch normalization & \textemdash & \textemdash & 16 \\
 Convolution & 3 x 3 & 2 x 2 & 32  \\
 Batch normalization & \textemdash & \textemdash & 32 \\
 Convolution & 3 x 3 & 1 x 1 & 32  \\
 Batch normalization & \textemdash & \textemdash & 32 \\
 Convolution & 3 x 3 & 2 x 2 & 16  \\
 Batch normalization & \textemdash & \textemdash & 16 \\
 Linear & \textemdash & \textemdash & 512 \\
 Batch normalization & \textemdash & \textemdash & 512 \\
 Linear for $\boldsymbol{\mu}$ & \textemdash & \textemdash & 20 / 70 \\
 Linear for $\log \boldsymbol{\sigma}$ & \textemdash & \textemdash & 20 / 70  \\
\bottomrule
\end{tabular}
}
\end{table}




\begin{table}[H]
\centering
\caption{Decoder CNN for SVNH and CIFAR10}
\label{table:SVHNDecoderCNN}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lccc}
\toprule
 \textbf{Operation} &  \textbf{Kernel} & \textbf{Strides} & \textbf{Feature Maps}  \\
\midrule
 Linear for sampled $\mathbf{z}$ &  \textemdash & \textemdash &  512\\
 Batch normalization & \textemdash & \textemdash & 512 \\
 Linear & \textemdash & \textemdash &  1024\\
 Batch normalization & \textemdash & \textemdash & 1024 \\
 Transposed Convolution & 3 x 3 & 2 x 2 & 32  \\
 Batch normalization & \textemdash & \textemdash & 32 \\
 Transposed Convolution & 3 x 3 & 1 x 1 & 32  \\
 Batch normalization & \textemdash & \textemdash & 32 \\
 Transposed Convolution & 3 x 3 & 2 x 2 & 16  \\
 Batch normalization & \textemdash & \textemdash & 16 \\
 Transposed Convolution & 3 x 3 & 1 x 1 & 3  \\
\bottomrule
\end{tabular}
}
\end{table}

For all architectures we used ReLU as a non-linearity. In addition, all pixels of the images have been normalized to [0,1] range for each channel for both training and testing phases.

\section{Samples from trained models}
Random samples from all of the trained models for both BBB and SGHMC can be seen on Figure~\ref{fig:BBBSGHMCSamples}.


\section{Runtimes of different methods}

The runtimes for the training convergence for CIFAR-10 (the most complex dataset used in the experiments) for different \emph{Bayesian} methods are available in Table~\ref{table:BVAERuntimes}

\begin{table}[H]
\sisetup{detect-weight,mode=text}
\centering
\caption{BVAE runtimes for learning}
\label{table:BVAERuntimes}
\begin{tabular}{lc}
\toprule
\textbf{Method} & \textbf{Time (mins)} \\
\midrule
BBB & 1628  \\
SGHMC & 1473  \\
SWAG & 371  \\
Vanilla & 345  \\
\bottomrule
\end{tabular}
\end{table}




\end{document}
