% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}

\usepackage{xr}
% \usepackage{cleveref}
\makeatletter
\newcommand*{\addFileDependency}[1]{
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{
    \externaldocument{#1}
    \addFileDependency{#1.tex}
    \addFileDependency{#1.aux}
}
%%% END HELPER CODE
% put all the external documents here!
\myexternaldocument{bhat_587}
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Identifying near-optimal decisions in linear-in-parameter bandit models with continuous decision sets: Supplementary material}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author{\href{mailto:<sanjay.bhat@tcs.com>?Subject=Your UAI 2022 paper}{Sanjay~P.~Bhat}{}}
\author{Chaitanya~Amballa}
% Add affiliations after the authors
\affil{%
    TCS Research, Hyderabad, India
}
% \affil[2]{%
%     Second Affiliation\\
%     Address\\
%     …
% }
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }


% ----------OUR CONTENT----------


\usepackage{mathtools}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{psfrag,graphicx}
\usepackage{algorithmic}
\usepackage{algorithm}
\usepackage{breqn}
\usepackage{lineno}
% \usepackage{algpseudocode}
% \renewcommand{\algorithmiccomment}[1]{\hfill$\triangleright$\textit{#1}}
% \usepackage{multirow}


\newcommand{\mcd}{\mathcal{D}}
\newcommand{\real}{\mathbb{R}}
\newcommand{\pint}{\mathbb{Z}_{+}}
\newcommand{\cof}{\bar{\mu}}
\newcommand{\tp}[1]{{#1}^{\rm T}}
\newcommand{\itp}[1]{{#1}^{\rm -T}}
\newcommand{\cumr}{\bar{R}}
\newcommand{\mcf}{\mathcal{F}}
\newcommand{\mcg}{\mathcal{G}}
\newcommand{\mcn}{\mathcal{N}}
\newcommand{\prob}{\mathbb{P}}
\newcommand{\ig}{\mathrm{IG}}
\newcommand{\isdef}{\stackrel{\rm def}{=}}
\newcommand{\ebb}{\mathbb{E}}
\newcommand{\hav}{\hat{V}}
\newcommand{\has}{\hat{S}}
\newcommand{\hax}{\hat{x}}
\newcommand{\bam}{\bar{M}}
\newcommand{\lam}{\lambda_{\min}}
\newcommand{\laM}{\lambda_{\max}}
\newcommand{\tr}{\mathrm{trace}}
\newcommand{\haeps}{\hat{\epsilon}}
\newcommand{\hag}{\hat{g}}
\newcommand{\ve}{\varepsilon}
\newcommand{\spn}[1]{\|{#1}\|_{\infty}}
\newcommand{\mh}{\hat{\mu}}
\newcommand{\by}{\bar{y}}
\newcommand{\bta}{\bar{\eta}}
\newcommand{\sn}{\mathrm{S}^{f-1}}
\newcommand{\algo}{\mathcal{A}}
\newcommand{\mcs}{\mathcal{S}}
\newcommand{\setv}{\mathbb{F}}
\newcommand{\mco}{\mathcal{O}_{\ve}}
\newcommand{\alt}{\mathrm{Alt}_{\ve}}
\newcommand{\mcw}{\mathcal{W}}
\newcommand{\mcl}{\mathcal{L}^{\mu,\zeta}}
\newcommand{\kl}{\mathrm{kl}}
\newcommand{\vecp}{\mathbf{p}}
\newcommand{\vecpsi}{\vecp^{s}_{i}}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{example}[theorem]{Example}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{assum}{Assumption}

\begin{document}
\maketitle


Please note that the numbering of equations, figures and tables  in the supplementary material is in continuation with that in the main paper.

\setcounter{figure}{2}
\appendix
\setcounter{equation}{11}
\section{Proof of Theorem \ref{lbthm}}\label{lbapp}
We begin by recalling properties of the measures $\ebb^{\algo,\mu}$ and $\ebb^{\algo,\mu}_{t}$ introduced in Section \ref{lbsec}.
By Proposition 7.28 in \cite{bertsekas1996stochastic}, the measures $\prob^{\algo,\mu}_{t}$, $t\in\pint$, and $\prob^{\algo,\mu}$ satisfy the following properties (see also Proposition V.1.1 of \cite{neveu}):
\begin{enumerate}
   % \item $\prob^{\algo,\mu}$ and $\prob^{\algo,\mu}_{t}$ are defined on the Borel $\sigma$-algebras of $\Omega$ and $\Omega_{t}$, respectively.
    \item For every real-valued function $q$ that is integrable on $(\Omega_{1},\mcf_{1},\prob^{\algo,\mu}_{1})$, we have
    \begin{dmath}
    \int\limits_{\Omega_{1}}q(h_{1})\prob^{\algo,\mu}_{1}(\mathrm{d}h_{1})=\int\limits_{[0,1]^{n}}\int\limits_{\mcd}\int\limits_{\real}q(u,s,y)
    \times Q^{\mu}(\mathrm{d}y|s)\pi_{1}(\mathrm{d}s|u)\lambda(\mathrm{d}u)
    \label{it1}
    \end{dmath}
    \item For every $t>1$ and every  integrable  real-valued function $q$ on $(\Omega_{t},\mcf_{t},\prob^{\algo,\mu}_{t})$, we have
    \begin{dmath}
    \int\limits_{\Omega_{t}}q(h_{t})\prob^{\algo,\mu}_{t}(\mathrm{d}h_{t})=\int\limits_{\Omega_{t-1}}\int\limits_{[0,1]^{n}}\int\limits_{\mcd}\int\limits_{\real}q(h_{t-1},u,s,y)
    \times Q^{\mu}(\mathrm{d}y|s)\pi_{1}(\mathrm{d}s|u)\lambda(\mathrm{d}u)\prob^{\algo,\mu}_{t-1}(\mathrm{d}h_{t-1}).
    \label{it2}
    \end{dmath}
    \item For every $t\in\pint$ and every Borel subset $A$ of $\Omega_{t}$, we have  $\prob^{\algo,\mu}_{t}(A)=\prob^{\algo,\mu}(A\times\mcs\times\mcs\times\cdots)$.
\end{enumerate}

Next, suppose Assumption \ref{assum2} holds, and let $\rho_{z,\nu}(\cdot)$ denote the Gaussian density on $\real$ with mean $z\in\real$ and standard deviation  $\nu>0$. Then, for every $\zeta\in\real^{f}$ and $s\in\mcd$, the measure $Q^{\zeta}(\cdot|s)$ is a Gaussian measure on $\real$ having density $\rho_{g_{\zeta}(s),\sigma}(\cdot)$ w.r.t. the Lebesgue measure on $\real$. Consequently, for every $\zeta\in\real^{f}$ and $s\in\mcd$, the measures $Q^{\zeta}(\cdot|s)$ and  $Q^{\mu}(\cdot|s)$ are mutually absolutely continuous, and
\begin{equation}
    \left.\frac{\mathrm{d}Q^{\mu}(\cdot|s)}{\mathrm{d}Q^{\zeta}(\cdot|s)}\right|_{y}=\frac{\rho_{g_{\mu}(s),\sigma}(y)}{\rho_{g_{\zeta}(s),\sigma}(y)}.\label{rnkernel}
\end{equation}
We are now ready to begin the proof of Theorem \ref{lbthm}.

{\bf Proof of Theorem \ref{lbthm}} Consider an alternative reward model given by  $\zeta\in\alt(\mu)$. For each $t\in\pint$, define the log-likelihood ratio $\mcl_{t}\isdef \ln \frac{\mathrm{d}\prob^{\algo,\mu}_{t}}{\mathrm{d}\prob^{\algo,\zeta}_{t}}$, and note that $\mcl_{t}$ is a random variable on $(\Omega_{t},\mcf_{t},\prob^{\algo,\mu}_{t})$. It is now easy to see from (\ref{it1}),  (\ref{it2}) and (\ref{rnkernel}) that, for each $t\in\pint$ and each $h_{t}=\{(s_{i},y_{i},u_{i})\}_{i=1}^{t}\in\Omega_{t}$, we have
\begin{equation}
    \mcl_{t}(h_{t})=\sum_{i=1}^{t}[\ln \rho_{g_{\mu}(s_{i}),\sigma}(y_{i})-\ln \rho_{g_{\zeta}(s_{i}),\sigma}(y_{i})].
    \label{lle}
\end{equation}

Next, define the event $\mathcal{E}^{\mu}\isdef \{\arg\max_{s\in \mcd}g_{\mu}(s)\subseteq \setv(h_{\tau})\subseteq \mco(\mu)\}$, and note that $\mathcal{E}^{\mu}$ is contained in the $\sigma$-algebra $\mcf_{\tau}$ generated by the stopping time $\tau$. It follows from Lemma 19 of \cite{kauffman} that
\begin{equation}
    \ebb^{\algo,\mu}(\mcl_{\tau})\geq \kl(\prob^{\algo,\mu}(\mathcal{E}^{\mu}),\prob^{\algo,\zeta}(\mathcal{E}^{\mu})),
    \label{kaufflem}
\end{equation}
where $\kl(\nu_{1},\nu_{2})$ is the KL-divergence between two Bernoulli distributions having parameters $\nu_{1},\nu_{2}\in [0,1]$.

Next, define the event $\mathcal{E}^{\zeta}$ by replacing $\mu$ in the definition of $\mathcal{E}^{\mu}$ with $\zeta$.  Since $\algo$ is a $(\ve,\delta)$-PAC algorithm, we have $\prob^{\algo,\mu}(\mathcal{E}^{\mu})\geq 1-\delta$ and $\prob^{\algo,\zeta}(\mathcal{E}^{\zeta})\geq 1-\delta$. By our choice of $\zeta$, we have $\mathcal{E}^{\mu}\cap\mathcal{E}^{\zeta}\subseteq\{\setv(h_{\tau})\subseteq \mco(\mu)\cap\mco(\zeta)\}=\varnothing$. As a result, we infer that  $\prob^{\algo,\zeta}(\mathcal{E}^{\mu}) < \delta$. Monotonicity properties of the KL divergence now imply that $\kl(\prob^{\algo,\mu}(\mathcal{E}^{\mu}),\prob^{\algo,\zeta}(\mathcal{E}^{\mu}))\geq \kl(\delta,1-\delta)$
%{\textcolor{red}{need to check this and find a reference, or easy way to convince}}.
By inequality (3) in \cite{kauffman}, we further have $\kl(\delta,1-\delta)\geq \ln(1/2.4\delta)$. Using this in (\ref{kaufflem}), we get
\begin{equation}
    \ebb^{\algo,\mu}(\mcl_{\tau})\geq \ln\left(\frac{1}{2.4\delta}\right).
    \label{kauff2}
\end{equation}
Combining (\ref{kauff2}) with (\ref{lleineq}) from Lemma \ref{llelem} below yields
\[\frac{1}{2\sigma^{2}}\ebb^{\algo,\mu}(\tau)\|g_{\mu}-g_{\zeta}\|_{\infty}^{2}
%\max_{s\in\mathcal{D}}[g_{\mu}(s)-g_{\zeta}(s)]^{2}
%\sup_{\xi\in\mcw}(\mu-\zeta)^{\rm T}V_{\xi}(\mu-\zeta)
\geq \ln\left(\frac{1}{2.4\delta}\right).\]
Inequality (\ref{lbeq}) now follows by taking an infimum over $\zeta\in\alt(\mu)$ on the left hand side in the inequality above and rearranging the resulting inequality.
\hfill $\Box$

\begin{lemma}
Let algorithm $\algo$ and $\mu,\zeta\in\real^{f}$ be as in the proof of Theorem (\ref{lblem}).  Suppose $\{\mcl_{t}\}_{t=1}^{\infty}$ is defined as in (\ref{lle}), and let $\tau$ be a stopping time with respect to the filtration $\{\mcf_{t}\}_{t=0}^{\infty}$. Then we have
\begin{equation}
    \ebb^{\algo,\mu}(\mcl_{\tau})\leq \frac{1}{2\sigma^{2}}\ebb^{\algo,\mu}(\tau)\|g_{\mu}-g_{\zeta}\|_{\infty}^{2}.
   % \max_{s\in\mathcal{D}}[g_{\mu}(s)-g_{\zeta}(s)]^{2}.
    \label{lleineq}
\end{equation}
\label{llelem}
\end{lemma}
\begin{proof}
For each $t\in\pint$, denote $\ell_{t}= \ln \rho_{g_{\mu}(s_{t}),\sigma}(y_{t})-\ln \rho_{g_{\zeta}(s_{t}),\sigma}(y_{t})$, and let $\mcg_{t}$ denote the $\sigma$-algebra on $\Omega$ generated by $(h_{t-1},u_{t},s_{t})$. Note that, for each $t\in\pint$,  $\ell_{t}$ is a $\mcf_{t}$-measurable random variable, while $\mcf_{t-1}\subseteq\mcg_{t}\subseteq \mcf_{t}$.

Next, define the process $\{M_{t}\}_{t=0}^{\infty}$ by $M_{0}=0$ and $M_{t}=\sum_{i=1}^{t}[\ell_{i}-\ebb^{\algo,\mu}(\ell_{i}|\mcg_{i})]$  for each $t\in\pint$. The inclusions $\mcf_{t-1}\subseteq\mcg_{t}\subseteq \mcf_{t}$ along with the tower property of conditional expectations show that the process $\{M_{t}\}_{t=0}^{\infty}$ is adapted to the filtration $\{\mcf_{t}\}_{t=0}^{\infty}$ and is a martingale under the measure $\prob^{\algo,\mu}$.
%{\textcolor{red}{Need to argue the integrability.}}
The optional stopping theorem now  implies that $\ebb^{\algo,\mu}(M_{\tau})=\ebb^{\algo,\mu}(M_{0})=0$. This immediately yields
\begin{equation}
    %\ebb^{\algo,\mu}(\mcl_{\tau})=
    \ebb^{\algo,\mu}\left[\sum_{i=1}^{\tau}\ell_{i}\right]=\ebb^{\algo,\mu}\left[\sum_{i=1}^{\tau}\ebb^{\algo,\mu}(\ell_{i}|\mcg_{i})\right].
    \label{llelem1}
\end{equation}

Substituting the expression for a Gaussian density in the expression for $\ell_{i}$  yields $2\sigma^{2}\ell_{i}=2y_{i}[g_{\mu}(s_{i})-g_{\zeta}(s_{i})]+[g_{\zeta}(s_{i})]^{2}-[g_{\mu}(s_{i})]^{2}$ for each $i\in\pint$. Using the fact that $\ebb^{\algo,\mu}(y_{i})=g_{\mu}(s_{i})$ gives  $2\sigma^{2}\ebb^{\algo,\mu}(\ell_{i}|\mcg_{i})=[g_{\zeta}(s_{i})-g_{\mu}(s_{i})]^{2}\leq \max_{s\in\mathcal{D}}[g_{\mu}(s)-g_{\zeta}(s)]^{2}$ for each $i\in\pint$. Using the last inequality in (\ref{llelem1}) and recognizing the left hand side of (\ref{llelem1}) to be $\ebb^{\algo,\mu}(\mcl_{\tau})$ yields (\ref{lleineq}). %\textcolor{red}{Integrability conditions for getting (\ref{llelem1}). Reference for optional stopping theorem.}
\end{proof}

\section{Proofs for subsections \ref{ss1} and \ref{ss3}}\label{appe}
{\bf Proof of Proposition \ref{eoptprop}.}
Choose $s^{*} \in \arg\max_{s\in \mcd} q(s)$,  and  consider $s \in \mcd^{\prime}$. We have
$  q(s^{*}) \leq \hat{q}(s^{*}) + \frac{\ve}{4}
  \leq \hat{q}(\hat{s}) + \frac{\ve}{4}
  \leq \hat{q}(s) + \frac{3\ve}{4}
  \leq q(s) + \ve$,
% \begin{eqnarray}
%   h(s^{*}) &\leq& \hat{h}(s^{*}) + \frac{\ve}{4} \nonumber \\
%   &\leq& \hat{h}(\hat{s}) + \frac{\ve}{4}  \nonumber \\
%   &\leq& \hat{h}(s) + \frac{3\ve}{4}  \nonumber \\
%   &\leq& h(s) + \ve \nonumber,
% \end{eqnarray}
where the first and last inequalities follow from $\spn{\hat{q}-q}\leq \frac{\ve}{4}$, the second inequality follows from the definition of $\hat{s}$, and the third follows from the definition of $\mcd^{\prime}$ and our choice $s\in\mcd^{\prime}$.
We have thus shown that every $s \in \mcd^{\prime}$ is $\ve$-optimal for $q$.



Next, we have
$\hat{q}(s^{*})\geq  q(s^{*})-\frac{\ve}{4}
\geq q(\hat{s})-\frac{\ve}{4}
\geq  \hat{q}(\hat{s})-\frac{\ve}{2}$,
where the first and the last inequalities follow from $\spn{\hat{q}-q}\leq \frac{\ve}{4}$ while the second inequality follows from the fact that $s^{*}$ is a maximizer of $q$. We have thus shown that $s^{*}\in\mcd^{\prime}$. Since $s^{*}\in\arg\max_{s\in\mcd}q(s)$ was chosen arbitrarily, the last assertion of the result follows. \hfill $\Box$

{\bf Proof of Lemma \ref{lblem}.} First, we recall a definition from the theory of optimal designs. A {\em design} is a probability measure on the Borel $\sigma$-algebra of $\mcd$. Given a design $\xi$ on $\mcd$, we denote $V_{\xi}=\int_{\mcd}\phi(s)\tp{\phi}(s)\xi(\mathrm{d}s)$. Note that the integral is defined since $\mcd$ is compact and $\phi$ is continuous.

Next, suppose $C=\{\phi(p_{1}),\ldots,\phi(p_{m})\}$ is a $(L,m)$-volumetric spanner for some $L>0$ and $m\geq f$. Consider the design $\xi$ which places mass $1/m$ at each of the points of $C$, and let $X=[\phi(p_{1}),\ldots,\phi(p_{m})]\in\real^{f\times m}$. Note that $X\tp{X}=mV_{\xi}$.

By the definition of a $(L,m)$-volumetric spanner, we have $\max_{z\in\phi(\mcd)}\|\tp{X}(X\tp{X})^{-1}z\|_{2}^{2}\leq L^{2}$. A simple calculation shows that, for each $z\in \phi(\mcd)$, we have $\|\tp{X}(X\tp{X})^{-1}z\|_{2}^{2}=\tp{z}(X\tp{X})^{-1}z=m^{-1}\tp{z}V_{\xi}z$. The Keifer-Wolfowitz theorem \citep{kiefer}, \citep[Thm. 21.1]{lattimore} implies that $\max_{z\in\phi(\mcd)}\tp{x}V_{\xi}x\geq f$. Putting everything together, we have $L^{2}\geq m^{-1}\max_{z\in\phi(\mcd)}\tp{z}V_{\xi}z\geq f/m$. This completes the proof. \hfill$\Box$


\section{Proof of Proposition \ref{bdprop}}
\label{appa}
By way of preparation for the proof of Proposition \ref{bdprop}, we will find it convenient to rewrite (\ref{ols1}) and (\ref{ols2}) by grouping together observations made during each round. To this end, let $B_{L,m}=[\phi(p_{1}),\ldots,\phi(p_{m})]\in\real^{f\times m}$ and, for each $j\in\pint$, let $\by^{j}=[y_{(j-1)m+1},\ldots,y_{jm}]^{\rm T}\in\real^{m}$ and $\bta^{j}=[\eta_{(j-1)m+1},\ldots,\eta_{jm}]^{\rm T}\in\real^{m}$ denote the vectors of rewards and noise samples, respectively, encountered in the $j$th round. The decision epoch at the end of $k>0$ rounds is $t=km$. In the notation of (\ref{ols1}), we have
\[X_{t}=[\underbrace{B_{L,m}|\cdots | B_{L,m}}_{k\mbox{ times}}].\]
Equations (\ref{ols1})-(\ref{ols2}) now become
\begin{eqnarray}
  \mh_{km}&=&(B_{L,m}\tp{B}_{L,m})^{-1}B_{L,m}\left[\frac{1}{k}\sum_{j=1}^{k}\by^{j}\right], \label{ols3}\\
  \mh_{km}-\mu&=&(B_{L,m}\tp{B}_{L,m})^{-1}B_{L,m}\left[\frac{1}{k}\sum_{j=1}^{k}\bta^{j}\right]. \label{ols4}
\end{eqnarray}

The proof of the sub-Gaussian part of Proposition \ref{bdprop} essentially applies to the right hand side of (\ref{ols4}) the tail concentration inequality below for the norm of the average of $k$ random vectors having independent $\sigma$-sub-Gaussian components. The proof is given later in this appendix.

\begin{proposition}
\label{propsn:2}
  Suppose $\xi^{1}, \ldots, \xi^{k}$ are $f$-dimensional random vectors such that the random variables $\{ \xi_{i}^{j}: i=1,\ldots,f, j=1,\ldots, k \}$
  are independent and $\sigma$-sub Gaussian. Let $S_{k}=\left(\xi^{1}+\cdots+\xi^{k}\right)$. Then the following statements hold.
  \begin{enumerate}
    \item $\exp \left(\lambda \|S_{k}\|_{2}^{2}\right)$ is integrable for each $\lambda \in \left(0, 1/2\sigma^{2}k\right).$
    \item For every $\ve>0$, we have $\prob \left(\frac{1}{k}\| S_{k}\|_{2} > \ve \right) \leq
    \beta(k,\ve)$, where $\beta$ is given by (\ref{eqtn:boundSG}).
    % 2^{f/2}\exp \left(\frac{-k\epsilon^{2}}{4\sigma^{2}}\right)$.
  \end{enumerate}
\end{proposition}

% The proof of the Gaussian part of Proposition \ref{bdprop} applies to (\ref{ols4}) the tail concentration inequality for the average of independent Gaussian random vectors provided by the next lemma.

We are now ready to prove Proposition \ref{bdprop}.

{\bf Proof of Proposition \ref{bdprop}. }
First, suppose Assumption \ref{assum1} holds. We have
\begin{eqnarray}
\lefteqn{\spn{g_{\mh_{km}}-g_{\mu}}= \max_{s\in\mcd}|\tp{\phi}(s)(\mh_{km}-\mu)|}\nonumber \\
&=&\max_{s\in\mcd}\left|\tp{\phi}(s)(B_{L,m}\tp{B}_{L,m})^{-1}B_{L,m}\left[\frac{1}{k}S_{k}\right]\right|,
\label{bdpf1}
\end{eqnarray}
where $S_{k}=
\sum_{j=1}^{k}\bta^{j}$, and the last equality uses (\ref{ols4}).
Since the columns of $B_{L.m}$ form a $(L,m)$-volumetric spanner for $\phi(\mcd)$, it follows that $\|\tp{B}_{L,m}(B_{L,m}\tp{B}_{L,m})^{-1}\phi(s)\|_{2}\leq L$ for all $s\in\mcd$. Using this fact along with the Cauchy-Schwarz inequality in (\ref{bdpf1}) gives $\spn{g_{\mh_{km}}-g_{\mu}}\leq \frac{L}{k}\|S_{k}\|_{2}$. The assertion of the proposition now follows immediately from Proposition \ref{propsn:2}.
\hfill $\Box$

% Next, we prove the second part of the proposition under Assumption \ref{assum2}. For convenience, define $\varphi(s)=\tp{B}_{L,m}(B_{L,m}\tp{B}_{L,m})^{-1}\phi(s)$ for every $s\in\mcd$, and recall that $\|\varphi(s)\|_{2}\leq L$ for all $s\in\mcd$. Define
% $G:\real^{f}\rightarrow \real$ by $G(z)=\max_{s\in\mcd}|\tp{\varphi}(s)z|$, for every $z\in\real^{f}$.

% We claim that $G$ is Lipschitz with respect to the Euclidean norm with Lipschitz constant 1. To prove the claim, consider $z_{1},z_{2}\in\real^{f}$. We assume without loss of generality that $G(z_{1})\geq G(z_{2})$, and let $s^{\prime}\in\arg\max_{s\in\mcd}|\tp{\varphi}(s)z_{1}|$. Then we have $|G(z_{1})-G(z_{2})|=G(z_{1})-G(z_{2})= \max_{s\in\mcd}|\tp{\varphi}(s)z_{1}|-\max_{s\in\mcd}|\tp{\varphi}(s)z_{2}|\leq |\tp{\varphi}(s^{\prime})z_{1}|-|\tp{\varphi}(s^{\prime})z_{2}| \leq |(\tp{\varphi}(s^{\prime})(z_{1}-z_{2})|\leq \|\varphi(s^{\prime})\|_{2}\|z_{1}-z_{2}\|_{2}\leq \|z_{1}-z_{2}\|_{2}$. This proves the claim.

% Next, note that under Assumption \ref{assum2} invoked in the second part of the proposition, $Z\isdef\frac{1}{k}S_{k}$ is an average of $k$ independent zero-mean $f$-dimensional Gaussian random vectors, each having covariance matrix $\sigma^{2}I$, where $I$ is the $f\times f$ identity matrix. Hence $Z$ is a zero-mean Gaussian random vector with covariance matrix $\frac{\sigma^{2}}{k}I$. We now observe from (\ref{bdpf1}) that $\|g_{\mh_{km}}-g_{\mu}\|_{\infty}=G(Z)$. Thus $\|g_{\mh_{km}}-g_{\mu}\|_{\infty}$ is a Lipschitz function of the  Gaussian vector $Z$ having i.i.d. components. Applying a slightly extended version of  Theorem 2.26 of \citet{wainwright} (which is stated for the special case where the components of $Z$ are  standard normal) gives (\ref{eqtn:boundG}).
% \hfill $\Box$

For proving Proposition \ref{propsn:2}, we first recollect a few preliminary results. Though these results are known, we state them  to make the constants explicit, and provide proofs for easy reference.

\begin{lemma}
\label{lem:1}
Suppose $X$ is $\sigma$-sub Gaussian for some $\sigma > 0$. If $\lambda \in (0, 1/2\sigma^{2})$, then $\exp{\left(\lambda X^{2}\right)}$ is integrable, and $\ebb{\left[\exp{\left(\lambda X^{2}\right)}\right]} \leq 2^{2\sigma^{2}\lambda}(1-2\sigma^{2}\lambda)^{-1}$.
\end{lemma}
\begin{proof}
Let $\lambda \in (0, 1/2\sigma^{2})$. Since $X$ is $\sigma$-sub Gaussian, we have $\prob \left( | X | > t\right) \leq 2\exp\left(-\frac{t^{2}}{2\sigma^{2}}\right)$ for all $t > 0$
%\textcolor{red}{citeref}.
Next, note that $\exp{\left(\lambda X^{2}\right)} \geq 1$. Let $s \geq 1$. Then
\begin{eqnarray}
\lefteqn{\prob\left(\exp{\left(\lambda X^{2} \right) > s}\right)
= \prob\left( |X| > \sqrt{\frac{\ln s}{\lambda}} \right)}  \nonumber \\
&\leq& 2 \exp \left( -\frac{1}{2\sigma^{2}} \frac{\ln s}{\lambda} \right)
%\nonumber  \\ &=&
=2s^{-\frac{1}{2\sigma^{2}\lambda}}. \nonumber \end{eqnarray}
Thus, we conclude that
\begin{equation}
\prob\left(\exp{\left(\lambda X^{2} \right) > s}\right) \leq
\begin{cases}
1, & \text{if } s \leq 2^{2\sigma^{2}\lambda}, \\
2s^{-\frac{1}{2\sigma^{2}\lambda}}, & \text{if } s>  2^{2\sigma^{2}\lambda}.
\end{cases}
\label{probeq}
\end{equation}
Since $2\sigma^{2}\lambda < 1$, the integral $\int_{0}^{\infty}\prob\left(\exp\left(\lambda x^{2}\right) > s\right) \mathrm{d}s $ exists. Indeed, (\ref{probeq}) implies that
\begin{eqnarray}
 \lefteqn{ \int_{0}^{\infty}\prob\left(\exp\left(\lambda x^{2}\right) > s\right) \mathrm{d}s }\nonumber \\
  &\leq& \int_{0}^{2^{2\sigma^{2}\lambda}} 1 \mathrm{d}s
  %\nonumber \\ && {}
  + \int_{2^{2\sigma^{2}\lambda}}^{\infty} 2s^{-\frac{1}{2\sigma^{2}\lambda}} \mathrm{d}s \nonumber  \\
%   &=& 2^{2\sigma^{2}\lambda} + \frac{2s^{\left(1-\frac{1}{2\sigma^{2}\lambda}\right)}}{\left(1-\frac{1}{2\sigma^{2}\lambda}\right)} \Biggr|_{2^{2\sigma^{2}\lambda}}^{\infty} \nonumber  \\
 % &=& \frac{2^{2\sigma^{2}\lambda}}{\left(1-2\sigma^{2}\lambda\right)} \nonumber
 &=& 2^{2\sigma^{2}\lambda}(1-2\sigma^{2}\lambda)^{-1}. \nonumber
\end{eqnarray}
Since $\exp \left(\lambda x^{2}\right)$ is a non-negative random variable, it follows that $\ebb \left[\exp \left(\lambda x^{2}\right) \right] = \int_{0}^{\infty}\prob\left(\exp\left(\lambda x^{2}\right) > s\right) \mathrm{d}s $, and the result follows.
\end{proof}

\begin{lemma}
\label{lem:2}
Suppose $\zeta$ is a random vector of dimension $f$ such that $\zeta_{1}, \ldots,\zeta_{f}$ are independent $\sigma$-sub Gaussian random variables. Then $\ebb \left[ \exp \left( \lambda\tp{x}\zeta \right) \right] \leq \exp \left( \frac{\lambda^{2}\|x\|_{2}^{2}\sigma^{2}}{2} \right)$ for all $x \in \real^{f}$ and $\lambda \in \real$. Furthermore, if $\lambda \in (0, 1/2\sigma^{2})$, then $\exp \left(\lambda \|\zeta\|_{2}^{2}\right)$ is integrable, and $\ebb \left[ \exp\left(\lambda \|\zeta\|_{2}^{2}\right) \right] \leq \left(\frac{2^{2\sigma^{2}\lambda}}{1-2\sigma^{2}\lambda}\right)^{f}$.
\end{lemma}

\begin{proof}
By independence and $\sigma$-sub Gaussianity, we have
\begin{eqnarray}
 \lefteqn{ \ebb\left[\exp \left(\lambda \tp{x}\zeta\right)\right]
  = \prod_{i=1}^{f}\ebb \left[\exp \left(\lambda x_{i}\zeta_{i}\right)\right]} \nonumber \\
  &\leq& \prod_{i=1}^{f} \exp \left(\frac{\lambda^{2}x_{i}^{2}\sigma^{2}}{2}\right)
  %\nonumber \\&=&
  =\exp \left(\frac{\lambda^{2}\|x\|_{2}^{2}\sigma^{2}}{2}\right). \nonumber
\end{eqnarray}

This proves the first assertion. To prove the second assertion, let $\lambda \in \left(0, 1/2\sigma^{2}\right)$. By Lemma \ref{lem:1}, $\exp\left(\lambda\zeta_{i}^{2}\right)$ is integrable for each $i$. Hence it follows by independence that $\exp \left(\lambda\|\zeta\|_{2}^{2}\right)$ is also integrable, and $\ebb \left[\exp \left(\lambda\|\zeta\|_{2}^{2}\right)\right] = \prod_{i=1}^{f} \ebb \left[\exp \left(\lambda\zeta_{i}^{2}\right)\right] \leq \left(\frac{2^{2\sigma^{2}\lambda}}{1-2\sigma^{2}\lambda}\right)^{f}$.
\end{proof}

The next lemma, which we state without proof, is a conditional version of the first part of Lemma \ref{lem:2}.

\begin{lemma}
\label{lem:3}
  Suppose $\zeta$ is a random vector of dimension $f$ such that $\zeta_{1}, \ldots, \zeta_{f}$ are independent, $\sigma$-sub Gaussian random variables.
  Let $Y$ be a $\mcg$-measurable $f$-dimensional random vector, where $\mcg$ is a $\sigma$-algebra such that $\zeta$ is independent of $\mcg$. Then \[ \ebb\left[\exp \left(\lambda \tp{Y}\zeta \right)| \mcg\right] \leq \exp \left(\frac{\lambda^{2}\|Y\|_{2}^{2}\sigma^{2}}{2}\right)\ \mathrm{a.s.}\]
\end{lemma}

The proof of Proposition \ref{propsn:2} follows next.

{\bf Proof of Proposition \ref{propsn:2}.}
The $i${th} component of $S_{k}$ is a sum of $k$ independent $\sigma$-sub Gaussian random variables. Applying the first part of  Lemma \ref{lem:2} with $\zeta = \left[\xi_{i}^{1}, \ldots, \xi_{i}^{k}\right]$ and $x = [1, \ldots, 1]$ lets us conclude that the $i^{th}$ element of $S_{k}$ is $\sigma \sqrt{k}$-sub Gaussian.
Applying the second part of Lemma \ref{lem:2} with $\zeta=S_{k}$ shows that $\exp \left(\lambda \|S_{k}\|_{2}^{2}\right)$ is integrable for $\lambda \in \left(0, 1/2\sigma^{2}k\right)$. This proves the first assertion.

To prove the second statement, choose  $x \in \real^{f}$, and define the process $\{ M_{j}(x) \}^{k}_{j=0}$ by $M_{0}(x) = 1$ and
\[ M_{j}(x) = \exp \left(\tp{x}S_{j} - j\frac{\sigma^{2}}{2}\|x\|_{2}^{2}\right),  j =1, \ldots, k, \]
where $S_{j} = \xi^{1} + \cdots + \xi^{j}$ for each $j$. It follows from the first part of Lemma \ref{lem:2} that $M_{j}(x)$ is integrable for each $j$. Next, let $\mcg_{j}$ denote the $\sigma$-algebra generated by $\xi^{1}, \ldots, \xi^{j}$, with $\mcg_{0}$ denoting the trivial $\sigma$-algebra, and note that $M_{j}(x)$ is $\mcf_{j}$-measurable.
For each $j=1, \ldots, k$, we have
\begin{eqnarray}
\lefteqn{ \ebb\left[M_{j}(x) | \mcg_{j-1} \right] }\nonumber \\
  &=& \ebb \left[ M_{j-1}(x) \exp\left.\left(\tp{x}\xi^{j} - \frac{\sigma^{2}}{2}\|x\|_{2}^{2}\right) \right| \mcg_{j-1}\right] \nonumber \\
  &=& M_{j-1}(x) \ebb\left[\exp\left(\tp{x}\xi^{j} - \frac{\sigma^{2}}{2}\|x\|_{2}^{2}\right)\right]   \nonumber \\
  &\leq& M_{j-1}(x),  \nonumber
\end{eqnarray}
where the second equality follows from
the $\mcg_{j-1}$-measurability of $M_{j-1}(x)$ and the $\mcg_{j-1}$-independence of $\xi^{j}$ (see Lemma \ref{lem:3}), while the last inequality follows by applying the first part of Lemma \ref{lem:2} with $\zeta=\xi^{j}$.
Thus, $\{M_{j}(x)\}_{j=0}^{k}$ is a supermartingale with respect to the filtration $\{\mcg_{j}\}_{j=0}^{k}$.

Next, define $\{\bam_{j}\}_{j=0}^{k}$ by
\begin{equation}
  \label{eqtn:Mjbar_1}
  \bam_{j} = \left(\frac{k\sigma^{2}}{2\pi}\right)^{f/2} \int_{\real^{f}} M_{j}(x) \exp \left(\frac{-k\sigma^{2}}{2}\tp{x}x\right) \mathrm{d}x,
\end{equation}
and note that $\bam_{0}=1$.
Substituting for $M_{j}(x)$ in (\ref{eqtn:Mjbar_1}), completing the square in the exponent and rearranging terms yields
\begin{equation}
  \label{eqtn:Mjbar_2}
  \bam_{j} = \left[\left(\frac{k}{(j+k)}\right)^{\frac{f}{2}} \exp \left(\frac{\|S_{j}\|_{2}^{2}}{2(j+k)\sigma^{2}}\right) \right] \times J,
%   \nonumber \\ &&\left[\left(\frac{(j+k)\sigma^{2}}{2\pi}\right)^{f/2} \int_{\real^{n}}\exp\left[-\frac{1}{2}\left\{\tp{\left(x - \frac{S_{j}}{(j+k)\sigma^{2}}\right)} \left((j+k)\sigma^{2}I\right) \left(x - \frac{S_{j}}{(j+k)\sigma^{2}}\right) \right\}\right] \mathrm{d}x \right]
\end{equation}
where $J$ is the integral over $x$ of the $F$-dimensional Gaussian density over $x$ with mean $[(j+k)\sigma^{2}]^{-1}S_{j}$ and covariance matrix $[(j+k)\sigma^{2}]^{-1}I$, with $I$ denoting the $f\times f$ identity matrix.
Thus, $J$ evaluates to $1$.
% the second term above to be the integral of a multivariate Gaussian density, and \textcolor{red}{letting $j=k$}, we get
% \begin{equation}
%   \label{eqtn:Mjbar_3}
%   \bam_{j} = \left(\frac{k}{j+k}\right)^{f/2} \exp \left(\frac{\|S_{j}\|_{2}^{2}}{2(j+k)\sigma^{2}}\right)
% \end{equation}
Next, $S_{j}$ is a random vector with independent $\sigma\sqrt{j}$-sub Gaussian components. Also, $\frac{1}{2(j+k)\sigma^{2}} < \frac{1}{2j\sigma^{2}}$.
Hence, by Lemma \ref{lem:2}, $\bam_{j}$ is integrable. In addition, it follows from Lemma 20.3 in \citet{lattimore} that $\{\bam_{j}\}_{j=1}^{k}$ is a submartingale.

Letting $j=k$ in (\ref{eqtn:Mjbar_2}) gives $\bam_{k}=2^{-\frac{f}{2}}\exp(\frac{\|S_{k}\|_{2}^{2}}{4k\sigma^{2}})$.
By Ville's maximal inequality (see Theorem 3.9 in \citet{lattimore}), we have
\begin{eqnarray}
  \prob \left(\|S_{k}\|_{2} > \epsilon\right) &=& \prob \left(\bam_{k} > \frac{1}{2^{f/2}} \exp \left(\frac{\epsilon^{2}}{4k\sigma^{2}}\right)\right) \nonumber \\
  &\leq& \prob \left(\max_{j}\bam_{j} > \frac{1}{2^{f/2}} \exp \left(\frac{\epsilon^{2}}{4k\sigma^{2}}\right) \right) \nonumber \\
  &\leq& \frac{\ebb\left[\bam_{0}\right]}{\frac{1}{2^{f/2}} \exp \left(\frac{\epsilon^{2}}{4k\sigma^{2}}\right)} \nonumber \\
  &=& 2^{f/2} \exp \left(\frac{-\epsilon^{2}}{4k\sigma^{2}}\right). \nonumber
\end{eqnarray}
Replacing $\ve$ by $k\ve$ in the last inequality completes the proof of the second assertion.
\hfill $\Box$

\section{Proof of Proposition \ref{vsprop}} \label{appb}
The proof of Proposition \ref{vsprop} uses the following lemma.

\begin{lemma}
\label{laglem}
Let $s\in[p_{\min},p_{\max}]$ and suppose $p_{1},\ldots,p_{f}\in [p_{\min},p_{\max}]$ are such that $p_{i}\neq p_{j}$ for all $i\neq j$. Then $c_{1},\ldots,c_{n+1}\in\real$ satisfy
\begin{equation}
c_1\phi(p_1) + \cdots + c_{n+1}\phi(p_{f}) = \phi(s)
\label{spanningeqn}
\end{equation}
if and only if $c_i = l_i(s,\vecp)$ for each $i=1,\ldots,f$ where $\vecp=[p_{1},\ldots,p_{f}]^{\rm T}$, and $l_{i}(\cdot,\vecp)$ is the $i$th Lagrange basis polynomial for the points $\{p_1,p_2,\ldots, p_{f} \}$ given by
\begin{equation}
l_{i}(s,\vecp)\isdef\frac{\prod\limits_{j \neq i}(s - p_j)}{\prod\limits_{j \neq i}(p_i - p_j)}.
\label{lagrange}
\end{equation}
\end{lemma}
\begin{proof}
Equation (\ref{spanningeqn}) may be rewritten as
\begin{equation}
    V(\vecp)c(s)=\phi(s),
    \label{spanner_eqn}
\end{equation}
where $V(\vecp)\isdef [\phi(p_{1}),\ldots,\phi(p_{f})]\in\real^{f\times f}$. Note that $V(\vecp)$ is a Vandermonde matrix, and its  determinant is given by (see Fact 7.18.5 from \citet{dsb})
\begin{equation}
% \label{Van_det_eqtn}
    \det(V(\vecp)) = \prod_{1\leq i < j \leq f}(p_j - p_i),
    \label{detveq}
\end{equation}
The determinant of $V(\vecp)$ in (\ref{detveq})  is nonzero since $p_{i}\neq p_{j}$ for $j\neq i$.
Equation (\ref{spanner_eqn}) thus has a unique solution. Applying Cramer's rule (see Fact 3.16.12 from \citet{dsb}) gives this solution to be
\begin{equation}
c_{i}=\frac{\det(V(\vecpsi))}{\det(V(\vecp))}
    \label{cramer}
\end{equation}
where $\vecpsi$ is the vector obtained by replacing the $i$th element of $\vecp$ by $s$. Using (\ref{detveq}) to expand the determinants of the two Vandermonde matrices in (\ref{cramer}) and canceling common terms gives $c_{i}=l_{i}(s,\vecp)$.
\end{proof}

The proof of Proposition \ref{vsprop} follows.

{\bf Proof of Proposition \ref{vsprop}.}
To show 1) implies 2), suppose $p_{1},\ldots, p_{f}\in\mcd$ are $(1,f)$-volumetric points for the pair $(\phi,\mcd)$. Choose $s\in\mcd=[p_{\min},p_{\max}]$ arbitrarily.  Applying the definition of $(1,f)$-volumetric points, it follows that there exist $c_{1},\ldots,c_{f}\in\real$ such that $c_{1}\phi(p_{1})+\cdots +c_{f}\phi(p_{f})=\phi(s)$ and $c_{1}^{2}+\ldots+c_{f}^{2}\leq 1$.  Clearly, $|c_{i}|\leq 1$ for all $i=1,\ldots,f$. Since $s\in\mcd$ was chosen arbitrarily, it follows that $\{\phi(p_{1}),\ldots,\phi(p_{f})\}$ is a barycentric spanner for $\phi(\mcd)$ (see \cite{amballa} for a definition). Theorem 1 of \cite{amballa} now implies that 2) holds.

To prove that 2) implies 1), suppose $p_{\min}=p_{1}\leq p_{2}\leq \cdots \leq p_{f}=p_{\max}$ satisfy (\ref{nlineq}).
Define $\vecp$ as in Lemma \ref{laglem}. The Lagrange polynomials defined in Lemma \ref{laglem} satisfy
\begin{eqnarray}
l_{i}(p_{i},\vecp)&=&1,\ i=1,\ldots,f, \label{lagprop1} \\
l_{i}(p_{j},\vecp)&=&0,\ i,j=i=1,\ldots,f, \ i\neq j,\label{lagprop4} \\
\frac{\mathrm{d}l_{i}}{\mathrm{d}s}(p_{i},\vecp)&=&0,\ i=2,\ldots,f-1, \label{lagprop2}
\end{eqnarray}
\begin{equation}
\frac{\mathrm{d}l_{1}}{\mathrm{d}s}(p_{1},\vecp)< 0 < \frac{\mathrm{d}l_{f}}{\mathrm{d}s}(p_{f},\vecp). \label{lagprop3} \\
\end{equation}
Equations (\ref{lagprop1}), $(\ref{lagprop4})$ and the inequalities in (\ref{lagprop3}) follow by substituting appropriate values for $s$ in (\ref{lagrange}), while (\ref{lagprop2}) follows by differentiating  (\ref{lagrange}) with respect to $s$, substituting appropriately for $s$, and then using (\ref{nlineq}).

Next, define the function $G:\mcd\rightarrow \real$ by $G(s)\isdef l_{1}^{2}(s,\vecp)+\cdots +l_{f}^{2}(s,\vecp)-1$. We claim that $G(s)\leq 0$ for all $s\in\mcd$. In light of Lemma \ref{laglem} and the definition of $(1,f)$-volumetric points, our claim implies that 1) holds. Hence, to complete the proof, it is sufficient to prove our claim.

To prove our claim, note that $G$ is a polynomial of degree $2(f-1)$. Also, we observe from (\ref{lagprop1}), (\ref{lagprop2}) and (\ref{lagprop3}) that $p_{1}$ and $p_{f}$ are roots of $G$ of multiplicity 1, while each $p_{i}$ is a root of $G$ of multiplicity at least 2 for $i\neq 1,f$. Thus, the polynomial $H(s)\isdef (s-p_{1})(s-p_{2})^{2}\cdots (s-p_{f-1})^{2}(s-p_{f})$ divides $G$. Since $H$ also clearly has degree $2(f-1)$, it follows that there exists $K\in\real$ such that $G(s)=KH(s)$ for all $s\in\mcd$. The value of $K$ may be computed as $K=\frac{G^{\prime}(p_{1})}{H^{\prime}(p_{1})}$, where $^{\prime}$ indicates the derivative. It is easy to use (\ref{lagprop1})  and  (\ref{lagprop4}) to verify that $G^{\prime}(p_{1})=2\frac{\mathrm{d}l_{1}}{\mathrm{d}s}(p_{1},\vecp)$, which is negative by (\ref{lagprop3}). An easy calculation also yields $H^{\prime}(p_{1})=(p_{1}-p_{2})^{2}\cdots (p_{1}-p_{f-1})^{2}(p_{1}-p_{f})$ which is negative since $p_{1}<p_{f}$. These arguments show that $K>0$. Our claim now follows by noting that $H$ takes only non-positive values on $\mcd$. This completes the proof.
\hfill $\Box$

% \bibliographystyle{icml2022}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% APPENDIX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newpage
\twocolumn
\section{Algorithm 2}
\label{appc}





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{algorithm}[htb!]
    \caption{VSBAI-Poly: Best Arm Identification for Polynomial Rewards}
    \label{alg:BAI}
 \begin{algorithmic}[1]
 \STATE {\bfseries Input:}
   $\ve > 0$, $\delta \in (0, 1)$, sub-Gaussianity parameter $\sigma$,
    $(1,f)$-volumetric points $p_{1}, \ldots, p_{f}$ for $(\phi, \mcd)$
    \STATE Set $B_{1,f}=\left[\phi(p_{1}), \ldots, \phi(p_{f})\right]$
    % \STATE Set STOP = False
    \STATE Initialize $k \leftarrow 1$, $r \leftarrow 0$
    \STATE Set STOP = False

    \WHILE{ STOP==False }
    \STATE $\by^{k}$ = []
    \FOR{$t = 1, \ldots, f $}
    \STATE $y_{(k-1)m+t} = g_{\mu}(p_t) + \eta_{t}$
    \STATE $\by^{k}\leftarrow [(\by^{k})^{\rm T};y_{(k-1)f+t}]^{\rm T}$
    \ENDFOR
    \STATE $r = r + \by^{k}$
    \IF {$\beta(k, \frac{\ve}{4L}) < \delta $}
    \STATE STOP = True
    \ELSE
    \STATE $k=k+1$
    \ENDIF
    \ENDWHILE
    \STATE $\tau^{*} = kd$
    \STATE $\hat{\mu}_{\tau^{*}} = \frac{1}{k}\itp{B_{1,f}}r$
    \STATE $\hat{s}$ = global\_optimizer($\hat{\mu}_{\tau^{*}}, p_{\min}, p_{\max}$)
    \STATE $\mcd_{\tau^{*}}$ = get\_dtau($\hat{\mu}_{\tau^{*}}, \hat{s},  p_{\min}, p_{\max}, \ve$)
    \STATE {\bfseries Output: $\mcd_{\tau^{*}}$}

    \vspace{0.5cm}

    \STATE \textbf{Function} global\_optimizer($\hat{\mu}_{\tau^{*}}, p_{\min}, p_{\max}$)
    \STATE $\hat{\mu}_{\tau^{*}}^{'}$ = differentiate($\hat{\mu}_{\tau^{*}}$)
    \STATE roots = find_roots($\hat{\mu}_{\tau^{*}}^{'}$)
    \STATE roots.add($p_{\min}, p_{\max}$)
    \STATE values = $g_{\hat{\mu}_{\tau^{*}}}$(roots)
    \STATE opt_value = argmax(values)
    \STATE \textbf{return} opt_value

    \vspace{0.5cm}

    \STATE \textbf{Function} get\_dtau($\hat{\mu}_{\tau^{*}}, \hat{s},  p_{\min}, p_{\max}, \ve$)
    \STATE d_tau = []
    \STATE find_roots($g_{\hat{\mu}_{\tau^{*}}}(s) - g_{\hat{\mu}_{\tau^{*}}}(\hat{s}) + \ve/2$)
    \STATE roots.add($p_{\min}$, $p_{\max}$)
    \STATE roots = sort(roots)
    \STATE root_{left}  = get_closest_left_root_to_$\hat{s}$(roots, $\hat{s}$)
    \STATE root_{right} = get_closest_right_root_to_$\hat{s}$(roots, $\hat{s}$)
    \STATE d_tau.add(root_{left}, root_{right})
    \STATE d_tau.add(every_pair_to_the_left_of_root_{left})
    \STATE d_tau.add(every_pair_to_the_right_of_root_{right})

    % \FOR {i in range(len(roots))}
    % \IF {roots[i] > $\hat{s}$}
    % \STATE break
    % \ENDIF
    % \ENDFOR
    % \STATE left = deque(roots[:i])
    % \STATE right = deque(roots[i:])
    % \STATE d_tau.append((left.pop(), right.popleft()))
    % \WHILE {len(left) > 1}
    % \STATE d_tau.append(left.pop(), left.pop())
    % \ENDWHILE
    % \WHILE {len(right) > 1}
    % \STATE d_tau.append(right.popleft(), right.popleft())
    % \ENDWHILE
    \STATE \textbf{return} d_tau








 \end{algorithmic}
 \end{algorithm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\newpage
\twocolumn
\section{Multi-arm setting configurations}
\label{appd}


\begin{figure}[h!]
  \includegraphics[width=\linewidth]{results/multi-arm/normal_0.1epsilon.eps}
  \caption{10 arm setting when the angles $\phi$ of the arms (3 to 10) are sampled from $\mathcal{N} (0, .09)$. $(\ve,\delta)=(0.1, 0.05)$ for the VSBAI algorithm}
  \label{fig:multi-arm-gaussian}
\end{figure}

\begin{figure}[h!]
  \includegraphics[width=\linewidth]{results/multi-arm/uniform_0.1epsilon.eps}
  \caption{10 arm setting when the angles $\phi$ of the arms (3 to 10þ) are sampled uniformly from $[0,0.1]$. $(\ve,\delta)=(0.1, 0.05)$ for the VSBAI algorithm}
  \label{fig:multi-arm-uniform}
\end{figure}


\newpage
\twocolumn
\section{Other experiments}
\label{appg}


\setcounter{table}{2}
\begin{table*}[!htb]\centering
    \begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|}\hline
        {Algorithm} & \multicolumn{2}{c|}{LazyTS} & \multicolumn{2}{c|}{Rage} & \multicolumn{2}{c|}{Oracle} & \multicolumn{2}{c|}{VSBAI}                                     \\ \hline
        {Arms}      & Mean                        & Std                       & Mean                        & Std                          & Mean   & Std   & Mean    & Std    \\\hline

        10          & 335.1                       & 22.71                     & 524.1                       & 33.84                        & 347.1  & 32.22 & 47693.4 & 105.32

        \\\hline
        20          & 423.05                      & 28.92                     & 683.05                      & 92.07                        & 356.15 & 32.31 & 47424.1 & 41.32

        \\\hline
        100         & 421.75                      & 32.11                     & 1038.75                     & 148.01                       & 426.4  & 39.76 & 47271.4 & 13.49

        \\\hline
        1000        & 419.65                      & 28.43                     & 1152.7                      & 50.23                        & 476.45 & 40.74 & 47222.7 & 1.27


        \\\hline
        2500        & 446.15                      & 29.06                     & 1447.3                      & 150.46                       & 481.8  & 41.33 & 47219.8 & 0.37
        \\\hline

        5000        & 431.65                      & 32.23                     & 1546.9                      & 160.17                       & 510.05 & 48.87 & 47219.9 & 0.41
        \\\hline
    \end{tabular}
    \caption{Expected sample complexity for the setting described in Appendix \ref{appg}}
    \label{table3}
\end{table*}
\hfill
\begin{table*}[!htb]\centering
    \begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|}\hline
        {Algorithm} & \multicolumn{2}{c|}{LazyTS} & \multicolumn{2}{c|}{Rage} & \multicolumn{2}{c|}{Oracle} & \multicolumn{2}{c|}{VSBAI}                              \\ \hline
        {Arms}      & Mean                        & Std                       & Mean                        & Std                          & Mean  & Std  & Mean & Std  \\\hline

        10          & 0.27                        & 0.01                      & 0.05                        & 0.001                        & 0.001 & 0.0  & 1.30 & 0.02
        \\\hline

        20          & 0.33                        & 0.02                      & 0.06                        & 0.00                         & 0.001 & 0.00 & 1.39 & 0.04
        \\\hline

        100         & 0.39                        & 0.02                      & 1.09                        & 0.07                         & 0.02  & 0.00 & 1.41  & 0.04
        \\\hline

        1000        & 34.78                       & 1.06                      & 27.61                       & 0.33                         & 0.69  & 0.02 & 1.44 & 0.04
        \\\hline

        2500        & 211.24                      & 8.22                      & 335.82                      & 2.46                         & 0.65  & 0.02 & 1.56 & 0.04
        \\\hline

        5000        & 422.35                      & 22.45                     & 884.32                      & 3.85                         & 0.89  & 0.03 & 2.17 & 0.03
        \\\hline
    \end{tabular}
    \caption{Run-time in seconds for the setting described in Appendix \ref{appg}}
    \label{table4}
\end{table*}

We consider the setting outlined in subsection \ref{multiarm-setting} and present results for a different configuration of the problem instances. We first note that the implementation of the baseline algorithms presented in \cite{rage}, \cite{jedra}, and \cite{soare2014best} for the setting in subsection \ref{multiarm-setting} is true when the angles $\phi_{i}$ for $i=3, \ldots, n$ are sampled from a uniform distribution $[0, 0.1]$ rather than a Gaussian distribution as in subsection \ref{multiarm-setting}. We therefore present experimental  results for this uniform setting and provide a comparison of sample complexity and run time as in subsection \ref{multiarm-setting}. Note that we are able reproduce the results reported in \cite{jedraArxiv} (see Table 2 and Table 3 of \cite{jedraArxiv}).

We observe from tables \ref{table3} that the sample complexity of VSBAI is greater than the other baselines. However, we argue that the instances generated in this setting are simple and in situations where it is difficult to separate out the best-arm from the next best (like when the angles $\phi_{i}$ of the arms are sampled from Gaussian Gaussian setting in \ref{multiarm-setting}), all these baselines suffer from huge sample complexities and run-times. In other words VSBAI is independent of the way the instances are generated but on the other hand all the other baselines are not robust, hence can potentially perform badly in adversarial environments. Table \ref{table4} gives a comparison of  the run-times for this setting.
The results shown are obtained after averaging over 20 seeds.


\clearpage
\bibliography{bhat_587}

\end{document}
