%\documentclass{uai2023} % for initial submission
 \documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}
\usepackage{amsmath,amsfonts}
\usepackage{amsthm}
\usepackage{multirow}
%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\externaldocument{uai2023-template}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{A Bayesian Approach for Bandit Online Optimization with Switching Cost\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<shizai.sz@alibaba-inc.com>?Subject=Your UAI 2023 paper}{Zai Shi}{}}
\author[1]{Jian Tan}
%\author[1,2]{Further~Coauthor}
%\author[3]{Further~Coauthor}
\author[1]{FeiFei Li}
%\author[3]{Further~Coauthor}
%\author[3,1]{Further~Coauthor}
%% Add affiliations after the authors
\affil[1]{%
	Alibaba Group\\
	Hangzhou, Zhejiang, China
}
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

\section{Proof of Theorem 1}
Defining $v_t^*=\arg\min_{x\mathcal{X}} f(x)+c(x,x_{t-1})$ and $x_0^*=x_0$, we have
\begin{align}
	&\sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(v_t^*)-c(v_t^*,x_{t-1}))\nonumber\\
	&\geq \sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(x_t^*)-c(x_t^*,x_{t-1}))\label{1step1}\\
	&\geq \sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(x_t^*)-\eta c(x_t^*,x_{t-1}^*)-\eta c(x_{t-1}^*,x_{t-1}))\label{1step2}\\
	&=\sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(x_t^*)-\eta c(x_t^*,x_{t-1}^*)-\eta c(x_{t}^*,x_{t}))+\eta c(x_T^*,x_{T})-\eta c(x_0^*,x_0)\nonumber\\
	&\geq \sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(x_t^*)-\eta c(x_t^*,x_{t-1}^*)-\eta c(x_{t}^*,x_{t}))-\eta D\label{1step3}\\
	&\geq  \sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(x_t^*)-\eta c(x_t^*,x_{t-1}^*)-\eta^2 c(x_{t}^*,x^*)-\eta^2c(x_t,x^*))-\eta D\label{1step4}\\
	&\geq \sum_{t=1}^T((1-\eta^2/\lambda)f(x_t)+c(x_t,x_{t-1})-(1+\eta^2/\lambda)f(x_t^*)+2\eta^2/\lambda f(x^*)-\eta c(x_t^*,x_{t-1}^*))-\eta D\label{1step5}\\
	&\geq \sum_{t=1}^T((1-\eta^2/\lambda)f(x_t)+(1-\eta^2/\lambda)c(x_t,x_{t-1})-(1+\eta^2/\lambda)f(x_t^*)-\eta c(x_t^*,x_{t-1}^*))-\eta D\label{1step6}
\end{align}
where \eqref{1step1} is from the definition of $v_t^*$, \eqref{1step2} is from Assumption 3, \eqref{1step3} is from Assumption 4, \eqref{1step4} is from Assumption 3, \eqref{1step5} is from Assumption 2 and \eqref{1step6} is from $f(x)\geq0$ and $c(x,y)\geq 0$ within $\cal{X}$. 

Meanwhile, following a similar approach to the proof of Lemma 1 based on Lemma 2 of the main paper, we have
\begin{align}
	\sum_{t=1}^T(f(x_t)+c(x_t,x_{t-1})-f(v_t^*)-c(v_t^*,x_{t-1}))=O\left(B\sqrt{T\gamma'_T}+\sqrt{T\gamma'_T(\gamma'_T+\log(1/\delta)})\right)\label{lem}
\end{align}
with probability at least $1-\delta$, where $\gamma'_T$ is the maximal information gain related to kernel $k'$ and the domain $\mathcal{X}\times\mathcal{X}$ in $2d$-dimension.

Since $f(x)\geq0$ and $c(x,y)\geq 0$ within $\cal{X}$, by combining \eqref{1step6} and \eqref{lem} with $\eta^2/\lambda<1$ we have 
\begin{align*}
	\sum_{l=1}^T\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=1}^{T}\psi\left(f(x_l^*)+c(x_l^*,x_{l-1}^*)\right)=O\left(B\sqrt{T\gamma'_T}+\sqrt{T\gamma'_T(\gamma'_T+\log(1/\delta)})\right)=\tilde{O}(T^{g(2d)})
\end{align*}
with probability at least $1-\delta$, where $\psi=\max\{\frac{1+\eta^2/\lambda}{1-\eta^2/\lambda},\frac{\eta}{1-\eta^2/\lambda}\}$.
\section{Proof of Corollary 1}
	From the definition of competitive ratio, we have
\begin{align}
	&CR=\frac{\sum_{t=1}^Tf_t(x_t)+c(x_t,x_{t-1})}{\sum_{t=1}^Tf_t(x_t^*)+c(x_t^*,x^*_{t-1})}\nonumber\\
	&\leq\frac{\sum_{t=1}^{T}\psi\left(f(x_t^*)+c(x_t^*,x_{t-1}^*)\right)+\tilde{O}(T^{g(2d)})}{\sum_{t=1}^Tf_t(x_t^*)+c(x_t^*,x^*_{t-1})}\label{step1}\\
	&\leq\psi+\frac{\tilde{O}(T^{g(2d)})}{TC}\label{step2}\\
	&=\psi+\tilde{O}(T^{g(2d)-1}),\nonumber
\end{align}
where \eqref{step1} is from Theorem 1 and $f(x)\geq0,c(x,y)\geq0$ within $\mathcal{X}$, \eqref{step2} is from Assumption 4 and the assumption in Corollary 1. 
\section{Proof of Theorem 2}
For epoch $m$, define $v_m^*=\arg\min_{x\in\mathcal{X}} f(x)+2\eta c(x,v_m)$ and $x_0^*=x_0$, where $v_m$ is the pivot point of epoch $m$ defined in Algorithm 3. For epoch $m$, the time horizon is from $l=\frac{m(m-1)}{2}+1$ to $l=\frac{m(m-1)}{2}+m$. Defining $t=\frac{m(m-1)}{2}+m$ and $x^*=\arg\min_{x\in\mathcal{X}}f(x)$, we have
\begin{align}
	&\sum_{l=\frac{m(m-1)}{2}+1}^{t-1}\left(f(x_l)+2\eta c(x_l,v_m)-f(v_m^*)-2\eta c(v_m^*,v_m)\right)+f(x_t)-f(x^*)\label{exp}\\
	&\geq  \sum_{l=\frac{m(m-1)}{2}+1}^{t}\left(f(x_l)+c(x_l,x_{l-1})\right)-\eta c(x_t,v_m)-\eta c(x_{\frac{m(m-1)}{2}},v_m)-\sum_{l=\frac{m(m-1)}{2}+1}^{t-1}\left(f(v_m^*)+2\eta c(v_m^*,v_m)\right)-f(x^*)\label{2step1}\\
	&\geq \sum_{l=\frac{m(m-1)}{2}+1}^{t}\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=\frac{m(m-1)}{2}+1}^{t-1}\left(f(x_l^*)+2\eta c(x_l^*,v_m)\right)-2\eta D-f(x_t^*)\label{2step2}\\
	&\geq \sum_{l=\frac{m(m-1)}{2}+1}^{t}\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=\frac{m(m-1)}{2}+1}^{t-1}\left(f(x_l^*)+2\eta^2( c(x_l^*,x_{l-1}^*)+\eta c(x_{l-1}^*,x^*)+\eta c(v_m,x^*))\right)\nonumber\\&-2\eta D-f(x_t^*)\label{2step3}\\
	&\geq \sum_{l=\frac{m(m-1)}{2}+1}^{t}\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=\frac{m(m-1)}{2}+1}^{t}\left((1+2\eta^3/\lambda)f(x_l^*)+2\eta^2 c(x_l^*,x_{l-1}^*)\right)-2\eta^3 c(x_{\frac{m(m-1)}{2}},x^*)\nonumber\\&+2\eta^2c(x_t^*,x_{t-1}^*)+2\eta^3c(x_{t-1}^*,x^*)+2\eta^3c(x_t^*,x^*)-2\eta^3/\lambda (m-1)(f(v_m)-2f(x^*))-2\eta D\label{2step4}\\
	&\geq \sum_{l=\frac{m(m-1)}{2}+1}^{t}\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=\frac{m(m-1)}{2}+1}^{t}\left((1+2\eta^3/\lambda)f(x_l^*)+2\eta^2 c(x_l^*,x_{l-1}^*)\right)\nonumber\\&-(2\eta+2\eta^3) D-2\eta^3/\lambda(m-1)(f(v_m)-f(x^*))\label{2step5}
\end{align}
where \eqref{2step1} is from $\eta c(x_l,v_m)+\eta c(x_{l-1},v_m)\geq c(x_l,x_{l-1})$ using Assumption 3 , \eqref{2step2} is from Assumption 4 and the definition of $v_m^*$ and $x^*$, \eqref{2step3} is from using Assumption 3 for $c(x_l^*,v_m)$ and $c(x_{l-1}^*,v_m)$, \eqref{2step4} is from Assumption 2 and \eqref{2step5} is from Assumption 2 and 4.

Now we want to bound $(m-1)(f(v_m)-f(x^*))$ in \eqref{2step5} for $m>1$ (it is $0$ when $m=1$). By the definition of $v_m$ in Algorithm 3, we have 
\begin{align*}
	f(v_m)+\varepsilon_{v_m}-f(x_{\frac{n(n+1)}{2}})-\varepsilon_{\frac{n(n+1)}{2}}\leq 0, \forall n\in\{1,...,m-1\}
\end{align*}
where $\varepsilon_{v_m}$ is the observation noise when observing $f(v_m)$ and $\varepsilon_{x_{\frac{n(n+1)}{2}}}$ is the observation noise when observing $f(x_{\frac{n(n+1)}{2}})$. Note that $x_{\frac{n(n+1)}{2}}$ is the point chosen by UE in epoch $n$. Therefore, for any $\epsilon>0$ and $m>1$, if
\begin{align*}
	\sum_{n=1}^{m-1}[\varepsilon_{\frac{n(n+1)}{2}}-\varepsilon_{v_m}]\leq \epsilon,
\end{align*}
we have
\begin{align*}
	(m-1)(f(v_m)-f(x^*))-\sum_{n=1}^{m-1}(f(x_{\frac{n(n+1)}{2}})-f(x^*))\leq \epsilon 
\end{align*}

Then for $m>1$ we have
\begin{align*}
	&\mathbb{P}[(m-1)(f(v_m)-f(x^*))-\sum_{n=1}^{m-1}(f(x_{\frac{n(n+1)}{2}})-f(x^*))\leq \epsilon ]\\&\geq \mathbb{P}[\sum_{n=1}^{m-1}[\varepsilon_{\frac{n(n+1)}{2}}-\varepsilon_{v_m}]\leq \epsilon]\geq 1-\exp(-\frac{\epsilon^2}{4(m-1)R})
\end{align*}
since the observation noise is $R$-subGaussian from Assumption 1. As a result, 
\begin{align}
	(m-1)(f(v_m)-f(x^*))-\sum_{n=1}^{m-1}(f(x_{\frac{n(n+1)}{2}})-f(x^*))\leq \sqrt{4(m-1)R\log\frac{M-1}{\delta}}\label{subGaussian}
\end{align}
with probability at least $1-\delta/(M-1)$.

For UE across $m$ epochs, we can regard them as running IGP-UCB on $f(x)$ for $m$ iterations. Therefore, we have
\begin{align}
	&\sum_{n=1}^{m-1}(f(x_{\frac{n(n+1)}{2}})-f(x^*))=O\left(B\sqrt{(m-1)\gamma_{m-1}}+\sqrt{(m-1)\gamma_{m-1}(\gamma_{m-1}+\log(1/\delta)})\right)\label{1lemma1}
\end{align}
holds together for all choices of $m\in\{2,...,M+1\}$ with probability at least $1-\delta$ from Lemma 1.
Combined with \eqref{subGaussian}, we have
\begin{align}
	(m-1)(f(v_m)-f(x^*))\leq \sqrt{4(m-1)R\log\frac{M-1}{\delta}}+O\left(B\sqrt{(m-1)\gamma_{m-1}}+\sqrt{(m-1)\gamma_{m-1}(\gamma_{m-1}+\log(1/\delta)})\right)\label{1lemma2}
\end{align}
holds together for all choices of $m\in\{2,...,M\}$ with probability at least $1-2\delta$, which finishes the bound of $(m-1)(f(v_m)-f(x^*))$.

Meanwhile, LE in epoch $m$ can be regarded as running IGP-UCB on $f(x)+2\eta c(x,v_m)$ for $m-1$ iterations. Then from Lemma 1, 
\begin{align}	
	&\sum_{l=\frac{m(m-1)}{2}+1}^{\frac{m(m+1)}{2}-1}\left(f(x_l)+2\eta c(x_l,v_m)-f(v_m^*)-2\eta c(v_m^*,v_m)\right)\nonumber\\&=O\left(B\sqrt{(m-1)\gamma_{m-1}}+\sqrt{(m-1)\gamma_{m-1}(\gamma_{m-1}+\log(M/\delta)})\right)\label{2lemma1}
\end{align}
with probability at least $1-\delta/M$. From \eqref{1lemma1} and \eqref{2lemma1}, we have
\begin{align}
	&\sum_{m=1}^M[\sum_{l=\frac{m(m-1)}{2}+1}^{\frac{m(m+1)}{2}-1}\left(f(x_l)+2\eta c(x_l,v_m)-f(v_m^*)-2\eta c(v_m^*,v_m)\right)+f(x_{\frac{m(m+1)}{2}})-f(x^*)]\nonumber\\&=\sum_{m=1}^MO\left(B\sqrt{m\gamma_{m}}+\sqrt{m\gamma_{m}(\gamma_{m}+\log(M/\delta)})\right)\label{bound}
\end{align}
with probability at least $1-2\delta$, which gives the bound for the telescoping of \eqref{exp} from $m=1$ to $M$. Combining it with \eqref{2step5} and \eqref{1lemma2}, we have
\begin{align*}
	&\sum_{m=1}^M[\sum_{l=\frac{m(m-1)}{2}+1}^{\frac{m(m-1)}{2}+m}\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=\frac{m(m-1)}{2}+1}^{\frac{m(m-1)}{2}+m}\left((1+2\eta^3/\lambda)f(x_l^*)+2\eta^2 c(x_l^*,x_{l-1}^*)\right)]\\
	&=\sum_{t=1}^T\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{t=1}^{T}\left((1+2\eta^3/\lambda)f(x_l^*)+2\eta^2 c(x_l^*,x_{l-1}^*)\right)\\
	&= \sum_{m=1}^MO\left(B\sqrt{m\gamma_{m}}+\sqrt{m\gamma_{m}(\gamma_{m}+\log(M/\delta)})\right)
\end{align*}
with probability at least $1-3\delta$ since \eqref{bound} and \eqref{1lemma2} both use the event \eqref{1lemma1}. From Assumption 2 and 4, we know that $f$ and $c$ are positive within $\cal{X}$.
Set $\psi=\max\{1+2\eta^3/\lambda,2\eta^2\}$ and recall that $B\sqrt{m\gamma_{m}}+\sqrt{m}\gamma_{m}=m^{g(d)}$. Then since $f$ and $c$ are both positive within $\mathcal{X}$ and $T=1+2+...+M=\frac{M(M+1)}{2}$, we have
\begin{align*}
	\sum_{l=1}^T\left(f(x_l)+c(x_l,x_{l-1})\right)-\sum_{l=1}^{T}\psi\left(f(x_l^*)+c(x_l^*,x_{l-1}^*)\right)=\tilde{O}(T^{(g(d)+1)/2}).
\end{align*}
with probability at least $1-3\delta$.

\section{Experiment Details of Section 6}
\subsubsection{Robot Pushing Problem}
The original 14-dimensional robot pushing problem was first tested in \cite{wang2018batched} without switching cost, where the authors implemented the simulation of pushing two objects with two robot hands in the Box2D physics engine. The original code is available at https://github.com/zi-w/Ensemble-Bayesian-Optimization. In this problem, we need to choose 14 control parameters that determine the location and rotation of the robot hands, pushing speed, moving direction and
pushing time. The lower limit of these parameters is $[-5,-5,-10,-10,2,0,-5,-5,-10,-10,2,0,-5,-5]$ and the upper limit is $[5,5,10,10,30,2\pi,5,5,10,10,30,2\pi,5,5]$. Denote the initial positions of the objects $i_0,i_1$, the ending positions of the objects $e_0,e_1$ and the goal locations of the objects $g_0,g_1$, respectively. Then the reward is defined to be $r=||g_0-i_0||+||g_1-i_1||-||g_0-e_0||-||g_1-e_1||$, which is the progress made towards pushing the objects to the goal.


\subsubsection{Lunar Lander Problem}
The original 12-dimensional lunar lander problem in \cite{eriksson2019scalable} is to learn a controller for a lunar lander to minimize fuel consumption and the distance to a landing target, while also preventing crashes. The original code is available in the OpenAI gym: https://github.com/openai/gym. The $12$ controllable parameters of the lander include its angle and position, their respective time
derivatives, and so on. We use $[0,2]$ as the limit for each of these parameters.



\section{Running Time Results of Experiments in Section 6}
In this section, we show the running time of AS and GS for experiments in Section 6. For each algorithm, we run its $10$ tests concurrently in a sever with 2.7GHz Intel(R) Xeon(R) Platinum CPU including 16 processors, and 30GB RAM. We list the maximum, minimum and the average running time of $10$ tests of each algorithm in Table 1. For fairness, we just use the naive implementation of each algorithm without any empirical acceleration techniques proposed in previous BO works.
% Please add the following required packages to your document preamble:
% \usepackage{multirow}
\begin{table}[h]
	\centering
	\begin{tabular}{|cl|l|l|ll}
		\cline{1-4}
		\multicolumn{2}{|l|}{}                                          & Robot Pushing & Lunar Lander &  &  \\ \cline{1-4}
		\multicolumn{1}{|c|}{\multirow{3}{*}{Greedy Search}}      & Avg & 107993.2      & 49316.1      &  &  \\ \cline{2-4}
		\multicolumn{1}{|c|}{}                                    & Max & 127680        & 67624        &  &  \\ \cline{2-4}
		\multicolumn{1}{|c|}{}                                    & Min & 94107         & 31143        &  &  \\ \cline{1-4}
		\multicolumn{1}{|c|}{\multirow{3}{*}{Alternating Search}} & Avg & 55984.5       & 27973.2      &  &  \\ \cline{2-4}
		\multicolumn{1}{|c|}{}                                    & Max & 67893         & 30840        &  &  \\ \cline{2-4}
		\multicolumn{1}{|c|}{}                                    & Min & 44272         & 14680        &  &  \\ \cline{1-4}
	\end{tabular}
	\caption{Running time of GS and AS for robot pushing and lunar lander problem. The time unit is second.}
\end{table}
\vfill

\bibliography{shi_205}
\end{document}
