\documentclass[accepted,colorlinks,citecolor=blue,urlcolor=blue,linkcolor=blue]{uai2023} 

\usepackage[american]{babel}
\usepackage{natbib}
\bibliographystyle{plainnat}
\renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} 
\usepackage{booktabs}
\usepackage{tikz} 
\newcommand{\swap}[3][-]{#3#1#2}
\usepackage{comment}
\usepackage{algorithmic,algorithm}

\usepackage{microtype}
\usepackage{graphicx}

\usepackage{multirow,makecell}
\usepackage{amsmath,amssymb,amsthm,bm}
\usepackage{mathtools}
\usepackage{bbm}
\usepackage{enumerate}
\usepackage{subfigure}
\usepackage{color}

\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts

\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors

\usepackage{hyperref}       % hyperlinks
\usepackage{nameref}
\usepackage{zref-xr,zref-user}
\zxrsetup{toltxlabel}
\zexternaldocument*{liu_595}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{example}[theorem]{Example}

\newcommand{\argmin}{\mathop{\mathrm{argmin}}}
\newcommand{\argmax}{\mathop{\mathrm{argmax}}}
\newcommand{\minimize}{\mathop{\mathrm{minimize}}}
\newcommand{\st}{\mathop{\mathrm{subject\,\,to}}}


\newcommand{\red}[1]{\textcolor{red}{#1}}
\newcommand{\blue}[1]{\textcolor{blue}{#1}}


\makeatletter
\newcommand{\printfnsymbol}[1]{%
  \textsuperscript{\@fnsymbol{#1}}%
}
\makeatother


\def\R{\mathbb{R}}
\def\E{\mathbb{E}}
\def\P{\mathbb{P}}
\def\1{\mathbbm{1}}
\def\j{{(j)}}
\def\Dis{\mathrm{Dis}}
\def\kl{\mathrm{KL}}
\def\Cov{\mathrm{Cov}}
\def\Var{\mathrm{Var}}
\def\half{\frac{1}{2}}
\def\th{\mathrm{th}}
\def\tr{\mathrm{tr}}
\def\df{\mathrm{df}}
\def\dim{\mathrm{dim}}
\def\col{\mathrm{col}}
\def\row{\mathrm{row}}
\def\nul{\mathrm{null}}
\def\rank{\mathrm{rank}}
\def\nuli{\mathrm{nullity}}
\def\spa{\mathrm{span}}
\def\sign{\mathrm{sign}}
\def\supp{\mathrm{supp}}
\def\diag{\mathrm{diag}}
\def\aff{\mathrm{aff}}
\def\conv{\mathrm{conv}}
\def\hy{\hat{y}}
\def\ty{\tilde{y}}
\def\hbeta{\hat{\beta}}
\def\tbeta{\tilde{\beta}}
\def\htheta{\hat{\Theta}}
\def\halpha{\hat{\alpha}}
\def\btheta{\boldsymbol{\theta}}
\def\hf{\hat{f}}
\def\hmu{\hat{\mu}}
\def\hlambda{{\hat{\lambda}}}
\def\heta{{\hat{\eta}}}
\def\hR{{\widehat{R}}}

\def\cA{\mathcal{A}}
\def\cB{\mathcal{B}}
\def\cC{\mathcal{C}}
\def\cD{\mathcal{D}}
\def\cE{\mathcal{E}}
\def\cF{\mathcal{F}}
\def\cG{\mathcal{G}}
\def\cH{\mathcal{H}}
\def\cI{\mathcal{I}}
\def\cJ{\mathcal{J}}
\def\cK{\mathcal{K}}
\def\cL{\mathcal{L}}
\def\cM{\mathcal{M}}
\def\cN{\mathcal{N}}
\def\cO{\mathcal{O}}
\def\cP{\mathcal{P}}
\def\cQ{\mathcal{Q}}
\def\cR{\mathcal{R}}
\def\cS{\mathcal{S}}
\def\cT{\mathcal{T}}
\def\cU{\mathcal{U}}
\def\cV{\mathcal{V}}
\def\cW{\mathcal{W}}
\def\cX{\mathcal{X}}
\def\cY{\mathcal{Y}}
\def\cZ{\mathcal{Z}}



\title{No-Regret Linear Bandits beyond Realizability\\(Supplementary Material)}

\author[1]{Chong Liu}
\author[1]{Ming Yin}
\author[1]{Yu-Xiang Wang}
\affil[1]{
    Department of Computer Science\\
    University of California\\
    Santa Barbara, CA 93106, USA
}

\begin{document}

\onecolumn
\maketitle

\section{Proof of Proposition~\ref{prop:perservation}}\label{sec:pres}

Equivalently, $\rho$-gap-adjusted misspecification (Definition \ref{def:lm}) satisfies 
\begin{equation}\label{eqn:rho_miss}
 \left|f(x) - f_0(x) \right|\leq \rho    
\left|f^* - f_0(x)\right|,\;\;\forall x \in \cX.
\end{equation}

\begin{proof}[Proof of preservation of max value: $\max_{x\in\mathcal{X}}f(x)=f^*$]

Let $f^*_w := \max_{x\in\mathcal{X}}f(x)$. We first prove $f^*_w\leq f^*$ by contradiction. Suppose $f^*_w > f^*$, since $\mathcal{X}$ is compact, there exists $x_w\in\cX$ such that $f(x_w)=f^*_w>f^*$. Then by eq. \eqref{eqn:rho_miss} this implies
\[
f(x_w)-f_0(x_w)\leq \rho (f^*-f_0(x_w))\Rightarrow f^*<f^*_w=f(x_w)\leq \rho f^*+(1-\rho)f_0(x_w)\leq f^*
\]
Contraction! Therefore, $f_w^*\leq f^*$. On the other hand, choose $x_0\in\argmax_{x\in\cX}f_0(x)$, then by eq. \eqref{eqn:rho_miss} $f(x_0)=f_0(x_0)=f^*$. This implies $f_w^*\geq f^*$. Combing both results to obtain $f_w^*= f^*$.
\end{proof}


\begin{proof}[Proof of preservation of maximizers: $\argmax_{x}f(x) =\argmax_{x}f_{0}(x)$]

Using that $f(x)\leq \rho f^*+(1-\rho)f_0(x)$ and $\max_{x\in\mathcal{X}}f(x)=f^*$, it is easy to verify $\argmax_{x}f(x) \subset\argmax_{x}f_{0}(x)$. On the other hand, if $x'\in\argmax_{x}f_{0}(x)$, then by eq. \eqref{eqn:rho_miss} $f(x')=f_0(x')=f^*$ and this means $\argmax_{x}f_0(x) \subset\argmax_{x}f(x)$. 
\end{proof}

\begin{proof}[Proof of self-bounding property]
This directly comes from the definition.
\end{proof}


\section{Property of Weak $\rho$-Gap-Adjusted Misspecification}\label{sec:weak}

First we recall Definition \ref{def:lm_weak}.

\begin{definition}[Restatement of Weak $\rho$-gap-adjusted misspecification]
Denote $f_w^*=\max_{x\in\mathcal{X}} f(x)$. Then we say $f$ is (weak) $\rho$-gap-adjusted misspecification approximation of $f_0$ for a parameter $0 \leq \rho < 1$ if:
\begin{align*}
\sup_{x \in \cX} \left| \frac{f(x) - f_w^*+f^*-f_0(x)}{f^* - f_0(x)}\right|\leq \rho.
\end{align*}
\end{definition}

Under the weak $\rho$-gap-adjusted misspecification condition, it no longer holds $f_w^*=f^*$. However, it still preserves the maximizers.

\begin{proposition}\label{prop_weka_rho}
 Under the weak $\rho$-gap-adjusted misspecification condition, it holds $$\argmax_{x}f(x) =\argmax_{x}f_{0}(x).$$
\end{proposition}
\begin{proof}
Suppose $x'\in\argmax_{x}f(x)$, then by definition
\[
|f^*-f_0(x')|=|f(x')-f_w^*+f^*-f_0(x')|\leq \rho |f^*-f_0(x')|\Rightarrow (1-\rho) |f^*-f_0(x')|\leq 0\Rightarrow x'\in\argmax_{x}f_0(x).
\]
On the other hand, if $x'\in\argmax_{x}f_0(x)$, then
\[
|f_w^*-f(x')|=|f(x')-f_w^*+f^*-f_0(x')|\leq \rho |f^*-f_0(x')|=0\Rightarrow x'\in\argmax_{x}f(x). 
\]
\end{proof}


The next proposition shows the weak $\rho$-adjusted misspecification condition characterizes the suboptimality gap between $f$ and $f_0$.

\begin{proposition}
    Denote $g(x):= f^*_w-f(x)\geq 0$, $g_0(x):=f^*-f_0(x)\geq 0$, then the weak $\rho$-gap-adjusted misspecification condition implies:
    \[
    (1-\rho)g_0(x)\leq g(x)\leq (1+\rho) g_0(x),\quad x\in\cX.
    \]
\end{proposition}
This can be proved directly by the triangular inequality. This reveals the weak $\rho$-gap-adjusted misspecification condition requires $g(x)$ to live in the band $[(1-\rho)g_0(x),(1+\rho) g_0(x)]$, and the concrete maximum values $f_w^*$ and $f^*$ can be arbitrarily different. 

\section{Linear Bandits under the Weak $\rho$-Gap-Adjusted Misspecification}\label{sec:weak_regret}

We need to slightly modify LinUCB \citep{abbasi2011improved} and work with the following LinUCBw algorithm.

\begin{algorithm}[!htbp]
\caption{LinUCBw (adapted from \citet{abbasi2011improved})}
	\label{alg:linucb2}
	{\bf Input:}
	Predefined sequence $\beta_t$ for $t=1,2,3,...$ as in eq. \eqref{eq:beta_t_2};
 Set $\lambda=\sigma^2/C^2_w$ and $\mathrm{Ball}_0 = \cW$.
	\begin{algorithmic}[1]
	    \FOR{$t = 0,1,2,... $}
	    \STATE Select $x_t=\argmax_{x \in \cX} \max_{[w^\top,c] \in \mathrm{Ball}_t} [w^\top,c] \begin{bmatrix}x\\1\end{bmatrix}$.
	    \STATE Observe $y_t = f_0(x_t) + \eta_t$.
     \STATE Update 
     \begin{align*}
\Sigma_{t+1} = \lambda I_{d+1} + \sum_{i=0}^{t} \begin{bmatrix}x_i\\1\end{bmatrix} \cdot [x^\top_i,1] \ \mathrm{where}\  \Sigma_0 = \lambda I_{d+1}.
\end{align*}
	    \STATE Update 
	    \begin{align*}
\begin{bmatrix}\hat{w}_{t+1}\\\hat{c}_{t+1}\end{bmatrix} = \argmin_{w,c} \lambda \left \|\begin{bmatrix}w\\c\end{bmatrix} \right\|^2_2+ \sum_{i=0}^{t} (w^\top x_i +c- y_i)^2_2.
\end{align*}
    \STATE Update
	    \begin{align*}
     \mathrm{Ball}_{t+1} = \left \{
    \begin{bmatrix}w\\c\end{bmatrix} \bigg\rvert \left\|\begin{bmatrix}w\\c\end{bmatrix} - \begin{bmatrix}\hat{w}_{t+1}\\\hat{c}_{t+1}\end{bmatrix} \right\|^2_{\Sigma_{t+1}} \leq \beta_{t+1} \right\}.
    \end{align*}
		\ENDFOR
	\end{algorithmic}
\end{algorithm}


\begin{theorem}\label{thm:2}
Suppose Assumptions \ref{ass:boundedness}, \ref{ass:unique}, and \ref{ass:rho} hold. W.l.o.g., assuming $c^*=f^*-f_w^*\leq F$. Set 
\begin{align}
\beta_t = 8\sigma^2 \left(1 + (d+1)\log\left(1+ \frac{t C^2_b (C^2_w+F^2) }{d \sigma^2} \right) + 2\log \left(\frac{\pi^2 t^2}{3\delta} \right)\right).\label{eq:beta_t_2}
\end{align} 
Then Algorithm~\ref{alg:linucb2} guarantees w.p. $> 1-\delta$ simultaneously for all $T=1,2,...$
\begin{align*}
R_T &\leq F +c^*+ \sqrt{\frac{8 (T-1) \beta_{T-1} (d+1)}{(1-\rho)^2} \log \left( 1 + \frac{T C^2_b (C^2_w+F^2) }{d \sigma^2 }\right)}.
\end{align*}
\end{theorem}

\begin{remark}
The result again shows that LinUCBw algorithm achieves $\tilde{O}(\sqrt{T})$ cumulative regret and thus it is also a no-regret algorithm under the weaker condition (Definition \ref{def:lm_weak}). Note Definition \ref{def:lm_weak} is quite weak which even doesn't require the true function sits within the approximation function class.
\end{remark}

\begin{proof}

The analysis is similar to the $\rho$-gap-adjusted case but includes $c^*=f^*-f^*_w$. For instance, let $\Delta^w_t$ denote the deviation term of our linear function from the true function at $x_t$, then
\begin{align*}
\Delta^w_t = f_0(x_t) - w^\top_* x_t-c^*,
\end{align*}
And our observation model (eq. \eqref{eq:obs}) becomes
\begin{align*}
y_t = f_0(x_t) + \eta_t = w_*^\top x_t + c^* + \Delta^w_t + \eta_t.
\end{align*}
Then similar to Lemma~\ref{lem:delta}, we have the following lemma, whose proof is nearly identical to Lemma~\ref{lem:delta}.
\begin{lemma}[Bound of deviation term]
$\forall t \in \{0,1,\ldots,T-1\}$,
\begin{align*}
|\Delta_t | \leq \frac{\rho}{1-\rho} w^\top_*(x_* - x_t).
\end{align*}
\end{lemma}

We also provide the following lemma, which is the counterpart of Lemma~\ref{lem:gap}.

\begin{lemma}
Define $u_t = \left \|\begin{bmatrix}x_t\\1\end{bmatrix} \right\|_{\Sigma_t^{-1}}$ and assume $\beta_t$ is chosen such that $w_*\in \mathrm{Ball}_t$.
Then
\begin{align*}
w_*^\top (x_* - x_t) \leq 2 \sqrt{\beta_t} u_t.
\end{align*}
\end{lemma}
\begin{proof}
Let $\tilde{w},\tilde{c}$ denote the parameter that achieves $\argmax_{w,c \in \mathrm{Ball}_t} w^\top x_t+c$, by the optimality of $x_t$, 
\begin{align*}
w_*^\top x_* - w^\top_* x_t &=\begin{bmatrix}w_*^\top,c^*\end{bmatrix} \begin{bmatrix}x_*\\1\end{bmatrix}-\begin{bmatrix}w_*^\top,c^*\end{bmatrix} \begin{bmatrix}x_t\\1\end{bmatrix}\\
&\leq \begin{bmatrix}\tilde{w}^\top,\tilde{c}\end{bmatrix} \begin{bmatrix}x_t\\1\end{bmatrix} - \begin{bmatrix}w_*^\top,c^*\end{bmatrix} \begin{bmatrix}x_t\\1\end{bmatrix}\\
&= (\begin{bmatrix}\tilde{w}^\top,\tilde{c}\end{bmatrix} - \begin{bmatrix}\hat{w}_t^\top,\hat{c}_t\end{bmatrix}+\begin{bmatrix}\hat{w}_t^\top,\hat{c}_t\end{bmatrix}-\begin{bmatrix}w_*^\top,c^*\end{bmatrix}) \begin{bmatrix}x_t\\1\end{bmatrix}\\
&\leq \left \|\begin{bmatrix}\tilde{w}^\top,\tilde{c}\end{bmatrix} - \begin{bmatrix}\hat{w}_t^\top,\hat{c}_t\end{bmatrix}\right\|_{\Sigma_t} \left \|\begin{bmatrix}x_t\\1\end{bmatrix} \right \|_{\Sigma^{-1}_t} + \left \|\begin{bmatrix}\hat{w}_t^\top,\hat{c}_t\end{bmatrix}-\begin{bmatrix}w_*^\top,c^*\end{bmatrix}\right \|_{\Sigma_t} \left \|\begin{bmatrix}x_t\\1\end{bmatrix}\right \|_{\Sigma^{-1}_t}\\
&\leq 2\sqrt{\beta_t} u_t
\end{align*}
where the second inequality applies Holder's inequality; the last line uses the definition of $\mathrm{Ball}_t$ (note that both $\begin{bmatrix}\tilde{w}^\top,\tilde{c}\end{bmatrix},\begin{bmatrix}w_*^\top,c^*\end{bmatrix}\in \mathrm{Ball}_t).$
\end{proof} 

The rest of the analysis follows the analysis of Theorem~\ref{thm:main}.
\end{proof}

\section{Simulation}\label{sec:simulation}

In this section, we run a simulation on a $1$-dimensional test function shown in Figure \ref{fig:exp1}. Here we run the first $10$ iterations with uniform sampling and the remaining $100$ iterations are using LinUCB algorithm. In Figure \ref{fig:exp2} we can see that cumulative regret is increasing with uniform sampling but it doesn't increase when running LinUCB. The reason behind it is that under the gap-adjusted misspecification, LinUCB is able to quickly find the optimal point $x_*=0$.

\begin{figure*}[!htbp]
	\centering 
	\subfigure[$1$-dimensional test function.]{\label{fig:exp1}\includegraphics[width=0.45\linewidth]{cr_experiment.pdf}}
	\subfigure[Cumulative regret]{\label{fig:exp2}\includegraphics[width=0.45\linewidth]{cr.pdf}}
 \caption{Simulation function and result.}
\end{figure*}

\bibliography{bib}

\end{document}