%\documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}


%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}

% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables

\usepackage{mathtools,amsthm, amssymb, mathrsfs, algpseudocode,algorithm, dsfont, amsfonts,listings,bm,tikz,xr}
% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 


%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newtheorem*{theorem*}{Theorem}
\newtheorem{theorem}{Theorem}
\renewcommand*{\thetheorem}{\Alph{theorem}}
\newtheorem{lemma}{Lemma}
\renewcommand*{\thelemma}{\Alph{lemma}}
\newtheorem{assumption}{Assumption}
\newtheorem{definition}{Definition}
\newcommand{\expec}{\mathbb{E}}
\newcommand{\prob}{\mathbb{P}}
\newcommand{\indicator}{\mathds{1}}
\newcommand{\Var}{\mathrm{Var}}

\title{Heavy-tailed Linear Bandit with Huber Regression
\\(Supplementary Material)}

\externaldocument[main-]{kang_777}  




\author[1]{Minhyun Kang}
\author[1,2]{Gi-Soo Kim}

% Add affiliations after the authors
\affil[1]{
Artificial Intelligence Graduate School, UNIST,
Ulsan, Republic of Korea
}
\affil[2]{
Department of Industrial Engineering,  UNIST,
Ulsan, Republic of Korea
}



  
 \begin{document}


\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

\appendix
We present the detailed proof of the result here. For some lemmas we follow the lines of \cite{bastani2020online} which prove an analogous bound for the Lasso estimator. For some calculation difference, we present them as well. We indicate it in the corresponding lemmas.

\begin{proof}[Proof of Lemma \ref{main-lemma3}]
Using $\expec\left[XX^T \indicator_{( X\notin U)}\right] $ is semi-positive definite, 
    \begin{align*}
        \expec[XX^T|X \in U]
        &= \expec\left[XX^T \indicator_{( X\in U)}\right]\cdot \frac{1}{\prob(x\in U)}\\
         &\preccurlyeq \expec\left[XX^T \indicator_{( X\in U)}\right]\cdot \frac{1}{\mathit{p}}\\
          &\preccurlyeq \expec\left[XX^T \indicator_{( X\in U)}\right]\cdot \frac{1}{\mathit{p}} +  \expec\left[XX^T \indicator_{( X\notin U)}\right]\cdot \frac{1}{\mathit{p}}\\
        &= \expec[XX^T]\cdot \frac{1}{\mathit{p}}.
\end{align*}
\end{proof}

The following Lemma \ref{lemma2} states that the size of the set $T_{i,t}$ is $O(\log T)$.
\begin{lemma}[Lemma EC.8 of \cite{bastani2020online}]\label{lemma2}
When $t\geq (Kq)^2, \ Kq \geq 4,$
\begin{equation*}
    \frac{1}{2} q\log t < |T_{i,t}| < 2q\log t.
\end{equation*}
\end{lemma}

\begin{proof}[Proof of Lemma \ref{lemma2}] We follow the lines of Lemma EC.8 of \cite{bastani2020online}.
	Let $N_t$ be the largest integer with $t > 2^{N_t+1}Kq$. Then $t\leq 2^{N_t+2}Kq$ and
 \begin{equation*}
     (N_t+2)q \leq |T_{i,t}| \leq (N_t+3)q.
 \end{equation*}
	For the lower bound, we have
 \begin{equation*}
     \frac{\log(t/Kq)}{\log 2}<N_t+2.
 \end{equation*}
	Hence,
	\begin{equation*}
		|T_{i,t}| \geq  q\frac{\log(t/Kq)}{\log 2} \geq q\log(t/\sqrt{t}) =\frac{1}{2}q\log t.
	\end{equation*}
The second inequality follows from $t>(Kq)^2$.
For the upper bound, using $N_t+1 \leq \frac{\log(t/Kq)}{\log 2},$
\begin{align*}
	|T_{i,t}|  &\leq \left(\frac{\log(t/Kq)}{\log 2} +2\right)q\\
	&=\left(\frac{\log(t/Kq)+\log 4}{\log 2} \right)q\\
	&=\left(\frac{\log(4t/Kq)}{\log 2} \right)q\\
	&\leq 2q\log t.
\end{align*}
The last inequality follows from $Kq \geq 4$.
\end{proof}


\begin{proof}[Proof of Lemma \ref{main-forcedsampling}] We follow the lines of Proposition 2 of \cite{bastani2020online}.
    By the Theorem \ref{main-th:eigen}, we have
    \begin{equation*}
        \prob\left(\lambda_{\min}\left(\hat{\Sigma}(T_{i,t})\right)\leq \frac{\gamma\mathit{p}}{2}\right)\leq d\exp\left(\frac{-|T_{i,t}|\gamma\mathit{p}}{8}\right).
    \end{equation*}
The size of the set $T_{i,t}$ is bounded by
    \begin{equation*}
        |T_{i,t}| \geq  \frac{1}{2}q\log t  \geq \frac{8}{\gamma \mathit{p}}\log\left(\frac{t^2d}{\alpha}\right),
    \end{equation*}
provided that $q \geq \frac{48}{\gamma \mathit{t}}$ and $t \geq \frac{d}{\alpha}$.
    Hence, with probability at least $1-\frac{\alpha}{t^2}$, 
    \begin{equation}\label{eq3}
        \lambda_{\min}\left(\hat{\Sigma}(T_{i,t})\right) \geq \frac{\gamma \mathit{p}}{2}.
    \end{equation}
When $ q \geq \frac{192}{\gamma \mathit{p}}d^{1/2}$ and $t >\frac{2d+1}{\alpha}$, \ $|T_{i,t}| \geq 32\lambda_{\min}^{-1}\left(\hat{\Sigma}(T_{i,t})\right)d^{1/2}\log(t^2(2d+1)/\alpha).$ Then, Theorem \ref{main-th:huber} can be directly applicable with $\tau = \tau_0(|T_{i,t}|/\log(t^2(2d+1)/\alpha))^{1/(1+\delta)}, \ \tau_0 \geq\nu_{\delta}.$ Hence,
\begin{equation*}
    \prob\left(||\hat{\beta}(T_{i,t})-\beta_i ||_2 \leq \left(\frac{\log(t^2(2d+1)/\alpha)}{|T_{i,t}|}\right)^{\delta/(1+\delta)}\cdot 4\lambda_{\min}^{-1}\left(\hat{\Sigma}(T_{i,t})\right) \tau_0d^{1/2}\right) \geq 1-\frac{\alpha}{t^2}.
\end{equation*}
Together with (\ref{eq3}), when $ q \geq 6\left(\frac{32\tau_0d^{1/2}}{h\gamma \mathit{p}}\right)^{(1+\delta)/\delta}$ and $t \geq \frac{2d+1}{\alpha}$, with probability at least $1-\frac{2\alpha}{t^2},$
\begin{align*}
    ||\hat{\beta}(T_{i,t})-\beta_i||_2 &\leq \frac{h}{4}.
\end{align*}

\end{proof}

\begin{proof}[Proof of Lemma \ref{main-allsetlemma}] We follow the lines of Lemma EC.14 of \cite{bastani2020online}.
    We have
    \begin{equation*}
        \indicator_{(r\in \mathcal{A}_{i,t})} = \indicator_{(A_{r-1})}\cdot \indicator_{(x_r\in U_i)}\cdot \indicator_{(r \notin \cup_{i\in[k]} T_{i,t})}.
    \end{equation*}
    For $n = 0, \ 1, \ 2,\ ..., $
    \begin{equation*}
        r \in \left[(2^n-1)Kq+1, 2^nKq \right]
    \end{equation*}
    are forced-sampling time steps and 
    \begin{equation*}
        r \in \left[2^nKq+1, (2^{n+1}-1)Kq\right]
    \end{equation*}
    are not.
    Let $N_t$ be the largest integer such that $t> 2^{N_t+1}Kq$ as before. Define the intervals
    \begin{equation*}
        V_{1,t} = \left[2^{N_t}Kq+1, (2^{N_t+1}-1)Kq \right], \ V_{2,t} = \left[2^{N_t+1}Kq+1, t \wedge (2^{N_t+2}-1)Kq\right],
    \end{equation*}
    and the sum of random variables 
    \begin{align*}
        M_{i,t}  :&=\sum_{r\in V_{1,t}} \indicator_{(r \in \mathcal{A}_{i,t})} + \sum_{r\in V_{2,t}} \indicator_{(r \in \mathcal{A}_{i,t})}\\
         & < \sum_{r=1}^t \indicator_{(r \in \mathcal{A}_{i,t})}\\
        &=  |\mathcal{A}_{i,t}|.
    \end{align*}
    Both intervals $V_{1,t}$ and $V_{2,t}$ are not containing the forced-sampling time steps and hence we do not update the forced-sample estimator within the intervals. Therefore, we can write
    \begin{align*}
        M_{i,t} &= \sum_{r \in V_{1,t}} \indicator_{(A_{2^{N_t}}Kq)}\cdot \indicator_{(x_r \in U_i)}+\sum_{r \in V_{2,t}} \indicator_{(A_{2^{N_t+1}}Kq)}\cdot \indicator_{(x_r \in U_i)}\\
        & \geq \indicator_{(A_{2^{N_t}}Kq)}\cdot  \indicator_{(A_{2^{N_t+1}}Kq)}\cdot \sum_{r\in V_{1,t} \cup V_{2,t}} \indicator_{(x_r \in U_i)}.
    \end{align*}
The lower bound of cardinality of two disjoint intervals is 
    \begin{align*}
        |V_{1,t}\cup V_{2,t}| & = \left(t\wedge 2^{N_t+2}-1\right)Kq-2^{N_t+1}Kq +\left(2^{N+t+1}Kq -Kq-2^{N_t}Kq\right)\\
        & = \left(t-2^{N_t}Kq-Kq\right)\wedge\left(3\cdot 2^{N_t}Kq -2Kq\right)\\
        &> \left(\frac{t}{2}-Kq\right) \wedge \left(\frac{3}{4}t-2Kq\right)\\
        &>\left(\frac{t}{2}-\frac{t}{80}\right) \wedge \left(\frac{3}{4}t-\frac{t}{40}\right) \\
        & = \frac{39}{80}t.
    \end{align*}
    The first inequality follows from $t \leq 2^{N_t+2}Kq$. The last inequality follows from $t> (Kq)^2$ and $ q>80.$
    The upper bound of the cardinality of two disjoint intervals is 
     \begin{align*}
        |V_{1,t}\cup V_{2,t}| &< t-2^{N_t}Kq-Kq\\
        & < t-\frac{t}{4} -Kq\\
        &<\frac{3}{4}t.
        \end{align*}
The probability of two events is bounded by
\begin{align*}
    \prob\left(A_{2^{N_t}Kq} \text{ and } A_{2^{N_t+1}Kq}\right)& \geq 1-\frac{2K\alpha}{(t/4)^2}-\frac{2K\alpha}{(t/2)^2}\\
    & = 1- \frac{32K\alpha}{t^2}\\
    & > 1-0.01.
\end{align*}
The last inequality is from $t^2>(Kq)^4$ and $\alpha \in(0,1)$.
Hence, we have
\begin{align*}
    \expec[M_{i,t}] & \geq \prob\left(A_{2^{N_t} Kq} \text{ and } A_{2^{N_t+1}Kq}\right) \mathit{p}|V_{1,t}\cup V_{2,t}|\\
    & \geq 0.48t\mathit{p} .
\end{align*}
The Hoeffding's inequality implies,
\begin{align*}
    \prob \left(\expec[M_{i,t}]-M_{i,t} \geq \eta^2 \right) &\leq \exp\left(-\frac{2\eta}{|V_{1,t} \cup V_{2,t}|}\right)\\
    &\leq \exp \left(-\frac{8\eta^2}{3t}\right).
\end{align*}
Let $\eta = 0.23 t \mathit{p}.$ Then
\begin{align*}
    \prob(M_{i,t}<0.48t\mathit{p}-0.23t\mathit{p}) &\leq \exp \left(-\frac{8}{3}t(0.23\mathit{p})^2\right)\\
    & \leq \exp(-t\mathit{p}^2/9).
\end{align*}
Since $M_{i,t} \leq |\mathcal{A}_{i,t}|$,
\begin{equation*}
    \prob\left(|\mathcal{A}_{i,t}| < \frac{t\mathit{p}}{4}\right) \leq \exp(-t\mathit{p}^2/9) \leq \frac{\alpha}{t^2},
\end{equation*}
provided that $t \geq \frac{1}{\alpha}$ and $q \geq \frac{54}{\mathit{p}}.$
\end{proof}

We now provide the proof of the expected regret bound.

\begin{proof}[Proof of Theorem \ref{main-expectedregret}]
Lemma EC.19 of \cite{bastani2020online} states that the upper bound of expected regret can be decomposed into
\begin{align*}
    \sum_{t=1}^T\mathbb{E}[r_t] &= \sum_{t}^T\mathbb{E}[x^T\beta_{a^*(t)}-x^T\beta_{a(t)}]\\
    &\leq 2\sum_{i\in \mathcal{D}} \mathbb{P}(||\hat{\beta}(S_{a^*(t),t-1})-\beta_{a^*(t)}||_2 > \Delta)+ 2\sum_{i\in \mathcal{D}} \mathbb{P}(||\hat{\beta}(S_{a(t),t-1})-\beta_{a^(t)}||_2 > \Delta)+4\Delta^2KC_0
\end{align*}
for $\Delta >0.$
From Lemma 7 with $\alpha = (2d+1)t,$ we have
\begin{equation*}
    \mathbb{P}\left(||\hat{\beta}(S_{i,t})-\beta_i||_2 \geq \left(\frac{4}{\mathit{p}t}\log t\right)^{\delta/(1+\delta)} \frac{32\tau_0d^{1/2}}{\gamma \mathit{p}}\right) \leq \frac{3(2d+2)}{t}
\end{equation*}
for $ i \in K_{opt}.$ Let $\Delta = \left(\frac{4}{\mathit{p}t}\log t\right)^{\delta/(1+\delta)} \frac{32\tau_0d^{1/2}}{\gamma \mathit{p}}$ then, 
\begin{equation*}
    \mathbb{E}[r_t]\leq \frac{12K(2d+1)}{t}+4\left(\frac{32\tau_0}{\gamma p}\right)^2 d \left(\frac{4}{pt} \log T \right)^{\frac{2\delta}{1+\delta}}KC_0.
\end{equation*}
The cumulative regret is bounded by
      \begin{align*}
         &\sum_{t=1}^T\mathbb{E}[r_t] \leq 12K(2d+1)(\log T+1)+4^7 d\left(\frac{\tau_0}{\gamma}\right)^2\frac{1}{p^3}KC_0((\log T)^2+\log T)\\
         &\text{when } \delta =1 \text{ and}\\
         &\sum_{t=1}^T\mathbb{E}[r_t] \leq 12K(2d+1)(\log T+1)+64^2 16^{\frac{\delta}{1+\delta}}d\left(\frac{\tau_0}{\gamma}\right)^2\frac{1}{p^{\frac{2+4\delta}{1+\delta}}}KC_0\left(\frac{1+\delta}{1-\delta}\right)T^{\frac{1-\delta}{1+\delta}}(\log T)^{\frac{2\delta}{1+\delta}}\\
         & \text{when } 0<\delta<1.
     \end{align*}

\end{proof}

\bibliography{reference}

\end{document}