\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
% version; also before submission to
% see how the non-anonymous paper
% would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{marvosym}

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\makeatletter
\newcommand*{\addFileDependency}[1]{% argument=file name and extension
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{%
    \externaldocument{#1}%
    \addFileDependency{#1.tex}%
    \addFileDependency{#1.aux}%
}
\myexternaldocument{gou_341}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

% Load macros and some necessary packages
\input{macros}

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Stochastic Graphical Bandits with Heavy-Tailed Rewards\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<gouyt@lamda.nju.edu.cn>?Subject=Your UAI 2023 paper}{Yutian Gou}}
\author[2]{\href{mailto:<yijinfeng@jd.com>?Subject=Your UAI 2023 paper}{Jinfeng Yi}}
\author[1\thanks{Corresponding author.}]{\href{mailto:<zhanglj@lamda.nju.edu.cn>?Subject=Your UAI 2023 paper}{Lijun Zhang}}

% Add affiliations after the authors
\affil[1]{%
    National Key Laboratory for Novel Software Technology\\
    Nanjing University\\
    Nanjing 210023, China
}
\affil[2]{%
    JD AI Research\\ 
    Beijing 100176\\
    China
}
  
\begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

\section{OMITTED PROOFS FOR SECTION \ref{main:1}}\label{proof:main:1}
In this section, we present the proofs for RUN-TEM, RUNE-TEM and RUNE-MoM omitted in the first section of our main results.
\subsection{Technical Lemmas}
\begin{lem}[Bernstein's inequality for bounded distributions \citep{book/Vershynin2018}]\label{bernstein}
    Let $X_1,\cdots, X_n$ be $n$ independent zero-mean random variables. Suppose $|X_i|\le K$ and $\E[X_i^2]\le u$ for all $i\in[n]$. Then for any $t > 0$, we have
    \begin{equation}\label{bernstein:1}
        \Pr\left[ \frac{1}{n}\sum_{i=1}^n X_i \ge t\right] \le \exp\left(\frac{nt^2}{2(u+Kt/3)}\right) ~,
    \end{equation}
    also, 
    \begin{equation}\label{bernstein:2}
        \Pr\left[ \frac{1}{n}\sum_{i=1}^n X_i \le -t\right] \le \exp\left(\frac{nt^2}{2(u+Kt/3)}\right) ~,
    \end{equation}
    and therefore, 
    \begin{equation}\label{bernstein:3}
        \Pr\left[ \frac{1}{n}\sum_{i=1}^n X_i \ge \sqrt{\frac{2ut}{n}}+\frac{2Kt}{3n}\right] \le e^{-t} ~.
    \end{equation}
\end{lem}

\subsection{Analysis for RUN-TEM}
\begin{proof}[of Proposition \ref{prop:1}]
    Here, we follow the same computing process as the proof of Lemma 1 in \citep{journals/tit/Bubeck2013}. First of all, we will bound $\E[X_t^2\Ibb_{|X_t|\le B_t}]$ by the following computation
    \begin{equation}\label{proof-prop:1-1}
    \begin{aligned}
        \E[X_t^2\Ibb_{|X_t|\le B_t}]
        &= \int_{x\sim X_t} x^2\Ibb_{|x|\le B_t} \mathrm{d}x \\
        &= \int_{x\sim X_t} |x|^{1+\epsilon}\Ibb_{|x|\le B_t}|x|^{1-\epsilon} \d x \\
        &\le B_t^{1-\epsilon}\int_{|x|\le B_t} |x|^{1+\epsilon} \d x \\
        &\le B_t^{1-\epsilon} \E[|X_t|^{1+\epsilon}] \le vB_t^{1-\epsilon} ~.
    \end{aligned}
    \end{equation}
    Also, we can bound $\E[X_t\Ibb_{|X_t|> B_t}]$ by a similar calculation
    \begin{equation}\label{proof-prop:1-2}
    \begin{aligned}
        \E[X_t\Ibb_{|X_t| > B_t}]
        &= \int_{x\sim X_t} x\Ibb_{|x|> B_t} \mathrm{d}x \\
        &= \int_{x\sim X_t} \frac{|x|^{1+\epsilon}}{|x|^{\epsilon}}\Ibb_{|x|> B_t} \mathrm{d}x \\
        &< \frac{1}{B_t^{\epsilon}}\int_{|x|> B_t} |x|^{1+\epsilon} \mathrm{d}x \\
        &\le \frac{1}{B_t^{\epsilon}} \E[|X_t|^{1+\epsilon}] \le \frac{v}{B_t^{\epsilon}} ~.
    \end{aligned}
    \end{equation}
    Then, by combining \eqref{proof-prop:1-1},\eqref{proof-prop:1-2} with  Bernstein's inequality\eqref{bernstein:3}, we have that, with probability at least $1-\delta$,
% \clearpage
    \begin{equation}
    \begin{aligned}
        \E[X] - \frac{1}{n}\sum_{t=1}^n X_t\Ibb_{|X_t|\le B_t} 
        &= \frac{1}{n}\sum_{t=1}^n (\E[X]-\E[X\Ibb_{|X|\le B_t}]) + \frac{1}{n}\sum_{t=1}^n (\E[X\Ibb_{|X|\le B_t}]-X_t\Ibb_{|X_t|\le B_t}) \\
        &= \frac{1}{n}\sum_{t=1}^n \E[X\Ibb_{|X|> B_t}] + \frac{1}{n}\sum_{t=1}^n (\E[X\Ibb_{|X|\le B_t}]-X_t\Ibb_{|X_t|\le B_t}) \\
        &\le \frac{1}{n}\sum_{t=1}^n \frac{v}{B_t^{\epsilon}} + \sqrt{\frac{2B_n^{1-\epsilon}v\log(1/\delta)}{n}} + \frac{2B_n\log(1/\delta)}{3n} ~.
    \end{aligned}
    \end{equation}
    Finally, substitute $B_t=(\frac{vt}{\log(1/\delta)})^{\frac{1}{1+\epsilon}}(\epsilon\in(0,1])$ into the above inequality, we get that, with probability at least $1-\delta$,
    \begin{equation}
    \begin{aligned}
        \E[X] - \frac{1}{n}\sum_{t=1}^n X_t\Ibb_{|X_t|\le B_t} 
        &\le \left(\frac{1}{n}\sum_{t=1}^n \left(\frac{n}{t}\right)^{\frac{\epsilon}{1+\epsilon}} + \sqrt{2} +\frac{2}{3}\right)v^{\frac{1}{1+\epsilon}}\left( \frac{\log(1/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}} \\
        &\le  \left(\frac{1}{\sqrt{n}}\sum_{t=1}^n \frac{1}{\sqrt{t}} + \sqrt{2} +\frac{2}{3}\right)v^{\frac{1}{1+\epsilon}}\left( \frac{\log(1/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}} \\
         &\le  \left(\frac{1}{\sqrt{n}}(2\sqrt{n}-1) + \sqrt{2} +\frac{2}{3}\right)v^{\frac{1}{1+\epsilon}}\left( \frac{\log(1/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}} \\
         &<  5v^{\frac{1}{1+\epsilon}}\left( \frac{\log(1/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}} ~,
    \end{aligned}
    \end{equation}
    where the last inequality use the following summation formula
    \begin{equation}
    \begin{aligned}
        \sum_{t=1}^n \frac{1}{\sqrt{t}} \le 1+\int_{1}^n \frac{1}{\sqrt{x}} \d x = 2\sqrt{n}-1~.
    \end{aligned}
    \end{equation}
    Now, have finished proving the first part of Proposition \ref{prop:1}, and the second part follows the same scheme, so we omit it.
\end{proof}
\begin{proof}[of Theorem \ref{thm:RUN-TEM}]
	Here, we use a proof sketch similar to  \citep{conf/uai/Caron2012}. First of all, we consider a clique covering $\C$ of $G = (V,E)$ and define the intra-clique regret $R_C(T)$ for any $C\in\C$ by
	\begin{equation}
		R_C(T) = \sum_{i\in C}\sum_{t\le T} \Delta_i \Ibb_{\{ I_t=i\}} ~.
	\end{equation}

	By Definition \ref{clique-covering}, we have that $V=\cup_{C\in\C} C$ and furthermore $R_{T}\le\sum_{C\in\C} R_C(T)$. From now on, we will focus on upper bounding the intra-clique regret for a given clique $C\in\C$ and abbreviate $R_C(T)$ to $R_C$.
	
	Let $T_C(t)=\sum_{i\in C} T_i(t)$ denote the number of pulls for any arm in clique $C$ up to time $t$. Then, for any positive integer $\ell_{C}$,
	\begin{small}
	\begin{equation}\label{pthm:1-1}
		\begin{aligned}
		\hspace{-2mm}
		\E[R_C]
		&= \E\left[\sum_{i\in C}\sum_{t\le T} \Delta_i \Ibb_{\{I_t=i\}} \right] \\
		&= \sum_{i\in C}\sum_{t\le T} \Delta_i \Pr[ I_t=i] \\
		&= \sum_{i\in C}\sum_{t\le T} \Delta_i \Pr[ I_t=i, T_{C}(t-1) < \ell_{C}] + \sum_{i\in C}\sum_{t\le T} \Delta_i \Pr[ I_t=i, T_{C}(t-1) \ge \ell_{C}] \\
		&\le \ell_{C}\Delta_{C}^{\max} + \underbrace{\sum_{\substack{i\in C\\ t\le T}} \Delta_i \Pr\left[I_t=i,
		    T_{C}(t-1) \ge \ell_{C}
			\right]}_{(a)} .
		\end{aligned}
	\end{equation}
	\end{small}
	where $\Delta_{C}^{\max} := \max_{i\in C} \Delta_i$ is the maximum reward gap in clique $C$. 

	Now, we analyze term $(a)$ using the similar tricks applied by  \citep{journals/ml/Auer2002a}. Let $c_{i}(t)=5v^{\frac{1}{1+\epsilon}}( \frac{4\log(t)}{O_i(t)})^{\frac{\epsilon}{1+\epsilon}}$ and $i^{\star}$ be the unique optimal arm, then (a) can be computed by
	\begin{small}
	\begin{equation}
		\begin{aligned}
			\sum_{\substack{i\in C \\t < T}} \Delta_i \Pr\left[ \muh_i(t)+c_i(t) \ge \muh_{i^{\star}}(t)+c_{i^{\star}}(t),
			T_{C}(t) \ge \ell_{C}
			\right] 
			&\le \sum_{\substack{i\in C \\t < T}} \Delta_i \Pr\left[ \max_{\ell_{C}\le s_i\le t}\muh_i(s_i)+c_i(s_i) \ge \min_{0\le s\le t}\muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] \\
			&\le \sum_{\substack{i\in C \\t < T}}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Delta_i \Pr\left[\muh_i(s_i)+c_{i}(s_i) \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] .
		\end{aligned}
	\end{equation}
	\end{small}
	
	Next, observe that $\muh_i(s_i)+c_{i}(s_i) \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)$ implies that at least one of the following three independent events must hold:  
	\begin{equation}\label{cond:1}
		\muh_i(s_i) \ge \mu_i + c_{i}(s_i) ~,
	\end{equation}
	\begin{equation}\label{cond:2}
		 \muh_{i^{\star}}(s) \le \mu^{\star} - c_{i^{\star}}(s) ~,
	\end{equation}
	\begin{equation}\label{cond:3}
		\mu^{\star} < \mu_i + 2c_{i,s_i} ~.
	\end{equation}
	Otherwise, assume that all three inequalities are false, we have that
	\begin{equation}
	\begin{aligned}
		\muh_i(s_i)+c_{i}(s_i) 
		< \mu_i + 2c_{i}(s_i) 
		\le \mu^{\star} <  \muh_{i^{\star}}(s)+c_{i^{\star}}(s) ~,
	\end{aligned}
	\end{equation}
	which implies that $I_t\neq i$.
	
	If we choose $\ell_{C}=\lceil \frac{40(10v)^{1/\epsilon}\log(T)}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\rceil$, where $\Delta_{C}^{\min}=\min_{i\in C} \Delta_i$ is the minimum nonzero reward gap in clique $C$, then for $\forall i\neq i^{\star}\in C$ and $s_i\ge \ell_{C}$, 
	\begin{equation}
	\begin{aligned}
% 	\hspace{-10mm}
	    \mu^{\star} - \mu_i - 2c_{i}(s_i) 
	    = \Delta_{i} - 10v^{\frac{1}{1+\epsilon}}\left( \frac{4\log(T)}{s_i}\right)^{\frac{\epsilon}{1+\epsilon}} 
		\ge 0 ~,
	\end{aligned}
	\end{equation}
	which implies that \eqref{cond:3} is always false. Thus, $\muh_i(s_i)+c_{i}(s_i) \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)$ implies that \eqref{cond:1} or \eqref{cond:2} must hold.
	
	Next, we can bound the probability of event \eqref{cond:1} and \eqref{cond:2} by using Proposition \ref{prop:1}, 
	\begin{equation}
		\begin{aligned}
			\Pr[\eqref{cond:1}] &= \Pr\left[ \muh_{i}(s_i) \ge \mu_i + c_{i}(s_i)\right] &\le \frac{1}{t^4},\\
			\Pr[\eqref{cond:2}] &= \Pr\left[ \muh_{i^{\star}}(s) \le \mu^{\star} - c_{i^{\star}}(s)\right] &\le \frac{1}{t^4} ~.
		\end{aligned}
	\end{equation}

	Therefore, by using the union bound, we obtain that
	\begin{equation}\label{ineq:(a)}
		\begin{aligned}
			(a) 
			&\le \sum_{\substack{i\in C \\t < T}}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Delta_i \left(\Pr[\eqref{cond:1}] + \Pr[\eqref{cond:2}]\right) \\
			&\le \sum_{\substack{i\in C \\t < T}} \Delta_i \left(\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \frac{2}{t^4}\right) \\
			&\le \sum_{\substack{i\in C \\t < T}} \frac{2\Delta_i}{t^2} 
			\le \frac{\pi^2}{3} \sum_{i\in C} \Delta_i ~, 
		\end{aligned}
	\end{equation}
	substituting into \eqref{pthm:1-1} demonstrates that 
	\begin{equation}
		\begin{aligned}
			\E[R_C(T)] 
			&\le \ell_{C}\Delta_{C}^{\max} + \frac{\pi^2}{3} \sum_{i\in C} \Delta_i \\
			&\le \frac{40(10v)^{1/\epsilon}\Delta_{C}^{\max}}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\log T + \left( 1 + \frac{\pi^2}{3}\right) \sum_{i\in C} \Delta_i~, 
		\end{aligned}
	\end{equation}
	Finally, summing over all cliques in $\C$ and taking the infimum over all possible coverings $\C$ yields the aforementioned upper bound.of RUN-TEM. 
\end{proof}

\subsection{Analysis for RUNE-TEM}
\begin{proof}\label{pthm:2}[of Theorem \ref{thm:RUNE-TEM}]
	Firstly, we try to bound the expectation of $R_C(T)$ by using the techniques similar to \cite{conf/uai/Hu2019}, where we use the same symbols as the proof of Theorem \ref{thm:RUN-TEM}.
	
	For the sake of the simplicity, we assume $i^{\star}$ is the unique optimal arm with without loss of generality. Let $\Delta_{C}^{\max} = \max_{i\in C} \Delta_i, \Delta_{C}^{\min} = \min_{i\in C} \Delta_i, N_{C}:=\max_{i\in C} |N(i)|^{\frac{1}{4}}$. Then, for any positive integer $\ell_{C}$, we have 
	\begin{equation}\label{2:R_C(T)}
		\begin{aligned}
			\E[R_C(T)] 
			&= \sum_{\substack{i\in C\backslash\{ i^{\star}\} \\ t\le T}} \Delta_i \E[\Ibb\{I_t=i\}] \\
			&= \sum_{t\le T} \Pr\left[ \exists i\in C\backslash\{ i^{\star}\} \ \text{s.t.}\ I_t=i, T_{C}(t-1) < \ell_{C}\right] \cdot \Delta_{C}^{\max} \\
			&\ \ +\sum_{t\le T} \Pr\left[ \exists i\in C\backslash\{ i^{\star}\} \ \text{s.t.}\ I_t=i, T_{C}(t-1) \ge \ell_{C}\right] \cdot \Delta_{C}^{\max} \\
			&\le \ell_{C}\Delta_{C}^{\max} + \underbrace{\sum_{t\le T} \Pr\left[ \exists i\in C\backslash\{ i^{\star}\} \ \text{s.t.}\ I_t=i, T_{C}(t-1) \ge \ell_{C}\right]}_{(\alpha)} \cdot \Delta_{C}^{\max}~.
		\end{aligned}
	\end{equation}
	
	Now, we analyze term $(\alpha)$ by using the similar tricks of \cite{journals/ml/Auer2002a}. Let $c_{i}(t)=5v^{\frac{1}{1+\epsilon}}( \frac{4\log(|N(i)|^{1/4}t)}{O_i(t)})^{\frac{\epsilon}{1+\epsilon}}$ and $\tilde{c}_{i}(t)=5v^{\frac{1}{1+\epsilon}}( \frac{4\log(N_{C}t)}{O_i(t)})^{\frac{\epsilon}{1+\epsilon}}$, then
	\begin{equation}\label{2:alpha}
		\begin{aligned}
			(\alpha)
			&=\sum_{t\le T} \Pr[ \exists i\in C\backslash\{ i^{\star}\} \ \text{s.t.}\ I_t=i, T_{C}(t-1) \ge \ell_{C}]\\
			&= \sum_{\substack{ \\t < T}} \Pr\left[{\begin{array}{c} \max_{i\in C\backslash\{ i^{\star}\}} \left\{\muh_i(t)+c_{i}(t)\right\} \ge \muh_{i^{\star}}(t)+c_{i^{\star}}((t),
					\\	T_{C}(t) \ge \ell_{C}
			\end{array}}\right] \\
			&\le \sum_{t < T} \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}} \left\{\max_{\ell_{C}\le s_i\le t}\muh_{i}(s_i)+c_{i}(s_i)\right\} \ge \min_{0\le s\le t}\muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] \\
			&\le \sum_{t < T}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}} \left\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\right\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] ~.
		\end{aligned}
	\end{equation}
	
	Now, let $\mu_{C}^{\max}=\max_{i\in C} \mu_i$, we observe that $\max_{i\in C\backslash\{ 1\}}\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)$ implies that at least one of the following must hold: 
	\begin{equation}\label{cond:2-1}
		\max_{i\in C\backslash\{ 1\}}\{\muh_{i}(s_i)\} \ge \mu_{C}^{\max} + \tilde{c}_{i}(s_i) ~,
	\end{equation}
	\begin{equation}\label{cond:2-2}
		\muh_{i^{\star}}(s) \le \mu^{\star} - c_{i^{\star}}(s) ~,
	\end{equation}
	\begin{equation}\label{cond:2-3}
		\mu^{\star} < \mu_{C}^{\max} + 2\tilde{c}_{i}(s_i) ~.
	\end{equation}
	Otherwise, assume that all three inequalities are false, we have
	\begin{equation}\label{cond:2-4}
		\max_{i\in C\backslash\{ 1\}}\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\} < \mu_{C}^{\max} + 2\tilde{c}_{i}(s_i) \le \mu^{\star} <  \muh_{i^{\star}}(s)+c_{i^{\star}}(s) ~,
	\end{equation}
	which implies that $I_t\neq i$.
	
	If we choose $\ell_{C}=\lceil \frac{40(10v)^{1/\epsilon}\log(N_{C}T)}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\rceil$, when $\ell_{C} \le s_i \le t$, then
	\begin{equation}
		\mu^{\star} - \mu_{C}^{\max} - 2\tilde{c}_{i}(s_i) = \mu^{\star} - \mu_{C}^{\max} - 10v^{\frac{1}{1+\epsilon}}\left( \frac{4\log(N_{C}t)}{s_i}\right)^{\frac{\epsilon}{1+\epsilon}} \ge 0 ~,
	\end{equation}
	which implies that \eqref{cond:2-3} is always false. Thus, event \eqref{cond:2-1} or event \eqref{cond:2-2} must hold.
	
	Next, we bound the probability of event \eqref{cond:2-1} , 
	\begin{equation}\label{cond:2-1:bound}
		\begin{aligned}
			\Pr[\eqref{cond:2-1}]
			&= \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}}\{\muh_{i}(s_i)\} \ge \mu_{C}^{\max} + \tilde{c}_{i}(s_i)\right] \\
			&\le \sum_{i\in C\backslash\{ i^{\star}\}} \Pr\left[ \muh_{i}(s_i) \ge \mu_{C}^{\max} + \tilde{c}_{i}(s_i)\right] \\
			&\le \sum_{i\in C\backslash\{ i^{\star}\}} \Pr\left[ \muh_{i}(s_i) \ge \mu_i + \tilde{c}_{i}(s_i)\right] \\
			&\le |C|\cdot e^{-4\log(N_{C}t)} = \frac{|C|}{N_{C}^4t^4} \le \frac{1}{t^4}~, 
		\end{aligned}
	\end{equation}
	where the penultimate inequality is induced by Proposition \ref{prop:1} and subsequently a union bound on $C$. Similarly, we can bound the probability of \eqref{cond:2-2} directly,
	\begin{equation}\label{cond:2-2:bound}
		\begin{aligned}
			\Pr[\eqref{cond:2-2}]
			&= \Pr\left[ \muh_{i^{\star}}(s) \le \mu^{\star} - c_{i^{\star}}(s)\right] \\
			&\le e^{-4\log(|N(i^{\star})|^{1/4}t)} = \frac{1}{|N(i^{\star})|t^4} \le \frac{1}{t^4}~, 
		\end{aligned}
	\end{equation}
	By further computation, we get that
	\begin{equation}\label{ineq:(b)}
		\begin{aligned}
			(\alpha) 
			&\le \sum_{t < T}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}} \left\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\right\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] \\
			&\le \sum_{t < T} \sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \left(\Pr[\eqref{cond:2-1}] + \Pr[\eqref{cond:2-2}]\right) \\
			&\le \sum_{t < T} \left(\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \frac{2}{t^4}\right) 
			\le \sum_{t < T} \frac{2}{t^2} 
			\le \frac{\pi^2}{3} ~, 
		\end{aligned}
	\end{equation}
	and furthermore, by \eqref{2:R_C(T)}, we have
	\begin{equation}
		\begin{aligned}
			\E[R_C(T)] 
			&\le \ell_{C}\Delta_{C}^{\max} + (\alpha) \cdot  \Delta_{C}^{\max} \\
			&\le \ell_{C}\Delta_{C}^{\max} + \frac{\pi^2}{3} \Delta_{C}^{\max} \\
			&\le 40\frac{(10v)^{1/\epsilon}\Delta_{C}^{\max}}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\log(N_{C}T) + \left( 1 + \frac{\pi^2}{3}\right) \Delta_{C}^{\max}~, 
		\end{aligned}
	\end{equation}
	Finally, summing over all cliques in $\C$ and taking the infimum over all possible coverings $\C$ yields the aforementioned upper bound of RUNE-TEM. 
\end{proof}

\subsection{Analysis for RUNE-MoM}
In this section, we present the proofs for RUNE-MoM and RUNE-TEM omitted in the first section of our main results.
Before starting the proof, we need to state a lemma which has been proved by \cite{journals/tit/Bubeck2013}.
\begin{lem}
    Let $X_1,\cdots,X_n$ be a real i.i.d.~sequence with finite mean $\mu$. We assume that for some $\epsilon\in(0, 1]$ and $v\ge 0$, $\E[|X-\mu |^{1+\epsilon}]\le v$. Let $\muh$ be the empirical mean:
    
    \begin{equation*}
        \muh = \frac{1}{n}\sum_{t=1}^n X_t ~,
    \end{equation*}
    Then, for any $\eta > 0$, we have
    \begin{equation}\label{empirical:1}
        \Pr[\mu-\muh > \eta] \le \frac{3v}{n^{\epsilon}\eta^{1+\epsilon}} ~,
    \end{equation}
    and also,
    \begin{equation}\label{empirical:2}
        \Pr[\muh-\mu > \eta] \le \frac{3v}{n^{\epsilon}\eta^{1+\epsilon}} ~.
    \end{equation}
\end{lem}

\begin{proof}[of Proposition \ref{prop:2}]
    We follow the same proof scheme as \cite{journals/tit/Bubeck2013}. Let $\eta=(12v)^{\frac{1}{1+\epsilon}}(\frac{1}{N})^{\frac{\epsilon}{1+\epsilon}}>0$ and $Y_l = \Ibb_{\muh_l<\mu-\eta}$ for $l\in[k]$. According to equation \eqref{empirical:1}, $Y_l$ has a Bernoulli distribution with parameter
    \begin{equation}
        p \le \frac{3v}{N^{\epsilon}\eta^{1+\epsilon}} \le \frac{1}{4} ~,
    \end{equation}
    which implies that $\E[Y_l]=p\le \frac{1}{4}$. Thus using Hoeffding’s inequality \citep{journals/Hoeffding1963} for the tail of a binomial distribution, we get that
    \begin{equation}
    \begin{aligned}
        \Pr[\muh_{M} < \mu - \eta] 
        &\le \Pr\left[\sum_{l=1}^k Y_l\ge \frac{k}{2}\right] \\
        &\le \Pr\left[\sum_{l=1}^k Y_l-\E\left[\sum_{l=1}^kY_l\right]\ge \frac{k}{4}\right] \\
        &\le \exp\left( -\frac{2(k/4)^2}{k}\right) = \exp\left( -k/8\right)
    \end{aligned}
    \end{equation}
    where the first inequality holds because $\muh_{M} < \mu - \eta$ implies that at least half of $\muh_l$ for $l\in[k]$ satisfying $\mu>\muh_l+\eta$. 
    
    Substitute $k=\lceil 8\log(1/\delta)\rceil$ and $N=\lceil n/k \rceil$ into the formula  of $\eta$, we have that, 
    \begin{equation}\label{proof-prop:2-1}
		\eta = (12v)^{\frac{1}{1+\epsilon}}\left(\frac{1}{N}\right)^{\frac{\epsilon}{1+\epsilon}} 
		\le (12v)^{\frac{1}{1+\epsilon}}\left(\frac{k}{n}\right)^{\frac{\epsilon}{1+\epsilon}} 
		\le (12v)^{\frac{1}{1+\epsilon}}\left( \frac{8\log(e^{1/8}/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}}~,
	\end{equation}
    
    Therefore, with probability at least $1-\delta$,
	\begin{equation}\label{proof-prop:2-2}
		\muh_{M} \ge \mu - (12v)^{\frac{1}{1+\epsilon}}\left( \frac{8\log(e^{1/8}/\delta)}{n}\right)^{\frac{\epsilon}{1+\epsilon}} ~,
	\end{equation}
	
	Now, we have finished proving the first part of Proposition \ref{prop:2}, and the second part follows the same scheme, so we omit it.
\end{proof}

\begin{proof}[of Theorem \ref{thm:RUNE-MoM}]
    This proof is similar to the analysis of Theorem \ref{thm:RUNE-TEM}, so we only present some critical steps here. 
    
    First of all, we can get the same inequality for $\E[R_C(T)]$ as \eqref{2:R_C(T)}.
    \begin{equation}\label{RUNE-MoM:R_C(T)}
		\begin{aligned}
			\E[R_C(T)]
			&\le \ell_{C}\Delta_{C}^{\max} + \underbrace{\sum_{t\le T} \Pr\left[ \exists i\in C\backslash\{ i^{\star}\} \ \text{s.t.}\ I_t=i, T_{C}(t-1) \ge \ell_{C}\right]}_{(\alpha)} \cdot \Delta_{C}^{\max}~.
		\end{aligned}
	\end{equation}
	
	Then, we analyze term $(\alpha)$ by the same computation as \eqref{2:alpha}
	\begin{equation}\label{RUNE-MoM:alpha}
		\begin{aligned}
			(\alpha)
			&\le \sum_{t < T}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}} \left\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\right\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] ~.
		\end{aligned}
	\end{equation}
	where we redefine $c_{i}(t)=(12v)^{\frac{1}{1+\epsilon}}( \frac{32\log(|N(i)|^{1/4}t)}{O_i(t)})^{\frac{\epsilon}{1+\epsilon}}$ and $\tilde{c}_{i}(t)=(12v)^{\frac{1}{1+\epsilon}}( \frac{32\log(N_{C}t)}{O_i(t)})^{\frac{\epsilon}{1+\epsilon}}$.
	
	Now, let $\mu_{C}^{\max}=\max_{i\in C} \mu_i$ and $\ell_{C}=\lceil \frac{64(24v)^{1/\epsilon}\log(N_{C}T)}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\rceil$, then by using a similar trick to \eqref{cond:2-1}-\eqref{cond:2-3}, when $\ell_{C} \le s_i \le t$, $\max_{i\in C\backslash\{ 1\}}\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)$ implies that at least one of the following must hold: 
	\begin{equation}\label{RUNE-MoM:cond:1}
		\max_{i\in C\backslash\{ 1\}}\{\muh_{i}(s_i)\} \ge \mu_{C}^{\max} + \tilde{c}_{i}(s_i) ~,
	\end{equation}
	\begin{equation}\label{RUNE-MoM:cond:2}
		\muh_{i^{\star}}(s) \le \mu^{\star} - c_{i^{\star}}(s) ~,
	\end{equation}
	where the probabilities can be both bounded by $\frac{e^{1/8}}{t^4}$ by using Proposition \ref{prop:2} and similar analysis to \eqref{cond:2-1:bound} and \eqref{cond:2-2:bound}.
	
	By further computation, we get that
	\begin{equation}\label{ineq:(alpha)}
		\begin{aligned}
			(\alpha) 
			&\le \sum_{t < T}\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \Pr\left[ \max_{i\in C\backslash\{ i^{\star}\}} \left\{\muh_{i}(s_i)+\tilde{c}_{i}(s_i)\right\} \ge \muh_{i^{\star}}(s)+c_{i^{\star}}(s)\right] \\
			&\le \sum_{t < T} \left(\sum_{s_i=\ell_{C}}^{t}\sum_{s=0}^{t} \frac{2e^{1/8}}{t^4}\right) 
			\le \sum_{t < T} \frac{2e^{1/8}}{t^2} 
			\le \frac{e^{1/8}\pi^2}{3} ~, 
		\end{aligned}
	\end{equation}
	and furthermore, by \eqref{RUNE-MoM:R_C(T)}, we have
	\begin{equation}
		\begin{aligned}
			\E[R_C(T)] 
			&\le \ell_{C}\Delta_{C}^{\max} + (\alpha) \cdot  \Delta_{C}^{\max} \\
			&\le \ell_{C}\Delta_{C}^{\max} + \frac{\pi^2}{3} \Delta_{C}^{\max} \\
			&\le 64\frac{(24v)^{1/\epsilon}\Delta_{C}^{\max}}{(\Delta_{C}^{\min})^{(1+\epsilon)/\epsilon}}\log(N_{C} T) + \left( 1 + \frac{e^{1/8}\pi^2}{3}\right) \Delta_{C}^{\max}~, 
		\end{aligned}
	\end{equation}
	Finally, summing over all cliques in $\C$ and taking the infimum over all possible coverings $\C$ yields the aforementioned upper bound of RUNE-MoM. 
\end{proof}

\section{OMITTED PROOFS FOR SECTION \ref{main:2}}
In this section, we present the proofs for RAAE-TEM and RAAE-MoM omitted in the second section of our main results.

\subsection{Analysis for RAAE-TEM}
In order to prove Theorem \ref{thm:RAAE-TEM} we need one additional lemma, which is described as following. It shows that, after the active arm elimination phase of epoch $r$, the remained arms are at most $4\varepsilon_{r}$-suboptimal with high probability.

\begin{lem}\label{lem:1}
	For all epoch $r$, with probability at least $1-1/T$ we have $\mu_{i} \le \mu^{\star}+4\varepsilon_{r}$ for all $i\in V_{r+1}$.
\end{lem}

\begin{proof}[of Lemma \ref{lem:1}]
	In each epoch $r$, the player observed the reward of each arm in $V_r$ for at least $n_r$ times. Note that by Proposition \ref{prop:1} the rewards of the arms are distributed independently from the feedback graphs. Given an epoch $r$ we denote by $\E_{r}$ the event that $|\muh_i(t-1) - \mu_{i}| \le \varepsilon_{r}$ holds for all $i\in V_r$ and denote $\E = \cup_{r\ge 1}\E_{r}$. Therefore by Proposition \ref{prop:1}
	\begin{equation}
		\Pr[\E_{r}] \ge 1-\frac{1}{KT} ~,
	\end{equation}
	by taking the union bound on each $V_{r}$, we have that
	\begin{equation}
		\Pr[\E] \ge 1-\frac{1}{T} ~.
	\end{equation}
	
	In the remainder of the proof, we assume that $\E$ holds. So for any epoch $r$ and any arm $i\in V_{r}$, we have $|\muh(i, O_i(t-1)) - \mu_{i}| \le \varepsilon_{r}$. As a result, for any epoch $r$ and any two arms $i$ and $i^{\prime}$, there must be
	\begin{equation}
		|(\muh_i(t-1)-\muh_{i^{\prime}}(t-1)) - (\mu_{i}-\mu_{i^{\prime}})| \le 2\varepsilon_{r} ~.
	\end{equation}
	
	Next, we argue that under $\E$ the optimal arm $i^{\star}$ is never eliminated. Indeed, for any epoch $r$, we denote the arm $i_{r} = \arg\max_{i\in V_{r}} \muh_i(t-1)$ and $\muh_r^{\star}=\muh_{i_{r}}(t-1)$, it is obvious to see that 	
	\begin{equation}
		\muh_r^{\star}-\muh_{i^{\star}}(t-1) \le |(\muh_{i_{r}}(t-1)-\muh_{i^{\star}}(t-1)) - (\mu_{i_{r}}-\mu_{i^{\star}})| \le 2c_{r} ~,
	\end{equation}	
	so the algorithm doesn’t eliminate $i^{\star}$.
	
	Next, we show that under $\E$, in any epoch $r$ the algorithm eliminate all viable arms $i$ with sub-optimal gap $\Delta_{i} > 4\varepsilon_{r}$. Thus, since arm $i^{\star}$ remains viable, we have that
	\begin{equation}
		\muh_{i^{\star}}(t-1)-\muh(i, O_{i}(t-1)) \ge \muh_r^{\star}-\muh_i(t-1) \ge \Delta_{i}-2\varepsilon_{r} > 2\varepsilon_{r} ~,
	\end{equation}
	which means arm $i$ is eliminated by the algorithm. Thus, for all the rest arms $i$ in $V_{r+1}$, we have that
	\begin{equation}
		\mu_{i} - \mu^{\star} \le 4\varepsilon_{r} ~.
	\end{equation}
	Until now, we have finished the proof of Lemma \ref{lem:1}. 
\end{proof}

Besides, we list the following two lemmas that will be used in the proof of Theorem \ref{thm:RAAE-TEM}. The first lemma shows that, the expected regret of the algorithm until the first arm in $V^{(\tau)}$ has been eliminated , is bounded by $O(\log(KT))$. Furthermore, the expected regret of the algorithm after that epoch is also bounded by $O(\log(KT))$. The second lemma shows that the expected regret of Algorithm \ref{alg:2} after we have eliminated all arms not in $V^{(\tau)}$, is at most $O(\log(KT))$.

\begin{lem}\label{lem:2}
	Let $\bar{\Delta}=\max_{i\in V^{(\tau)}} \Delta_{i}$ be the largest gap of arms in $V^{(\tau)}$ and $\tilde{r}=\lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. Then, the expected regret of Algorithm \ref{alg:2} up to epoch $\tilde{r}$ is at most 
	\begin{equation}\label{lem:2-1}
		\frac{160\alpha (20v)^{1/\epsilon}}{\bar{\Delta}^{1/\epsilon}}\log(2KT).
	\end{equation}
	Also, the expected regret of Algorithm \ref{alg:2} after epoch $\tilde{r}$ is at most 
	\begin{equation}\label{lem:2-2}
		\sum_{i\in V^{(\tau)}}\frac{160(20v)^{1/\epsilon}}{\Delta_{i}^{1/\epsilon}}\log(2KT).
	\end{equation}
\end{lem}

\begin{proof}[of Lemma \ref{lem:2}]
	By Lemma \ref{lem:1}, the instantaneous regret for each round on epoch $r$ is at most $4\varepsilon_{r-1}=8\varepsilon_{r}$. Note that each epoch spends at most $\alpha$ rounds. Then, the expected regret up to round $\tilde{r}$ is at most
	\begin{equation}\label{plem:2-1}
		\begin{aligned}
			\sum_{r=1}^{\tilde{r}} \alpha \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 80\alpha (5v)^{1/\epsilon}\log(2KT)\sum_{r=1}^{\tilde{r}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 80\alpha (5v)^{1/\epsilon}\log(2KT)\sum_{r=1}^{\tilde{r}} 2^{r+1} \\
			&\le 80\alpha (5v)^{1/\epsilon}\log(2KT) 2^{\tilde{r}+2} \le \frac{160\alpha (20v)^{1/\epsilon}\log(2KT)}{\bar{\Delta}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality comes from the fact $\tilde{r} = \lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. 
	
	Let $\tilde{r}_i = \lfloor \log_2(4/\bar{\Delta}_i) /\epsilon - 1\rfloor$, the number of epochs until arm $i$ is removed from $V_{r}$, which means that it takes $\tilde{r}_{i}+1$ epochs in order for $4\varepsilon_{\tilde{r}_{i}+1}=2^{2-\epsilon(\tilde{r}_{i}+2)} < \bar{\Delta}_i$. We shall assume that the algorithm finished after $\tilde{r}_{u}$ epochs, where $u$ is the arm in $V$ with the minimum nonzero gap. Note that each epoch spends at most $|V_{r}|$ rounds after we have eliminated all arms not in $V^{(\tau)}$. Thus, the expected regret for the remaining epochs is at most
	\begin{equation}
		\begin{aligned}
			\sum_{r=\tilde{r}+1}^{\tilde{r}_{u}} |V_{r}| \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 80(5v)^{1/\epsilon}\log(2KT)\sum_{r=1}^{\tilde{r}} \frac{|V_{r}|}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 80(5v)^{1/\epsilon}\log(2KT)\sum_{i\in V^{(\tau)}} \sum_{r=\tilde{r}+1}^{\tilde{r}_{i}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&\le 80(5v)^{1/\epsilon}\log(2KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} 2^{r+1} \\
			&\le \sum_{i\in V^{(\tau)}}\frac{160(20v)^{1/\epsilon}\log(2KT)}{\Delta_{i}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality is induced by the same computation as \eqref{plem:2-1}. 
\end{proof}

\begin{lem}\label{lem:3}
	The expected regret of Algorithm \ref{alg:2} after we have eliminated all arms not in $V^{(\tau)}$, is at most 
	\begin{equation}\label{lem:3-1}
		\sum_{i\in V^{(\tau)}}\frac{160(20v)^{1/\epsilon}}{\Delta_{i}^{1/\epsilon}}\log(2KT).
	\end{equation}
\end{lem}
% Note that the size of each chosen independent set $I_{r}$ is bounded by $\alpha$. 

\begin{proof}[of Lemma \ref{lem:3}]
	Let $\tilde{r}_i = \lfloor \log_2(4/\bar{\Delta}_i) /\epsilon - 1\rfloor$, the number of epochs until arm $i$ is removed from $V_{r}$, which means that it takes $\tilde{r}_{i}+1$ epochs in order for $4\varepsilon_{\tilde{r}_{i}+1}=2^{2-\epsilon(\tilde{r}_{i}+2)} < \bar{\Delta}_i$. We're going to talk about two categories. 
	
	When $\tau \le K-1$, the expected regret of Algorithm \ref{alg:2} after we have eliminated all arms not in $V^{(\tau)}$ equals that of Algorithm \ref{alg:2} after epoch $\tilde{r}$, then the conclusion is directly induced by the second part of Lemma \ref{lem:2}. 
	
	When $\tau > K-1$, then $V^{(\tau)}=V\backslash\{ i^{\star}\}$, the expected regret equals that of the algorithm terminating with $V_{r}=\{ i^{\star}\}$. We shall assume that the algorithm finished after $\tilde{r}_{u}$ epochs, where $u$ is the arm in $V^{(\tau)}$ with the minimum gap. Note that each epoch spends at most $|V_{r}|$ rounds after we have eliminated all arms not in $V^{(\tau)}$. Thus, the expected regret for the remaining epochs is at most
	\begin{equation}
		\begin{aligned}
			\sum_{r=1}^{\tilde{r}_{u}} |V_{r}| \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 80(5v)^{1/\epsilon}\log(2KT)\sum_{r=1}^{\tilde{r}} \frac{|V_{r}|}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 80(5v)^{1/\epsilon}\log(2KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 80(5v)^{1/\epsilon}\log(2KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} 2^{r+1} \\
			&\le \sum_{i\in V^{(\tau)}}\frac{160(20v)^{1/\epsilon}\log(2KT)}{\Delta_{i}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality is induced by the same computation as \eqref{plem:2-1}. 
\end{proof}

\begin{proof}[of Theorem \ref{thm:RAAE-TEM}]
	Here, we use a proof sketch similar to \citep{conf/icml/Cohen2016}. Firstly, we need to bound $R$, which is defined as the total number of epochs that the algorithm runs for. As the fact that the number $T_r$ of rounds elapsed during epoch $r$, is lower bounded by $n_r=\lceil (5(5v)^{1/\epsilon}\log(2KT))\cdot (2^{(1+\epsilon)})^{r+1} \rceil$, there must be
	\begin{equation}
		T \ge \sum_{r=1}^R T_r \ge (5(5v)^{1/\epsilon}\log(2KT))\sum_{r=1}^R (2^{(1+\epsilon)})^{r+1} ~.
	\end{equation}
	Then, by computing the RHS of the above inequality, we can obtain an upper bound
	\begin{equation}\label{rb}
		R \le \bar{r}=\frac{1}{1+\epsilon}\log_2 \left( \frac{(2^{1+\epsilon}-1)T}{20(5v)^{1/\epsilon}4^{\epsilon}\log(2KT)} + 1 \right)~.
	\end{equation}

	Next, we attempt to bound the expected regret of the algorithm. By Lemma \ref{lem:1} and the union bound, the total failure probability of the robust mean estimators is at most $\bar{r}/T$. Then the expected regret of the algorithm is at most the expected regret conditioned on the success of the estimation of the means plus $(\bar{r}/T)\cdot(T\Delta_{\max}) = O(\Delta_{\max}\log T)$ by Eq.~\eqref{rb}. Thus it remains to bound the conditioned expected regret.
	
	In the remainder of the proof, we assume that the robust mean estimators always succeed. Therefore, the regret of the algorithm is at most the regret of an algorithm terminating with $V_{r}=\{ i^{\star}\}$. In addition, we assume that $T$ is large enough for this to happen. 
	
	Let $\bar{\Delta}=\max_{i\in V^{(\tau)}} \Delta_{i}$ be the largest gap of arms in $V^{(\tau)}$ and $\tilde{r} = \lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. Thus, it takes $\tilde{r}+1$ epochs in order for $4\varepsilon_{\tilde{r}}=2^{2-\epsilon(\tilde{r}+1)} < \bar{\Delta}$, which means that the algorithm eliminates all arms in $V\backslash V^{(\tau)}$ until epoch $\tilde{r}$. Now we will split in the following two cases: 
	
	
         If $\tau> K-1$, then $V^{(\tau)}=V\backslash \{ i^{\star}\}$. Thus the regret of the algorithm is upper bounded by the the regret until the algorithm finished running with $V_{r}=\{ i^{\star}\}$, which can be directly obtained by \eqref{lem:3-1} of Lemma \ref{lem:3}. 
	
	    If $\tau\le K-1$, then splitting by epoch $\tilde{r}$, we can bound the expected regret before and after that, using \eqref{lem:2-1} and \eqref{lem:2-2} in Lemma \ref{lem:2} respectively. Since $\bar{\Delta}\ge \Delta_{i}$ for all $i\in V^{(\tau)}$, we have that
		\begin{equation}
			\frac{\alpha}{\bar{\Delta}^{1/\epsilon}} \le \frac{\alpha}{|V^{(\tau)}|} \sum_{i\in V^{(\tau)}} \frac{1}{\Delta_{i}^{1/\epsilon}} = \sum_{i\in V^{(\tau)}} \frac{1}{\Delta_{i}^{1/\epsilon}}~, 
		\end{equation}
		by definition of $V^{(\tau)}$. Thus we notice that the expected regret of the algorithm up to epoch $\tilde{r}$ is at most the regret from round $\tilde{r}+1$ thereafter. Then we can also bound the expected regret of the algorithm by using \eqref{lem:2-2} of Lemma \ref{lem:2}.
    

	Finally, we obtain a regret $O(\sum_{i\in V^{(\tau)}} (\frac{v}{\Delta_{i}})^{1/\epsilon} \log T + \Delta_{\max}\log T)$ by the assumption $T\ge K$.
\end{proof}

\subsection{Analysis for RAAE-MoM}
Following the same proof sketch of Theorem \ref{thm:RAAE-TEM}, we can obtain the property described in Lemma \ref{lem:1}. It shows that, after the active arm elimination phase of epoch $r$, the remained arms are at most $4\varepsilon_{r}$-suboptimal with high probability.

Besides, we list the following two lemmas that will be used in the proof of Theorem \ref{thm:RAAE-MoM}. The first lemma shows that, the expected regret of the algorithm until the first arm in $V^{(\tau)}$ has been eliminated, is bounded by $O(\log(KT))$. Furthermore, the expected regret of the algorithm after that epoch is also bounded by $O(\log(KT))$. The second lemma shows that the expected regret of RUNE-MoM after we have eliminated all arms not in $V^{(\tau)}$, is at most $O(\log(KT))$.

\begin{lem}\label{RAAE-MoM:lem:2}
	Let $\bar{\Delta}=\max_{i\in V^{(\tau)}} \Delta_{i}$ be the largest gap of arms in $V^{(\tau)}$ and $\tilde{r}=\lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. Then, the expected regret of RAAE-MoM up to epoch $\tilde{r}$ is at most 
	\begin{equation}\label{RAAE-MoM:lem:2-1}
		\frac{256\alpha (48v)^{1/\epsilon}}{\bar{\Delta}^{1/\epsilon}}\log(2KT).
	\end{equation}
	Also, the expected regret of RAAE-MoM after epoch $\tilde{r}$ is at most 
	\begin{equation}\label{RAAE-MoM:lem:2-2}
		\sum_{i\in V^{(\tau)}}\frac{256(48v)^{1/\epsilon}}{\Delta_{i}^{1/\epsilon}}\log(2KT).
	\end{equation}
\end{lem}

\begin{proof}[of Lemma \ref{RAAE-MoM:lem:2}]
	By Lemma \ref{lem:1}, the instantaneous regret for each round on epoch $r$ is at most $4\varepsilon_{r-1}=8\varepsilon_{r}$. Note that each epoch spends at most $\alpha$ rounds. Then, the expected regret up to round $\tilde{r}$ is at most
	\begin{equation}\label{RAAE-MoM:plem:2-1}
		\begin{aligned}
			\sum_{r=1}^{\tilde{r}} \alpha \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 128\alpha (12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{r=1}^{\tilde{r}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 128\alpha (12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{r=1}^{\tilde{r}} 2^{r+1} \\
			&\le 128\alpha (12v)^{1/\epsilon}\log(2e^{1/8}KT) 2^{\tilde{r}+2} \le \frac{256\alpha (48v)^{1/\epsilon}\log(2e^{1/8}KT)}{\bar{\Delta}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality comes from the fact $\tilde{r} = \lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. 
	
	Let $\tilde{r}_i = \lfloor \log_2(4/\bar{\Delta}_i) /\epsilon - 1\rfloor$, the number of epochs until arm $i$ is removed from $V_{r}$, which means that it takes $\tilde{r}_{i}+1$ epochs in order for $4\varepsilon_{\tilde{r}_{i}+1}=2^{2-\epsilon(\tilde{r}_{i}+2)} < \bar{\Delta}_i$. We shall assume that the algorithm finished after $\tilde{r}_{u}$ epochs, where $u$ is the arm in $V$ with the minimum nonzero gap. Note that each epoch spends at most $|V_{r}|$ rounds after we have eliminated all arms not in $V^{(\tau)}$. Thus, the expected regret for the remaining epochs is at most
	\begin{equation}
		\begin{aligned}
			\sum_{r=\tilde{r}+1}^{\tilde{r}_{u}} |V_{r}| \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{r=1}^{\tilde{r}} \frac{|V_{r}|}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{i\in V^{(\tau)}} \sum_{r=\tilde{r}+1}^{\tilde{r}_{i}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&\le 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} 2^{r+1} \\
			&\le \sum_{i\in V^{(\tau)}}\frac{256(48v)^{1/\epsilon}\log(2e^{1/8}KT)}{\Delta_{i}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality is induced by the same computation as \eqref{RAAE-MoM:plem:2-1}. 
\end{proof}

\begin{lem}\label{RAAE-MoM:lem:3}
	The expected regret of RAAE-MoM after we have eliminated all arms not in $V^{(\tau)}$, is at most 
	\begin{equation}\label{RAAE-MoM:lem:3-1}
		\sum_{i\in V^{(\tau)}}\frac{256(48v)^{1/\epsilon}}{\Delta_{i}^{1/\epsilon}}\log(2e^{1/8}KT).
	\end{equation}
\end{lem} 

\begin{proof}[of Lemma \ref{RAAE-MoM:lem:3}]
	Let $\tilde{r}_i = \lfloor \log_2(4/\bar{\Delta}_i) /\epsilon - 1\rfloor$, the number of epochs until arm $i$ is removed from $V_{r}$, which means that it takes $\tilde{r}_{i}+1$ epochs in order for $4\varepsilon_{\tilde{r}_{i}+1}=2^{2-\epsilon(\tilde{r}_{i}+2)} < \bar{\Delta}_i$. We're going to talk about two categories. 
	
	When $\tau \le K-1$, the expected regret of RAAE-MoM after we have eliminated all arms not in $V^{(\tau)}$ equals that of RAAE-MoM after epoch $\tilde{r}$, then the conclusion is directly induced by the second part of Lemma \ref{RAAE-MoM:lem:2}. 
	
	When $\tau > K-1$, then $V^{(\tau)}=V\backslash\{ i^{\star}\}$, the expected regret equals that of the algorithm terminating with $V_{r}=\{ i^{\star}\}$. We shall assume that the algorithm finished after $\tilde{r}_{u}$ epochs, where $u$ is the arm in $V^{(\tau)}$ with the minimum gap. Note that each epoch spends at most $|V_{r}|$ rounds after we have eliminated all arms not in $V^{(\tau)}$. Thus, the expected regret for the remaining epochs is at most
	\begin{equation}
		\begin{aligned}
			\sum_{r=1}^{\tilde{r}_{u}} |V_{r}| \cdot n_{r}\cdot 8\varepsilon_{r} 
			&\le 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{r=1}^{\tilde{r}} \frac{|V_{r}|}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} \frac{1}{\varepsilon_{r}^{1/\epsilon}} \\
			&= 128(12v)^{1/\epsilon}\log(2e^{1/8}KT)\sum_{i\in V^{(\tau)}} \sum_{r=1}^{\tilde{r}_{i}} 2^{r+1} \\
			&\le \sum_{i\in V^{(\tau)}}\frac{256(48v)^{1/\epsilon}\log(2e^{1/8}KT)}{\Delta_{i}^{1/\epsilon}} ~,
		\end{aligned}
	\end{equation}
	where the last inequality is induced by the same computation as \eqref{RAAE-MoM:plem:2-1}. 
\end{proof}

\begin{proof}[of Theorem \ref{thm:RAAE-MoM}]
	Here, we use a proof sketch similar to \citep{conf/icml/Cohen2016}. Firstly, we need to bound $R$, which is defined as the total number of epochs that the algorithm runs for. As the fact that the number $T_r$ of rounds elapsed during epoch $r$, is lower bounded by $n_r=\lceil \frac{(8(12v)^{1/\epsilon}\log(2e^{1/8}KT))}{\varepsilon_{r}^{(1+\epsilon)/\epsilon}} \rceil$, there must be
	\begin{equation}
		T \ge \sum_{r=1}^R T_r \ge (8(12v)^{1/\epsilon}\log(2e^{1/8}KT))\sum_{r=1}^R (2^{(1+\epsilon)})^{r+1} ~.
	\end{equation}
	Then, by computing the RHS of the above inequality, we can obtain an upper bound
	\begin{equation}\label{RAAE-MoM:rb}
		R \le \bar{r}=\frac{1}{1+\epsilon}\log_2 \left( \frac{(2^{1+\epsilon}-1)T}{48(12v)^{1/\epsilon}4^{\epsilon}\log(2e^{1/8}KT)} + 1 \right)~.
	\end{equation}

	Next, we attempt to bound the expected regret of the algorithm. By Lemma \ref{lem:1} and the union bound, the total failure probability of the robust mean estimators is at most $\bar{r}/T$. Then the expected regret of the algorithm is at most the expected regret conditioned on the success of the estimation of the means plus $(\bar{r}/T)\cdot(T\Delta_{\max}) = O(\Delta_{\max}\log T)$ by Eq.~\eqref{RAAE-MoM:rb}. Thus it remains to bound the conditioned expected regret.
	
	In the remainder of the proof, we assume that the robust mean estimators always succeed. Therefore, the regret of the algorithm is at most the regret of an algorithm terminating with $V_{r}=\{ i^{\star}\}$. In addition, we assume that $T$ is large enough for this to happen. 
	
	Let $\bar{\Delta}=\max_{i\in V^{(\tau)}} \Delta_{i}$ be the largest gap of arms in $V^{(\tau)}$ and $\tilde{r} = \lfloor \log_2(4/\bar{\Delta}) /\epsilon - 1\rfloor$. Thus, it takes $\tilde{r}+1$ epochs in order for $4\varepsilon_{\tilde{r}}=2^{2-\epsilon(\tilde{r}+1)} < \bar{\Delta}$, which means that the algorithm eliminates all arms in $V\backslash V^{(\tau)}$ until epoch $\tilde{r}$. Now we will split in the following two cases: 
	
		If $\tau> K-1$, then $V^{(\tau)}=V\backslash \{ i^{\star}\}$. Thus the regret of the algorithm is upper bounded by the the regret until the algorithm finished running with $V_{r}=\{ i^{\star}\}$, which can be directly obtained by \eqref{RAAE-MoM:lem:3-1} of Lemma \ref{RAAE-MoM:lem:3}. 
	
		If $\tau\le K-1$, then splitting by epoch $\tilde{r}$, we can bound the expected regret before and after that, using \eqref{RAAE-MoM:lem:2-1} and \eqref{RAAE-MoM:lem:2-2} in Lemma \ref{lem:2} respectively. Since $\bar{\Delta}\ge \Delta_{i}$ for all $i\in V^{(\tau)}$, we have that
		\begin{equation}
			\frac{\alpha}{\bar{\Delta}^{1/\epsilon}} \le \frac{\alpha}{|V^{(\tau)}|} \sum_{i\in V^{(\tau)}} \frac{1}{\Delta_{i}^{1/\epsilon}} = \sum_{i\in V^{(\tau)}} \frac{1}{\Delta_{i}^{1/\epsilon}}~, 
		\end{equation}
		by definition of $V^{(\tau)}$. Thus we notice that the expected regret of the algorithm up to epoch $\tilde{r}$ is at most the regret from round $\tilde{r}+1$ thereafter. Then we can also bound the expected regret of the algorithm by using \eqref{lem:2-2} of Lemma \ref{lem:2}.


	Finally, we obtain a regret $O(\sum_{i\in V^{(\tau)}} (\frac{v}{\Delta_{i}})^{1/\epsilon} \log T + \Delta_{\max}\log T)$ by the assumption $T\ge K$.
\end{proof}

% \bibliography{ref}

\end{document}
