% \documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{xr} 
\externaldocument{wang_8-supp}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example



%%%%%%%%%%%%%%%%%% Author's Own Pre


\usepackage{amsfonts,amssymb,amsthm}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{dashrule}

\usepackage{xspace}
\usepackage{bm, bbm}
\usepackage{subfig}
\usepackage{multirow}
\usepackage{hyperref}
\hypersetup{
	colorlinks   = true, %Colours links instead of ugly boxes
	urlcolor     = blue, %Colour for external hyperlinks
	linkcolor    = blue, %Colour of internal links
	citecolor   = blue %Colour of citations
}

\usepackage{algorithm}
\usepackage{algorithmic}

% \usepackage{eqparbox}
% \renewcommand{\algorithmiccomment}[1]{\hfill\eqparbox{COMMENT}{\# #1}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
% \theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\makeatletter
\newcommand\addstarred[1]{%
    \expandafter\let\csname\string#1@nostar\endcsname#1%
    \edef#1{\noexpand\@ifstar\expandafter\noexpand\csname\string#1@star\endcsname\expandafter\noexpand\csname\string#1@nostar\endcsname}%
    \expandafter\newcommand\csname\string#1@star\endcsname%
}
\makeatother

\newcommand{\ceil}[1]{\left\lceil#1\right\rceil}
\newcommand{\floor}[1]{\left\lfloor#1\right\rfloor}
\newcommand{\round}[1]{\lceil#1\rfloor}
% \addstarred\round[1]{\left\lceil#1\right}
\newcommand{\1}[1]{\mathbbm{1}{\{#1\}}}
\addstarred\1[1]{\mathbbm{1}{\left\{#1\right\}}}
\renewcommand{\O}{{O}}
\renewcommand{\ge}{\geqslant}
\renewcommand{\le}{\leqslant}

\DeclarePairedDelimiter\abs{\lvert}{\rvert}%
\DeclarePairedDelimiter\norm{\lVert}{\rVert}%

\newcommand{\N}{\mathbb{N}}
\newcommand{\R}{\mathbb{R}}
\renewcommand{\P}{\mathbb{P}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\brai}{{(i)}}
\newcommand{\braj}{{(j)}}

\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\KL}{{\normalfont KL}}
\DeclareMathOperator*{\kl}{{\normalfont kl}}
\DeclareMathOperator*{\ERT}{\mathbb{E}[\normalfont \text{R}_T (\mathcal{A})]}
% \DeclareMathOperator{\fr}{\texttt{fr}}
% \DeclareMathOperator*{\Kfr}{\mathcal{K}^\fr}
\DeclareMathOperator*{\RT}{\text{\normalfont R}_T}
\DeclareMathOperator*{\RTI}{\text{\normalfont R}_T^{(i)}}

\newcommand{\mo}[1]{{\textcolor{blue}{MH says: #1}}}
\newcommand{\dt}[1]{{\textcolor{brown}{DT: #1}}}

\newcommand{\ACMAB}{\texttt{AC-MA2B}\xspace}
\newcommand{\FreeExp}{\texttt{FreeExp}\xspace}
\newcommand{\NoFreeExp}{\texttt{NoFreeExp}\xspace}
\newcommand{\MATOBHR}{\texttt{MA2B-HR}\xspace}
\newcommand{\MATOB}{\texttt{MA2B}\xspace}
\newcommand{\mK}{\mathcal{K}}
\newcommand{\mM}{\mathcal{M}}

\ifodd 2
\newcommand{\rev}[1]{{\color{blue}#1}}%revise of the text
\newcommand{\com}[1]{\textbf{\color{blue} (COMMENT: #1)}}%comment of the text
\else
\newcommand{\rev}[1]{#1}
\newcommand{\com}[1]{}
\fi

\ifodd 1
\newcommand{\icmlrev}[1]{{\color{blue}#1}}%revise of the text
\newcommand{\icmlcom}[1]{\textbf{\color{blue} (COMMENT: #1)}}%comment of the text
\else
\newcommand{\icmlrev}[1]{#1}
\newcommand{\icmlcom}[1]{}
\fi

\usepackage[switch]{lineno}
\linenumbers

\newcommand{\reviewquote}[1]{\textbf{Question:} \emph{#1}}
\newcommand{\answer}[1]{\textbf{Answer:} \textcolor{blue}{#1}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\title{Exploration for Free: How Does Reward Heterogeneity \\ Improve Regret in Cooperative Multi-agent Bandits?}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is automatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Xuchuang Wang}
\author[2]{Lin Yang}
\author[3]{Yu-Zhen Janice Chen}
\author[1]{Xutong Liu}
\author[3]{Mohammad Hajiesmaili}
\author[3]{Don Towsley}
\author[1]{John C.S. Lui}
% Add affiliations after the authors
\affil[1]{%
Department of Computer Science and Engineering\\
The Chinese University of Hong Kong\\
Hong Kong
}
\affil[2]{%
School of Intelligence Science and Technology\\
Nanjing University\\
Jiangsu, China
}
\affil[3]{%
College of Information and Computer Sciences\\
University of Massachusetts Amherst\\
Massachusetts, USA
  }
  
\begin{document}
\maketitle

\begin{abstract}
    This paper studies a cooperative multi-agent bandit scenario in which the rewards observed by agents are heterogeneous---one agent's meat can be another agent's poison. Specifically, the total reward observed by each agent is the sum of two values: an arm-specific reward, capturing the intrinsic value of the arm, and a privately-known agent-specific reward, which captures the personal preference/limitations of the agent. This heterogeneity in total reward leads to different local optimal arms for agents but creates an opportunity for \textit{free exploration} in a cooperative setting---an agent can freely explore its local optimal arm with no regret and share this free observation with some other agents who would suffer regrets if they pull this arm since the arm is not optimal for them.
    We first characterize a regret lower bound that captures free exploration, i.e., arms that can be freely explored have no contribution to the regret lower bound. Then, we present a cooperative bandit algorithm that takes advantage of free exploration and achieves a near-optimal regret upper bound which tightly matches the regret lower bound up to a constant factor.
    Lastly, we run numerical simulations to compare our algorithm with various baselines without free exploration.
\end{abstract}

\input{sections/_intro.tex}
\input{sections/_model.tex}
\input{sections/_algo.tex}
\input{sections/_analysis.tex}
\input{sections/_simu.tex}
\input{sections/_conclu.tex}


\begin{acknowledgements} % will be removed in pdf for initial submission,
    % (without ‘accepted’ option in \documentclass)
    % so you can already fill it to test with the
    % ‘accepted’ class option
    The work of Mohammad Hajiesmaili is supported by NSF CAREER-2045641, CPS-2136199, CNS-2106299, and CNS-2102963.
    The work of Don Towsley is supported by U.S. Army Research Laboratory under Cooperative Agreement W911NF-17-2-0196.
    % The work of John C.S. Lui is supported in part by the RGC's GRF 14215722.
    The work of John C.S. Lui is supported in part by the RGC GRF 14215722.
    Lin Yang is the corresponding author (\texttt{linyang@nju.edu.cn}).
\end{acknowledgements}

% References
\bibliography{bibliography}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% APPENDIX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \newpage
% \leftlinenumbers
% \appendix
% \onecolumn

% \input{sections/_related_works}
% \input{sections/_other_possible_models}
% \input{sections/_application_scenarios.tex}
% % \input{sections/_simulation_details}
% \input{sections/_proofs.tex}

% \input{sections/_reduce_communication.tex}
% \input{sections/_rebuttal_uai.tex}

\end{document}
