\documentclass[accepted]{uai2022} % for initial submission
% \documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{graphicx}
%\usepackage{subfigure}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{booktabs} % for professional tables
\usepackage{etex}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb} % triangleq
\usepackage{color, colortbl}
\usepackage{xcolor}
\usepackage{hyperref}
\usepackage{cleveref}



\usepackage{bm}
\usepackage{bbm}
\usepackage{graphicx}
\usepackage{thmtools}
\usepackage{thm-restate}
\usepackage{mathtools}
%\usepackage{caption}
\usepackage[skip=0pt]{caption}
\usepackage[skip=0pt]{subcaption}
%\usepackage{accents}
\usepackage{booktabs} % toprule
\usepackage{fontawesome}
% \usepackage[algo2e,ruled,vlined, linesnumbered]{algorithm2e}

\renewcommand{\textfraction}{0.05}
\renewcommand{\topfraction}{0.99}

%colors
\definecolor{red}{HTML}{E51400}  %red
\definecolor{blue}{HTML}{0050EF} %cobalt
\definecolor{green}{HTML}{008A00} %emerald
\definecolor{purple}{HTML}{AA00FF} %violet
\definecolor{dark-red}{rgb}{0.4, 0.15, 0.15}
\definecolor{dark-blue}{rgb}{0.15, 0.15, 0.4}
\definecolor{medium-red}{rgb}{0.5, 0, 0}
\definecolor{medium-blue}{rgb}{0, 0, 0.5}
\definecolor{light-red}{rgb}{0.7, 0, 0}
\definecolor{light-blue}{rgb}{0, 0, 0.7}


% \hypersetup{
%     colorlinks=true,
%     linkcolor=blue,
%     filecolor=magenta,      
%     urlcolor=blue,
% }

% theorems
\newtheorem{theorem}{\bf Theorem}
\newtheorem{lemma}{\bf Lemma}
\newtheorem{fact}{\bf Fact}
\newtheorem{condition}{\bf Condition}
\newtheorem{property}{\bf Property}
\newtheorem{corollary}{\bf Corollary}
\newtheorem{proposition}{\bf Proposition}
\newtheorem{definition}{\bf Definition}
\newtheorem{observation}{\bf Observation}
\newtheorem{assumption}{\bf Assumption}

\renewcommand{\qedsymbol}{$\blacksquare$}
\renewcommand*{\proofname}{\textnormal{\bf Proof}}
% \renewcommand{\thefootnote}{\fnsymbol{footnote}}

% color
\definecolor{red}{HTML}{E51400} %red
\definecolor{blue}{HTML}{0050EF} %cobalt
\definecolor{green}{HTML}{008A00} %emerald
\definecolor{purple}{HTML}{AA00FF} %violet
\definecolor{orange}{HTML}{FF7F00}
\definecolor{gray}{HTML}{848482}

% names
\newcommand{\mulane}{{MuLaNE}}

% math
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
% \newcommand{\dom}{\text{dom}}
% \newcommand{\ubar}[1]{\underaccent{\bar}{#1}}
% \newcommand{\bpsi}{\pmb{\psi}}
% \newcommand{\norm}[1]{\left\lVert#1\right\rVert}
% \newcommand{\prts}[1]{\left(#1\right)}
% \newcommand{\brkt}[1]{\left[#1\right]}
% \newcommand{\curl}[1]{\left\{#1\right\}}
% \newcommand{\cS}{\mathcal{S}}
% \newcommand{\abs}[1]{\left| #1 \right|}
% \newcommand{\bOne}{\mathbbm{1}}
% \DeclarePairedDelimiter{\ceil}{\lceil}{\rceil}
% \DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
% \DeclareRobustCommand{\stirling}{\genfrac\{\}{0pt}{}}


% \newcommand{\pl}{\left(}
% \newcommand{\pr}{\right)}

% \newcommand{\R}{\mathbb{R}}
% \newcommand{\Z}{\mathbb{Z}}
% \newcommand{\E}{\mathbb{E}}
% \newcommand{\Var}{{\rm Var}}
% \newcommand{\bbeta}{\boldsymbol{\beta}}
% \newcommand{\I}{\mathbb{I}}
% \newcommand{\bT}{\mathbb{T}}
% \newcommand{\bA}{\boldsymbol{A}}
% \newcommand{\ba}{\boldsymbol{a}}
% \newcommand{\bb}{\boldsymbol{b}}
% \newcommand{\bB}{\boldsymbol{B}}
% \newcommand{\bch}{\boldsymbol{\chi}}
% \newcommand{\bc}{\boldsymbol{c}}
% \newcommand{\bI}{\boldsymbol{I}}
% \newcommand{\bk}{\boldsymbol{k}}
% \newcommand{\bL}{\boldsymbol{L}}
% \newcommand{\bO}{\boldsymbol{O}}
% \newcommand{\bP}{\boldsymbol{P}}
% \newcommand{\br}{\boldsymbol{r}}
% \newcommand{\bR}{\boldsymbol{R}}
% \newcommand{\bS}{\boldsymbol{S}}
% \renewcommand{\bT}{\boldsymbol{T}}
% \newcommand{\bU}{\boldsymbol{U}}
% \newcommand{\bu}{\boldsymbol{u}}
% \newcommand{\bV}{\boldsymbol{V}}
% \newcommand{\bv}{\boldsymbol{v}}
% \newcommand{\bw}{\boldsymbol{w}}
% \newcommand{\bW}{\boldsymbol{W}}
% \newcommand{\bX}{\boldsymbol{X}}
% \newcommand{\bY}{\boldsymbol{Y}}
% \newcommand{\bx}{\boldsymbol{x}}
% \newcommand{\by}{\boldsymbol{y}}
% \newcommand{\bZ}{\boldsymbol{Z}}
% \newcommand{\bpi}{\boldsymbol{\pi}}
% \newcommand{\cA}{\mathcal{A}}
% \newcommand{\cB}{\mathcal{B}}
% \newcommand{\cC}{\mathcal{C}}
% \newcommand{\cD}{\mathcal{D}}
% \newcommand{\cE}{\mathcal{E}}
% \newcommand{\cF}{\mathcal{F}}
% \newcommand{\cG}{\mathcal{G}}
% \newcommand{\cH}{\mathcal{H}}
% \newcommand{\cI}{\mathcal{I}}
% \newcommand{\cL}{\mathcal{L}}
% \newcommand{\cR}{\mathcal{R}}
% \newcommand{\cM}{\mathcal{M}}
% \newcommand{\cN}{\mathcal{N}}
% \newcommand{\cU}{\mathcal{U}}
% \newcommand{\cV}{\mathcal{V}}
% \newcommand{\cP}{{\mathcal{P}}}
% \newcommand{\PR}{{\mathbb{P}}}
% \newcommand{\cQ}{{\mathcal{Q}}}
% \newcommand{\cW}{{\mathcal{W}}}
% \newcommand{\bbI}{\mathbb{I}}
% \newcommand{\bbZ}{\mathbb{Z}}
% \newcommand{\bbR}{\mathbb{R}}
% \newcommand{\brho}{\boldsymbol{\rho}}
% \newcommand{\btau}{\boldsymbol{\tau}}
% \newcommand{\btheta}{\boldsymbol{\theta}}
% \newcommand{\boldeta}{\boldsymbol{\eta}}
% \newcommand{\ephi}{\hat{\boldsymbol{\phi}}}
% \newcommand{\epsi}{\hat{\boldsymbol{\psi}}}
% \newcommand{\trace}{\mathrm{trace}}
% \newcommand{\defeq}{\vcentcolon=}
% \newcommand{\eqdef}{=\vcentcolon}
% \newcommand{\tar}{\mathcal{T}}
% \newcommand{\bsig}{\bm{\sigma}}

\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\I}{\mathbb{I}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\bT}{\mathbb{T}}
\newcommand{\bOne}{\mathbbm{1}}
\newcommand{\bA}{\boldsymbol{A}}
\newcommand{\ba}{\boldsymbol{a}}
\newcommand{\bb}{\boldsymbol{b}}
\newcommand{\bB}{\boldsymbol{B}}
\newcommand{\bC}{\boldsymbol{C}}
\newcommand{\bD}{\boldsymbol{D}}
\newcommand{\be}{\boldsymbol{e}}
\newcommand{\bH}{\boldsymbol{H}}
\newcommand{\bh}{\boldsymbol{h}}
\newcommand{\bI}{\boldsymbol{I}}
\newcommand{\bi}{\boldsymbol{i}}
\newcommand{\bj}{\boldsymbol{j}}
\newcommand{\bk}{\boldsymbol{k}}
\newcommand{\bl}{\boldsymbol{\ell}}
\newcommand{\bL}{\boldsymbol{L}}
\newcommand{\bM}{\boldsymbol{M}}
\newcommand{\bN}{\boldsymbol{N}}
\newcommand{\bO}{\boldsymbol{O}}
\newcommand{\br}{\boldsymbol{r}}
\newcommand{\bR}{\boldsymbol{R}}
\newcommand{\bS}{\boldsymbol{S}}
\renewcommand{\bT}{\boldsymbol{T}}
\newcommand{\bU}{\boldsymbol{U}}
\newcommand{\bu}{\boldsymbol{u}}
\newcommand{\bV}{\boldsymbol{V}}
\newcommand{\bv}{\boldsymbol{v}}
\newcommand{\bw}{\boldsymbol{w}}
\newcommand{\bW}{\boldsymbol{W}}
\newcommand{\bX}{\boldsymbol{X}}
\newcommand{\bY}{\boldsymbol{Y}}
\newcommand{\bx}{\boldsymbol{x}}
\newcommand{\by}{\boldsymbol{y}}
\newcommand{\bz}{\boldsymbol{z}}
\newcommand{\bZ}{\boldsymbol{Z}}
\newcommand{\bpi}{\boldsymbol{\pi}}
\newcommand{\cA}{\mathcal{A}}
\newcommand{\cB}{\mathcal{B}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cD}{\mathcal{D}}
\newcommand{\cE}{\mathcal{E}}
\newcommand{\cF}{\mathcal{F}}
\newcommand{\cG}{\mathcal{G}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cL}{\mathcal{L}}
\newcommand{\cJ}{\mathcal{J}}
\newcommand{\cM}{\mathcal{M}}
\newcommand{\cN}{\mathcal{N}}
\newcommand{\cQ}{\mathcal{Q}}
\newcommand{\cR}{\mathcal{R}}
\newcommand{\cS}{\mathcal{S}}
\newcommand{\cT}{\mathcal{T}}
\newcommand{\cU}{\mathcal{U}}
\newcommand{\cV}{\mathcal{V}}
\newcommand{\cP}{{\mathcal{P}}}
\newcommand{\defeq}{\vcentcolon=}


\newcommand{\ttheta}{\tilde{\theta}}
\newcommand{\tR}{\tilde{R}}
\newcommand{\tP}{\tilde{P}}
\newcommand{\tQ}{\tilde{Q}}
\newcommand{\tY}{\tilde{Y}}
\newcommand{\tZ}{\tilde{Z}}

\newcommand{\hP}{\hat{P}}
\newcommand{\hZ}{\hat{Z}}

\newcommand{\vg}{\boldsymbol{g}}
\newcommand{\vx}{\boldsymbol{x}}
\newcommand{\vmu}{\boldsymbol{\mu}}
\newcommand{\vzero}{\boldsymbol{0}}
\newcommand{\vtheta}{\boldsymbol{\theta}}
\newcommand{\btheta}{\boldsymbol{\theta}}

\newcommand{\norm}[1]{\left\lVert #1\right\rVert}
\newcommand{\prdct}[1]{\left\langle #1\right\rangle}

\newcommand{\LB}{{\it LB}}





% Macro for switching between full and short version
\newcommand{\compilefullversion}{false}%SHOW full version
\ifthenelse{\equal{\compilefullversion}{false}}{%
	\newcommand{\OnlyInFull}[1]{}
	\newcommand{\OnlyInShort}[1]{#1}
}{%
	\newcommand{\OnlyInFull}[1]{#1}%
	\newcommand{\OnlyInShort}[1]{}%
}%

% Macro for comments:
\newcommand{\compilehidecomments}{true}%HIDE comments
\ifthenelse{ \equal{\compilehidecomments}{true} }{%
	\newcommand{\wei}[1]{}
	\newcommand{\xutong}[1]{}
	\newcommand{\jinhang}[1]{}
	\newcommand{\tong}[1]{}
}{
\newcommand{\wei}[1]{{\color{blue}{\small{\bf [Wei: #1]}}}}
% \newcommand{\xutong}[1]{{\color{green} [\text{Xutong:} #1]}}
\newcommand{\xutong}[1]{{\color{green} [#1]}}
\newcommand{\yutong}[1]{{\color{orange} [\text{yutong:} #1]}}
\newcommand{\shuai}[1]{{\color{blue} [Shuai: #1]}}
\newcommand{\tong}[1]{{\color{blue}{\small{\bf [Tong: #1]}}}}
}

% % % squeeze
% \setlength{\textfloatsep}{0.3\textfloatsep}
% \setlength{\dbltextfloatsep}{0.3\dbltextfloatsep}
% \setlength{\floatsep}{0.3\floatsep}
% \setlength{\dblfloatsep}{0.3\dblfloatsep}
% \setlength{\belowdisplayskip}{0.3\baselineskip}

% \usepackage{titlesec}
% \titlespacing{\section}{0pt}{8pt plus -1pt minus 1pt}{0pt plus 1pt minus 1pt}
% \titlespacing{\subsection}{0pt}{3pt plus -1pt minus 1pt}{-2pt plus 1pt minus 2pt}
% \titlespacing{\subsubsection}{0pt}{\parskip}{-\parskip}
% \makeatletter
% \def\thm@space@setup{
% \thm@preskip=0.8\topsep
% \thm@postskip=\thm@preskip % or whatever, if you don't want them to be equal
% }
% \makeatother

% algorithms
\makeatletter
\newcommand{\algrule}{\par\vskip.2\baselineskip{\color{black!30}\hrule}\par\vskip.2\baselineskip}
\makeatother
\renewcommand{\algorithmiccomment}[1]{\hfill$\triangleright$\textit{\textcolor{medium-blue}{#1}}}
%\renewcommand{\algorithmicinput}{\textbf{Input: }}

\title{Federated Online Clustering of Bandits}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<liuxt@cse.cuhk.edu.hk>}{Xutong Liu}}
\author[2]{\href{mailto:<zhaohaoru@sjtu.edu.cn>}{Haoru Zhao}}
\author[3]{\href{mailto:<tyu@adobe.com>}{Tong Yu}}
\author[2]{\href{mailto:<shuaili8@sjtu.edu.cn>}{Shuai Li\thanks{Correspondence to: Shuai Li <shuaili8@sjtu.edu.cn>}}}
\author[1]{\href{mailto:<cslui@cse.cuhk.edu.hk>}{John C.S. Lui}}
% Add affiliations after the authors
\affil[1]{%
    The Chinese University of Hong Kong\\
    Hong Kong SAR, China
}
\affil[2]{%
    Shanghai Jiao Tong University\\
    Shanghai, China
}
\affil[3]{%
    Adobe Research\\
    San Jose, CA, USA
  }

\begin{document}
\maketitle

\begin{abstract}
% Contextual multi-armed bandit (MAB) is an important sequential decision making problem in recommendation systems. A line of works, called the clustering of bandits (CLUB), utilizes the collaborative effect over users and dramatically improves the recommendation quality. Owing to the increasing scale of application and public concerns of privacy, there is a growing demand to keep user data decentralized and push bandit learning to the local server side. Existing CLUB algorithms, however, are designed under the centralized setting where data are available at a central server. We focus on the study of the federated clustering of bandit (FCLUB) problem, whose goal is to minimize the total regret meanwhile satisfying privacy and communication considerations. \tong{To achieve this, what is the UNIQUE challenge. May mention Cluster Structure Detection.} For this problem, we design a new phase-based scheme for cluster detection and a novel asynchronous communication protocol for cooperative bandit learning. To protect users' privacy, previous differential privacy (DP) definitions are not very suitable \tong{elaborate why} and we propose a new DP notion which acts on the user cluster level. We provide rigorous proofs to show that our algorithm simultaneously achieves (clustered) DP, sublinear communication complexity and sublinear regret. Finally, experimental evaluations show our superior performance compared with benchmark algorithms.

Contextual multi-armed bandit (MAB) is an important sequential decision-making problem in recommendation systems. A line of works, called the clustering of bandits (CLUB), utilize the collaborative effect over users and dramatically improve the recommendation quality. Owing to the increasing application scale and public concerns about privacy, there is a growing demand to keep user data decentralized and push bandit learning to the local server side. Existing CLUB algorithms, however, are designed under the centralized setting where data are available at a central server. We focus on studying the federated online clustering of bandit (FCLUB) problem, which aims to minimize the total regret while satisfying privacy and communication considerations. We design a new phase-based scheme for cluster detection and a novel asynchronous communication protocol for cooperative bandit learning for this problem. To protect users' privacy, previous differential privacy (DP) definitions are not very suitable, and we propose a new DP notion that acts on the user cluster level. We provide rigorous proofs to show that our algorithm simultaneously achieves (clustered) DP, sublinear communication complexity and sublinear regret. Finally, experimental evaluations show our superior performance compared with benchmark algorithms.

\end{abstract}

%%%%%%%%%%%%%%%%%%%%% \input{content/1_introduction.tex} %%%%%%%%%%%%%%%%%%%%
\section{Introduction}
Stochastic multi-armed bandit (MAB) \citep{auer2002finite} is a well-known sequential decision-making problem, where a learner sequentially selects actions so as to maximize the cumulative rewards (or minimize the cumulative regret). 
One fruitful application area of MAB is the online recommendation systems (RecSys)~\citep{chu2011contextual, abbasi2011improved, gentile2014online,li2019improved, zhang2020conversational, li2021unifying},
where MAB algorithms provide
a principled way to handle the challenge of exploration-exploitation trade-off~\citep{lattimore2020bandit}.


To advance the bandit algorithm for large-scale applications, contextual linear bandits add the simple yet effective linear structure assumptions on actions and reward functions~\citep{chu2011contextual,li2010contextual, abbasi2011improved}.
One limitation, however, is that such a model mainly works in a content-dependent manner, ignoring the often used tool of collaborative filtering.
To address this issue, the clustering of bandits (CLUB) are proposed~\citep{gentile2014online, li2016collaborative, li2018online,li2019improved}.
The CLUB algorithms adaptively cluster similar users and utilize the collaborative information given by the cluster structure, which dramatically improves the recommendation quality.

While most existing bandit algorithms are designed under a centralized setting,
in response to the increasing application scale and public concerns about privacy, there is a growing demand to keep user data decentralized and push the learning of bandit models to the client or the local server side \tong{This claim is very strong. Please cite. kairouz2021advances is not a bandit paper. Can we cite papers to support this claim in bandit papers?}. 
This paradigm is now known as federated learning~\citep{kairouz2021advances}.
Owing to its overall applicability, there has been a surge of interest in studying federated MAB~\citep{dubey2020differentially, zhu2021federated,shi2021federated}, which promises cooperative bandit learning with larger amounts of data (across multiple local servers) while keeping the data decentralized.
This motivates us to study the CLUB problem to its federated counterpart, i.e., the federated clustering of bandits (FCLUB).

In FCLUB, each local server can conduct its own local clustering of bandit algorithms.
To enable the collaborative effects of users across different servers, the local server could also collaborate with other local servers under the coordination of a global server, whose communication needs to satisfy specific privacy and communication requirements.
The goal of this work is to design an federated online clustering of bandit framework, so as to minimize the $T$-round regret under the privacy protection requirements and communication cost considerations.


The key challenge of FCLUB is designing collaborative bandit learning procedures and cluster detection strategies to identify the overall cluster structures across different local servers, where each local server only holds part of the users with unknown interests.
Such a problem is more challenging due to the following privacy and communication cost requirements, which are two first-order requirements for any federated applications~\citep{kairouz2021advances}.

\textbf{Privacy protection:} To reduce the privacy leakage of each user, we expect local servers to only share user clusters' data instead of individuals' raw data.
In addition, we still need a mechanism to protect the uploaded (cluster) information against possible adversaries outside the local server, for which we adopt the solution concept of differential privacy (DP).
However, the off-the-shelf DP notion is defined on individual users, hence unsuitable for FCLUB.
It is challenging and unclear what is a suitable notion of privacy over the clustering of users and how to devise algorithms to guarantee the corresponding privacy requirements.


\textbf{Communication:} Communication is critical for collaborative learning, but may also be expensive or time-consuming. For FCLUB, it is desired to minimize the total regret while keeping the communication costs (in terms of communication rounds between the global server and local servers) as low as possible.
Another requirement is to design an asynchronous communication protocol incorporating the randomly arriving users and possibly lagging servers, preventing commonly used synchronous protocols~\citep{dubey2020differentially}.

\tong{Although this paragraph seems to describe the limitations of CLUB, the sentences below are still general and not uniquely related to CLUB. May improve by adding words `cluster' in suitable places of these sentences. E.g., cluster detection procedure is mentioned above once, but never mentioned again in this paragraph, which make all pieces a bit disconnected.}

\subsection{Our Contributions}
To address the aforementioned challenges, this paper makes four contributions.

\textbf{1. Problem Formulation:} We propose the setting of online clustering of bandits to its federated counterpart, which considers the privacy protection and communication requirements.
We also propose a novel cluster differential privacy (CDP) notion tailored for the FCLUB setting.

\textbf{2. Algorithm Design:}
\tong{Cluster Structure Detection seems to be the key challenge, but is missing here. Which parts of our methodologies are motivated by this challenge?}
We propose a private and communication-efficient FCLUB-CDP algorithm. For privacy protection, a tree-based privatizer is designed to guarantee our proposed CDP.
For communication efficiency, we follow the phase-based principle for cluster detection and propose the asynchronous communication protocol for delayed information sharing.
In particular, each local server maintains upload/download buffers and occasionally uploads/downloads the buffered information to/from the global server only if it finds the latest information deviates too far from the last update.

\textbf{3. Theoretical Analysis:} We prove that FCLUB-CDP achieves the $O(dL\sqrt{mT\frac{\log(1/\delta)}{\varepsilon}}\log^{1.5} T)$ regret bounds, $O(dmL\log T)$ communication costs and $(\varepsilon, \delta, L,m)$-CDP privacy guarantee, respectively. 

\textbf{4. Experiments:} We conduct extensive experiments over synthetic and real-world datasets to validate our theoretical analysis. Empirical results show the superior performance of our algorithm over existing algorithms.\footnote{Codes and datasets are available at  \href{https://github.com/ZhaoHaoRu/Federated-Clustering-of-Bandits}{GitHub}.}

%%%%%%%%%%%%%%%%%%%%%%\input{content/related.tex}%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Related Work}\label{sec: related work}
\textbf{Online Clustering of Bandits.} The online clustering bandits is first proposed by \citet{gentile2014online} and shows its effeteness by accelerating the learning process of contextual bandits. The key idea is to use a graph representing the user similarity and adaptively refine the user clusters for information sharing.
This work has been extended by a series of works considering the collaborative effects on both users and items~\citep{li2016collaborative}, the context-aware settings~\citep{gentile2017context}, the cascading bandit setting~\citep{li2018online} and the users with different user frequency~\citep{li2019improved}.
However, none of these works consider the privacy constraints and communication cost requirements imposed by the FL paradigm like the current work, and therefore cannot give guarantees on these two critical criteria.
\citet{korda2016distributed} considers the peer-to-peer but non-private clustering of bandits, our work studies the private bandit setting under the orchestration of a global server, which requires different algorithms and analysis.

% These 
% Li2016a extends first proposed the online clusting of linear bandits algorithm which maintains a graph among users and uses connected components to represent user clusters.
% And there are a series of works proposing several variants of this algorithm, such as \cite{li2018online},\cite{li2019improved} and \cite{gentile2017context}. In this paper, we use this algorithm to process the users on local servers, which means we maintain a graph on every local server.

\textbf{Federated and Distributed Bandits.} There has been growing interest in bandit learning with multiple players.
One line of research investigates the competitive agents with collisions~\citep{anandkumar2011distributed,rosenski2016multi,bistritz2018distributed,boursier2019sic}, in which the reward for an arm is zero if it is chosen by more than one agent. The goal of these works is to minimize regret without communication, which is different from ours.
The cooperative distributed bandits are most related to our work, in which multiple agents collaborate to solve a bandit problem over certain communication networks, e.g., peer-to-peer networks~\citep{korda2016distributed} or client-server networks~\citep{dubey2020differentially,li2021asynchronous}.
Our work belongs to the client-server setting, but we differ from both \citet{dubey2020differentially} and  \citet{li2021asynchronous} since neither of them considers the clustering effects of users.
% Our work belongs to the client-server setting but we differ from \cite{dubey2020differentially} which assumes users are identical and comes in a round-robin manner or \cite{li2021asynchronous} which does not consider the clustering of users and differential privacy.

\textbf{Differential Privacy.} Our work leverages on \textit{differential privacy}, a rigorous mathematical framework of privacy first proposed by \citet{dwork2006calibrating}.
% Encoding the intuition that any observable output changes very little (in probability) when any input datum changes, differential privacy has been accepted as the \textit{de facto} gold standard of privacy-preserving data analysis in both academia and industry.
We utilize several useful techniques from the standard differential privacy to maintain our cluster differential privacy condition.
Most notably, we use a tree-based algorithm
% \shuai{this should be put in the discussion part}
which is introduced in \citet{chan2011private} to realize differential privacy for the continual release of statistics.
% This algorithm can add $\log n$ noisy terms to protect the partial sums of $n$ entries by maintaining a binary tree whose nodes correspond to these $n$ entries.
In the single-agent bandit setting, \citet{shariff2018differentially} also utilizes this tree-based algorithm to achieve Joint DP, which is then extended by \citet{dubey2020differentially} to the federated setting.
The closest work to ours is \citet{dubey2020differentially} and they study the simpler case where each local server only holds one user and all users are identical (with the same unknown preference vector), hence gives the different user-level DP definition with different privatizer and analysis.


\textit{To the best of our knowledge, this paper is the first to generalize the CLUB to its federated setting, which simultaneously achieves privacy protection and communication requirements.}






%%%%%%%%%%%%%%%%%%%%%%%%%\input{content/2_preliminary.tex}%%%%%%%%%%%%%%%%%
\section{Problem Settings}\label{sec: problem setting}

In this section, we formulate the setting of ``Federated Clustering of Bandits'' (FCLUB). We use $[n]$ to represent set $\{1,...,n\}$. We use boldface lowercase letters and boldface capitalized letters for column vectors and matrices, respectively. For the norms, $\norm{\bx}$ denotes the $\ell_2$ norm of vector $\bx$. For any symmetric positive semi-definite (PSD) matrix $\bM$ (i.e., $\bx^{\top} \bM \bx \ge 0, \forall \bx$), $\norm{\bx}_{\bM}=\sqrt{\bx^{\top} \bM \bx}$ denotes the matrix norm of $\bx$ regarding matrix $\bM$.

At the global level, there are $n$ users, denoted by the set $\cU=\{u_1, ..., u_n\}$.
Each user $i \in \cU$ has an \textit{unknown} preference vector $\btheta_i \in \R^d$ and for simplicity we assume $\norm{\btheta_i}\le 1$.
Since users may have the same/similar preference vector, we assume there exists $m$ (unknown) different preference vectors, i.e., $|\{\btheta_1, ..., \btheta_n\}|=m$.
Users with the same preference vector form an underlying cluster and we denote these $m$ (unrevealed) clusters by $\cC=\{C_1, ..., C_m\}$.
Different from CLUB, users in FCLUB are distributed in $L$ local servers denoted by $\{1,...,L\}$.
At the local level, the local server $\ell$ contains $n^\ell$ users $\cU^\ell=\{u^{\ell}_1,..., u^{\ell}_{n^\ell}\}$ (with $\bigcup_{\ell \in [L]}\cU^\ell=\cU$) and similarly, these $n^\ell$ users form local cluster $\cC^\ell=\{C^\ell_1, ..., C^\ell_{m_{\ell}}\}$ where $m_{\ell} \le m$.

The learning agent interacts with the bandit game as the follows.
At each time $t$, a user $i_t \in [n]$ randomly arrives with probability $1/n$.
Then $K$ items are generated to form a item set $\bD_t$, where the feature of each item $\bx \in \bD_t$ is drawn independently from a fixed but unknown distribution $\rho$ over $\{\bx \in \R^d: \norm{\bx}\le 1\}$.
The learning agent identifies the local server $\ell_t$ that $i_t$ belongs to and the current user cluster $j_t$ (detected by our algorithm) which $i_t$ lies in.
The local server then recommends an item $\bx_{t} \in \bD_t$ to the user based on the aggregated information from cluster $j_t$.
After $i_t$ receives the recommendation, the learning agent receives a random reward $y_t \in [0,1]$.
Let $\cH_t=\{i_1, \bx_1, y_1,..., i_{t-1}, \bx_{t-1}, y_{t-1}, i_t\}$ be the historical information before time $t$.
We assume the expectation of reward $y_t$ is linear in the feature vector $\bx \in \bD_t$ and the unknown preference vector $\btheta_{i_t}$, i.e., $\E_t[y_t|\bx]=\btheta_{i_t}^{\top}\bx$, and $\{y_t-\btheta_{i_t}^{\top}\bx\}_{t=1,2,...}$ have sub-Gaussian tails $\sigma_0^2$. 

Now we give some assumptions on preference vectors and item feature vectors. Note that all the assumptions follow the previous works~\cite{gentile2014online,gentile2017context,li2018online, li2019improved}.
\begin{assumption}[Gap between preference vectors]
For any two different preference vectors $\btheta_{i_1} \neq \btheta_{i_2}$, there is a fixed but unknown gap $\gamma>0$ so that $\norm{\btheta_{i_1}-\btheta_{i_2}} \ge \gamma$.\footnote{As previous works, this assumption can be relaxed by assuming the existence of two thresholds, one for the between-cluster distance $\gamma$, the other for the within-cluster distance $\norm{\btheta_{i_1}-\btheta_{i_2}}\le\eta$. }
\end{assumption}

\begin{assumption}[Item regularity]
For item distribution $\rho$, there exists a known $\lambda_x > 0$ so that $\E_{\bx \sim \rho}[\bx \bx^{\top}]$ is full rank with minimal eigenvalue $\lambda_x$. Meanwhile, for all time $t$, for any fixed unit vector $\btheta \in \R^d$, $(\btheta^{\top} \bx)^2$ has sub-Gaussian tail with variance $\sigma^2 \le \frac{\lambda_x}{8 \log (4K)}$.
\end{assumption}

\textbf{Learning Efficiency.} The goal of the learning agent is to accumulate as much reward as possible.
Let the optimal item for user $i_t$ at time $t$ be $\bx^*_{i_t}=\argmax_{\bx \in \bD_t} \btheta_{i_t}^{\top}\bx$.
The learning performance is measured by the regret, defined as 
\begin{equation}
    R(T)=\E[\sum_{t=1}^T r_t]= \E[\sum_{t=1}^T(\btheta_{i_t}^{\top}\bx^*_{i_t}-\btheta_{i_t}^{\top}\bx_t)],
\end{equation}
where $r_t$ is the regret at time $t$ and the expectation is taken over the randomness of the algorithm and the environment regarding the users $i_1, ..., i_T$ and the item sets $D_1, ..., D_T$.

In addition to the regret, privacy protection and the communication cost are two important criterion in federated learning.
In this work, we aim to ensure that the user data are protected under privacy constraints and the communication complexity is low.

\textbf{Privacy Requirements.}
To protect the user data, we introduce two privacy requirements.
First, we desire the local server only uploads the user clusters' sufficient statistics (or clustered data) for the learning procedure, instead of individual users' raw data.
Second, to protect the clustered data against third-party adversaries outside the local server, we adopt the notion of DP, which encodes the intuition that any observable output changes very little (in probability) when any input datum changes.
Since existing DP notions are defined on \textit{user-level} data~\cite{shariff2018differentially, dubey2020differentially}, we introduce a new differential privacy requirement to protect the \textit{cluster-level} data.

% \begin{definition}[Locally Differential Private (LDP)] A data releasing mechanism $\cQ \rightarrow \mathcal{Z}$ is defined as $(\varepsilon, \delta)$-LDP, if for any two outcomes $y, y' \in \cD$, and any (measurable) subset $O \subseteq \mathcal{Z}$, there is 
% \begin{equation}
%     \Pr[\cQ(y)\in U] \le \exp(\varepsilon)\Pr[\cQ(y)\in U] + \delta.
% \end{equation}
% \end{definition}
The contextual MAB problem involves two sets of variables against the adversaries outside the local server: the decision sets $\bD_t$ and the observed rewards $y_t$.
Since the users only receive and store observations regarding the chosen action $\bx_t$ and the observed reward $y_t$, it suffices to protect $(\bx_t,y_t)_{t\in [T]}$ to achieve DP requirements.
Let $\tau_{\ell,j}$ be time slots when user in cluster $j$ appears at local server ${\ell}$, we denote two sequences $S_{\ell,j}=(\bx_{t},y_{t})_{t \in \tau_{\ell,j}}$ and $S_{\ell,j}'=(\bx'_{t},y'_t)_{t \in \tau_{\ell,j}}$ as $t$-neighboring if $(\bx_{\tau},y_{\tau})=(\bx'_{\tau},y'_{\tau})$ for $\tau \neq t \in \tau_{\ell,j}$. 

\begin{definition}[Cluster Differential Privacy]\label{def:cdp} In the FCLUB setting with $L$ servers and (at most) $m$ clusters, a federated contextual bandit algorithm $A=(A_{\ell,j})_{\ell\in [L], j \in [m]}$ is $(\varepsilon, \delta, L, m)$-CDP, if for any $(\ell,j), (\ell',j')$ s.t. $(\ell,j)\neq (\ell',j')$, any $t$ and the set of sequences $\bar{S}_{\ell',j'} = \cup_{i \in [L], k \in [m]}S_{i,k}$ and $\bar{S}'_{\ell',j'}=\cup_{i \in [L]\backslash \ell', k \in [m]\backslash j'}S_{i,k} \cup S_{\ell',j'}'$ s.t. $S_{\ell',j'}$ and $S'_{\ell',j'}$ are $t$ neighboring, and for any subset of actions $a_{\ell,j} \subset \prod_{\tau \in \tau_{\ell,j}}\cD_{\tau}$ of actions, it holds that
\begin{equation}
\Pr[A_{\ell,j}(\bar{S}_{\ell',j'})\in a_{\ell,j} ] \le e^{\varepsilon}\Pr[A_{\ell,j}(\bar{S}'_{\ell',j'})\in a_{\ell,j}] + \delta.
\end{equation}
\end{definition}
Note that our CDP notion formalizes the intuition that the action chosen by any local server $\ell$ (at the cluster level $j$) must be sufficiently indistinguishable (in probability) to any single $(x,y)$ pair from any other local cluster $(\ell',j')$. Such a notion does not require each cluster is private to its own observations, i.e., each cluster of users can be trusted with its own data, which is different from the local DP~\citep{zheng2020locally} or Joint DP \citep{shariff2018differentially} that assume even itself cannot be trusted. 

\textbf{Communication Complexity.}
To evaluate the communication complexity, we count one upload operation (or one download operation) between any local server and the global server as one communication round. 
Our communication complexity is the total number of communication rounds $C(T)$ over the time horizon $T$. 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\input{content/3_algorithm}%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Algorithm}\label{sec:algorithm}
\begin{figure}
    \centering
    \includegraphics[width=0.5\textwidth]{./phase_split.png}
    \caption{Illustration of how our algorithm detects clusters from phase $s$ to $s'$. Local server $1$ delete edge $(1,2)$ and split $C_{s,1}^{1}$ into $C_{s',1}^1, C_{s',2}^1$. The global server merges local clusters as $\{(C^1_{s',1}, C^2_{s',1}), (C^1_{s',2}, C^2_{s',2})\}$.}
    \label{fig:split_n_merge}
\end{figure}
In this section, we introduce our phase-based federated clustering of bandit algorithms with CDP (FCLUB-CDP).

\textbf{Identify the Underlying Cluster Structure.}
To correctly identify the cluster structure, we design a phase-based clustering detection algorithm in \Cref{alg:cluster_detect}.
The high level idea is to first conduct local-level clustering of bandits on each local server and merge the local clusters on the global server.

At the local level, each server $\ell$ maintains a profile of $(\bV_{t,i}, \bb_{t,i}, T_{t,i})$ for its own local users $i \in \cU^{\ell}$, where $V_{t,i}$ is Gramian matrix, $\bb_{t,i}$ is the moment vector of regressand by the regressors, and $T_{t,i}$ is the number of times that $i$ has appeared up to time $t$.
At the beginning of $s$ phase (or $t=2^s+1$), based on profiles $(\bV_{t,i}, \bb_{t,i}, T_{t,i})_{i \in \cU^{\ell}}$, each server $\ell$ maintains an undirected graph structure $\cG^{\ell}_s$, where nodes represent all local users $\cU^{\ell}$ and a pair of users are connected by an edge if they are similar.
We initialize the graph by a complete graph $\cG^\ell_{0}$ and gradually delete edges at every beginning of each phase $s$.
Specifically, in line~\ref{line:detect_delete}, we delete edge between any user $i_1$ and $i_2$ if the distance between their estimated preference vector are larger than the following threshold.
\begin{equation}\label{eq:edge_deletion}
    \norm{\hat{\btheta}_{t, i_1}-\hat{\btheta}_{t, i_2}} > \alpha_1 (F(T_{t, i_1})+F(T_{t,i_2})), 
\end{equation}
where $\hat{\btheta}_{t, i}=(\lambda \bI + \bV_{t, i})^{-1}\bb_{t,i}$ and $F(x)=\sqrt{\frac{1+\ln (1+x)}{(1+x)}}$.
After the deletion, users in connected components $j \in C(\cG^{\ell}_s)$ are grouped into local cluster $j$.
In line~\ref{line:detect_cluster}, server $\ell$ uploads the clustered information $I_{s,{\ell}}=(C^{\ell}_{s,j}, \tilde{\bV}^{\ell}_{s, j}, \tilde{\bb}^{\ell}_{s, j}, \tilde{T}^{\ell}_{s,j})_{j \in C(\cG^{\ell}_{s})}$ to the global server, which contains clustered information  for each cluster $j \in C(\cG^{\ell}_s)$.
Note that $I_{s,\ell}$ are added with random perturbation to protect the users' data, which will be introduced shortly after.

In line~\ref{line:detect_merge}, when the global server receives the privatized clustered information from all servers, it performs a merge operation to merge clusters from different servers whose estimated clustered preference vectors are close into a global clusters according to the following inequality.
\begin{equation}\label{eq:merge}
    \norm{\hat{\btheta}^{\ell _1}_{s, j_1}-\hat{\btheta}^{\ell_2}_{s, j_2}} < \alpha_2 (F(\tilde{T}^{\ell_1}_{s, j_1})+F(\tilde{T}^{\ell_2}_{s, j_2})),
\end{equation}
where $\hat{\btheta}^{\ell}_{s, j}= (\tilde{\bV}^\ell_{s, j})^{-1}\tilde{\bb}^\ell_{s,j}$ and $F(x)=\sqrt{\frac{1+\ln (1+x)}{(1+x)}}$.
$P_s$ denotes the set of $m_s$ global clusters.
Note that the local clusters in the same global cluster indexed by $k \in [m_s]$ will communicate and share protected clustered information with each other in an asynchronous manner.
At the beginning of phase $s$, if the new global cluster structure $P_s$ is different from $P_{s-1}$ at phase $s-1$, we will renew the shared global information $(\bS^{g}_{t,k}$, $\bu^{g}_{t,k}$, $T^{g}_{t,k})$ for $k \in [m_s]$.
For local servers $(\ell, j)$ in the same global cluster $P_{s,k}$, the local synchronized information $(\bS^{\ell}_{t,j}, \bu^{\ell}_{t,j}, T^{\ell}_{t,j})$, the upload buffers $(\Delta \bS^{\ell}_{t,j}, \Delta \bu^{\ell}_{t,j}, \Delta T^{\ell}_{t,j})$ and download buffers $(\Delta \bS^{-\ell}_{t,j}, \Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j})$ are renewed.
We also generate a new tree-based privatizer PVT$(\ell, j)$ for each cluster $j$ at server $\ell$, which will be introduced later on.

\begin{algorithm}[t]
	\resizebox{0.95\columnwidth}{!}{
\begin{minipage}{\columnwidth}
\begin{algorithmic}[1]
\caption{Phase-based FCLUB with CDP}\label{alg:main}
\STATE\textbf{Input:} Failure probability $\alpha$, deletion parameter $\alpha_1 > 0$, merge parameter $\alpha_2 > 0$, privacy parameters $\varepsilon, \delta$.
% \STATE \textbf{Notations}:Let $t_{u}$ represents the communication times, $m=\log(t_{u}) +1$, $\sigma = \frac{4\times(L_{1}^{2}+1)\sqrt{m\log(16/\delta_{1}^2)}}{\epsilon_{1}}$, $\rho_{min} = \sqrt{32}m(L_{1}^{2}+1)\log(4/\delta_{1})(4\sqrt{d}+2\ln{\frac{2t_{u}}{\alpha}})/\epsilon_{1}$, $\rho_{max}=3\rho_{min}$, $\Upsilon=\sqrt{m(L_{1}^{2}+1)(\sqrt{d}+2\log(2t_{u}\alpha))/\sqrt{2\epsilon_{1}}} $, $\beta(t) =\sigma\sqrt{2\log(2/\alpha)+d\log(3+\frac{t_{u}L_{1}^{2}}{d\rho_{min}})}+LS\sqrt{\rho_{max}}+L\Upsilon $
% \STATE \textbf{Setting}: $L$ Local servers $\{1, ..., L\}$, $m$ true clusters $\cC=\{C_1, ..., C_m\}$, $n$ users $\cU=\{u_1, ..., u_n\}$ and the global server $\cG$. 
% Each local server $l$ contains $m_l$ clusters $\cC^\ell=\{C^\ell_1, ..., C^\ell_{m_l}\}$, $n_l$ users $\cU^\ell=\{\bu^\ell_1, ..., \bu^\ell_{n_l}\} \subseteq \cU$.
\STATE User initialization: For $i \in [n]$, $ \bV_{0,i}=\boldsymbol{0}_{d \times d}, \bb_{0,i}=\boldsymbol{0}_{d \times 1}, \bT_{0,i}=0$.
\STATE Local server initialization: For $\ell \in [L]$, set graph $\cG^{\ell}_0=(\cU^{\ell}, \cE^{\ell}_0)$, local information, upload buffers, download buffers: $(\bS^{\ell}_{0,1}, \bu^{\ell}_{0,1}, T^{\ell}_{0,1})=(\Delta \bS^{\ell}_{0,1},\Delta \bu^{\ell}_{0,1},\Delta T^{\ell}_{0,1})=(\Delta \bS^{-\ell}_{0,1},\Delta \bu^{-\ell}_{0,1},\Delta T^{-\ell}_{0, 1})=(\boldsymbol{0}_{d \times d},\boldsymbol{0}_{d \times 1},0)$, perturbations $(\bar{\bH}^{\ell}_{t,1},\bar{\bh}^{\ell}_{t,1})=(\bH^{\ell}_{t,1},\bh^{\ell}_{t,1})=(\boldsymbol{0}_{d \times d},\boldsymbol{0}_{d \times 1})$.
\STATE Global server initialization: create one global cluster $P_0=\{\{(1,1),..., (L, 1)\}\}$ and set the global information for $V^g_{0,1}=\boldsymbol{0}_{d \times d}$, the $\bu^g_{0,1}=\boldsymbol{0}_{d \times 1}, T^g_{0,1}=0$.
\FOR{$s=1, 2, ..., $}
\STATE Detect and adjust clusters (\Cref{alg:cluster_detect}).
\FOR{$\tau=1, ..., 2^s$}
\STATE Compute the total time step $t=2^s-2 + \tau$.
\STATE Advance all parameter, e.g., $\bS^{\ell}_{t,j}=\bS^{\ell}_{t-1,j}$.
\STATE User $i_t$ at local server $l_t$ arrives and $l_t$ gets the local cluster $j_t$ that $i_t$ belongs to based on $\cG^\ell_{s}$.
% \STATE Compute $V_{t,j_t}=\bS^{l_t}_{t, j_t}+\Delta \bS^{l_t}_{t, j_t} , b_{t, j_t}=\bu^{l_t}_{t, j_t} + \Delta \bu^{l_t}_{t, j_t}$ and $N_{t, j_t}=T^{l_t}_{t, j_t} + \Delta T^{l_t}_{t, j_t}$.
\STATE Compute local $\beta^{l_t}_{t,j_t}$ according to \Cref{lemma:cdp_beta}.

\STATE Local server $l_t$ receives feasible context set $\bD_t$ and recommends item $\bx_t=\argmax_{\bx \in \bD_t} \bx^{\top}(\bS^{l_t}_{t, j_t})^{-1}\bu^{l_t}_{t, j_t} + \beta^{l_t}_{t,j_t} \norm{\bx}_{(\bS^{l_t}_{t, j_t})^{-1}}$.\label{line:recommend}
\STATE User $i_t$ receives feedback $y_t$, update the user $i_t$'s information: $(\bT_{t,i_t}, \bV_{t,i_t}, \bb_{t, i_t}) \mathrel{{+}{=}} (1,\bx_{t} \bx_{t} ^\top, y_t \bx_{t})$
% \STATE Update upload buffer $\bT_{t,i_t} = \bT^{l_t}_{t,i_t}+1, \bV_{t,i_t}= \bV^{l_t}_{t,i_t}+\bx_t \bx_t ^\top +B_t, \bb^{l_t}_{t, i_t}=\bb_{t, i_t}+y_t \bx_t + \xi_t.$
and others unchanged.
% : $\bT_{t,i} = \bT_{t,i}, \bV_{t,i}= \bV_{t,i}, \bb_{t, i}=\bb_{t, i}$ for all $(l,i) \neq (l_t,i_t)$.
\STATE Check upload event (\Cref{alg:check_upload}).
\STATE Check download event (\Cref{alg:check_download}).
\ENDFOR
\ENDFOR
\end{algorithmic}
\end{minipage}}
\end{algorithm}

\textbf{Asynchronous Communication Protocol.}
In this work, we design a novel asynchronous communication protocol to incorporate the randomly arriving users.
To reduce the communication cost, our high-level idea is to use the delayed communication, where the feedback are temporarily stored in buffers and only if the stored information exceeds a threshold, the upload/download events are triggered.
Such a threshold will ensure that the local information will not diverge too far from the global information, which in turn will not diverge too far from the scenario when information are fully synchronized.   
Also note that our communication is conducted in the asynchronous manner at the local cluster level.
In other words, all local clusters indexed by $(\ell, j) \in P_{s,k}$ will establish connection with each other within the global cluster $k$.
For each local cluster $(\ell, j)$, it stores a local copy of the sufficient statistics $(\bS^{\ell}_{t,j}, \bu^{\ell}_{t,j}, T^{\ell}_{t,j})$ and a upload buffer $(\Delta \bS^{\ell}_{t,j}, \Delta \bu^{\ell}_{t,j}, \Delta T^{\ell}_{t,j})$.
For the global server, it prepares for each local cluster $(\ell, j)$ a download buffer $(\Delta \bS^{-\ell}_{t,j}, \Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j})$, which are used to send other local servers' information to the local cluster.
It also maintains the global statistics $(\bS^{g}_{t,k}, \bu^{g}_{t,k}, T^{g}_{t,k})$ to save the data uploaded from local clusters in global cluster $k$.

Our proposed communication framework consists of two components: the upload protocol (\Cref{alg:check_upload}) and the download protocol (\Cref{alg:check_download}).
For the upload protocol, at each time step $t$, user $i_t$ visits the server $l_t$ and receives recommended item $\bx_t$.
After the user $i_t$ interacts with the environment and observes feedback $(\bx_{t}, y_t)$, the local server updates the upload buffers in line~\ref{line:upload_update_obs} and checks the following condition to decide whether to upload the upload buffer:
\begin{equation}\label{eq:upload_condition}
\text{det}(\bS^{l_t}_{t, j_t}+\Delta \bS^{l_t}_{t, j_t})/{\text{det}(\bS^{l_t}_{t, j_t})}\ge U,
\end{equation}
where $\bH^{l_t}_{t,j_t}$ and $\bar{\bH}^{l_t}_{t,j_t}$ are tentative and current perturbation for privacy protection, respectively. 
If the condition is satisfied, the local server sends $(\Delta \bS^{l_t}_{t, j_t}, \bu^{l_t}_{t, j_t}, T^{l_t}_{t, j_t})$ to the global cluster $k_t$.
The global server then merges the uploaded information into the global information in line~\ref{line:upload_merge} and also sends it to download buffers for other local clusters $(\ell,j) \neq (l_t, j_t) \in P_{s,k_t}$ in line~\ref{line:upload_renew_download}.
For local cluster $(l_t,j_t)$ itself, the local server updates the local statistics and initializes the upload buffer using the newly generated perturbation in lines~\ref{line:upload_update_local} to \ref{line:upload_clear}.
For the download protocol, at each time step $t$, the global server will check the deviation between global statistics and the local statistics via following condition:
\begin{equation}\label{eq:download_condition}
    \text{det}(\bS^g_{t,k_t})/{\text{det}(\bS^{\ell}_{t, j})}\ge D
\end{equation}
independently for local clusters $(\ell, j)$ in global cluster $P_{s,k_t}$.
If any cluster $(\ell,j)$ satisfies such condition, the global server sends the information from other clusters to $(\ell,j)$, which is used to update $(\ell,j)$'s local statistics.
Finally, the global server cleans the download buffer.

\textbf{Tree-Based Privacy Protocol.}
To ensure the uploaded information are privatized, we adopt the tree-based privatizer to generate random perturbations $\bH^{\ell}_{t,j}$ and $\bh^{\ell}_{t,j}$ whenever an upload event happens.
Note that the privatier subroutine is at the local cluster level and a new privatizer is created if the cluster structure changes at the start of any phase $s$.

Let $\bx_1, ..., \bx_T$ be a (matrix-valued) sequence of length $T$, and $s_i = \sum_{t=1}^i \bx_i$ be the partial sum of the first $i$ elements that will be realised privately.
Generally speaking, the tree-based mechanism~\cite{dwork2006calibrating} maintains a binary tree $\cT$ of depth $1+ \lceil \log T \rceil$, where the leaf nodes contain the elements $\bx_i$ and the parent node maintains the sum of its children. For each node with value $n_i$, the tree-base mechanism protects privacy by adding noise $h_i$ to each node and release $n_i + h_i$ if queried.
The key advantage is that such a tree only accesses $\nu=O(\log T)$ nodes to compute and release the partial sum $s_i$, which means the perturbation is at most $O(\nu)$ instead of $O(T)$.

Following this general idea, we implement the tree-based privatizer $(\ell, j)$ that satisfies the requirements of CDP.
Recall that we only need to protect the information uploaded to the global server, it suffices to maintain a tree $\cT^{\ell}_{j}$ of depth $\nu=O(1+\lceil \log t_c \rceil)$ for the upload event, where $t_c$ is the total number of uploads.
To make the partial sums private, we insert a random noise matrix to each node in $\cT^{\ell}_j$, similar to that of \citet{shariff2018differentially} and \cite{dubey2020differentially},.
Specifically, we sample a random matrix $\bar{\bN} \in \R^{(d+1)\times (d+1)}$ where each entry $\bar{N}_{p,q}$ is drawn from i.i.d. Gaussian distribution $\cN(0, \sigma_{\text{noise}})$ and symmetrize it to get $N=(\bar{\bN}^{\top}+\bar{\bN})/\sqrt{2}$.
It follows that in order to ensure the whole tree is $(\varepsilon, \delta)$-DP, each node should preserve $(\varepsilon/\sqrt{8\nu\log (2/\delta}), \delta/2)$-DP.
In other words, it suffices to set the variance $\sigma_{\text{noise}}=64\nu \log (2/\delta)^2/\epsilon^2$ for each tree node.
Note that at each upload round $t$, the total noise added to the partial sum is the summation of at most $\nu$ random matrices with size $(d+1)\times (d+1)$, where the top-left $(d \times d)$-submatrix forms $\bH^{\ell}_{t,j}$ and the first $d$ elements from the right-most $(d+1)\times 1$ vector forms $\bh^{\ell}_{t,j}$.
By concentration of random matrices~\cite{tao2011topics}, we have with probability at least $(1-\frac{\alpha}{mL})$, the operator norm of $\bH^{\ell}_{t,j}$ is
\begin{equation}
    \norm{\bH^{\ell}_{t,j}}_{op} \le \rho \triangleq 8\sqrt{2}\nu \log (4/\delta) (4\sqrt{d} + 2 \log (2mL/\alpha))/\varepsilon.
\end{equation}
for any $\ell \in [L], j \in [m], t \in [T]$.

% With a little abuse of notation, suppose the local cluster $(\ell, j)$ receives observation $(\bx_t, y_t)_{t \in [T]}$ and the upload happens in $(t_1, ..., t_{t_c})$ rounds.
% Consider two consecutive uploads occurs at time $t_{i-1}$ and $t_{i}$, the local cluster will insert $\sum_{\tau=t_{i-1}}^{t_{i}}$ 
% , since each partial sum $s_i$ can be uniquely represented by the summation of at most $\nu$ nodes in $\cT$.

\textbf{Recommendation Procedure.}
At each time step $t$, the recommended item $\bx_t$ for user $i_t$ is selected as follows.
When the current cluster is correct (which is guaranteed after $O(\log T)$ rounds and to be proved later), the estimated $\hat{\btheta}_{t}=(\bS^{l_t}_{t, j_t})^{-1}\bu^{l_t}_{t, j_t}$ is computed using the local information $\bS^{l_t}_{t, j_t}$ and $\bu^{l_t}_{t, j_t}$.
Since by \Cref{lemma:cdp_beta}, $\norm{\btheta_{i_t}-\hat{\btheta}_{t}}_2 \le  \beta^{l_t}_{t,j_t}$, the confidence radius is $ \beta^{l_t}_{t,j_t}\norm{\bx}_{(\bS^{l_t}_{t, j_t})^{-1}}$, which characterizes the exploration bonus for item $\bx \in \bD_t$.
Then the local server will recommend the item $\bx_t \in \bD_t$ that maximizes the $\bx^{\top}\hat{theta}_t$ plus the above exploration bonus.
Finally, the user will receive feedback $y_t$ and the system updates corresponding statistics for better decision in future rounds.
\begin{restatable}{lemma}{BetaCi}\label{lemma:cdp_beta}
Under the setting of FCLUB and fix a local cluster $j$ located at the server $\ell$ which shares the information with $L'\le L$ clusters (including itself), let the true preference vector be $\btheta^*$ and the true cluster be $j^*$, let $\hat{\btheta}^{\ell}_{t,j}=(\bS^{\ell}_{t,j})^{-1} \bu^{\ell}_{t,j}$.
When all (global) clusters are correctly identified and partitioned, it holds with probability at least $1-2\alpha$,
\begin{equation}
    \norm{\btheta^*-\hat{\btheta}^{\ell}_{t,j}}_{\bS^{\ell}_{t,j}} \le \beta^{\ell}_{t,j},
\end{equation}
where $\beta^{\ell}_{t,j} \triangleq \beta^{\ell}_j(T^{\ell}_{t,j}, L, \alpha/(mL))= \sigma_{0}\sqrt{2\log(\frac{mL}{\alpha})+d\log(\frac{\rho_{\max}}{\rho_{\min}}+\frac{T^{\ell}_{t,j}}{dL'\rho_{\min}})} + \sqrt{L'\rho_{\max}} +\sqrt{L'}\kappa$.
\end{restatable}



\begin{algorithm}[t]
	\resizebox{0.95\columnwidth}{!}{
\begin{minipage}{\columnwidth}
\begin{algorithmic}[1]
\caption{Phase-based Cluster Detection and Adjustment}\label{alg:cluster_detect}
\STATE $t=2^s-1$.
\FOR{$\ell \in [L]$}
\STATE Set $\cG^{\ell}_{s}$ by deleting any edge $(i_1,i_2) \in \cG^{\ell}_{s-1}$ if \Cref{eq:edge_deletion} holds. \label{line:detect_delete}
\STATE For $\ell \in [L], j \in C(\cG^{\ell}_{s}) $, generate new perturbation $\bH^{\ell}_{t,j}, \bh^{\ell}_{t,j}$ using $\text{PVT}({\ell, j})$ in \Cref{alg:private} and set historical $\bar{\bH}^{\ell}_{t,j}=\bH^{\ell}_{t,j}, \bh^{\ell}_{t,j}=\bar{\bh}^{\ell}_{t,j}$. 
\STATE For $\ell \in [L]$, upload the local clustered information $I_{s,{\ell}}=(C^{\ell}_{s,j}, \tilde{\bV}^{\ell}_{s, j}, \tilde{\bb}^{\ell}_{s, j}, \tilde{T}^{\ell}_{s,j})_{j \in C(\cG^{\ell}_{s})}$ to the global server, where $(\tilde{\bV}^{\ell}_{s,j}, \tilde{\bb}^{\ell}_{s,j}, \tilde{T}^{\ell}_{s,j})= (2\rho \bI + \bH^{\ell}_{t,j},\bh^{\ell}_{t,j}, 0) +\sum_{i \in C^{\ell}_{s,j}}(\bV_{t,i},\bb_{t,i},T_{t,i})$.\label{line:detect_cluster}
\ENDFOR
\STATE The global server does global merge based on $I_s$ and get $m_s$ global clusters $P_s=\{P_{s, 1}, ..., P_{s, m_s}\}$, where the two local clusters $C^{\ell_1}_{t,j_1}$, $C^{\ell_2}_{t,j_2}$ (with $\ell_1 \neq \ell_2$) are merged together in $P_{s,k}$ if \Cref{eq:merge} holds.\label{line:detect_merge}
\IF{$s=0$ or $P_s\neq P_{s-1}$}
\STATE //Renew the cluster information.
\FOR{$k \in [m_s]$}
\STATE Set global gram matrix $(\bS^g_{t, k}, \bu^g_{t, k},T^g_{t, k})=\sum_{(\ell,j) \in P_{s,k}}(\tilde{\bV}^\ell_{s,j},\tilde{\bb}^\ell_{s,j},{\tilde{T}^\ell_{s,j}})$.
    \FOR{$(\ell,j) \in P_{s, k}$}
\STATE Set $(\bS^\ell_{t, j},b^\ell_{t, j},T^{\ell}_{t, j})=(\bS^g_{t,k}, \bb^g_{t,k}, T^g_{t,k})$.
\STATE Create new perturbation $\bH^{\ell}_{t,j}, \bh^{\ell}_{t,j}$ using $\text{PVT}({\ell, j})$ in \Cref{alg:private}.
\STATE Set new $(\Delta \bS^\ell_{t,j}, \Delta \bu^{\ell}_{t,j}, \Delta T^{\ell}_{t,j})=(3\rho \bI + \bH^\ell_{t,j}-\bar{\bH}^\ell_{t,j}, \bh^\ell_{t,j}-\bar{\bh}^\ell_{t,j},0)$
\STATE Set new $(\Delta \bS^{-\ell}_{t,j}, \Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j})=(\boldsymbol{0},\boldsymbol{0},0)$.
    \ENDFOR
\ENDFOR
\ENDIF
\end{algorithmic}
\end{minipage}}
\end{algorithm}

\begin{algorithm}[t]
	\resizebox{0.95\columnwidth}{!}{
\begin{minipage}{\columnwidth}
\begin{algorithmic}[1]
\caption{Check Upload Event}\label{alg:check_upload}
\STATE Update upload buffer $(\Delta \bS^{l_t}_{t,j_t}, \Delta \bu^{l_t}_{t, j_t}, \Delta \bT^{l_t}_{t,j_t}) \mathrel{{+}{=}} (\bx_{t} \bx_{t} ^\top,y_t \bx_{t},1)$.\label{line:upload_update_obs}
\IF{$\text{det}(\bS^{l_t}_{t, j_t}+\Delta \bS^{l_t}_{t, j_t})/\text{det}(\bS^{l_t}_{t, j_t})\ge U$}
\STATE The global cluster finds $k_t$ so that $(l_t, j_t) \in P_{s, k_t}$.
% \STATE Local server sets $\Delta \bS^{l_t}_{t, j_t}=\Delta \bS^{l_t}_{t, j_t}+\bH^{l_t}_{t,j_t}-\bar{\bH}^{l_t}_{t,j_t}$ and $\Delta \bu^{l_t}_{t, j_t}= \Delta \bu^{l_t}_{t, j_t}+ \bh^{l_t}_{t,j_t}-\bar{\bh}^{l_t}_{t,j_t}$.
\STATE Update global information $(\bS^g_{t, k_t}, \bu^g_{t, k_t}, T^g_{t, k_t})\mathrel{{+}{=}}(\Delta \bS^{l_t}_{t, j_t}, \Delta \bu^{l_t}_{t, j_t},\Delta T^{l_t}_{t, j_t})$.\label{line:upload_merge}
\FOR{$(\ell,j) \neq (l_t, j_t) \in P_{s, k_t}$}
\STATE Global server updates other servers' download buffer $(\Delta \bS^{-\ell}_{t,j},\Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j}) \mathrel{{+}{=}}( \Delta \bS^{l_t}_{t, j_t}, \Delta \bu^{l_t}_{t, j_t}, \Delta T^{l_t}_{t, j_t})$.\label{line:upload_renew_download}
\ENDFOR
\STATE Local server $l_t$ updates the local statistics: $(\bS^{l_t}_{t, j_t}, \bu^{l_t}_{t, j_t}, T^{l_t}_{t, j_t}) \mathrel{{+}{=}} (\Delta \bS^{l_t}_{t, j_t}, \Delta \bu^{l_t}_{t, j_t}, \Delta T^{l_t}_{t, j_t})$.\label{line:upload_update_local}
\STATE Local server $l_t$ sets $(\bar{\bH}^{l_t}_{t,j_t}, \bar{\bh}^{l_t}_{t,j_t})= (\bH^{l_t}_{t,j_t}, \bh^{l_t}_{t,j})$ and creates new perturbation $\bH^{\ell}_{t,j_t}, \bh^{\ell}_{t,j_t}$ using the tree-based privatizer $\text{PVT}({l_t, j_t})$.\label{line:upload_perterb}
\STATE Local server $l_t$ initializes the upload buffer using the new perturbation: $(\Delta \bS^{l_t}_{t, j_t}, \Delta \bu^{l_t}_{t, j_t}, \Delta T^{l_t}_{t, j_t})=(3\rho \bI+\bH^{l_t}_{t,j}-\bH^{l_t}_{t,j_t}, \bh^{l_t}_{t,j_t}-\bar{\bh}^{l_t}_{t,j_t},0)$.\label{line:upload_clear}
\ENDIF
\end{algorithmic}
\end{minipage}}
\end{algorithm}

\begin{algorithm}[t]
	\resizebox{0.95\columnwidth}{!}{
\begin{minipage}{\columnwidth}
\begin{algorithmic}[1]
\caption{Check Download Event}\label{alg:check_download}
\FOR{$(l,j) \in P_{s,k_t}$}
\IF{$\text{det}(\bS^g_{t,k_t})/{\text{det}(\bS^{\ell}_{t, j})}\ge D$}
\STATE Local server receives $(\bS^{\ell}_{t,j}, \bu^{\ell}_{t,j}, T^{\ell}_{t,j}) \mathrel{{+}{=}} (\Delta \bS^{-\ell}_{t,j}, \Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j})$.
\STATE Global server cleans the download buffer: $(\Delta \bS^{-\ell}_{t,j}, \Delta \bu^{-\ell}_{t,j}, \Delta T^{-\ell}_{t,j})=(\boldsymbol{0},\boldsymbol{0},0)$.
\ENDIF
\ENDFOR
\end{algorithmic}
\end{minipage}}
\end{algorithm}

\begin{algorithm}[t]
	\resizebox{0.95\columnwidth}{!}{
\begin{minipage}{\columnwidth}
\begin{algorithmic}[1]
\caption{Privatizer PVT($\ell,j$) for cluster $j$ at server $\ell$} \label{alg:private}
\STATE \textbf{Input:} Privacy budget $\varepsilon, \delta$, number of uploads $t_c$.
% , number of clusters $L'$ to share the information with cluster $j$ at server $\ell$.
\STATE Create a binary tree $\cT$ of depth $\nu=\lceil \log (t_c+1) \rceil+1$.
\STATE For each node, we generate a perturbation matrix matrix $N \in \R^{(d+1)\times (d+1)}$, where $\bN=({\bar{\bN}+\bar{\bN}^{\top}})/\sqrt{2}$ and $\bar{\bN} \in \R^{(d+1)\times (d+1)}$ with $\bar{N}_{p,q}\sim \cN(0, 64\nu\frac{\log (2/\delta)^2}{\varepsilon^2})$.
\STATE Calculate a queue of $\cQ=(\bH_i, \bh_i)_{i=1, ..., t_c+1}$ for partial sums $s_0, ..., s_{t_c}$.
\STATE Sequentially pop one pair of $\cQ$ if PVT($\ell,j$) is called.
\end{algorithmic}
\end{minipage}}
\end{algorithm}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%\input{content/4_analysis.tex}%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Results}\label{sec:results}
Recall that perturbations $\bar{\bH}^{\ell}_{t,j}, \bar{\bH}^{\ell}_{t,j}$ are designed to satisfy the $(\varepsilon, \delta, L, m)$-CDP requirement.
In particular, the privacy budget $(\varepsilon,\delta)$ affects the regret and communication bounds via the following quantities $(\rho_{\max}, \rho_{\min}, \kappa)$, which can be treated as spectral bounds for $\bar{\bH}^{\ell}_{t,j}, \bar{\bH}^{\ell}_{t,j}$.
Let $\tilde{\bH}^{\ell}_{t,j}=2\rho \bI + 3\rho  c^{\ell}_{j,t} \bI + \bar{\bH}^{\ell}_{t,j}$, where $c^{\ell}_{j,t}$ is the number of uploads for local server $\ell$ and cluster $j$.
\begin{definition}[Approximately-accurate $\rho_{\min}, \rho_{\max}$ and $\kappa$]\label{def:accurate}
The bounds $0<\rho_{t, \min} \le \rho_{t,\max}$ and $\kappa >0$ are $(\alpha/(mL))$-accurate for $(\bar{\bH}^{\ell}_{t,j})$ for any $\ell \in [L], j \in [m]$ and $t \in [T]$:
\begin{equation}\label{eq:alpha_accurate}
    \norm{\tilde{\bH}^{\ell}_{t,j}}_{\text{op}} \le \rho_{\max}, \norm{(\tilde{\bH}^{\ell}_{t,j})^{-1}}_{\text{op}} \le \frac{1}{\rho_{\min}}, \norm{\bar{\bh}^{\ell}_{t,j}}_{(\tilde{\bH}^{\ell}_{t,j})^{-1}} \le \kappa
\end{equation}
\end{definition}
with probability at least $(1-\frac{\alpha}{mL})$. 

As will be shown later, our communication protocol ensures $ c^{\ell}_{j,t} \in [0, d\log T/\log (\min\{U,D\})]$, so $\rho_{\min} = \rho$,  $\rho_{\max} = 3\rho + 3\rho d\log T/\log (\min\{U,D\})$ , and $\kappa=\norm{\bar{\bh}^{\ell}_{t,j}}/\sqrt{\rho}$, where $\rho \triangleq 8\sqrt{2}\nu \log (4/\delta) (4\sqrt{d} + 2 \log (2mL/\alpha))/\varepsilon$ is given by our privatizer.

In the following, we will give general regret and communication bounds using $(\rho_{\max}, \rho_{\min}, \kappa)$ and replace them with their exact values.

\subsection{Regret Bound}
We give the following theorem as our main result for the regret bound.
\begin{restatable}{theorem}{mainRegret}\label{thm:mainRegret}
Suppose the cluster structure over the users and items satisfy the assumptions in \Cref{sec: problem setting} with gap parameter $\gamma>0$ and item regularity parameter $1\ge \lambda_x>0$. If the privatizer produces random perturbation that are $(1/(8mLT))$-accurate as in \Cref{def:accurate}, with probability at least $1-1/T$, the regret is upper bounded by
\begin{align}
    % &R(T) \le 2T_0 + \sum_{j=1}^m R_j(T_j)\\
%     &\le  2T_0 + \sum_{j=1}^m\Big(\sigma_{0}\sqrt{2\log(8mLT)+d\log(\frac{\rho_{\max}}{\rho_{\min}}+\frac{T_{j}}{d\rho_{\min}})}+\\
%     &\sqrt{L\rho_{\max}}+\kappa\sqrt{L}\Big)\Big(\sqrt{d\log(\frac{\rho_{\max}}{\rho_{\min}}+\frac{T_{j}}{d\rho_{\min}})}\Big)\Gamma\sqrt{T_j} \\
% &\le  O\Big(2T_0 + \sum_{j=1}^m\sqrt{L\frac{d\log T}{\log \min\{U,D\}} \frac{\log\log T \log (1/\delta) (\sqrt{d}+\log T)}{\varepsilon}}\\
% &\Gamma\sqrt{d\log T }\sqrt{T_j}\Big)\\
R(T)&\le \tilde{O}\Big(n(\frac{\log T}{\lambda_x^2}+\frac{\sigma_0^2d\log T}{\lambda_x \gamma^2} + \frac{\log (1/\delta)\log T }{\lambda_x \varepsilon \gamma^2})\notag\\
&+ dL\sqrt{mT\frac{\log(1/\delta)}{\varepsilon}}\log^{1.5} T \Big)
\end{align}
\end{restatable}
We will give the proof sketch for the above \cref{thm:mainRegret}.
\begin{proof}
Our proof mainly consists of two parts.
The first part bounds the number of exploration rounds $2T_0$ after which the overall user clusters are correctly detected at the global server.
The second part is to bound the regret for the asynchronous contextual linear bandits after the clusters are partitioned correctly.

Different from standard online clustering bandits, the key technical challenge is to take care of the additional random Gaussian noise produced by the privater, which perturbs the true observation that is needed for global cluster detection and the regret analysis for contextual linear bandits.
Moreover, such perturbed observation are also lagged behind the instant observation, since FCLUB-CDP adopts the "delayed" asynchronous communication where upload and download are triggered occasionally. 
This makes standard contextual bandit analysis no longer works and requires new proof techniques to handle the gap between instant observation and the lagged (and perturbed) observation.

For the first cluster detection part, by the assumption of item regularity, we prove that after $t \ge O(n(\frac{\log T}{\lambda_x^2}+\frac{d\sigma_0^2\log T}{\lambda_x \gamma^2}))$ rounds, the local estimates are accurate enough so that the local clusters are correctly identified, similar to that of~\cite{li2018online}.
Specifically, the 2-norm distance between local estimate $\hat{\theta}_{t,i}$ and the truth $\theta_i$ for any user $i$ is less than $\gamma/4$.
Thus the local clusters are split correctly for all local servers. 
Now for the global cluster detection, the global server receives the aggregated observation from correctly partitioned local clusters, in which random Gaussian noises are added.
Based on spectra property of Gaussian noise matrices (\cref{def:accurate}), the global server will spend additional $O(\frac{n\log (1/\delta)\log T }{\lambda_x \varepsilon \gamma^2})$ rounds so that the perturbed estimate $\hat{\theta}^{l}_{s,j}$ are accurate enough at the beginning of phase $s=\lceil \log_2 T_0\rceil$, where $T_0=O(n(\frac{\log T}{\lambda_x^2}+\frac{d\sigma_0^2\log T}{\lambda_x \gamma^2} + \frac{\log (1/\delta)\log T }{\lambda_x \varepsilon \gamma^2}))$.
Therefore, after $t > 2T_0$, the overall user clusters are partitioned correctly.

For the regret after $2T_0$, we use the delayed update technique from~\citep[Section 5.1]{abbasi2011improved}, which only recomputes the confidence radius only $O(\log T)$ times and hence saves computation.
The same strategy can also be applied for the delayed communication.
The key analysis relies on using the upload and download condition in~\cref{eq:upload_condition} and \cref{eq:download_condition}, so that the actually-used cluster confidence radius is at most $\Gamma$ times larger than that if all local servers upload their perturbed observations in a fully synchronized manner, where $\Gamma=\sqrt{D(1+(L-1)(U-1))+ U-1}$.
This will give a $\Gamma R_j(T_j)$ regret for the second part, where $R_j(T_j)$ is the private-version regret for the cluster $j$ if all observation are synchronized at each round. The full proof is put in \OnlyInFull{\Cref{sec:regret_proof}}\OnlyInShort{Appendix B}.
\end{proof}


\subsection{Communication Cost}
We give the following theorem to bound the total communication cost.
\begin{restatable}{theorem}{comCost}\label{thm:comCost}
Under the CDP setting, the total communication cost satisfies:
\begin{align}
    % C_{T} &\le mL\Big(\log T + \frac{d\log(\frac{\rho_{\max}}{\rho_{\min}}+\frac{T}{d\rho_{\min}})}{ \log(\min\{U,D\})} \\
    % &+\frac{d\log(\frac{\rho_{\max}}{\rho_{\min}}+\frac{2T_0}{d\rho_{\min}})\log(2T_0)}{ \log(\min\{U,D\})}\Big)\\
    C(T)&\le O(\frac{dmL\log T}{\log(\min\{U,D\}})
\end{align}
\end{restatable}
\begin{proof}
The total communication cost also has two parts: the upload at the beginning of each phase for global cluster detection and the asynchronous communication within each phase for information sharing.
For the first part, the algorithm has at most $\log T$ phases and at each phase, there are total $mL$ local clusters uploading the clustered information, hence the total communication cost is $O(mL\log T)$.
For the second part, recall that we adopt the delayed asynchronous communication protocol and the total number of uploads and downloads can be bounded by $O(dmL\log T)$.
% This implements the intuition that the more observation we have the more difficult it is to trigger the upload and download condition in ~\Cref{eq:upload_condition} and ~\Cref{eq:download_condition}.
See \OnlyInFull{\Cref{sec:communication_cost}}\OnlyInShort{Appendix C} for the detailed proofs.
\end{proof}

\subsection{Privacy Guarantee}
\begin{restatable}{theorem}{privacyCDP}\label{thm:privacyCDP}
\Cref{alg:main} preserves $(\varepsilon,\delta, L, m)$-CDP as defined in \Cref{def:cdp}.
\end{restatable}
\begin{proof}
The CDP condition is satisfied by assigning the right amount of Gaussian noise in each tree node of our tree-based privacy protocol in~\Cref{sec:algorithm}. See \OnlyInFull{\Cref{sec:privacy_proof}}\OnlyInShort{Appendix D} for details.
\end{proof}

\subsection{Discussion and Comparison}
\textbf{Discussion on the Regret Bounds.}
For the regret bound, our result has two terms: the regret before the clusters are correctly partitioned $n(\frac{\log T}{\lambda_x^2}+\frac{\sigma_0^2d\log T}{\lambda_x \gamma^2} + \frac{\log (1/\delta) \log T}{\lambda_x \varepsilon \gamma^2})$ and the regret after the clusters are correctly partition $O(dL\sqrt{mT\frac{\log(1/\delta)}{\varepsilon}}\log^{1.5} T)$. 
We will compare our results with several degenerate cases, given that we are the first work to study the federated clustering of bandits setting. 
For these cases, the additional CDP causes at most $O(\sqrt{\frac{\log(1/\delta)}{\varepsilon}})$ factor and asynchronous communication protocol causes at most $O(\sqrt{dL\log T})$ factor in general.

First, when $m=1, L=1$, our setting degenerates to the linear bandits with DP where all users share the same underlying parameter.
Compared to \citet{shariff2018differentially} which gives a $O(\sqrt{d\frac{\log(1/\delta)}{\varepsilon}}\sqrt{T}\log^{1.5}T)$ regret with $0$ communication, our bound has a $O(\sqrt{d})$ additional factor (or more precisely $O(\sqrt{d\log \log T})$ factor) for the second term, which stems from the larger perturbation in order to protect total $O(d\log T)$ communication rounds.

Second, when $L=1$, our setting reduces to the online clustering bandits with DP, \citet{li2018online} gives a $O(n(\frac{\log T}{\lambda_x^2}+\frac{d\sigma_0^2\log T}{\lambda_x \gamma^2})+d\sqrt{mT}\log T)$ for the non-DP version.
Since CDP mechanism requires random perturbation, the clustering process suffers an additional $\frac{n\log T\log (1/\delta) }{\lambda_x \varepsilon \gamma^2}$ for the first term and the second regret term now has a new $\sqrt{\frac{\log(1/\delta)\log T}{\varepsilon}}$ leading factor due to the CDP requirements. 

Third, when $m=1$ and if we consider the special case when each local server only has one user and all users come in a round-robin manner, our setting reduces to the distributed linear bandits with DP.
\citet{dubey2020differentially} provides a synchronized algorithm that achieves $O(\sqrt{dLT\frac{\log(1/\delta)}{\varepsilon}}\log^{1.5} T)$, our second term has an additional $\sqrt{dL}$ factor because of different communication protocol, which enables asynchronous communication at the cost of the larger $O(dL\log T)$ (compared with $O(L\log T)$) communication rounds and an additional $\Gamma=O(\sqrt{L})$ factor in the confidence radius.

Finally, there is a lower bound $\Omega(\sqrt{dmT})$,
if we consider the case where the clustering structure is known,
the communication and privacy budgets are unlimited and each cluster contains equal number of users. 
In this case, it is equivalent to learn $m$ independent linear bandits, each with expected rounds $T/m$ and according to \cite{dani2008stochastic}, the lower bound is $\Omega(\sum_{i \in [m]}\sqrt{dT/m})=\Omega(\sqrt{dmT})$.
In other cases, the regret lower bound will be greater and the lower bound $\Omega(\sqrt{dmT})$ still holds.
Our regret bound matches the lower bound up to a factor of $O(L\sqrt{d\frac{\log(1/\delta)}{\varepsilon}}\log^{1.5} T)$.
% in total, and up to a factor of $O(\sqrt{d}\log^{1.5} T)$ regarding $d, m, T$ terms. Now for the $L$ and $\delta,\varepsilon$ terms, which are related to communication and privacy, to the best of our knowledge, the lower bound results are still unclear and remain challenging open questions in the literature. 
% This is because the regret, the communication cost and the privacy are intertwined and for most cases, delicate trade-offs need to be made between different criteria.



\textbf{Discussion on the Communication Cost.}
Our communication cost also has two terms: the first $O(mL\log T)$ term for identifying clusters at the beginning of each phase and the leading $O(\frac{dmL\log T}{\log(\min\{U,D\}})$ term for our asynchronous communication protocol.
Compared with \citet{dubey2020differentially} when $m=1$ and users come at the round-robin manner, our communication has an additional $O(d)$ factor. Due to the specialty of the user arrival, the same paper can achieve communication cost independent of $T$ at the cost of $O(\log (LT))$ additional factor in the regret. Though our total communication cost can not be reduced below $O(mL\log T)$ due to the first term, it will be interesting to consider whether the similar trade-off works for our asynchronous protocol in the future work.




%%%%%%%%%%%%%%%%%%%%%%%%%%%\input{content/5_experiments.tex}%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{figure*}[t]
 \centering
 \begin{subfigure}[b]{0.495\textwidth}
  \centering
  \includegraphics[width=0.7\textwidth]{./synthetic.pdf}
  \caption{Comparison with Baselines on the Synthetic Dataset}
  \label{fig:synthetic}
 \end{subfigure}
 \hfill
 \begin{subfigure}[b]{0.495\textwidth}
  \centering
  \includegraphics[width=0.67\textwidth]{./movielens_2.pdf}
  \caption{Comparison with Baselines on the MovieLens Dataset}
  \label{fig:movielens}
 \end{subfigure}
 \caption{Comparative Experiments on the Cumulative Regret with Baseline Algorithms}\label{fig:compare2}
\end{figure*}
\section{Experiments}\label{sec: experiments}
To validate our theoretical findings, we conduct experiments on a synthetic dataset and a real-world MovieLens dataset.
\Cref{alg:main} is denoted as CDP-FCLUB-DC and we also present its non-private version FCLUB-DC and its non-private, synchronized version FCLUB (by instantly uploading the observations).
% These two variants can better show the collaborative effects over different servers compared with non-private or synchronized algorithms.
The baselines include CLUB which uses a separate CLUB algorithm~\cite{gentile2014online} for each local server; SCLUB which uses a separate SCLUB algorithm~\cite{li2019improved} for each server; and LinUCB which uses a separate LinUCB algorithm~\cite{abbasi2011improved} for each user.
We also consider the synchronized and asynchronous version of \Cref{alg:main} (denoted as Homo and Homo-DC, respectively) by treating users are identical with the same preference vector. 
Note that all results are averaged over ten random seeds, and we provide mean results with one unit of standard derivation for each curve.
Due to the space limit, we provide the detailed experiment settings (including data generation and processing) in \OnlyInFull{\Cref{sec:exp_setting}}\OnlyInShort{Appendix E.1}, the parameter study \OnlyInFull{\Cref{sec:exp_pstudy}}\OnlyInShort{Appendix E.2}, the communication cost \OnlyInFull{\Cref{sec:exp_communication_cost}}\OnlyInShort{Appendix E.3} and running time results in \OnlyInFull{\Cref{sec:exp_running_time}}\OnlyInShort{Appendix E.4}, respectively.
% We experiment on a synthetic dataset. Our goal in this section is to numerically investigate the influence of parameters' change on the performance of CDP-FCLUB-DC algorithm. We consider a setting of $n=40$ users with $m = 4$ global clusters and $L = 5$ local servers. The weight vectors and item vectors are generated randomly with dimension $d = 10$. For each global cluster $j\in[m]$, we fix a weight vector $\theta_j$ and we guarantee that all weight vectors are orthogonal and the difference gap $\gamma$ between them is $\sqrt{2}$. We stipulate that the default privacy bugdet is $(\epsilon,\delta)=(1,0.1)$ and communication upload/download rate $U=D=1.01$. In each round, a random user comes and the algorithm selects one of $K=10$ items to recommend to the user. After we receive the feedback and compare it with the best action since we know the true weight vectors, we evaluate the performance of the algorithm by calculating the cumulative regret. The vertical axis indicates the cumulative regret and the horizontal axis indicates the round $t$.

% \begin{figure}[H]
%  \centering
%  \begin{subfigure}[b]{0.235\textwidth}
%   \centering
%   \includegraphics[width=\textwidth]{./figures/d.pdf}
%   \caption{Vary dimension $d$ and fix other variables }
%   \label{fig: d}
%  \end{subfigure}
%  \hfill
%  \begin{subfigure}[b]{0.235\textwidth}
%   \centering
%   \includegraphics[width=\textwidth]{./figures/epsilon.pdf}
%   \caption{Vary privacy budget $\epsilon$ and fix other variables}
%   \label{fig: epsilon}
%  \end{subfigure}
%  \begin{subfigure}[b]{0.235\textwidth}
%   \centering
%   \includegraphics[width=\textwidth]{./figures/L.pdf}
%   \caption{Vary the number of local servers $L$ and fix other variables}
%   \label{fig:L}
%  \end{subfigure}
%  \hfill
%  \begin{subfigure}[b]{0.235\textwidth}
%   \centering
%   \includegraphics[width=\textwidth]{./figures/m.pdf}
%   \caption{Vary the number of global cluster $m$ and fix other variables}
%   \label{fig: m}
%  \end{subfigure}
%  \caption{Comparative experiment on synthetic dataset}\label{fig:compare1}
% \end{figure}


% In Figure \ref{fig: d}, we vary the dimension $d$ from 5 to 20 and fix other parameters. It can be seen that with the increase of dimension $d$, the convergence speed of regret will slow down. In Figure \ref{fig: epsilon}, we change the privacy budget $\epsilon$ from 0.5 to 8. It is obvious that when $\epsilon$ is small, the cumulative regret is better because at this time the privacy will have less impact on the recommendation. In Figure \ref{fig:L} and Figure \ref{fig: m}, we vary the number of local server $L$ from 2 to 8 and the number of global cluster $m$ from 2 to 8 respectively. The results show that the value of $L$ and $m$ are positively correlated with the cumulative regret, which is consistent with our conclusion in Theorem \ref{thm:mainRegret}.
% \begin{figure}[H]  
% \centering
% \includegraphics[scale=0.3]{./figures/cumulative communication cost.pdf}
% \caption{Comparison of communication costs}
% \label{fig:communication costs}
% \end{figure}
% Communication cost is one of the most important factors we consider when designing the algorithm. In Figure \ref{fig:communication costs}, we compare the communication cost of FCLUB using full communication with that of FCLUB-DC and CDP-FCLUB-DC using the delayed communication. (The basic setting is the same as we list in the first paragraph of this section.) The results clearly indicate that our asynchronous communication protocol can reduce the communication cost effectively.


\textbf{Synthetic Dataset.} We first conduct experiments on a synthetic dataset.
In Figure \ref{fig:synthetic}, we compare our algorithm CDP-FCLUB-DC with the baselines listed above.
The vertical axis indicates the cumulative regret and the horizontal axis indicates the round $t$.
In general, our algorithm CDP-FCLUB-DC's performance has a clear advantage over baseline SCLUB, CLUB, LinUCB, Homo and Homo-DC.
Since Homo-DC and Homo assumes users are in the same cluster, they mistakenly merge different clusters and suffer linear regrets, indicating the correctness of cluster detection is essential to have small regrets.
Compared with SCLUB and CLUB that only perform local clustering operations, we can verify the correctness of our algorithm's clustering operations at the global level, which successfully leverages the collaborative effects across different local servers.
As expected, CDP-FCLUB-DC performs a little worse than FCLUB and FCLUB-DC due to the delayed communication and cluster differential privacy requirements.

% In Homo-DC, we assume all users are in the same cluster, we can verify that our algorithm CDP-FCLUB-DC can complete cluster detection well by comparing it with Homo-DC. Besides, since CLUB and SCLUB only perform merge or split operations at the local cluster level, through comparison with SCLUB and CLUB we can verify the correctness of our algorithm's merge operation at the global level. In general, our algorithm CDP-FCLUB-DC's performance has a clear advantage over baseline SCLUB, CLUB, LinUCB, Homo and Homo-DC. CDP-FCLUB-DC performs worse than FCLUB and FCLUB-DC due to the delayed communication and cluster differential privacy(CDP).
\textbf{MovieLens Dataset.}
% In this section, we compare our algorithm CDP-FCLUB-DC with the baselines listed above on movie recommendations with Movielens dataset. We randomly draw $10^3$ movies and $10^3$ users as preparation for the experiment. Similar to \citet{li2018online}, we generate the weight vectors on the basis of the $10^3$ users and the movies they rated. We randomly select $40$ users from these $10^3$ users for the experiment. In this experiment, we assume that there are $n=40$ users with $m = 4$ global clusters and $L = 5$ local servers and the weight vectors and item vectors are generated randomly with dimension $d = 10$. The users are selected randomly from these $10^3$ users for the experiment. Besides, we stipulate that the default privacy bugdet is $(\epsilon,\delta)=(1,0.1)$ and communication upload/download rate $U=D=1.01$.
In this section, we also compare our algorithm CDP-FCLUB-DC with the baselines listed above on movie recommendations with the MovieLens dataset.
The performances are shown in Figure \ref{fig:movielens}. Our algorithm CDP-FCLUB-DC's performance has an advantage over baseline SCLUB, CLUB, LinUCB, Homo and Homo-DC in general. Figure \ref{fig:movielens} shows CDP-FCLUB-DC performs worse than FCLUB and FCLUB-DC due to the delay communication and cluster differential privacy (CDP) as we have explained in synthetic dataset part. Different from the synthetic dataset, in the early stage, our algorithm needs more time to identify the underlying cluster structure. But after all user clusters are correctly detected at the global server, our algorithm performs better than Homo/Homo-DC that assume users are homogeneous, CLUB/SCLUB on each local server and LinUCB on each user.


% \begin{table}[!h]
% \centering
% \caption{Comparison of run time}
% \begin{tabular}{l|cccc}
% \hline
%           & CDP-FCLUB-DC \;& FCLUB-DC \\
% run time (ms)      & 1.707
% \;  & 1.814    \\ 
% \hline
%           & FCLUB \;& SCLUB   \\    
% run time (ms)      & 58.870
% \;  & 3.805\\
% \hline
%   & CLUB         & LinUCB   \\
% run time (ms) \;  & 0.779
% \; & 0.647\\
% \hline
%   & Homo-DC         & Homo\\
% run time (ms) 
% \; & 1.277
% \; & 29.265
% \\
% \hline
% \end{tabular}
% \label{tab:runtime}
% \end{table}
% We also compare the average running time (ms) of each round between our algorithm CDP-FCLUB-DC and baselines. It can be seen from Table \ref{tab:runtime} that due to the use of delay communication, our algorithm has a great improvement in communication cost compared with FCLUB and our run time cost is even lower than SCLUB. However, because of the existence of communication, our run time cost is still higher than CLUB and LinUCB.
% \subsection{Datasets}
% \textbf{General settings}.
% \begin{itemize}
%     \item main: CDP.
%     \item Appdx: Non-private \& LDP.
% \end{itemize}
% \textbf{Metrics}
% \begin{itemize}
%     \item Regret or per-round regret.
%     \item Communication cost.
%     \item Running time.
% \end{itemize}
% \textbf{Synthetic dataset.}
% $L=5$ servers, $m=2,5,10$ clusters, $n=20, 40, 60$ users, $d=5, 10, 20$ dimensions, $T\sim 100k$ rounds, privacy budget $(\varepsilon, \delta)=(0.1,0.1), (1, 0.1), (10, 0.1)$. $\theta$ are generated the same way as item features from random variables.

% \textbf{Movielens dataset.}
% $L=5$ local servers, $n=200$ users who rate most, $d=20$ dimensions, privacy budget $(\varepsilon, \delta)=(1,0.1)$, $T\sim 100k$ rounds, $K=10$ items per round, communication upload/download rate $U=D=1.01$, repeat for $10$ runs. Features are extracted from movielens dataset.
% \subsection{Baselines}
% \begin{itemize}
%     \item FCLUB-FC.
%     \item FCLUB-DC.
%     \item LinUCB-one that assume users are homogeneous.
%     \item CLUB/SCLUB on each local server.
%     \item LinUCB-ind on each user.
% \end{itemize}


%%%%%%%%%%%%%%%%%%%%%%%%%\input{content/6_conclusion.tex}%%%%%%%%%%%%%%%%%%%%%
\section{Conclusion and Future Work}\label{sec: conclusion}
In this paper, we formulate the federated online clustering of bandits problem, which generalizes the clustering of bandits problem to its federated counterpart. To tackle this new problem, we propose a FCLUB-CDP algorithm, which simultaneously achieves sublinear regret, sublinear communication complexity and satisfies our newly-defined clustered differential privacy requirements. Compared with benchmark algorithms, we show that FCLUB-CDP achieves superior performance regarding regret and communication cost.
There are many compelling directions for future study.
For example, it would be interesting to study our problem where local differential privacy is considered.
One could also study a more efficient protocol to further reduce the communication cost.

\subsubsection*{Acknowledgement}
The corresponding author Shuai Li is supported by National Natural Science Foundation of China (62006151). This work is sponsored by Shanghai Sailing Program.
The work of John C.S. Lui was supported in part by the RGC SRFS2122-4S02. 

% \input{content/acknowledgement}

\clearpage
\bibliography{main}
% \appendix
% \OnlyInFull{
% \input{content/appendix.tex}
% }


\end{document}
