\newpage
\appendix
\onecolumn

\section{NOTATIONS}\label{sectionB}


\begin{center}
\begin{tabular}{l|l}
    $\A$ & Arm set\\
    $\M$ & Agent set\\
    $d$ & Dimension of the model parameter and context\\
    $\tau$ & Sample complexity (stopping time)\\
    $\C(\tau)$ & Communication cost\\
    $k^*$ & Best arm\\
    $\hat k^*$ & Estimated best arm\\
    $m_t$ & Active agent in round $t$\\
    $\eta_{m_t,t}$ & $\sigma$-sub-Gaussian noise\\
    $r_{m_t,t}$ & Received reward of agent $m_t$ in round $t$\\
    $k_{m_t,t}$ & Arm pulled by agent $m_t$ in round $t$ \\
    $\mu(k)$ & Expected reward of arm $k$ in MAB\\
    $\x_k$ & Context of arm $k$\\
    $\y(i,j)$ & Context difference between $\x_i$ and $\x_j$\\
    $\t^*$ & Model parameter\\
    $\delta$ & Probability parameter of the fixed-confidence pure exploration problem\\
    $\epsilon$ & Reward gap parameter of the fixed-confidence pure exploration problem\\
    $\gamma/\gamma_1/\gamma_2$ & Triggered parameters\\
    $\lambda$ & Regularization parameter of the covariance matrix\\
    $B(t)$ & Breaking index\\
    $\alpha^M_{m_t,t}(k)$ & Exploration bonus of arm $k$ of the agent $m_t$ in \texttt{FAMABPE}\\
    $\alpha^M_{ser,t}(k)$ & Exploration bonus of arm $k$ of the server in \texttt{FAMABPE}\\
    $\alpha^L_{m_t,t}(i,j)$ & Exploration bonus of pair $(i,j)$ of the agent $m_t$ in \texttt{FALinPE}\\
    $\alpha^L_{ser,t}(i,j)$ & Exploration bonus of pair $(i,j)$ of the server in \texttt{FALinPE}\\
    $\hat\t_{m_t,t}$ & Estimated model parameter of the agent $m_t$ in \texttt{FALinPE}\\
$\hat\t_{ser,t}$ & Estimated model parameter of the server in \texttt{FALinPE}\\
    $i_{m_t,t}$ & Empirical best arm of agent $m_t$ in round $t$\\
    $j_{m_t,t}$ & Most ambiguous arm of agent $m_t$ in round $t$\\
    $\Delta(i,j)$ & Expected reward gap between arms $i$ and $j$\\
    $\hat\Delta_{m_t,t}(i,j)$ & Estimated reward gap between arms $i$ and $j$ of the agent $m_t$\\
    $\hat\Delta_{ser,t}(i,j)$ & Estimated reward gap between arms $i$ and $j$ of the server\\
    $\hat\mu_{m_t,t}(k)$ & Estimated reward of arm $k$ of the agent $m_t$ in \texttt{FAMABPE}\\
    $\hat\mu_{ser,t}(k)$ & Estimated reward of arm $k$ of the server in \texttt{FAMABPE}\\
    $T_{m_t,t}(k)$ & Number of observations on arm $k$ that is available to the agent $m_t$ at $\tau$\\
    $T_{ser,t}(k)$ & Number of observations on arm $k$ that is available to the server at $\tau$\\
    $T^{loc}_{m_t,t}(k)$ & Number of observations on arm $k$ has not been uploaded to the server by the agent $m_t$ at $t$\\
    $T^{all}_{t}(k)$ & Total number of observations on arm $k$\\
    $\V_{m_t,t}$ & Covariance matrix of the agent $m_t$\\
    $\V_{ser,t}$ & Covariance matrix of the server\\
    $\V^{loc}_{m_t,t}$ & Covariance matrix has not been uploaded to the server by the agent $m_t$\\
    $\V^{all}_{t}$ & Global covariance matrix\\
    $H^M_\epsilon$ & Problem complexity of the MAB\\
    $H^L_{\epsilon}$ & Problem complexity of the linear bandits
\end{tabular}
\end{center}

\newpage

\section{PROOF OF THEOREM \ref{theorem1}}\label{sectionC}

For clarity, we here reintroduce the notations used in the proof. Recall that $T^{all}_t(k)$ denotes the total number of arm $k$ be pulled till round $t$, $T_{ser,t}(k)$ denotes the number of observations on arm $k$ that is available to the server at $t$, $T_{m,t}(k)$ denotes the the number of observations on arm $k$ that is available to the agent $m$ at $t$ and $T_{m,t}^{loc}(k)$ denotes the observations on arm $k$ of agent $m$ has not been uploaded to the server at $t$. Besides, $\hat{\Delta}_{ser,t}(i,j)$ and $\hat{\Delta}_{m,t}(i,j)$ denote the estimated reward gap between arm $i$ and $j$ of the agent $m$ and server in round $t$, respectively. Furthermore, we let $\alpha^M_{m,t}(k)$ and $\alpha^M_{ser,t}(k)$ denoting the exploration bonuses of the agent $m$ and server, respectively. Moreover, we define the reward estimator of arm $k$ of the agent $m$ and server as $\hat{\mu}_{m,t}(k)$ and $\hat{\mu}_{ser,t}(k)$, respectively.

\begin{remark} [Global and local data in the federated MAB] \label{remark3} By the design of Algorithm \ref{alg3}, the total number of times arm $k$ has been pulled till round $t$ satisfies $T^{all}_t(k) = T_{ser,t}(k) + \sum_{m=1}^M T^{loc}_{m,t}(k)$, where $T_{ser,t}(k)$ denotes the number of observations on arm $k$ that has been uploaded to the server and $\sum_{m=1}^M T^{loc}_{m,t}(k)$ denotes the total number of observations on arm $k$ that agents $m=1,2,\dots,M$ have not uploaded to the server. Besides, as $T_{m,t}(k)$, $\forall m \in \M, k \in \A$ is downloaded from the server in some round earlier than $t$, we have $T_{m,t}(k) \le T_{ser,t}(k),\ \forall k\in\A$ and $\sum_{k=1}^KT_{m,t}(k) \le \sum_{k=1}^KT_{ser,t}(k)$. 
\end{remark}

Detailed proof for the first two components are given in the following two subsections.

\subsection{Upper Bound Communication Cost $\C(\tau)$}
\begin{lemma} [Communication cost] \label{lemmacommunication1}
Following the setting of Theorem \ref{theorem1}, the total communication cost of \texttt{FAMABPE}  can be bounded by
\begin{align}
\nonumber
    \C(\tau) \le 2\Big((M + 1/\gamma) \log_2 \tau\Big).
\end{align}
\end{lemma}

\begin{proof} [Proof of Lemma \ref{lemmacommunication1}] 
The proof of this Lemma can be divided into two sections, in the first section, we would divide the sample complexity $\tau$ into $\log_2\tau$ episodes, then we would analyze the upper bound of the communication number of all agents in each episode. We define
\begin{align}
\nonumber
    \bT_i = \min\Big\{ t\in[K+1,\tau],\ \sum_{k=1}^K T_{ser,t}(k) \ge 2^i \Big\}
\end{align}
and the set of all rounds into episodes $i$ as $\{ \bT_i,\bT_{i}+1,...,\bT_{i+1}-1 \}$. According to the definition, we have $\sum_{k=1}^K T_{ser,\tau}(k) \le \tau$, and thus 
\begin{align}
\nonumber
    \max\{ i\ge0 \} = \log_2\bigg(\sum_{k=1}^K T_{ser,\tau}(k)\bigg) \le \log_2\tau.
\end{align}

We then prove $\forall i \ge 0 $, from round $\bT_i$ to $\bT_{i+1} - 1$, the communication number of all agents can be bounded by $M + 1/\gamma$. We first define the communication number of agent $m$ from $\bT_i$ to $\bT_{i+1}-1$ as $\bN_m$, the sequence of communication round of agent $m$ from round $\bT_i$ to $\bT_{i+1}-1$ as $t^m_{1},...,t^m_{\bN_m}$, the communication number of all agents as $L$ and the sequence of all communication rounds from round $\bT_i$ to $\bT_{i+1}-1$ as $t_{i,1},...,t_{i,L}$. According to the communication condition (line 11 of Algorithm \ref{alg3}), we have $\forall m\in\M,\ j\in \vert \bN_m \vert$
\begin{align}
\begin{split}\label{correlation}
\sum_{k=1}^K\Big(T_{m,t^m_j}(k) + T_{m,t^m_j}^{loc}(k)\Big) &> (1+\gamma)\sum_{k=1}^K T_{m,t^m_j}(k) \\
\sum_{k=1}^KT_{m,t^m_j}^{loc}(k) &> \gamma \sum_{k=1}^K T_{m,t^m_j}(k).
\end{split}
\end{align}
Then, $\forall m\in\M,\ j\in \vert \bN_m \vert/ \{1\}$, we have 
\begin{align}\label{loc-T_i}
\sum_{k=1}^KT_{m,t^m_j}^{loc}(k) &> \gamma \sum_{k=1}^K T_{m,t^m_j}(k) \ge  \gamma \sum_{k=1}^K T_{ser,\bT_i}(k).
\end{align}
The inequality holds due to $T_{m,t^m_j}(k) = T^{ser}_{t^m_{j-1}}(k)$ and $t^m_{j-1} \ge \bT_i$ when $j\in \vert \bN_m \vert/ \{1\}$. The above inequality implies $\forall t_{i,l} \ge t_2^{m_{t_{i,l}}}$
\begin{align}
\begin{split}
\nonumber
\sum_{k=1}^K \Big(T_{ser,t_{i,l}}(k) - T_{ser,t_{i,l-1}}(k)\Big) & \ge  \sum_{k=1}^K\Big( T_{ser,t_{i,l-1}}(k) + T_{m_{t_{i,l}},t_{i,l}}^{loc}(k) \Big) - \sum_{k=1}^K T_{ser,t_{i,l-1}}(k)\\
& = \sum_{k=1}^K T_{m_{t_{i,l}},t_{i,l}}^{loc}(k)\\
& > \gamma\sum_{k=1}^K T_{m_{t_{i,l}},t_{i,l}}(k)\\
& \ge \gamma\sum_{k=1}^K T_{ser,\bT_i}(k).
\end{split}
\end{align}
Finally we can bound $L = \sum_{m=1}^M \bN_m$
\begin{align}\label{9}
    \begin{split}
       \sum_{k=1}^K \Big(T_{ser,\bT_{i+1} - 1 }(k) -  T_{ser,\bT_{i}}(k)\Big) &= \sum_{l=1}^{L-1} \sum_{k=1}^K \Big(T_{ser,t_{i,l+1}}(k) - T_{ser,t_{i,l}}(k)\Big)\\
       &\ge \gamma \sum_{m=1}^M (\bN_m - 1) \sum_{k=1}^K T_{ser,\bT_i}(k).
    \end{split}
\end{align}
The last inequality holds owing to (\ref{correlation}) and (\ref{loc-T_i}). With the definition of the episode, we have $\sum_{k=1}^K T_{ser,\bT_{i+1} - 1}(k) \le 2 \sum_{k=1}^K T_{ser,\bT_{i}}(k)$. We can then rewrite equation (\ref{9}) as
\begin{align} 
\nonumber
      M + 1/\gamma \ge \sum_{m=1}^M \bN_m = L.
\end{align}
Due to one communication including one upload and one download, the communication cost in one episode is at most $2(M+1/\gamma)$ (following the definition of (\ref{2})). We can then bound the total communication cost
\begin{align}\label{communication1}
   \C(\tau) \le 2\Big((M+1/\gamma) \log_2(\tau)\Big).
\end{align}

In the light of (\ref{communication1}), setting of the Theorem \ref{theorem1} and (\ref{39}) (the upper bound of the sample complexity $\tau$), we can bound the communication cost
\begin{align}
   \C(\tau) = \tilde{O}(KM).
\end{align}
\end{proof}

\subsection{Upper Bound Sample Complexity $\tau$}
Combining the breaking condition in Algorithm \ref{alg3} (line 14$\sim$16), and the definition of $B(\tau)$, we have 
\begin{align}
\nonumber
\epsilon \ge B(\tau) = \hat\Delta^{ser}_{\tau} (j^{ser}_\tau,i^{ser}_\tau) + \beta^{ser}_{\tau}(i^{ser}_\tau,j^{ser}_\tau).
\end{align}
Let's first consider the case when the empirically best arm on the server side is not the optimal arm, i.e., $i_{ser,\tau} \not = k^*$. By the definition of $j_{ser,\tau}$, we have
\begin{align}
\nonumber
    \hat\Delta_{ser,\tau} (j_{ser,\tau},i_{ser,\tau}) + \beta_{ser,\tau}(i_{ser,\tau},j_{ser,\tau}) \ge \hat\Delta_{ser,\tau} (k^*,i_{ser,\tau}) + \alpha^M_{ser,\tau}(i_{ser,\tau},k^*).
\end{align}
Recall that $\hat{k}^* = i_{ser,\tau}$ is the empirically best arm. 
Therefore, we have
\begin{align}
\nonumber
    \epsilon \ge \hat\Delta_{ser,\tau} (k^*,\hat k^*) + \alpha^M_{ser,\tau}(\hat k^*,k^*) \ge \Delta(k^*,\hat k^*),
\end{align}
where the second inequality is due to Lemma \ref{lemmaprobabilitybound} below (proof of Lemma \ref{lemmaprobabilitybound} is at the end of this section).

\begin{lemma}\label{lemmaprobabilitybound}
   Following the setting of Theorem \ref{theorem1}, we can establish the exploration bonuses
    \begin{align}
    \begin{split}
    \nonumber
    &\alpha^M_{m_t,t}(k) = \sigma\sqrt{\frac{2}{T_{m_t,t}(k)}\log\bigg(\frac{4K}{\delta}\Big((1+\gamma M)\sum_{k=1}^K T_{m_t,t}(k)\Big)^2\bigg)}\\ &\alpha^M_{ser,t}(k) = \sigma\sqrt{\frac{2}{T_{ser,t}(k)}\log\bigg(\frac{4K}{\delta}\Big((1+\gamma M)\sum_{k=1}^K T_{ser,t}(k)\Big)^2\bigg)},
    \end{split}
    \end{align}
    for all $t\in[K + 1,\tau]$, $k\in\A$, and the event
    \begin{align}
    \nonumber
       \I = \bigg\{\forall k\in\A,\forall t \in [K+1,\tau],\ \vert \hat{\mu}_{m_t,t}(k) - \mu(k) \vert \le \beta_{m_t,t}(k),\ \vert \hat{\mu}_{ser,t}(k) - \mu(k) \vert \le \alpha^M_{ser,t}\bigg\}.
    \end{align}
    We have $\bP(\I) \ge 1-\delta$.
\end{lemma}

Furthermore, if $i_{ser,t} = k^*$, we have $\epsilon \ge \Delta(k^*,\hat k^*) = 0$.  The discussion above implies $\hat k^*$ output by \texttt{FAMABPE} satisfies the $(\epsilon,\delta)$-condition (\ref{1}).

We now continue to bound the sample complexity $\tau$. First, we need to establish Lemma \ref{serverlemma1} below, which upper bounds $T_{ser,\tau}(k)$, the number of observations on arm $k$ that is available to the server at $\tau$.
\begin{lemma} \label{serverlemma1} Under the setting of Theorem \ref{theorem1}, if event $\I$ happens, we can bound
\begin{align}
\nonumber
    T_{ser,\tau}(k) \le \frac{2\sigma^2\log\Big(4K\Big((1+M\gamma)\sum_{s=1}^K T_{ser,\tau}(s)\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2} + \gamma M \sum_{s=1}^K T_{ser,\tau}(s),\ \forall k\in\A.
\end{align}
\end{lemma}

With Lemma \ref{serverlemma1}, we have
\begin{align}
    \begin{split}
    \nonumber
\sum_{k=1}^K T_{ser,\tau}(k) &\le  \sum_{k=1}^K\frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\sum_{s=1}^K T_{ser,\tau}(s)\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2} + \gamma KM \sum_{s=1}^K T_{ser,\tau}(s)\\
&\le \frac{1}{1-\gamma KM} \sum_{k=1}^K\frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\sum_{s=1}^K T_{ser,\tau}(s)\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2}\\
&\le \frac{1}{1-\gamma KM} \sum_{k=1}^K\frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\tau\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2}.
    \end{split}
\end{align}
The last inequality is due to $\sum_{k=1}^KT_{ser,t}(k) \le \tau$ (Remark \ref{remark3}). Based on the relation between $\tau$, $ \sum_{m=1}^M\sum_{k=1}^K T^{loc}_{m,\tau}(k)$, and $\sum_{k=1}^K T_{ser,\tau}(k)$ (Remark \ref{remark3}), we have
\begin{align}\label{37}
\begin{split}
    \tau & = \sum_{k=1}^{K} T^{all}_{\tau}(k)= \sum_{k=1}^K T_{ser,\tau}(k) + \sum_{m=1}^M \sum_{k=1}^K T_{m,\tau}^{loc}(k)
    \\& \le \big(1+\gamma M\big) \sum_{k=1}^K T_{ser,\tau}(k)\\
    &\le \frac{1+\gamma M}{1-\gamma KM} \sum_{k=1}^K\frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\tau\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2} \\
    &=\frac{1+ \gamma M}{1-\gamma KM}H^M_{\epsilon}2\log\Bigg(\frac{4K\Big((1+\gamma M)\tau\Big)^2}{\delta}\Bigg),
\end{split}
\end{align}
where the first inequality is due to Lemma \ref{lemmarela1}, the second inequality is due to the inequality we established above, and the third is by definition of $H_{\epsilon}^{M}$.

Recalling that in Theorem \ref{theorem1} we suppose $\gamma = 1/(2MK)$. Let $\tau^\prime$ be a parameter that satisfies
\begin{align}
\nonumber
    \tau^\prime \le \tau = \frac{M + 1/(2K)}{M - 1/2}H^M_{\epsilon}2\log\Bigg(\frac{4K\Big((1+1/(2K))\tau^{\prime}\Big)^2}{\delta}\Bigg),
\end{align}
where the equality is owing to the definition of the $\gamma$ and (\ref{37}).
Due to the fact that $\sqrt{x} \ge \log(x)$ holds when $x>0$, we have
\begin{align}
\begin{split}\label{38}
    \tau^\prime &\le \frac{M + 1/(2K)}{M - 1/2} H^M_{\epsilon}2\log\bigg(\Big(\frac{4K(1+1/(2K))\tau^{\prime}}{\delta^{1/2}}\Big)^2\bigg)\\
    &\le \frac{M + 1/(2K)}{M - 1/2} H^M_{\epsilon}4\sqrt{\frac{4K(1+1/(2K))\tau^{\prime}}{\delta^{1/2}}}\\
    &\le \bigg(\frac{M + 1/(2K)}{M - 1/2} H^M_{\epsilon}4\bigg)^2\frac{4K(1+1/(2K))}{\delta^{1/2}}.\\
\end{split}
\end{align}
We define the last term of (\ref{38}) as $\Lambda$. Then based on (\ref{37}) and (\ref{38}), we can finally get
\begin{align}
\begin{split}\label{39}
    \tau \le& \frac{M + 1/(2K)}{M - 1/2}H^M_{\epsilon}2\log\Bigg(\frac{4K\Big((1+1/(2K))\Lambda\Big)^{2}}{\delta}\Bigg),
\end{split}
\end{align}
where
\begin{align}
    \nonumber
         \Lambda = \bigg(\frac{M + 1/(2K)}{M - 1/2} H^M_{\epsilon}4\bigg)^2\frac{4K(1+1/(2K))}{\delta^{1/2}}.
\end{align}

\begin{lemma} \label{lemmarela1} Following the setting of Theorem \ref{theorem1}, we have $\forall t\in [K+1,\tau]$
\begin{align}
\nonumber
    \sum_{k=1}^K T_{ser,t}(k) > 1/\gamma \sum_{k=1}^K T^{loc}_{m_t,t}(k).
\end{align}
\end{lemma}

\paragraph{Proof for Lemma \ref{lemmaprobabilitybound}, Lemma \ref{serverlemma1}, and Lemma \ref{lemmarela1}}
In the following paragraphs, we provide the detailed proof for the lemmas used above.
\begin{proof} [Proof of Lemma \ref{lemmarela1}] 
For every round $t \in [K+1,\tau]$, if the agent $m_t$ communicates with the server at round $t$, we have
\begin{align}
\nonumber
     \sum_{k=1}^K T_{ser,t}(k) \ge 1/\gamma \sum_{k=1}^K T^{loc}_{m_t,t}(k) = 0.
\end{align}
The inequality holds by line 19 of Algorithm \ref{alg3}.

Else, according to the triggered condition of Algorithm \ref{alg3}, if agent $m_t$ does not communicate with the server in round $t$, we have 
\begin{align}
    \begin{split}
    \nonumber
    \sum_{k=1}^K(T_{m_t,t}(k) + T_{m_t,t}^{loc}(k)) &\le (1+\gamma)\sum_{k=1}^KT_{m_t,t}(k)\\
    \sum_{k=1}^K T_{m_t,t}^{loc}(k)&\le \gamma\sum_{k=1}^KT_{m_t,t}(k).
    \end{split}
\end{align}
With the fact that $\sum_{k=1}^KT_{m_t,t}(k) \le \sum_{k=1}^KT_{ser,t}(k)$ (Remark \ref{remark3}), we can finally get
\begin{align}
\nonumber
    \sum_{k=1}^K T_{m_t,t}^{loc}(k) \le \gamma \sum_{k=1}^KT_{ser,t}(k).
\end{align}
Here we finish the proof of Lemma \ref{lemmarela1}.
\end{proof}

Based on Lemma \ref{lemmarela1}, we can prove Lemma \ref{lemmaprobabilitybound} as shown below.
\begin{proof} [Proof of Lemma \ref{lemmaprobabilitybound}]
Due to $\hat{\mu}_{m_t,t}(k)$ and $T_{m_t,t}(k)$, $\forall k\in\A,\ t\in[K+1,\tau]$ are all downloaded from the server and they would remain unchanged until the next round agent $m_{t}$ communicates with the server. This implies $\forall t_1\in[K+1,\tau]$, there exists a $t_2\in[K+1,\tau]$ which satisfies 
\begin{align}
\nonumber
    \alpha^M_{m_{t_1},t_1}(k) = \alpha^M_{ser,t_2}(k)\ \text{and}\ \hat{\mu}_{m_{t_1},t_1}(k) = \hat{\mu}_{ser,t_2}(k),\ \forall k\in\A.
\end{align}
 Hence, we can derive 
\begin{align}\label{10}
    \bP(\I) = \bP\bigg(\forall k\in\A,\forall t \in [K+1,\tau],\ \vert \hat{\mu}_{ser,t}(k) - \mu(k) \vert \le \alpha^M_{ser,t}(k)\bigg).
\end{align}
We define $\I^c$ as the contradicted event of $\I$. Utilizing the union bound, it can be decomposed by
\begin{align}\label{union1}
    \bP(\I^c) \le \sum_{k=1}^K \sum_{t=K+1}^\tau \bP\bigg(\vert \hat{\mu}_{ser,t}(k) - \mu(k) \vert > \alpha^M_{ser,t}(k)\bigg).
\end{align}
With the help of the Hoeffeding inequality (Lemma \ref{auxlemma1}), it has
\begin{align}
\begin{split}\label{union2}
    \bP\bigg(\vert \hat{\mu}_{ser,t}(k) - \mu(k) \vert > \alpha^M_{ser,t}(k)\bigg) &\le  e^{-\alpha^M_{ser,t}(k)^2T_{ser,t}(k)/2\sigma^2}\\
    &=  e^{-\log\big(\frac{4K}{\delta}\big((1+\gamma M)\sum_{k=1}^K T_{ser,t}(k) \big)^2\big)}\\
    &= \frac{\delta}{4K\big((1+\gamma M)\sum_{k=1}^K T_{ser,t}(k) \big)^2}\\
    &\le \frac{\delta}{4K t^2}.
\end{split}
\end{align}
The first equality is owing to the definition of $\alpha_{ser,t}(k)$ and the last inequality is owing to $t = \sum_{k=1}^K T_{ser,t}(k) + \sum_{m=1}^M \sum_{k=1}^K T^{loc}_{m,t}(k) \le (1+\gamma M)\sum_{k=1}^K T_{ser,t}(k)$ (Lemma \ref{lemmarela1}). Substituting the last term of (\ref{union2}) into (\ref{union1}), we can finally bound
\begin{align}
\begin{split}
\nonumber
    \sum_{k=1}^K \sum_{t=K+1}^\tau \bP\bigg(\vert \hat{\mu}_{t}^{ser}(k) - \mu(k) \vert > \alpha^M_{ser,t}(k)\bigg) \le \delta
\end{split}
\end{align}
and $\bP(\I) = 1 -\bP(\I^c) \ge 1-\delta$. Here we finish the proof of Lemma \ref{lemmaprobabilitybound}.
\end{proof}

Before proving Lemma \ref{serverlemma1}, we first need to establish Lemma \ref{lemmabound1} below.
\begin{lemma}\label{lemmabound1} We define $k_{ser,t}$ in round $t\in[K+1,\tau]$ as $k_{ser,t} = \arg\max_{k\in\{i_{ser,t},j_{ser,t}\} }\alpha^M_{ser,t}(k)$. Following the setting of Theorem \ref{theorem1}, if event $\I$ happens, we can bound the index $B(t)$ as
\begin{align}\label{12}
    B(t) \le \min\Big(0,-\Delta(k^*,k_t^{ser}) + 4\beta_{ser,t}(k_{ser,t}) \Big) + 2\alpha^M_{ser,t}(k_{ser,t}).
\end{align}
\end{lemma}

\begin{proof} [Proof of Lemma \ref{lemmabound1}]
  This proof follows the idea of \cite{Gabillon2012BestAI}.   Consider the case when $i_{ser,t} = k^*$, we have
\begin{align}
\begin{split}\label{13}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) \\&\le \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + 2\alpha^M_{ser,t}(k_{ser,t}) \\& \le  \Delta(j_{ser,t},i_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t})
    \\&
    = - \Delta(k^*,j_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t})\\
    &\le - \Delta(k^*,k_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t}),
\end{split}
\end{align}
where the first inequality is owing to the definition of $k_{ser,t}$ and the second inequality is owing to the definition of the event $\I$. Then, consider the case when $j_{ser,t} = k^*$, we have
\begin{align}
\begin{split}\label{14}
   B(t) & = \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})\\
   & \le \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + 2\alpha^M_{ser,t}(k_{ser,t})
     \\& \le  -\hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + 2\alpha^M_{ser,t}(k_{ser,t})
   \\& \le  -\Delta(j_{ser,t},i_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t})
    \\&
    = - \Delta(k^*,i_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t}) \\
    &\le - \Delta(k^*,k_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t}),
\end{split}
\end{align}
where the second inequality is owing to the definition of the $i_{ser,t}$ (line 13 of Algorithm \ref{alg3}). 

Combine (\ref{13}) and (\ref{14}), it yields
\begin{align}\label{17}
B(t) \le -\Delta(k^*,k_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t}),
\end{align}
when $i_{ser,t} = k^*$ or $j_{ser,t} = k^*$. Furthermore, due to $B(t)  = \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(j_{ser,t},i_{ser,t})$ and $\hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) \le 0$, we can derive $B(t) \le \alpha^M_{ser,t}(j_{ser,t},i_{ser,t}) \le 2\alpha^M_{ser,t}(k_{ser,t})$. In the light of (\ref{17}), we can finally get
\begin{align}\label{20}
        B(t) \le \min\Big(0, \Delta(k^*,k_{ser,t}) + 2\alpha^M_{ser,t}(k_{ser,t}) \Big) + 2\alpha^M_{ser,t}(k_{ser,t}).
\end{align}

We further consider the case when $i_{ser,t} \not= k^*$ and $j_{ser,t} \not= k^*$, then we can derive
\begin{align}
\begin{split}\label{15}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})\\ & \le \Delta(j_{ser,t},i_{ser,t}) + 2\alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) \\
    & = \Delta(j_{ser,t},k^*) + \Delta(k^*,i_{ser,t}) + 2\alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) \\& \le \Delta(j_{ser,t},k^*) + 3\alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) \\
    &\le \Delta(j_{ser,t},k^*) + 6\alpha^M_{ser,t}(k_{ser,t})\\
    & = -\Delta(k^*,j_{ser,t}) +  6\alpha^M_{ser,t}(k_{ser,t}),
\end{split}
\end{align}
where the second equality is owing to $\Delta(i_{ser,t},j_{ser,t}) = \Delta(i_{ser,t},k^*) + \Delta(k^*,j_{ser,t})$ and second inequality is owing to 
\begin{align}
\begin{split}
\nonumber
\alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) &\ge \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})\\ 
&\ge \hat{\Delta}_{ser,t}(k^*,i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},k^*)\\
&\ge \Delta(k^*,i_{ser,t}).
\end{split}
\end{align}
Similar to (\ref{15}), we also can show
\begin{align}
\begin{split}\label{16}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t}) \\& \le   \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})\\
    &\le  
 - \Delta(k^*,i_{ser,t}) + \hat{\Delta}_{ser,t}(k^*,i_{ser,t}) + \alpha^M_{ser,t}(k^*,i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})\\
    & \le  - \Delta(k^*,i_{ser,t}) + \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^M_{ser,t}(i_{ser,t},j_{ser,t})
    \\
    & \le -\Delta(k^*,i_{sert}) +  4\alpha^M_{ser,t}(k_{ser,t}).
\end{split}
\end{align}
The second inequality is due to the definition of the even $\I$ and the third inequality is due to the definition of the $j_{ser,t}$. 

Combine (\ref{15}) and (\ref{16}), it yields
\begin{align}\label{18}
B(t) \le - \Delta(k^*,k_{ser,t}) + 6\alpha^M_{ser,t}(k_{ser,t})
\end{align}
when $i_{ser,t} \not = k^*$ and $j_{ser,t} \not = k^*$. In the light of (\ref{18}), we can finally get
\begin{align}\label{24}
        B(t) \le \min\Big(0,-\Delta(k^*,k_{ser,t}) + 4\alpha^M_{ser,t}(k_{ser,t}) \Big) + 2\alpha^M_{ser,t}(k_{ser,t}).
\end{align}
Combine (\ref{20}) and (\ref{24}), then we can finish the proof of Lemma \ref{lemmaprobabilitybound}.
\end{proof}


\begin{proof} [Proof of Lemma \ref{serverlemma1}] Suppose agent $m$ communicates in round $t_1$ and $t_2$, then from round $t \in [t_1 + 1,t_2]$, owing to $\hat{\mu}_{m,t}(k)$ and $T_{m,t}(k)$ remain unchanged, $k_{m,t}$ would not change either (we highlight this knowledge in Remark \ref{remark2}). We define at round $t_k\not = \tau$, an agent $m$ communicates with the server and $k_{ser,t_k} = k$, $\forall k\in\A$ (the definition of the $k_{ser,t}$ is provided in Lemma \ref{lemmabound1}). And after round $t_k$, when any agent communicating with the server, $k_{ser,t} \not= k$. This implies
\begin{align}\label{22}
\begin{split}
    T_{ser,\tau}(k) \le& T_{m,t_k+1}(k) + (\gamma M) 
    \sum_{s=1}^KT_{ser,\tau}(s) \\ =& T_{ser,t_k}(k) + (\gamma M)\sum_{s=1}^K T_{ser,\tau}(s).
\end{split}
\end{align}
The inequality holds due to for $t\in[t_k + 1,\tau]$, $\forall m\in\M$ would upload $T_{m,t}^{loc}(k)>0$ to the server at most one time (according to the definition of $t_k$) and $T_{m,t}^{loc}(k) \le \sum_{s=1}^K T_{m,t}^{loc}(s) \le \gamma \sum_{s=1}^K T_{ser,\tau}(s)$ according to $t\le\tau$ and the Lemma \ref{lemmarela1}.

 We would further bound $T_{ser,t_k}(k) = T_{m,t_k+1}(k)$. According to agent $m$ sets $ k_{ser,t_k} = k$ and the definition of the breaking condition (line 14$\sim$16 in Algorithm \ref{alg3}), with Lemma \ref{lemmabound1}, we can derive
\begin{align}
\begin{split}\label{27}
    \epsilon &\le B(t_k)\\
             &\le \min\Big(0,-\Delta(k^*,k) + 4\alpha^M_{ser,t_k}(k) \Big) + 2\alpha^M_{ser,t_k}(k).
\end{split}
\end{align}
Substituting (\ref{7}) (the definition of $\alpha^M_{ser,t_k}(k)$) into (\ref{27}), we have
\begin{align}
\nonumber
 T_{ser,t_k}(k) \le \frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\sum_{s=1}^K T_{ser,t_k}(s)\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2}.
\end{align}
With (\ref{22}) and $t_k<\tau$, we can finally bound $T_{ser,\tau}(k)$, i.e.
\begin{align}
\begin{split}
\nonumber
    T_{ser,\tau}(k) &\le \frac{2\sigma^2\log\Big(4K\Big((1+\gamma M)\sum_{s=1}^K T_{ser,\tau}(s)\Big)^2/\delta\Big)}{\max\Big(\frac{\Delta(k^*,k) + \epsilon}{3},\epsilon\Big)^2} + \gamma M \sum_{s=1}^K T_{ser,\tau}(s).
\end{split}
\end{align}
Here we finish the proof of Lemma \ref{serverlemma1}.
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{PROOF OF THEOREM \ref{theorem2}}\label{sectionE}

Recall that the upper confidence bounds of the agent $m$ and server are defined as $\alpha_{m,t}(i,j) = \Vert \y(i,j)\Vert_{\V_{m_t,t}^{-1}} C_{m,t}$ and $\alpha^L_{ser,t}(i,j) = \Vert \y(i,j)\Vert_{\V_{ser,t}^{-1}} C_{ser,t}$, respectively. The estimated model parameters of the agent $m$ and server are denoted as $\hat\t_{m,t}$ and $\hat\t_{ser,t}$, respectively. Besides, we also provide the Remark \ref{importantremark} to specifically illustrate the relationship between some most important data used in the proof of Theorem \ref{theorem2}.

\begin{remark} [Global and local data of the linear case] \label{importantremark}
Due to the transmitted data in the linear case being more complicated than the data in MAB. We here provide new notations to clarify the relations between global data and local data. The following matrix and vector denote the global data
\begin{align}
\begin{split}
\nonumber
    &\V_{t}^{all} = \lambda\bI + \sum_{s=1}^t \x_{m_s,s} \x_{m_s,s}^{\top} = \V_{ser,t} + \sum_{m=1}^M\V^{loc}_{m,t},\\ &\b_{t}^{all} = \sum_{s=1}^t \x_{m_s,s}r_{m_s,s} = \b_{ser,t} + \sum_{m=1}^M \b^{loc}_{m,t},\\ &T^{all}_t(k) = \sum_{s=1}^t\bone\{k = k_{m_s,s}\} = T_{ser,t}(k) + \sum_{m=1}^M T^{loc}_{m,t}(k),\ \forall k\in\A.
\end{split}
\end{align}
We define $N_{m,t}$ as the final round when agent $m$ communicates with the server at the end of round $t$. The collected data of agent $m$ that has not been uploaded to the server is provided as follows
\begin{align}
    \begin{split}
    \nonumber
         &\V_{m,t}^{loc} = \sum_{s=N_{m,t}+1}^{t} \bone\{m_s = m\} \x_{m_s,s} \x_{m_s,s}^{\top},\\& \b_{m,t}^{loc} = \sum_{s=N_{m,t}+1}^{t} \bone\{m_s = m\} \x_{m_s,s} r_{m_s,s}\\&
    T^{loc}_{m,t}(k) = \sum_{s=N_{m,t}+1}^{t}\bone\{m_s = m, k = k_{m_s,s}\},\ \forall k\in\A.  
    \end{split}
\end{align}
Similarly, the data that has been uploaded to the server yields
\begin{align}
    \begin{split}
    \nonumber
         &\V_{ser,t} = \lambda\bI + \sum_{m=1}^M \sum_{s=1}^{N_{m,t}} \bone\{m_s = m\} \x_{m_s,s} \x_{m_s,s}^{\top},\\& \b_{ser,t} = \sum_{m=1}^M\sum_{s=1}^{N_{m,t}} \bone\{m_s = m\} \x_{m_s,s} r_{m_s,s}\\&
         T_{ser,t}(k) = \sum_{m=1}^M\sum_{s=1}^{N_{m,t}}\bone\{m_s = m, k = k_{m_s,s}\},\ \forall k\in\A.  
    \end{split}
\end{align}
\end{remark}
According to the communication protocol, the local data of every agent $m\in\M$ can be represented by 
\begin{align}
    \begin{split}
        \nonumber
       \V_{m,t} = \V_{ser,N_{m,t}},\ \b_{m,t} = \b_{ser,N_{m,t}},\ 
        T_{m,t}(k) = T_{ser,N_{m,t}}(k),\ \forall k\in\A.
    \end{split}
\end{align}
Accordingly, we have $V_{m,t} \preceq V_{ser,t}$ and $\sum_{k=1}^K T_{m,t}(k) \le \sum_{k=1}^K T_{ser,t}(k)$, $\forall t\in[K+1,\tau]$. 

\paragraph{Proof sketch of Theorem \ref{theorem2}} The proof of Theorem \ref{theorem2} also consists of three main components, i.e., a) the sample complexity $\tau$; b) the communication cost $\C(\tau)$; c) the estimated best arm satisfies Eq (\ref{1}). Specifically, to upper bound the total communication cost $C(\tau)$, we utilize the property that the agents communicating with the server when at least one of the two events (line 11 of Algorithm \ref{alg2}) is triggered (Lemma \ref{lemmacommunication2}). To upper bound the sample complexity $\tau$, we first establish the relationship between $\sum_{k=1}^KT_{ser,t}(k)$ and $\sum_{k=1}^KT^{loc}_{m,t}(k)$ and the relationship between $\V_{ser,t}$ and $\V^{loc}_{m,t}$ based on the hybrid event triggered strategy (Lemma \ref{lemmarela2}). Then, we design the exploration bonuses by $\sum_{k=1}^KT_{m_t,t}(k)$, $\V_{m_t,t}$, $\sum_{k=1}^K T_{ser,t}(k)$ and $\V_{ser,t}$ (Lemma \ref{lemmaprobabilitybound2}). Furthermore, we bound the matrix norms $\Vert \y(i,j)\Vert_{\V^{-1}_{m_t,t}}$ and $\Vert \y(i,j)\Vert_{\V^{-1}_{ser,t}}$, $\forall i,j\in\A$ based on the arm selection strategy (Lemma \ref{lemmamatrixbound}). Combine these knowledge, we can bound $T_{ser,\tau}(k)$, $\forall k\in\A$ (Lemma \ref{lemmabound2} and \ref{serverlemma2}). Finally, utilizing the knowledge of Remark \ref{importantremark}, we can bound $T^{all}_\tau(k)$, $\forall k\in\A$, and $\tau = \sum_{k=1}^K T^{all}_\tau(k)$. Similar to the MAB setting, the guarantee on finding the best arm directly follows the property of the breaking index, i.e., if $B(\tau) \le \epsilon$, then $\Delta(k^*,\hat k^*)\le\epsilon$ with probability at least $1-\delta$. 

\subsection{Upper Bound Communication Cost $\C(\tau)$}

\begin{lemma} [Communication cost of the hybrid event-triggered strategy in Algorithm \ref{alg2}] \label{lemmacommunication2}
Under the setting of Theorem \ref{theorem2}, the total triggered number of the first event (line 11 of Algorithm \ref{alg2}) can be bounded by 
\begin{align}\label{58}
    (M + 1/\gamma_1) d\log_2\Big(1+\frac{\tau}{\lambda d}\Big)
\end{align}
and the total triggered number of the second event can be bounded by 
\begin{align}
\nonumber
     (M + 1/\gamma_2)\log_2(\tau).
\end{align}
The total communication cost can be bounded by
\begin{align}
\nonumber
    \C(\tau) \le 2\Bigg((M + 1/\gamma_1) d\log_2\bigg(1+\frac{\tau}{\lambda d} \bigg) +  (M + 1/\gamma_2)\log_2(\tau) \Bigg).
\end{align}
\end{lemma}

\begin{proof} [Proof of Lemma \ref{lemmacommunication2}]
    The triggered number of the second event can be bounded by Lemma \ref{lemmacommunication1}. Besides, we can bound the triggered number of the first event similar to \cite{He2022ASA}. The proof of (\ref{58}) also can be divided into two sections, in the first section, we would divide the sample complexity $\tau$ into $\log_2(1 + \tau/\lambda d)$ episodes, then we would analysis the upper bound of the triggered number of the first event in each episode. We define
    \begin{align}
\nonumber
    \bT_i = \min\Big\{ t\in\vert\tau\vert,\ \text{det}(\V_{ser,t}) \ge 2^i\lambda^d \Big\}
\end{align}
and the set of all rounds into episodes $\{ \bT_i,\bT_{i}+1,...,\bT_{i+1}-1 \}$, $\forall i \ge 0$. By the Lemma \ref{auxlemma2}, we can bound 
\begin{align}
\nonumber
    \text{det}(\V_{ser,\tau}) \le \lambda^d \Big(1 + \frac{\tau}{\lambda d}\Big)^d.
\end{align}
Accordingly, the number of the episode can be bounded by 
\begin{align}
\nonumber
    \max\{ i \ge 0 \} 
 = \log_2\bigg(\frac{\text{det}(\V_{ser,\tau})}{\lambda^d}\bigg) \le d\log_2 \Big(1 + \frac{\tau}{\lambda d}\Big).
\end{align}

We then prove $\forall i \ge 0 $, from round $\bT_i$ to $\bT_{i+1} - 1$, the triggered number of the first event can be bounded by $M + 1/\gamma_1$. We first define the number of agents $m$ triggers the first event in $\bT_i$ to $\bT_{i+1}-1$ as $\bN_m$, the sequence of agent $m$ triggers the first event in round $\bT_i$ to $\bT_{i+1}-1$ as $t^m_{1},...,t^m_{\bN_m}$, the number of every agent triggers the first event in $\bT_i$ to $\bT_{i+1}-1$ as $L$ and the sequence of the first event be triggered in $\bT_i$ to $\bT_{i+1}-1$ as $t_{i,1},...,t_{i,L}$. According to the definition of the first event, we have
\begin{align}
\nonumber
\text{det}(\V_{m_t,t} + \V_{m_t,t}^{loc}) > (1+\gamma_1)\text{det}(\V_{m_t,t}).
\end{align}
Then, $\forall m\in\M,\ j\in \vert \bN_m \vert/\{1\}$, we have 
\begin{align}\label{50}
 \text{det}(\V_{ser,\bT_i} + \V_{m,t^m_j}^{loc}) \ge  \frac{\text{det}(\V_{ser,\bT_i})}{\text{det}(\V_{m,t^m_j})} \text{det}(\V_{m,t^m_j} + \V_{m,t^m_j}^{loc}) \ge (1+\gamma_1)\text{det}(\V_{ser,\bT_i})
\end{align}
The inequality holds due to $\forall j\in \vert \bN_m \vert/\{1\}$, $\V_{ser,\bT_i} \preceq \V_{m,t^m_j}$ and Lemma \ref{auxlemma3}.
The above inequality implies $\forall t_{i,l} \ge t_2^{m_{t_{i,l}}}$
\begin{align}
\begin{split}
\nonumber
\text{det} (\V_{ser,t_{i,l}} - \V_{ser,t_{i,l-1}}) & = \text{det}( \V_{ser,t_{i,l-1}} + \V_{m_{t_{i,l}},t_{i,l}}^{loc}) - \text{det}( \V_{ser,t_{i,l-1}})\\
& \ge \text{det}( \V_{ser,\bT_i} + \V_{m_{t_{i,l}},t_{i,l}}^{loc}) - \text{det}( \V_{ser,\bT_i})\\
& \ge \gamma_1\text{det}(\V_{ser,\bT_i}).
\end{split}
\end{align}
The first inequality holds is owing to Lemma \ref{auxlemma4} and the last inequality holds is owing to (\ref{50}). 

Finally we can bound $L = \sum_{m=1}^M \bN_m$
\begin{align}\label{29}
    \begin{split}
       \text{det} (\V_{ser,\bT_{i+1} - 1 }) - \text{det}( \V_{ser,\bT_{i}}) &= \sum_{l=1}^{L-1}   \Big(\text{det}(\V_{ser,t_{i,l+1}}) - \text{det}(\V_{ser,t_{i,l}})\Big)\\
       &\ge \gamma_1 \sum_{m=1}^M (\bN_m - 1) \text{det}(\V_{ser,\bT_i}).
    \end{split}
\end{align}
Due to the definition of the episode, it has $2\text{det}(\V_{ser,\bT_i}) \ge \text{det}(\V_{ser,\bT_{i+1} - 1})$. We can rewrite equation (\ref{29}) as
\begin{align} 
\nonumber
      M + 1/\gamma_1 \ge \sum_{m=1}^M \bN_m . 
\end{align}
 We can then bound the total triggered number of the first event by
\begin{align}
\nonumber
    (M+1/\gamma_2) \log_2\Big(1+ \frac{\tau}{\lambda d}\Big).
\end{align}
Due to the communication would happen when at least one of the events is triggered, the total communication round is smaller or equal to the triggered number of two events. Hence, the total communication number from $t=K+1$ to $t = \tau$ can be bounded by
\begin{align}
\nonumber
    (M + 1/\gamma_1) d\log_2\bigg(1+\frac{\tau}{\lambda d} + \tau^{1/d}\bigg).
\end{align}
Furthermore, due to one communication includes one upload and one download, the total communication cost can be bounded by
\begin{align}\label{communication2}
       \C(\tau) \le 2\Bigg((M + 1/\gamma_1) d\log_2\bigg(1+\frac{\tau}{\lambda d} \bigg) +  (M + 1/\gamma_2)\log_2(\tau)\Bigg).
\end{align}

With (\ref{communication2}), (\ref{133}) (the upper bound of the sample complexity), and the setting of Theorem (\ref{theorem2}), we can bound the communication cost
\begin{align}
\nonumber
    \C(\tau) = O\Big( \max(2MK,M^2)\Big).
\end{align}
Here we finish the proof of Lemma \ref{lemmacommunication2}.
\end{proof}

\subsection{Upper Bound Sample Complexity $\tau$}
 Combine the breaking condition of the Algorithm \ref{alg2} (line 14$\sim$16) and definition of $B(\tau)$, we have 
\begin{align}
\nonumber
\epsilon \ge \hat\Delta_{ser,\tau} (j_{ser,\tau},i_{ser,\tau}) + \alpha^L_{ser,\tau}(i_{ser,\tau},j_{ser,\tau}) = B(\tau).
\end{align}
Let's first consider the case when the empirically best arm on the server side is not the optimal arm, i.e., $i_{ser,\tau} \not= k^*$. By the definition of the $j_{ser,t}$, we have
\begin{align}
\nonumber
    \hat\Delta_{ser,\tau} (j_{ser,\tau},i_{ser,\tau}) + \alpha^L_{ser,\tau}(i_{ser,\tau},j_{ser,\tau}) \ge \hat\Delta_{ser,\tau} (k^*,i_{ser,\tau}) + \alpha^L_{ser,\tau}(i_{ser,\tau},k^*).
\end{align}
Recall that $\hat{k}^* = i_{ser,\tau}$ is the estimated best arm. Therefore, we have
\begin{align}
\nonumber
    \epsilon \ge \hat\Delta_{ser,\tau} (k^*,\hat k^*) + \alpha^L_{ser,\tau}(\hat k^*,k^*) \ge \Delta(k^*,\hat k^*),
\end{align}
where the second inequality is due to Lemma \ref{lemmaprobabilitybound2} below (proof of Lemma \ref{lemmaprobabilitybound2} is at the end of the section).

\begin{lemma} \label{lemmaprobabilitybound2} Following the setting of Theorem \ref{theorem2}, we define event
  \begin{align}
  \nonumber
       \I = \bigg\{\forall i,j\in\A,\forall t \in [K+1,\tau],\ \vert \hat\Delta_{m_t,t}(i,j) - \Delta(i,j) \vert \le \alpha^L_{m_t,t}(i,j),\  \vert \hat\Delta_{ser,t}(i,j) - \Delta(i,j) \vert \le \alpha^L_{ser,t}(i,j) \bigg\}
    \end{align}
where
\begin{align}
\begin{split}
\nonumber
    &\alpha^L_{m_t,t}(i,j) = \Bigg(\sqrt{\lambda} + \Big(\sqrt{2\gamma_1}M + \sqrt{1 + \gamma_1 M}\Big)\bigg(\sigma\sqrt{d\log\bigg(\frac{2}{\delta}\bigg(1+\frac{(1+\gamma_2 M) \sum_{k=1}^K T_{m_t,t}(k)}{\min(\gamma_1,1)\lambda}\bigg)\bigg)} \bigg)\Bigg) \Vert \y(i,j) \Vert_{\V_{m_t,t}^{-1}}\\
   &\alpha^L_{ser,t}(i,j) = \Bigg(\sqrt{\lambda} + \Big(\sqrt{2\gamma_1}M + \sqrt{1 + \gamma_1 M}\Big) \bigg(\sigma\sqrt{d\log\bigg(\frac{2}{\delta}\bigg(1+\frac{(1+\gamma_2 M) \sum_{k=1}^K T_{ser,t}(k)}{\min(\gamma_1,1)\lambda}\bigg)\bigg)} \bigg)\Bigg) \Vert \y(i,j) \Vert_{\V_{ser,t}^{-1}}.
\end{split}
\end{align}
We have $\bP(\I) \ge 1-\delta$.
\end{lemma}

Moreover, when $i_{ser,\tau} = k^*$, we can trivially derive $\Delta(k^*,\hat{k}^*) = 0 \le \epsilon$. The above discussion implies $\hat{k}^*$ output by \texttt{FALinPE} satisfies the $(\epsilon,\delta)$-condition (\ref{1}).

We now continue to bound the sample complexity $\tau$. First, we need to establish Lemma \ref{serverlemma2} below, which upper bounds $T_{ser,\tau}(k)$, the number of observation on arm $k$ that is available to the server at $\tau$.

\begin{lemma} \label{serverlemma2} 
Under the setting of Theorem \ref{theorem2} and event $\I$, we can bound 
\begin{align}
\nonumber
    T_{ser,\tau}(k) \le \max_{i,j\in\A} \frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{^2}_{ser,\tau} + \gamma_2 M \sum_{s=1}^KT_{ser,\tau}(s),\ \forall k\in\A.
\end{align}
\end{lemma}

With Lemma \ref{serverlemma2}, we can derive
\begin{align}
    \begin{split}
    \nonumber
\sum_{k=1}^K T_{ser,\tau}(k) &\le \sum_{k=1}^K \max_{i,j\in\A} \frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,\tau} + \gamma_2 KM \sum_{s=1}^K T_{ser,\tau}(s)\\
&\le \frac{1}{1-\gamma_2 KM} \sum_{k=1}^K \max_{i,j\in\A} \frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,\tau}.
    \end{split}
\end{align}
Furthermore, based on the relationship between $\tau$ and $\sum_{k=1}^K T_{ser,\tau}(k)$ (Remark \ref{importantremark}), we have
\begin{align}
\begin{split}\label{118}
    \tau & = \sum_{k=1}^K T_{ser,\tau}(k) + \sum_{m=1}^M \sum_{k=1}^K T_{m,\tau}^{loc}(k)
    \\& \le \big(1+\gamma_2 M\big) \sum_{k=1}^K T_{ser,\tau}(k)\\
    &\le \frac{1 + \gamma_2 M}{1 - \gamma_2 KM} 
\sum_{k=1}^{K}\max_{i,j\in\A}\frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,\tau}\\
    &= \frac{1 + \gamma_2 M}{1 - \gamma_2 KM} H^L_{\epsilon}C^{2}_{ser,\tau}
\end{split}
\end{align}
where the second inequality is owing to the inequality we establish above and the last equality is owing to the definition of $H^L_\epsilon$.

Recalling that we suppose $\gamma_1 = 1/(M^2)$, $\gamma_2 = 1/(2MK)$ and $0 < \lambda \le \sigma^2\big(\sqrt{1 + \gamma_1 M} + \sqrt{2\gamma_1}M\big)^2\log(2/\delta)$. We first need to decompose $C_{ser,\tau}$
\begin{align}\label{128}
\begin{split}
C_{ser,\tau} & = \sqrt{\lambda} + \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2   \Bigg(\sigma\sqrt{d\log\Bigg(\frac{2}{\delta}\Bigg(1+\frac{(1+1/(2K)) \sum_{k=1}^K T_{ser,\tau}(k)}{\lambda/M^{2}}\Bigg)\Bigg)} \Bigg)\\
& = \sqrt{\lambda} + \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 \Bigg(\sigma\sqrt{d\log\Big(\frac{2}{\delta}\Big) + d\log\Bigg(1 + \frac{(1+1/(2K)) \sum_{k=1}^K T_{ser,\tau}(k)}{\lambda/M^{2}}\Bigg)} \Bigg)\\
& \le  2 \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 \Bigg(\sigma\sqrt{d\log\Big(\frac{2}{\delta}\Big) + d\log\Bigg(1 + \frac{(1+1/(2K)) \sum_{k=1}^K T_{ser,\tau}(k)}{\lambda/M^{2}}\Bigg)} \Bigg)\\
& \le  2 \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 \Bigg(\sigma\sqrt{d\log\Big(\frac{2}{\delta}\Big) + d\log\Bigg(1 + \frac{(1+1/(2K)) \tau}{\lambda/M^{2}}\Bigg)} \Bigg).
\end{split}
\end{align}
The first inequality is owing to the definition of $\lambda$ and the last inequality is owing to $\sum_{k=1}^KT_{ser,\tau}(k) \le \tau$. Substituting the last term of (\ref{128}) into (\ref{118}), we have
\begin{align}
\begin{split}
\nonumber
        \tau \le \frac{M + 1/(2K)}{M - 1/2}  \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 H^L_\epsilon 4\sigma^2 d \Bigg(\log\Big(\frac{2}{\delta}\Big) + \log\Bigg(1 + \frac{(1+1/(2K)) \tau}{\lambda/M^{2}}\Bigg)\Bigg).
\end{split}
\end{align}
We define
\begin{align}
\begin{split}
\nonumber
        \Gamma = \frac{M + 1/(2K)}{M - 1/2} \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 H^L_\epsilon 4 \sigma^2 d \log\Big(\frac{2}{\delta}\Big).
\end{split}
\end{align}
Let $\tau^\prime$ be a parameter satisfies
\begin{align}
\begin{split}
\nonumber
    \tau^\prime \le \tau &= \frac{M + 1/(2K)}{M - 1/2} \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2
         H^L_\epsilon 4 \sigma^2 d \log\Bigg(1 + \frac{(1+1/(2K)) \tau^\prime}{\lambda/M^{2}}\Bigg) + \Gamma\\
         &\le \frac{M + 1/(2K)}{M - 1/2} \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 H^L_\epsilon 4 \sigma^2 d \sqrt{1 + \frac{(1+1/(2K)) \tau^\prime}{\lambda/M^{2}}} + \Gamma.
\end{split}
\end{align}
We can further derive
\begin{align}\label{132}
\begin{split}
    \sqrt{\tau^\prime} \le \frac{M + 1/(2K)}{M - 1/2} \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 H^L_\epsilon 4 \sigma^2 d \sqrt{1 + \frac{(1+1/(2K)) \tau^\prime}{\lambda/M^{2}}} + \Gamma = \Lambda.
\end{split}
\end{align}
In the light of (\ref{132}), we can finally bound $\tau$ by
\begin{align}\label{133}
   \tau &\le \frac{M + 1/(2K)}{M - 1/2} \bigg(\sqrt{2} +  \sqrt{ 1 + \frac{1}{M}}\bigg)^2 H^L_\epsilon 4 \sigma^2 d \sqrt{1 + \frac{(1+1/(2K)) \Lambda^2}{\lambda/M^{2}}} + \Gamma.
\end{align}

\begin{lemma}\label{lemmarela2} Under the setting of Thoerem \ref{theorem2} and the communication strategy of line 11 in Algorithm \ref{alg2}, we can derive 
\begin{align}\label{54}
    \V_{ser,t} \succeq (1/\gamma_1)\V^{loc}_{m,t},\ \forall m\in\M,\ t\in[K+1,\tau].
\end{align}
Furthermore, following the results of Lemma \ref{lemmarela1}, we can also derive
\begin{align}\label{55}
   \sum_{k=1}^K T_{ser,t}(k) \ge (1/\gamma_2)\sum_{k=1}^K T^{loc}_{m,t}(k),\ \forall m\in\M,\ t\in[K+1,\tau].
\end{align}
\end{lemma}

\textbf{Proof for Lemma \ref{lemmaprobabilitybound2}, Lemma \ref{serverlemma2}, and Lemma \ref{lemmarela2}} In the following paragraphs, we provide the detailed proof for the lemmas used above.

\begin{proof} [Proof of Lemma \ref{lemmarela2}] The proof of (\ref{54}) is similar to the proof in \cite{He2022ASA}. Suppose the last round of agent $m$ communicates with the server is $t_1$ and the first event is triggered. Then, we can trivially derive
\begin{align}
\nonumber
    \V_{ser,t} \succ \bold{0} = \V^{loc}_{m,t},\ \forall t\in[K+1,\tau].
\end{align}
Otherwise, according to the definition of the first event, we have
\begin{align}
\nonumber
    \text{det}(\V_{m,t} + \V^{loc}_{m,t}) \le (1+\gamma_1) \text{det}(\V_{m,t}).
\end{align}
Based on the Lemma \ref{auxlemma5}, we have
\begin{align}
\nonumber
 1 + \gamma_1 \ge  \frac{\text{det}(\V_{m,t} + \V^{loc}_{m,t})} {\text{det}(\V_{m,t})} \ge \frac{\Vert\x\Vert^2_{\V_{m,t} + \V^{loc}_{m,t}}}{\Vert\x\Vert^2_{\V_{m,t}}} = \frac{\Vert\x\Vert^2_{\V_{m,t}} + \Vert\x\Vert^2_{\V^{loc}_{m,t}}}{\Vert\x\Vert^2_{\V_{m,t}}}  
\end{align}
and 
\begin{align}
\nonumber
    \V_{m,t} \succeq (1/\gamma_1) \V^{loc}_{m,t}.
\end{align}
With the fact that $\V_{ser,t} \succeq \V_{m,t}$, $\forall t\in[K+1,\tau]$, we can finish the proof of (\ref{54}). 

The proof of (\ref{55}) is similar to the proof of Lemma \ref{lemmarela1}. Combine the two results and we can finish the whole proof of Lemma \ref{lemmarela2}.
\end{proof}

Based on Lemma \ref{lemmarela2}, we can prove Lemma \ref{lemmaprobabilitybound2} as shown below.

\begin{proof} [Proof of Lemma \ref{lemmaprobabilitybound2}]
    Following the same argument of Lemma \ref{lemmabound1}, we only need to proof
    \begin{align}
    \nonumber
      \bP(\I) = \bP\bigg(\forall i,j\in\A,\forall t \in [K+1,\tau],\ \vert \hat\Delta_{ser,t}(i,j) - \Delta(i,j) \vert \le \alpha^L_{ser,t}(i,j)\bigg) \ge 1-\delta.
    \end{align}
Decompose $\vert \hat\Delta_{ser,t}(i,j) - \Delta(i,j) \vert$, we have
\begin{align}
    \begin{split}
    \nonumber
    \vert \Delta(i,j) - \hat{\Delta}_{ser,t}(i,j) \vert &= \vert \y(i,j)^\top \t^* - \y(i,j)^\top \hat{\t}_{ser,t} \vert\\
    & = \vert \y(i,j)^\top (\t^* - \hat\t_{ser,t}) \vert\\
    &\le \Vert \y(i,j)\Vert_{\V_{ser,t}^{-1}} \Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}}.
    \end{split}
\end{align}
Hence, according to the definition of the $\alpha_{ser,t}(i,j)$, we can derive
\begin{align}\label{67}
     \bP(\I) \ge \bP\Big(\forall t \in [K+1,\tau],\ \Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}} \le C_{ser,t} \Big) 
    \end{align}
and only need to proof
\begin{align}\label{67}
     \bP\Big(\forall t \in [K+1,\tau],\ \Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}} \le C_{ser,t} \Big) \ge 1-\delta.
\end{align}

We first discompose $\Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}}$
\begin{align}\label{68}
    \begin{split}
        \Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}} & = \Vert \t^* - \V^{-1}_{ser,t} \b_{ser,t} \Vert_{\V_{ser,t}}\\
        & = \Big\Vert \t^* - \V^{-1}_{ser,t} \Big(( \V_{ser,t} - \lambda\bI )\t^* + \sum_{m=1}^M\sum_{s=1}^{N_{m,t}} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big)\Big\Vert_{\V_{ser,t}}\\
        & = \Big\Vert \t^* - \t^* +  \lambda \V^{-1}_{ser,t} \t^*  + \V^{-1}_{ser,t} \sum_{m=1}^M\sum_{s=1}^{N_{m,t}} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big\Vert_{\V_{ser,t}}\\
        & = \Vert \lambda \V^{-1}_{ser,t} \t^* \Vert_{\V_{ser,t}}  + \Big\Vert \V^{-1}_{ser,t} \sum_{m=1}^M\sum_{s=1}^{N_{m,t}} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big\Vert_{\V_{ser,t}}\\
        & = \lambda\Vert \t^* \Vert_{\V_{ser,t}^{-1}} + \Big\Vert \V^{-1}_{ser,t} \Big( \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s} - \sum_{m=1}^M\sum_{s = N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big) \Big\Vert_{\V_{ser,t}}\\
        & \le \sqrt{\lambda} + \underbrace{\Big\Vert \V^{-1}_{ser,t}  \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s}\Big\Vert_{\V_{ser,t}}}_{\Lambda} + \underbrace{\Big\Vert \V^{-1}_{ser,t}\sum_{m=1}^M\sum_{s=N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big\Vert_{\V_{ser,t}}}_{\Gamma},
    \end{split}
\end{align}
where the last inequality is owing to $\V_{ser,t} \succeq \lambda\bI$. We further decompose term $\Lambda$ and $\Gamma$. Based on the Lemma \ref{auxlemma6}, we have $\forall t\in[K+1,\tau]$
\begin{align}\label{34}
    \Big\Vert \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s} \Big\Vert_{\V^{all^{-1}}_t} 
    \le \sigma\sqrt{d\log\Big(\frac{2(1+t/\lambda)}{\delta}\Big)}
\end{align}
holds with probability at least $1-\delta/2$. With a union bound and utilize the self normalized martingale again, it holds that for each $m\in\M$ and $\forall t\in[K+1,\tau]$
\begin{align}\label{35}
     \Big\Vert \sum_{s=N_{m,t} + 1}^t \bone\{m_s = m\} \x_{m_s,s}\eta_{m_s,s} \Big\Vert_{(\gamma_1\lambda\bI + \V^{loc}_{m,t})^{-1}} \le \sigma\sqrt{d\log\Big(\frac{2(1+t/(\gamma_1\lambda))}{\delta}\Big)}
\end{align}
holds with probability at least $1-\delta/2$.

According to the Lemma \ref{lemmarela2} and (\ref{34}), $\forall t\in [K+1,\tau]$, $\Lambda$ can be bounded by
\begin{align}\label{69}
\begin{split}
    &\Big\Vert \V^{-1}_{ser,t}  \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s}\Big\Vert_{\V_{ser,t}}\\  =& \Big\Vert   \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s}\Big\Vert_{\V_{ser,t}^{-1}} \\
    \le& \sqrt{1 + \gamma_1 M} \Big\Vert   \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s}\Big\Vert_{\V_t^{all^{-1}}}\\
    \le& \sqrt{1 + \gamma_1 M} \bigg(\sigma\sqrt{d\log\Big(\frac{2(1+t/\lambda)}{\delta}\Big)}\bigg)
\end{split}
\end{align}
with probability at least $1-\delta/2$. The first inequality holds due to Lemma \ref{lemmarela2} and the last inequality holds according to (\ref{34}). Besides, in the light of (\ref{35}), $\forall t\in [K+1,\tau]$, $\Gamma$ can be bounded by
\begin{align}\label{70}
    \begin{split}
        &\Big\Vert \V^{-1}_{ser,t}\sum_{m=1}^M\sum_{s=N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m,s}\eta_{m,s} \Big\Vert_{\V_{ser,t}}\\  =& \Big\Vert \sum_{m=1}^M\sum_{s=N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m_s,s}\eta_{m_s,s}  \Big\Vert_{\V^{-1}_{ser,t}}\\
         \le & \sum_{m=1}^M \Big\Vert \sum_{s=N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m_s,s}\eta_{m_s,s}  \Big\Vert_{\V_{ser,t}^{-1}}\\
         \le & \sqrt{2\gamma_1} \sum_{m=1}^M \Big\Vert \sum_{s=N_{m,t} + 1}^{t} \bone\{m_s = m\}\x_{m_s,s}\eta_{m_s,s}  \Big\Vert_{(\V_{m,t}^{loc} + \gamma_1\lambda\bI)^{-1}}\\
        \le & \sqrt{2\gamma_1}M \bigg(\sigma\sqrt{d\log\Big(\frac{2(1+t/(\gamma_1\lambda))}{\delta}\Big)}\bigg)
    \end{split}
\end{align}
with probability at least $1-\delta/2$. The second inequality holds is due to $\forall m\in\M$
\begin{align}
\begin{split}
\nonumber
    \V_{ser,t} &\succeq  
 \frac{1}{\gamma_1} \V^{loc}_{m,t}\\
   \frac{1}{2}\V_{ser,t} + \frac{1}{2}\V_{ser,t} &\succeq \frac{1}{2}\lambda\bI + \frac{1}{2\gamma_1} \V^{loc}_{m,t}.
\end{split}
\end{align}

Combine (\ref{68}), (\ref{69}) and (\ref{70}), due to the server or the agents can not directly derive $t$, we can utilize $(1+\gamma_2 M) \sum_{k=1}^K T_{ser,t}(k)$ to replace $t$ and the above inequalities still hold. We can finally get $\forall t\in[K+1,\tau]$
\begin{align}
    \begin{split}
    \nonumber
    \Vert \t^* - \hat\t_{ser,t} \Vert_{\V_{ser,t}} \le \sqrt{\lambda} + \Big(\sqrt{2\gamma_1}M + \sqrt{1 + \gamma_1 M}\Big) \bigg(\sigma\sqrt{d\log\bigg(\frac{2}{\delta}\bigg(1+\frac{(1+\gamma_2 M) \sum_{k=1}^K T_{ser,t}(k)}{\min(\gamma_1,1)\lambda}\bigg)\bigg)} \bigg)
    \end{split}
\end{align}
holds with probability at least $1-\delta$. Combine this with (\ref{67}), here we finish the proof of Lemma \ref{lemmaprobabilitybound2}.
\end{proof}

Before proving Lemma \ref{serverlemma2}, we first need to establish Lemma \ref{lemmamatrixbound} and Lemma \ref{lemmabound2} below.

\begin{lemma} \label{lemmamatrixbound} Following the setting of Theorem \ref{theorem2}, $\forall t\in [K+1,\tau]$, The matrix norm $\Vert \y(i,j) \Vert_{\V_{m_t,t}^{-1}}$ can be bounded by
\begin{align}\label{72}
    \Vert \y(i,j) \Vert_{V_{m_t,t}^{-1}} \le \sqrt{\frac{\rho(\y(i, j))}{T_{m_t,t}(i, j)}},\ \text{and}\ \Vert \y(i,j) \Vert_{V^{-1}_{ser,t}} \le \sqrt{\frac{\rho(\y(i, j))}{T_{ser,t}(i, j)}},\ \forall i,j \in \A,
\end{align}
where
\begin{align}\label{93}
\begin{split}
    & T_{m_t,t}(i,j) = \min_{k\in \A,\ p_k^*(\y(i,j)) > 0} T_{m_t,t}(k)/p_k^*(\y(i,j))\\
    & T_{ser,t}(i,j) = \min_{k\in \A,\ p_k^*(\y(i,j)) > 0} T_{ser,t}(k)/p_k^*(\y(i,j)).
\end{split}
\end{align}
\end{lemma}

\begin{proof}[Proof of Lemma \ref{lemmamatrixbound}] According to the Lemma 2 of \cite{Xu2017AFA}, the optimal value of (\ref{programming}) (i.e., $\rho(\y(i,j))$) is equal to the optimal value of
\begin{align}\label{40}
    \begin{split}
       & \min_{p_k,w_k}\sum_{k=1}^K \frac{w_k^2}{p_k}\\
        s.t.\quad & \y(i,j) = \sum_{k=1}^K w_k\x_k\\
        & \sum_{k=1}^Kp_k = 1,\  p_k>0,\ w_k\in\R,
    \end{split}
\end{align}
for all $i,j\in\A$.

Due to $\V_{m_t,t}$ and $T_{m_t,t}(k)$, $\forall k\in\A,\ t\in[K+1,\tau]$ are all downloaded from the server. This implies $\forall t_1\in[K+1,\tau]$, there exists a $t_2\in[K+1,\tau]$ which satisfies 
\begin{align}
\nonumber
    \V_{m_{t_1},t_1} = \V_{ser,t_2}\ \text{and}\ T_{m_{t_1},t_1}(k) = T_{ser,t_2}(k),\ \forall k\in\A.
\end{align}
Therefore, we only need to prove the second inequality of (\ref{72}). We can decompose the covariance matrix $\V_{ser,t}  = \lambda\bI + \sum_{k=1}^K T_{ser,t}(k) \x_k\x_k^\top$. We define the auxiliary covariance matrix as $\tilde{\V}_{ser,t} = \lambda\bI + \sum_{k=1}^K T_{ser,t}(i,j)p_k^*(\y(i,j)) \x_k\x_k^\top$. From (\ref{93}), we have 
\begin{align}
\nonumber
    T_{ser,t}(i,j)p_k^*(\y(i,j)) \le T_{ser,t}(k),\ \forall k,i,j\in\A
\end{align}
which implies $\tilde{\V}_{ser,t} \preceq \V_{ser,t}$ and 
\begin{align}
\nonumber
    \y(i,j)^\top \V_{ser,t}^{-1} \y(i,j) \le \y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j),\ i,j \in \A.
\end{align}
We then bound $\y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j)$, according to the KKT condition of (\ref{40}), we have the following formulas
\begin{align}
\begin{split}
\nonumber
    & w_k^*(\y(i,j)) = \frac{1}{2} p_k^*(\y(i,j)) \x_k^\top\varepsilon,\ \forall k,i,j\in\A \\
    & \y(i,j) = \frac{1}{2}\sum_{k=1}^K p_k^*(\y(i,j))\x_k \x_k^\top \varepsilon,\ \forall i,j\in\A,
\end{split}
\end{align}
where $\varepsilon\in\R^d$ corresponds to the Lagrange multiplier. Hence, we can rewrite $\y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j)$ as
\begin{align}\label{99}
    \y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j) = \frac{1}{4} \bigg(\sum_{k=1}^K p_k^*(\y(i,j))\x_k \x_k^\top \varepsilon\bigg)^\top \tilde{\V}_{ser,t}^{-1} \bigg(\sum_{k=1}^K p_k^*(\y(i,j))\x_k \x_k^\top \varepsilon\bigg).
\end{align}
Besides, based on (\ref{40}), we can rewrite $\rho(\y(i,j))$ as 
\begin{align}\label{100}
    \rho(\y(i,j)) = \sum_{k=1}^K \frac{w_k^{*2}(\y(i,j))}{p_k^*(\y(i,j))} = \frac{1}{4} \varepsilon^\top \bigg( \sum_{k=1}^K p_k^*(\y(i,j))\x_k\x_k^\top \bigg) \varepsilon.
\end{align}
In the light of (\ref{99}) and (\ref{100}), we can bound $\y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j) - \rho(\y(i,j))/T_{ser,t}(i,j)$ with $0$
\begin{align}
    \begin{split}
    \nonumber
&\y(i,j)^\top \tilde{\V}_{ser,t}^{-1} \y(i,j) - \frac{\rho(\y(i,j))}{T_{ser,t}(i,j)}\\ =& \frac{1}{4}\varepsilon^\top \bigg( \bigg(\sum_{k=1}^K p_k^*(\y(i,j))\x_k\x_k^\top\bigg) - \frac{\tilde{\V}_{ser,t}}{T_{ser,t}(i,j)} \bigg)\tilde{\V}_{ser,t}^{-1}\bigg(\sum_{k=1}^K p_k^*(\y(i,j))\x_k\x_k^\top\bigg) \varepsilon\\
=& - \frac{\lambda}{4}\varepsilon^\top \tilde{\V}_{ser,t}^{-1}\bigg(\sum_{k=1}^K p_k^*(\y(i,j))\x_k\x_k^\top\bigg) \varepsilon\\
\le & 0.
    \end{split}
\end{align}
The second equality holds due to the definition of the $\tilde{\V}_{ser,t}$, and the last inequality holds due to $\lambda >0$ and the definition of the positive definite matrix. Here we finish the proof of Lemma \ref{lemmamatrixbound}.
\end{proof}

\begin{lemma}\label{lemmabound2}
    Under the setting of Theorem \ref{theorem2} and event $\I$, $\forall t\in[K+1,\tau]$, $B(t)$ can be bounded as follows
    \begin{align}
    \nonumber
    B(t) \le \min\Big(0,-\max\Big(\Delta(k^*,i_{ser,t}),\Delta(k^*,j_{ser,t})\Big) +  2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\Big) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t}).
    \end{align}
\end{lemma}

\begin{proof}[Proof of Lemma \ref{lemmabound2}] 
    This proof is similar to the proof of Lemma \ref{lemmabound1}. According to the definition of the event $\I$, consider the case when $i_{ser,t} = k^*$, we have
\begin{align}
\begin{split}\label{83}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) \\& \le  \Delta(j_{ser,t},i_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t})
    \\&
    = - \Delta(k^*,j_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}).
\end{split}
\end{align}

Consider the case when $j_{ser,t} = k^*$, we have
\begin{align}
\begin{split}\label{84}
   B(t) & = \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\\ & \le  -\hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i^L_{ser,t},j_{ser,t})
   \\& \le  -\Delta(j_{ser,t},i_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t})
    \\&
    = - \Delta(k^*,i_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}),
\end{split}
\end{align}
where the first inequality is owing to $\hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t})\le 0$. 

Combine (\ref{83}) and (\ref{84}), it yields
\begin{align}\label{85}
  B(t) \le \min\Big(0,-\max\Big(\Delta(k^*,i_{ser,t}),\Delta(k^*,j_{ser,t})\Big) +  \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\Big) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})
\end{align}
when $i_{ser,t} = k^*$ or $j_{ser,t} = k^*$. 

Consider the case when $i_{ser,t} \not= k^*$ and $j_{ser,t} \not= k^*$, then we can derive
\begin{align}\label{106}
\begin{split}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\\ & \le \Delta(j_{ser,t},k^*) + \Delta(k^*,i_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) \\& \le \Delta(j_{ser,t},k^*) + 3\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) \\
    & = -\Delta(k^*,j_{ser,t}) + 3\alpha^L_{ser,t}(i_{ser,t},j_{ser,t})
\end{split}
\end{align}
where the second inequality holds is owing to 
\begin{align}
\begin{split}
\nonumber
\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) &\ge \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\\ 
&\ge \hat{\Delta}_{ser,t}(k^*,i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},k^*)\\
&\ge \Delta(k^*,i_{ser,t}).
\end{split}
\end{align} 
We also can show
\begin{align}
\begin{split}\label{89}
    B(t) &= \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) \\& \le  \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\\
    &\le  
 - \Delta(k^*,i_{ser,t}) + \hat{\Delta}_{ser,t}(k^*,i_{ser,t}) + \alpha^L_{ser,t}(k^*,i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})\\
    & \le  - \Delta(k^*,i_{ser,t}) + \hat{\Delta}_{ser,t}(j_{ser,t},i_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t})
    \\
    & \le -\Delta(k^*,i_{ser,t}) +  2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}).
\end{split}
\end{align}
The second inequality is due to the definition of the even $\I$ and the third inequality is due to the definition of $j_{ser,t}$. 

Combine (\ref{106}) and (\ref{89}), it yields
\begin{align}\label{90}
        B(t) \le \min\Big(0,-\Delta(k^*,k_{ser,t}) + 2\alpha^L_{ser,t}(i_{ser,t},j_{ser,t}) \Big) + \alpha^L_{ser,t}(i_{ser,t},j_{ser,t}).
\end{align}
Combine the (\ref{85}) and (\ref{90}), then we can finish the proof of Lemma \ref{lemmabound2}.
\end{proof}

\begin{proof} [Proof of Lemma \ref{serverlemma2}] The difference between this proof and Lemma \ref{serverlemma1} is Algorithm \ref{alg2} employs a different arm selection strategy. We define at round $t_k\not = \tau$, an agent $m$ communicates with the server and $k_{ser,t_k} = k$, where
\begin{align}
    k_{ser,t_k} = \arg\min_{k\in\A} \frac{T_{ser,t_k}(k)}{p^*_{k}(\y(i_{ser,t},j_{ser,t}))}.
\end{align}
And from round $t \in [t_k + 1,\tau]$, when any agent  communicating with the server, $k_{ser,t} \not= k$ . This implies
\begin{align}
\begin{split}
\nonumber
    T_{ser,\tau}(k) \le&   T_{m,t_k+1}(k) + (\gamma_2 M) \sum_{s=1}^K T_{ser,\tau}(s)\\
    =&T_{ser,t_k}(k) + (\gamma_2 M) \sum_{s=1}^K T_{ser,\tau}(s).
\end{split}
\end{align}
The inequality holds due to for $t\in[t_k + 1,\tau]$, $\forall m\in\M$ would upload $T_{m,t}^{loc}(k)>0$ to the server at most one time and $T_{m,t}^{loc}(k) \le \gamma_2 \sum_{s=1}^K T_{ser,\tau}(s)$ according to the Lemma \ref{lemmarela2}.

 With Lemma \ref{lemmabound2}, we can derive
\begin{align}
\begin{split}\label{49}
    \epsilon \le& B(t_k)\\
             \le& \min\Big(0,-\max(\Delta(k^*,i_{ser,t_k}),\Delta(k^*,j_{ser,t_k})) + 2\alpha^L_{ser,t_k}(i_{ser,t_k},j_{ser,t_k}) \Big) + \alpha^L_{ser,t_k}(i_{ser,t_k},j_{ser,t_k}).
\end{split}
\end{align}
We would further bound $T_{ser,t}(k)$. Recalling the arm selection strategy of Algorithm \ref{alg2}, when $k$ is chosen by agent $m$ in round $t_k + 1$, this implies
\begin{align}\label{101}
    T_{ser,t_k}(i_{ser,t_k},j_{ser,t_k}) = T_{ser,t_k}(k)/p^*_k(i_{ser,t_k},j_{ser,t_k}).
\end{align}
Recalling the definition of $\alpha_{ser,t_k}(i_{ser,t_k},j_{ser,t_k})$ and substituting (\ref{72}) and (\ref{49}) into (\ref{101}), we can derive
\begin{align}
\begin{split}
\nonumber
  T_{ser,t_k}(k)  & \le \frac{\rho(\y(i_{ser,t_k},j_{ser,t_k}))p^*_k(\y(i_{ser,t_k},j_{ser,t_k}))}{\max\big(\frac{\Delta(k^*,i_{ser,t_k}) + \epsilon}{3}, \frac{\Delta(k^*,j_{ser,t_k}) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,t_k}\\ & \le \max_{i,j\in\A} \frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,\tau}.
\end{split}
\end{align}
We can finally bound $T_{ser,\tau}(k)$, i.e.
\begin{align}
\begin{split}
\nonumber
    T_{ser,\tau}(k) &\le \max_{i,j\in\A} \frac{\rho(\y(i,j))p^*_k(\y(i,j))}{\max\big(\frac{\Delta(k^*,i) + \epsilon}{3}, \frac{\Delta(k^*,j) + \epsilon}{3}, \epsilon\big)^2} C^{2}_{ser,\tau} + \gamma_2 M \sum_{s=1}^K T_{ser,\tau}(s).
\end{split}
\end{align}
Here we finish the proof of Lemma \ref{serverlemma2}.
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{AUXILIARY LEMMAS}\label{sectionG}

\begin{lemma}[Conditionally $\sigma$-sub-Gaussian noise \citep{AbbasiYadkori2011ImprovedAF,Lattimore2020BanditA,Li2021AsynchronousUC,He2022ASA}] \label{auxlemma7} The noise $\eta_{m_t,t}$ of the linear case is drawn from a conditionally $\sigma$-sub-Gaussian distribution, which satisfies
\begin{align}
    \bE\Big[ e^{\lambda \eta_{m_t,t}} \Big\vert \x_{m_1,1},...,\x_{m_t,t},m_1,...,m_t,\eta_{m_1,1},...,\eta_{m_t,t} \Big] \le e^{\sigma^2\lambda^2/2},\quad \forall \lambda \in \R.
\end{align}
\end{lemma}

\begin{lemma} [Hoffeding inequality] \label{auxlemma1}
    Suppose $X_1,X_2,...,X_n$ are i.i.d drawn from a $\sigma$-sub-Gaussian distribution and $\bar{X} = (1/n) \sum_{s=1}^n X_s$ represents the mean, then
\begin{align}
\nonumber
    \bP(\vert \bE[X] - \bar{X}\vert \ge -  a) \le e^{-a^2n/2\sigma^2}.
\end{align}
\end{lemma}

\begin{lemma} [Lemma 10 of \cite{AbbasiYadkori2011ImprovedAF}] \label{auxlemma2} The matrix norm can be bounded by
\begin{align} 
\nonumber
    \mbox{det} \Big(\lambda\bI + \sum_{s=1}^t\x_{m_s,s}\x_{m_s,s}^\top\Big) \le  \Big(\lambda + \frac{t}{d}\Big)^d.
\end{align}
\end{lemma}

\begin{lemma}[Lemma 2.3 of \cite{Tie2011RearrangementIF}] \label{auxlemma3}
For arbitrary positive definitive matrices $A$, $B$ and $C$, it has
\begin{align} 
\nonumber
    \text{det}(A + B + C)  \text{det}(A) \le \text{det}(A + B) \text{det}(A + C).
\end{align}
\end{lemma}

\begin{lemma} [Lemma 2.2 of \cite{Tie2011RearrangementIF}] \label{auxlemma4}
For arbitrary positive definitive matrices $A$, $B$ and $C$, it has
\begin{align}
\nonumber
    \text{det}(A + B + C) + \text{det}(A) \ge \text{det}(A + B) + \text{det}(A + C).
\end{align}
\end{lemma}

\begin{lemma} [Lemma 12 of \cite{AbbasiYadkori2011ImprovedAF}]\label{auxlemma5}
For arbitrary positive definitive matrices $A$ and $B$ satisfies $A\succ B$, it has
\begin{align}
\nonumber
    \frac{\Vert\x\Vert^2_A}{\Vert\x\Vert^2_B} \le \frac{\text{det}(A)}{\text{det}(B)}.
\end{align}
\end{lemma}

\begin{lemma} [Theorem 1 of \cite{AbbasiYadkori2011ImprovedAF}] \label{auxlemma6} For $t\in\vert t\vert$, it has
\begin{align}
\nonumber
    \Big\Vert \sum_{s=1}^t\x_{m_s,s}\eta_{m_s,s} \Big\Vert_{\lambda\bI + \sum_{s=1}^t\x_{m_s,s}\x_{m_s,s}^\top} 
    \le \sigma\sqrt{d\log\Big(\frac{1+t/\lambda}{\delta}\Big)}
\end{align}
holds with probability at least $1-\delta$.
\end{lemma}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



