\documentclass{article}

\usepackage{neurips_2023}

\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage{hyperref}       % hyperlinks
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors

\usepackage{algorithm}
\usepackage{algorithmic}

\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with 

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
\usepackage{dsfont}

\newcommand{\fix}{\marginpar{FIX}}
\newcommand{\new}{\marginpar{NEW}}

\newtheorem{assumption}{Assumption}
\newtheorem{example}{Example}
\newtheorem{definition}{Definition}
\newtheorem{remark}{Remark}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\newtheorem{lemma}{Lemma}
\newtheorem{claim}{Claim}
\newtheorem{proposition}{Proposition}
\newtheorem{regret}{Regret}
\newtheorem{gap}{Gap}


\def \bvarphi {\mathrm{\boldsymbol{\varphi}}}
\def \btheta {\bm \theta}
\def \bsigma {\bm \Sigma}
\def \mt {\mathsf{T}}
\def \bV {\displaystyle\mV}
\def \bx {\displaystyle\vx}
\def \bA {\displaystyle\sA}
\def \bC {\displaystyle\sC}
\def \bD {\displaystyle\sD}
\def \bR {\displaystyle\sR}
\def \bT {\mathcal{T}}
\def \bI {\bold{I}}
\def \bb {\displaystyle\vb}
\def \bD {\mathcal{D}}
\def \bR {\mathcal{R}}
\def \E {\mathcal{E}}
\def \C {\mathcal{C}}
\def \A {\mathcal{A}}
\def \bP {\mathcal{P}}
\def \bn {\displaystyle\vn}
\def \bc {\mathcal{C}}
\def \ba {\bold{a}}
\def \bone {\mathds{1}}
\def \bE {\mathds{E}}
\def \bB {\mathbb{B}}
\def \bN {\mathcal{N}}
\def \bbN {N}
\def \M {\mathcal{M}}
\def \x {\mathbf{x}}
\def \t {\boldsymbol{\theta}}
\def \R {\mathcal{R}}
\def \I {\mathcal{I}}

\title{\textbf{Pure Exploration Distributed Bandits in An Asynchronous Environment}}

\author{}

\begin{document}

\maketitle


\begin{abstract}

\end{abstract}

 \begin{algorithm}[t]
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}
	\caption{Federated Asynchronous MAB Pure Exploration (\texttt{FedAMABPE}) }
    \label{alg3}
	\begin{algorithmic}[1]
            \STATE \textbf{Inputs:} Arm set $\A$, client set $\M$, triggered parameter $\gamma$ and $(\delta,\epsilon)$
            \STATE \textbf{Initialization:}
            \STATE  From round $1$ to $K$ sequentially pulls arm from $1$ to $K$ and receives reward  $r_{t}$, $\forall t\in\vert K\vert$ 
            \STATE Server sets $\hat{\mu}_{K}^{ser}(t) = r_{t}$ and $T_{K}^{ser}(t) = 1$
            \FOR{$m=1:M$}
            \STATE Agent $m$  sets $\hat{\mu}_{m,K+1}(k) = r_{t}$, $T_{m,K+1}(k) = 1$ and $T_{m,K}^{loc}(k) = S_{m,K}^{loc}(k) = 0$, $\forall k\in\A$ 
            \ENDFOR
            \FOR {$t = K+1:\infty$}
            \STATE Agent $m_t$ sets $i_{m_t,t} = \arg\max_{k\in\A} \hat{\mu}_{m_t,t}(k) $ and $j_{m_t,t} = \arg\max_{k\not = i_{m_t,t}} \hat{\Delta}_{m_t,t}(k,i_{m_t,t}) + \beta_{m_t,t}(i_{m_t,t},k)$
            \STATE Agent $m_t$ pulls arm $k_{m_t,t} = \arg\max_{k\in\{{i_{m_t,t},j_{m_t,t}}\}}\beta_{m_t,t}(k)$ and receives reward $r_{m_t,t}$ 
            \STATE Agent $m_t$ sets $S_{m_t,t}^{loc}(k_{m_t,t}) = S_{m_t,t-1}^{loc}(k_{m_t,t}) + r_{m_t,t}$, $T_{m_t,t}^{loc}(k_{m_t,t}) = T_{m_t,t-1}^{loc}(k_{m_t,t})+1$
           \IF {$\sum_{k=1}^K(T_{m_t,t}(k) + T_{m_t,t}^{loc}(k)) > (1+\gamma)\sum_{k=1}^KT_{m_t,t}(k)$} 
            \STATE \textbf{[Server $\rightarrow$ Agent $m_t$]} Send $S_{m_t,t}^{loc}(k)$ and $T_{m_t,t}^{loc}(k)$, $\forall k\in\A$ to the server
            \STATE Server updates $\hat{\mu}^{ser}_{t}(k) = (\hat{\mu}^{ser}_{t-1}(k)T_{t-1}^{ser}(k) + S_{m_t,t}^{loc}(k))/ (T_{t-1}^{ser}(k) + T^{loc}_{m_t,t}(k))$, $\forall k\in\A$
            \STATE Server updates $T_{t}^{ser}(k) = T_{t-1}^{ser}(k) + T_{m_t,t}^{loc}(k)$, $\forall k\in \A$
            \STATE  Server sets $i^{ser}_t = \arg\max_{k\in\A} \hat{\mu}^{ser}_{t}(k)$,  $j^{ser}_t = \arg\max_{k \not = i^{ser}_t} \hat{\Delta}^{ser}_{t}(k,i^{ser}_{t}) + \beta^{ser}_{t}(i^{ser}_{t},k)$
            and  $B(t) =  \hat{\Delta}^{ser}_{t}(j^{ser}_t,i^{ser}_{t}) + \beta^{ser}_{t}(i^{ser}_{t},j^{ser}_t)$
            \IF {$B(t) \le \epsilon$} 
            \STATE Server returns $i_{t}^{ser}$ as the estimated best arm $\hat k^*$ and break
            \ENDIF
            \STATE \textbf{[Server $\rightarrow$ Agent $m_t$]} Send $T^{ser}_{t}(k)$ and $\hat{\mu}^{ser}_{t}(k)$, $\forall k\in\A$ to agent $m_t$
            \STATE Agent $m_t$ sets $T_{m_t,t+1}(k) = T^{ser}_{t}(k)$ and $ \hat{\mu}_{m_t,t+1}(k) = \hat{\mu}^{ser}_{t}(k)$, $\forall k\in\A$
            \STATE Agent $m_t$ sets $T_{m_t,t}^{loc}(k) = 0$ and $S_{m_t,t}^{loc}(k) = 0$, $\forall k\in \A$
            \ELSE
            \STATE Agent $m_t$ sets $T_{m_t,t+1}(k) = T_{m_t,t}(k)$ and $ \hat{\mu}_{m_t,t+1}(k) = \hat{\mu}_{m_t,t}(k)$, $\forall k\in\A$
            \STATE Server sets $T^{ser}_{t}(k) = T^{ser}_{t-1}(k)$ and $\hat{\mu}^{ser}_{t}(k) = \hat{\mu}^{ser}_{t-1}(k)$, $\forall k\in \A$
            \ENDIF
            \STATE Agent $m\not = m_t$ sets $T_{m,t+1}(k) = T_{m,t}(k)$ and $ \hat{\mu}_{m,t+1}(k) = \hat{\mu}_{m,t}(k)$, $\forall k\in\A$
            \STATE Agent $m\not = m_t$ sets $T^{loc}_{m,t}(k) = T^{loc}_{m,t-1}(k)$ and $ \hat{\mu}^{loc}_{m,t}(k) = \hat{\mu}^{loc}_{m,t-1}(k)$, $\forall k\in\A$
            \ENDFOR
	\end{algorithmic}  
\end{algorithm}

\begin{algorithm}[t]
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}
	\caption{Federated Asynchronous Linear Pure Exploration (\texttt{FedALinPE}) }
 \label{alg2}
	\begin{algorithmic}[1]
            \STATE \textbf{Inputs:} Arm set $\A$, client set $\M$, regularization parameter $\lambda$, triggered parameter $\gamma_1,\ \gamma_2$, $(\delta,\epsilon)$
            \STATE \textbf{Initialization:}
            \STATE From round $1$ to $K$ sequentially pulls arm from $1$ to $K$ and receives reward  $r_{t}$, $\forall t\in\vert K\vert$ 
            \STATE Server sets $V_{K}^{ser} = \lambda\bI + \sum_{t=1}^K\x_{t}\x_{t}^\top$, $b_{K}^{ser} = \sum^K_{t=1} \x_{t} r_{t}$, $T_{K}^{ser}(k) = 1,\ \forall k \in \A$
            \FOR {$m=1:M$} 
            \STATE  Agent $m$ sets $V_{m,K+1} = \lambda\bI + \sum_{t=1}^K\x_{t}\x_{t}^\top$, $b_{m,K+1} = \sum^K_{t=1} \x_{t} r_{t}$, $T_{m,K+1}(k) = 1$, $V_{m,K}^{loc} = \bold{0}$, $b_{m,K}^{loc} = \bold{0}$ and $T^{loc}_{m,K}(k) = 0$, $\forall k\in\A$
            \ENDFOR
            \FOR {$t = K+1:\infty$} 
            \STATE Agent $m_t$ sets $\hat{\t}_{m_t,t} = V_{m_t,t}^{-1}b_{m_t,t}$, $i_{m_t,t} = \arg\max_{k\in\A} \x_k^\top\hat\t_{m_t,t}$ and $j_{m_t,t} = \arg\max_{k\not = i_{m_t,t}} \hat{\Delta}_{m_t,t}(k,i_{m_t,t}) + \alpha_{m_t,t}(i_{m_t,t},k)$
            \STATE Agent $m_t$ selects the most informative arm $k_{m_t,t}$ by (\ref{select2}) and receive reward $r_{m_t,t}$
            \STATE Agent $m_t$ updates $V_{m_t,t}^{loc} = V_{m_t,t-1}^{loc} + \x_{m_t,t}\x_{m_t,t}^{\top}$, $b_{m_t,t}^{loc} = b_{m_t,t-1}^{loc} + r_{m_t,t}\x_{m_t,t}$ and $T_{m_t,t}^{loc}(k_{m_t,t}) = T_{m_t,t-1}^{loc}(k_{m_t,t})+1$
            \IF {$\text{det}(V_{m_t,t} + V_{m_t,t}^{loc}) > (1+\gamma_1)\text{det}(V_{m_t,t})$ \textbf{or} $\sum_{k=1}^K (T_{m_t,t}(k) + T^{loc}_{m_t,t}(k)) > (1+\gamma_2) \sum_{k=1}^K T_{m_t,t}(k)$} 
            \STATE \textbf{\textbf{[Agent $m_t$ $\rightarrow$ Server]}} Send $V^{loc}_{m_t,t}$, $b_{m_t,t}^{loc}$ and $T_{m_t,t}^{loc}(k),\ \forall k\in\A$ to the server
            \STATE  Server sets $V^{ser}_{t} = V^{ser}_{t-1} + V^{loc}_{m_t,t}$, $b^{ser}_{t} = b^{ser}_{t-1} + b_{m_t,t}^{loc}$, $T_{t}^{ser}(k) = T_{t-1}^{ser}(k) + T_{m_t,t}^{loc}(k)$, $\forall k\in\A$ and $\hat{\t}^{ser}_{t} = {V_{t}^{ser^{-1}}}b^{ser}_{t}$
            \STATE  Server sets 
            $i^{ser}_t = \arg\max_{k\in\A} \x_k^\top\hat\t^{ser}_t$, $j^{ser}_t = \arg\max_{k\not = i^{ser}_t} \hat{\Delta}_{t}^{ser}(k,i^{ser}_t) + \alpha_{t}^{ser}(i^{ser}_t,k)$ and $B(t) =  \hat{\Delta}_{t}^{ser}(j^{ser}_t,i^{ser}_t) + \alpha_{t}^{ser}(i^{ser}_t,j^{ser}_t)$
            \IF {$B(t) \le \epsilon$} 
            \STATE Server returns $i_t^{ser}$ as the estimated best arm $\hat k^*$ and break the loop
            \ENDIF
            \STATE \textbf{[Server $\rightarrow$ Agent $m_t$]} Send $V^{ser}_{t}$, $b^{ser}_{t}$ and $T_{t}^{ser}(k)$, $\forall k\in\A$ to agent $m_t$
            \STATE Agent $m_t$ sets $V_{m_t,t+1} = V^{ser}_{t}$, $ b_{m_t,t+1} = b^{ser}_{t}$ and $ T_{m_t,t+1}(k) = T_{t}^{ser}(k)$, $\forall k\in\A$
            \STATE Agent $m_t$ sets $V_{m_t,t}^{loc} = \bold{0}$, $b_{m_t,t}^{loc} = \bold{0}$ and $T_{m_t,t}^{loc}(k) = 0$, $\forall k\in \A$
            \ELSE
            \STATE Agent $m_t$ sets $V_{m_t,t+1} = V_{m_t,t}$, $ b_{m_t,t+1} = b_{m_t,t}$ and $ T_{m_t,t+1}(k) = T_{m_t,t}(k)$, $\forall k\in\A$
            \STATE Server sets $V^{ser}_{t} = V^{ser}_{t-1}$, $ b^{ser}_{t} = b^{ser}_{t-1}$ and $ T^{ser}_{t}(k) = T^{ser}_{t-1}(k)$, $\forall k\in\A$
            \ENDIF
            \STATE Agent $m \not = m_t$ sets $V_{m,t+1} = V_{m,t}$, $ b_{m,t+1} = b_{m,t}$ and $ T_{m,t+1}(k) = T_{m,t}(k)$, $\forall k\in\A$
            \STATE Agent $m \not = m_t$ sets $V^{loc}_{m,t} = V^{loc}_{m,t-1}$, $ b^{loc}_{m,t} = b^{loc}_{m,t-1}$ and $ T^{loc}_{m,t}(k) = T^{loc}_{m,t-1}(k)$, $\forall k\in\A$
            \ENDFOR
	\end{algorithmic}  
\end{algorithm}

\end{document}