\section{Experts Drawn from Countably Finite Set}\label{sec:intro}
%branching experts problem \cite{Gofer13}. 
Consider a finite set $\hat{\mathcal{B}}$ containing $N$ experts. The expert set is either unknown or $N >> 1$. At each round $t \in \{1,\ldots, T\}$, the environment draws an expert uniformly at random from the set of experts and is presented to a learner. An expert presented in a round could either be a repetition or a new expert. Let $n_t$ denote the number of unique experts revealed till time $t$. The new experts are indexed in the order they are revealed, i.e., $n_t = n_{t-1}+1$, if a new expert is revealed, and $n_t = n_{t-1}$, otherwise. Thus, the set of experts in round $t$ is $\mathcal{B}_t = \{1,2,\ldots, n_t\}$. 

Let $\pmb{p}_t$ be the probability mass function used by the learner for choosing an expert from $\mathcal{B}_t$ and $\pmb{l}_t = (l_t(1),\ldots,l_t(n_t)) \in [0,1]^{n_t}$ is the loss vector revealed after choosing an expert. In round $t$, the cumulative loss of expert $i \in \mathcal{B}_t$ is $$L_t(i) = \sum_{r = 1}^tl_r(i),$$ and the expected cumulative loss of the learner is $L_t = \sum_{r = 1}^t \langle \pmb{p}_r, \pmb{l}_r \rangle$. We assume that once a new expert $n_{t}$ is revealed, the learner will have access to its cumulative loss $L_{t-1}(n_t)$. For $\hat{t} > t$, $L^{\hat{t}}_{t}(i)$ and $L^{\hat{t}}_{t}$ are the cumulative losss in the slots $\{t+1,\ldots,\hat{t}\}$ under expert $i$ and the learner, respectively, given by $$L^{\hat{t}}_{t}(i) = \sum_{r = t+1}^{\hat{t}}l_r(i) \text{ and } L^{\hat{t}}_{t} = \sum_{r = t+1}^{\hat{t}} \langle \pmb{p}_r, \pmb{l}_r \rangle.$$

Define $$L_t^* = \min_{i \in \mathcal{B}_t} L_t(i).$$ The aim is to minimize the regret $R_T = L_T - L_T^*$. Let $b_t$ denote the best expert till time slot $t$, i.e., $$b_t = \argmin_{i\in \mathcal{B}_t} L_t(i).$$ Note that $\mathcal{B}_T \subseteq \hat{\mathcal{B}}$. 


\subsection{The \algo-Hedge Algorithm}
We propose a modification to \algo~that uses \algo~only for the slots $t \leq \tau$, for some $\tau < T$, and uses the vanilla Hedge for $t > \tau$ using the experts from $\mathcal{B}_\tau$ and ignoring any newly revealed experts.

Let $q_\tau = \mathds{P}(b_T \in \mathcal{B}_\tau)$ denote the probability that the best expert for $T$ slots belongs to the set of experts revealed in the first $\tau$ slots. We have $$q_\tau = 1- (1-\nu(b_T))^\tau.$$
\begin{algorithm}[t]
\caption{\algo-Hedge (with parameter $\tau$)}\label{alg2}
\begin{algorithmic}[1]
\STATE For $t = 1, \ldots, \tau$, use \algo
\STATE For $t > \tau$, use the vanilla Hedge only using the experts from  $\mathcal{B}_\tau$ and resetting the weights $w_{i,\tau+1} = 1$, for all $i \in \mathcal{B}_\tau$.
\end{algorithmic}
\end{algorithm}

\begin{lemma}\label{lem2:1_to_tau}
For any $\tau \leq T$, the regret bound for \algo~is given by
    \begin{align*}
    L_{\tau} - L_{\tau}(b_{\tau}) \leq \frac{\eta \tau}{8} + \frac{e^{\eta {\tau}}}{\eta}(\ln n_{\tau}+1).
    \end{align*}
\end{lemma}
\begin{proof}
    Step 1:
    \begin{align}\label{eq1:lem1}
    \ln \frac{W_{\tau+1}}{W_1}&= \ln \sum_{i=1}^{n_\tau} e^{-\eta L_\tau(i)}\nonumber \\ 
    &\geq  \ln \max_{i \in \mathcal{B}_\tau} e^{-\eta L_\tau(i)}\nonumber \\
    &\geq \max_{i \in \mathcal{B}_\tau} \ln e^{-\eta L_\tau(i)}\nonumber \\
    &= -\eta \min_{i \in \mathcal{B}_\tau} L_\tau(i) \nonumber \\
    &= -\eta L_\tau(b_\tau).
    \end{align}
    Step 2: 
    We have
\begin{align}\label{eq2:lem1}
    \frac{w_t(n_t)}{W_{t}} &= \frac{e^{-\eta L_t(n_t)}}{\sum_{j = 1}^{n_t} e^{-\eta L_t(j)}}\nonumber\\
    &\leq \frac{1}{n_t}(e^{-\eta \sum_{j=1}^{n_t} L_j})^{-1/n_t} \nonumber\\
    &\leq \frac{1}{n_t} (e^{-\eta t n_t})^{-1/n_t} \nonumber\\
    &= \frac{e^{\eta t} }{n_t}.
\end{align}
    Let $E_t$ denote the event of a new expert arrival in round $t$. For $t \leq \tau$,
    \begin{align}\label{eq3:lem1}
    &\ln \frac{W_{t+1}}{W_t} = \ln \frac{W_{t+1}}{\hat{W}_{t}} + \ln \frac{\hat{W}_{t}}{W_{t}} \nonumber\\
    &\leq -\eta \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{\eta^2}{8} + \mathbbm{1}(E_t)\ln \left(1 + \frac{w_t(n_t)}{W_{t}}\right)\nonumber\\
    &\leq -\eta \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{\eta^2}{8} + \mathbbm{1}(E_t) \frac{w_t(n_t)}{W_{t}} \nonumber\\
    &\leq -\eta \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{\eta^2}{8} + \mathbbm{1}(E_t)\frac{e^{\eta t} }{n_t}
    \end{align}
   In the last equation above we have used \eqref{eq2:lem1}. Therefore,
\begin{align}\label{eq4:lem1}
    \ln \frac{W_{\tau+1}}{W_1} &= \ln \prod_{t = 1}^{\tau} \frac{W_{t+1}}{W_t} \nonumber\\
    &= \sum_{t = 1}^{\tau} \ln \frac{W_{t+1}}{W_t} \nonumber\\
    &\leq -\eta  \sum_{t = 1}^{\tau} \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{\eta^2 \tau}{8} +  \sum_{t = 1}^{\tau}\mathbbm{1}(E_t)\frac{e^{\eta t} }{n_t}\nonumber \\
    &\leq -\eta L_\tau + \frac{\eta^2 \tau}{8} + e^{\eta \tau} \sum_{i = n_0}^{n_\tau} \frac{1}{i} \nonumber \\
    &\leq -\eta L_\tau + \frac{\eta^2 \tau}{8} + e^{\eta \tau} (\ln n_t + 1)
\end{align}
The result follows from further manipulation of \eqref{eq4:lem1} and \eqref{eq1:lem1}.
\end{proof}
For $t > \tau$, \algo-Hedge uses Hedge with the set of experts from $\mathcal{B}_\tau$. Since $b_T$ belongs to $\mathcal{B}_\tau$ with probability $q_\tau$, with this probability the standard regret bound of Hedge applies to the losses $L^T_\tau$ and $L^T_\tau (b_T)$ for the horizon $T-\tau$. This is stated in the following lemma and the proof is given for completeness.
\begin{lemma}\label{lem3:tau_to_T}
For $t > \tau$, the regret bound for Hedge is given by 
    \begin{align*}
        L^T_{\tau} - L_\tau^T(b_T) \leq \frac{\eta(T-\tau)}{8} + \frac{\ln n_\tau}{\eta}, \; \text{w.p. } q_\tau.
    \end{align*}
\end{lemma}
\begin{proof}
Since the modified \algo~in Algorithm~\ref{alg2} uses vanilla Hedge using the experts from $\mathcal{B}_\tau$ for all $t > \tau$. Therefore, $W_t$ and $W_{t-1}$ will have the same number of weights, and using Hoeffding's inequality, we obtain 
\begin{align}\label{eq1:lem3}
    \ln \frac{W_t}{W_{t-1}} \leq  -\eta \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{\eta^2}{8},\; \forall t > \tau.
\end{align}
    \begin{align}\label{eq2:lem3}
    \ln \frac{W_T}{W_{\tau}}&= \sum_{t=\tau + 1}^{T} \ln  \frac{W_t}{W_{t-1}}\nonumber \\ 
    &\leq -\eta \sum_{t=\tau + 1}^{T} \langle \pmb{p}_t, \pmb{l}_t\rangle + \frac{(T-\tau)\eta^2}{8}\nonumber \\
    &= -\eta L^T_\tau + \frac{(T-\tau)\eta^2}{8}.
    \end{align}
    In the second step above, we have used \eqref{eq1:lem3}. Again,
    \begin{align}\label{eq3:lem3}
        \ln \frac{W_T}{W_{\tau}} &= \ln \left(\frac{\sum_{i=1}^{n_t} e^{-\eta L_\tau^T(i)}}{n_\tau}\right) \nonumber\\
        &\geq \ln \left(\max_{i \in \mathcal{B}_\tau} e^{-\eta L_\tau^T(i)}\right) - \ln n_\tau\nonumber \nonumber\\
        &\geq \ln e^{-\eta L_\tau^T(b_T)} - \ln n_\tau, \; \text{w.p. } q_\tau \nonumber\\
        & = -\eta L_\tau^T(b_T) - \ln n_\tau, \; \text{w.p. } q_\tau.
    \end{align}
\end{proof}

\begin{theorem}
    The regret $R_T$ of the modified \algo~satisfies, 
    \begin{align*}
        R_T \leq \frac{\eta T}{8} + \frac{(e^{\eta \tau}+1)\ln n_{\tau}}{\eta} + \frac{e^{\eta \tau}}{\eta} , \; \text{w.p. } q_\tau.
    \end{align*}
    
\end{theorem}
\begin{proof}
%Recall that $b_T$ is the best expert. Since the experts are drawn uniformly at random, the probability that the best expert occurs in the first $\tau$ slots is given by $$\mathds{P}(b_T \in \mathcal{B}_\tau) = 1-\left(1-\frac{1}{N}\right)^\tau.$$
   Let $b^T_t$ denote the best expert for the duration from $t+1$ to $T$ slots, i.e.,
    \begin{align*}
        b^T_t = \argmin_{i \in \mathcal{B}_T} L^T_{t}(i).
    \end{align*}
We have
    \begin{align}\label{eq:expandRegret}
        &L_T - L^*_T = L_{\tau} - L^*_{\tau} + L^T_{\tau} - L^*_T + L^*_{\tau} \nonumber\\
        &= L_{\tau} - L_{\tau}(b_{\tau}) + L^T_{\tau} - (L_{\tau}(b_T) + L_{\tau}^T(b_T)) + L_{\tau}(b_{\tau}) \nonumber\\
        &\leq  L_{\tau} - L_{\tau}(b_{\tau}) + L^T_{\tau} - L_{\tau}^T(b_T), \, w.p. \, q_\tau.
    \end{align}
    In the last inequality above, we have used $L_{\tau}(b_{\tau}) \leq L_{\tau}(b_T)$, if $b_T \in \mathcal{B}_\tau$. The result follows from substituting the upper bound for $L_{\tau} - L_{\tau}(b_{\tau})$ from Lemma~\ref{lem2:1_to_tau} and the upper bound for $L^T_{\tau} - L_\tau^T(b_T)$ from Lemma~\ref{lem3:tau_to_T} in \eqref{eq:expandRegret}.
    %\begin{align*}
    %    L_T - L^*_T \leq \frac{\eta T}{8} + \frac{e^{\eta {\tau}}+1}{\eta}\ln n_{\tau}
    %\end{align*}
\end{proof}

\subsection*{Next Steps:}
\begin{enumerate}
    \item Extend the result to 'well-behaved' distributions under which there is a minimum probability for the best expert to be chosen.
    \item Does similar bounds hold for MAB setting when the arms are revealed randomly?
    \item What if all $N$ experts are known a prior, but we randomly choose one expert at a time without replacement to reduce computation complexity? The cumulative loss of an expert is not computed and stored until the expert is chosen. This in spirit has a connection to the setting in \cite{Cohen2017}, where the experts are sequentially added depending on whether they belong to an $\epsilon$-covering or not.
    \item Future work -- Stochastic setting: Losses are drawn i.i.d. from a fixed (and unknown) distribution \cite{Amir2020}. Consider pseudo regret.
\end{enumerate}