\subsection{Regret analysis for sampling from the combinatorial space of coordinate blocks} \label{sec:comb_regret_analysis}
In this section we provide proofs for the theorems in Sec.~\ref{ss:analysis}.

The probability $\tilde{\pi}_{t,\mathcal{I}_t}$ of selecting a certain coordinate block $\mathcal{I}_t\subset \mathcal{I}=\{1,\cdots,D\}$ of size $|\mathcal{I}_t|=c\in\mathcal{C}$ follows sampling according to $\pi_t$ such that: 
\begin{align}\label{eq:block_prob}
    \tilde{w}_{t,\mathcal{I}_t} = 
    \prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}
    \quad ; \quad
    \tilde{W}_t = \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t,\mathcal{I}_t}
    \quad ; \quad
    \tilde{\pi}_{t,\mathcal{I}_t} = \frac{\tilde{w}_{t,\mathcal{I}_t}}{\tilde{W}_t}
    \quad \forall \mathcal{I}_t \in \bigcup_{c\in\mathcal{C}}\mathcal{S}_c
\end{align}
such that 
\begin{equation} \label{eq:prob_sum_to_1_2}
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t} =1
\end{equation}

In addition, let us first modify the losses in Eq.~\ref{eq:alpha_beta_loss} to be non-negative by adding the same constant $\log(\alpha)$ to all possible values:
\begin{align} \label{eq:alpha_beta_loss_shifted}
    \tilde{\ell}_{t,i} =
    \begin{cases}
		0 & \text{if } i \in C_t \text{ and } y_t > M_{t-1} \\
		\log(\tilde{\alpha}\tilde{\beta}) & \text{if } i \in C_t \text{ and } y_t \leq M_{t-1} \\
        \log(\tilde{\alpha}) & \text{if } i \notin C_t
    \end{cases}
\end{align}
This modification does not change the resulted distribution $\pi_t$ induced over the coordinates as it is invariant to shifts of the losses:
\begin{align}
\notag
\pi_{t,i} = \frac{w_{t,i}}{W_t} &= \frac{e^{-\eta\sum_{\tau=1}^t\tilde{\ell}_{\tau,i}}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\tilde{\ell}_{\tau,j}}} =
\frac{e^{-\eta\sum_{\tau=1}^t(\ell_{\tau,i}+\log(\tilde{\alpha}))}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t(\ell_{\tau,j}+\log(\tilde{\alpha}))}}
=
\frac{e^{-\eta t\log(\tilde{\alpha})}e^{-\eta\sum_{\tau=1}^t\ell_{\tau,i}}}{e^{-\eta t\log(\tilde{\alpha})}\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\ell_{\tau,j}}}
\\&=
\frac{e^{-\eta\sum_{\tau=1}^t\ell_{\tau,i}}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\ell_{\tau,j}}}
\end{align}
Thus $\tilde{\pi_{t,i}}$ remains unchanged as well and for simplicity we refer to $\tilde{\ell}$ as $\ell$ in the followings.

% \textbf{Proof} 
\subsubsection{Proof of Lemma 1}
\textbf{Lemma 1} For $\eta >0$ and non-negative losses $\ell_{t,i}\geq 0$ the update rule in (\ref{eq:multiplicative_weight_update}) satisfies for any block of coordinates $C^*$:
\begin{equation}\label{eq:lemma_1_2}
 \sum_{t=1}^T\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot\frac{1}{|\mathcal{I}_t|} \sum_{i\in\mathcal{I}_t}\ell_{t,i} 
 -\sum_{t=1}^T\frac{1}{|C^*|}\sum_{i\in C^*}\ell_{t,i}
\leq 
\eta\sum_{t=1}^T\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 + \frac{D\log(D)}{\eta}
\end{equation}

\textbf{Proof}: Set: 
\begin{equation} \label{eq:init_2}
    \tilde{w}_{0,\mathcal{I}_t} = 1 \quad \forall \mathcal{I}_t \in \bigcup_{c\in\mathcal{C}}\mathcal{S}_c
\end{equation}
Thus,
\begin{align}
    \tilde{W}_{t+1} &= 
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t+1,\mathcal{I}_t}
    =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t+1,i}^{\frac{1}{|\mathcal{I}_t|}}
    \\ & =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}e^{-\frac{\eta}{|\mathcal{I}_t|}\ell_{t,i}}
    =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}}
    \\ & = 
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t,\mathcal{I}_t}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}}
    \\ & = \label{eq:back_to_pi_hat_2}
    \tilde{W}_t\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}}
    \\&\leq \label{eq:exp_ineq_1_2}
    \tilde{W}_t\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\left(1-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}+\eta^2\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2\right)
    \\&\leq \label{eq:pi_hat_sums_to_1_2}
    \tilde{W}_t\left(1+
    \sum_{c\in\mathcal{C}}\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)
    \right)
    \\&\leq \label{eq:exp_ineq_2_2}
    \tilde{W}_t e^{
    \sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)
    } 
\end{align}
Where,
\begin{itemize}
    \item (\ref{eq:back_to_pi_hat_2}) follows from (\ref{eq:block_prob}).
    \item (\ref{eq:exp_ineq_1_2}) holds since $e^{-x}\leq 1-x+x^2$ for $x\geq 0$.
    \item (\ref{eq:pi_hat_sums_to_1_2}) holds due to Eq.~\ref{eq:prob_sum_to_1_2}.
    \item (\ref{eq:exp_ineq_2_2}) holds since $1+x\leq e^x$.
\end{itemize}

Due to Eq.~\ref{eq:init_2} we have,
\begin{align} \label{eq:aggergate_block_losses}
  \tilde{w}_{t,\mathcal{I}_t} 
  = 
  \prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}} 
  =
  \prod_{i\in \mathcal{I}_t}w_{0,i}^{\frac{1}{|\mathcal{I}_t|}} e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{t=1}^T\ell_{t,i}}
  =  e^{-\frac{\eta}{\mathcal{I}_t}\sum_{t=1}^T\sum_{i\in \mathcal{I}_t}\ell_{t,i}}
\end{align}
And,
\begin{equation}\label{eq:big_init_2}
   W_0=\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{0,\mathcal{I}_t}
   =\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}1
   =\sum_{c\in\mathcal{C}}|\mathcal{S}_c|=
   \sum_{c\in\mathcal{C}}{D \choose c} \leq (D!)^{|\mathcal{C}|}
\end{equation}
Given that the weight of a certain coordinate block $C^*$ is less than the total sum of all weights, together with Eq.~\ref{eq:exp_ineq_2_2},~\ref{eq:init_2} and~\ref{eq:big_init_2} we have:
\begin{align}
   e^{-\frac{\eta}{|C^*|}\sum_{t=1}^T\sum_{i\in C^*}\ell_{t,i}}
   &=  
   \tilde{w}_{t,C^*} \leq \tilde{W}_T 
   \notag \\ &\leq 
   (D!)^{|\mathcal{C}|} e^{
   \sum_{t=1}^T\sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)
  } 
\end{align}
Taking the $\log$ of both sides, we have:
\begin{align} \label{eq:recursive_ineq_2}
-\eta\sum_{t=1}^T\frac{1}{|C^*|}\sum_{i\in C^*}\ell_{t,i}
  & \leq
  \sum_{t=1}^T\sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right) 
%   \notag \\& 
  + |\mathcal{C}|\log(D!)
\end{align}
and since $D!\leq D^D$ 
% due to the Stirling approximation~\cite{pearson1924historical} for large $D$ 
the result follows.
 
  \textbf{Theorem 1}  Apply the update rule in \ref{eq:multiplicative_weight_update}, with a modified $\eta=\log(\alpha\beta)^{-1}\sqrt{\frac{\log(D)}{T}}$, then:
%  \begin{equation*}
%      Regret_t = \mathcal{O}(\log(\alpha\beta)\sqrt{T\log(D)})
%  \end{equation*}
 
\textbf{Proof}: Since $\ell_{t,i} \leq \log(\tilde{\alpha}\tilde{\beta})$ then: 
 \begin{align}
     \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2
     \leq
     \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\log(\tilde{\alpha}\tilde{\beta})\right)^2
     \leq
     \log(\tilde{\alpha}\tilde{\beta})^2
 \end{align}
 
 Thus due to Eq.~\ref{eq:prob_sum_to_1_2}:
 \begin{align}
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2
     \leq
     \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\log(\tilde{\alpha}\tilde{\beta})^2
     =
     \log(\tilde{\alpha}\tilde{\beta})^2
 \end{align}
 And setting $\eta=\frac{1}{\log(\tilde{\alpha}\tilde{\beta})}\sqrt{\frac{|\mathcal{C}|D\log(D)}{T}}$ in Eq.~\ref{eq:lemma_1_2} yields:
 \begin{equation}
     Regret_t \leq \eta T \log(\tilde{\alpha}\tilde{\beta})^2 + \frac{|\mathcal{C}|D\log(D)}{\eta}
    = 
     2 \log(\tilde{\alpha}\tilde{\beta})\sqrt{T|\mathcal{C}|D\log(D)}
 \end{equation}