In this section we provide proofs for the theorems in Section~3.1.1.%\ref{ss:analysis}.
To make non-negative temporal losses, we modify the losses in Eq.~(3) to be non-negative by adding the same constant $\log(\tilde{\alpha})$,
\begin{align} %\label{eq:alpha_beta_loss_shifted}
    \tilde{\ell}_{t,i} =
    \begin{cases}
		0 & \text{if } i \in C_t \text{ and } y_t > M_{t-1} \\
		\log(\tilde{\alpha}\tilde{\beta}) & \text{if } i \in C_t \text{ and } y_t \leq M_{t-1} \\
        \log(\tilde{\alpha}) & \text{if } i \notin C_t. \nonumber
    \end{cases}
\end{align}
This modification does not change the resulted distribution $\pi_t$ induced over the coordinates as it is invariant to shifts of the losses, $\pi_{t,i} = w_{t,i}/W_t$, 
\begin{align}
\notag
\pi_{t,i} = \frac{e^{-\eta\sum_{\tau=1}^t\tilde{\ell}_{\tau,i}}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\tilde{\ell}_{\tau,j}}} 
&=
\frac{e^{-\eta\sum_{\tau=1}^t(\ell_{\tau,i}+\log(\tilde{\alpha}))}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t(\ell_{\tau,j}+\log(\tilde{\alpha}))}}
\\ &=
\frac{e^{-\eta t\log(\tilde{\alpha})}e^{-\eta\sum_{\tau=1}^t\ell_{\tau,i}}}{e^{-\eta t\log(\tilde{\alpha})}\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\ell_{\tau,j}}}
=
\frac{e^{-\eta\sum_{\tau=1}^t\ell_{\tau,i}}}{\sum_{j=1}^D e^{-\eta\sum_{\tau=1}^t\ell_{\tau,j}}}. \nonumber
\end{align}
Thus, $\tilde{\pi}_{t,i}$ and $\hat{\pi}_{t,i}$ introduced in Sections~\ref{sec:comb_regret_analysis}~and~ \ref{sec:regret_analysis_without_replacement} remain unchanged as well. For simplicity we refer to $\tilde{\ell}$ as $\ell$ throughout this section.

\subsection{Regret analysis for sampling from the combinatorial space of coordinate blocks} \label{sec:comb_regret_analysis}

The probability $\tilde{\pi}_{t,\mathcal{I}_t}$ of selecting a certain coordinate block $\mathcal{I}_t\subset \mathcal{I}=\{1,\cdots,D\}$ of size $|\mathcal{I}_t|=c\in\mathcal{C}$ follows sampling according to $\pi_t$ such that
\begin{align}\label{eq:block_prob}
    \tilde{w}_{t,\mathcal{I}_t} = 
    \prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}
    \; , \quad
    \tilde{W}_t = \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t,\mathcal{I}_t}
    \; , \quad
    \tilde{\pi}_{t,\mathcal{I}_t} = \frac{\tilde{w}_{t,\mathcal{I}_t}}{\tilde{W}_t}
    \quad \forall \mathcal{I}_t \in \bigcup_{c\in\mathcal{C}}\mathcal{S}_c
\end{align}
with
\begin{equation} \label{eq:prob_sum_to_1_2}
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t} =1.
\end{equation}

% \textbf{Proof} 
%\subsubsection*{Proof of Lemma~\ref{lem:regret_comb}}
\begin{lemma} \label{lem:regret_comb}
For $\eta >0$ and non-negative losses $\ell_{t,i}\geq 0$ the update rule in (3) satisfies for any block of coordinates $\mathcal{I}^*$:
\begin{align}\label{eq:lemma_1_2}
 \sum_{t=1}^T\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c} & \tilde{\pi}_{t,\mathcal{I}_t}\cdot\frac{1}{|\mathcal{I}_t|} \sum_{i\in\mathcal{I}_t}\ell_{t,i} 
 -\sum_{t=1}^T\frac{1}{|\mathcal{I}^*|}\sum_{i\in \mathcal{I}^*}\ell_{t,i}
\leq   \nonumber\\
 & 
\eta\sum_{t=1}^T\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 + \frac{D\log(D)}{\eta}.
\end{align}
\end{lemma}

%\begin{proof}
\noindent \textit{Proof}: 
Set
\begin{equation} \label{eq:init_2}
    \tilde{w}_{0,\mathcal{I}_t} = 1 \quad \forall \mathcal{I}_t \in \bigcup_{c\in\mathcal{C}}\mathcal{S}_c 
\end{equation}
Thus,
\begin{align}
    \tilde{W}_{t+1} &= 
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t+1,\mathcal{I}_t}
    =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t+1,i}^{\frac{1}{|\mathcal{I}_t|}} \nonumber
    \\ & =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}e^{-\frac{\eta}{|\mathcal{I}_t|}\ell_{t,i}}
    =
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}} \nonumber\\
    & = 
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{t,\mathcal{I}_t}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}} \nonumber
    \\ & = \label{eq:back_to_pi_hat_2}
    \tilde{W}_t\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}}\\
    &\leq \label{eq:exp_ineq_1_2}
    \tilde{W}_t\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\left(1-\frac{\eta}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}+\eta^2\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2\right)\\
    %         \end{align}
    % \begin{align}
    % % \\ 
    &\leq \label{eq:pi_hat_sums_to_1_2}
    \tilde{W}_t\left(1+
    \sum_{c\in\mathcal{C}}\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)
    \right)
    \\&\leq \label{eq:exp_ineq_2_2}
    \tilde{W}_t e^{
    \sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right),
    } 
\end{align}
where (\ref{eq:back_to_pi_hat_2}) follows from (\ref{eq:block_prob}), (\ref{eq:exp_ineq_1_2}) holds since $e^{-x}\leq 1-x+x^2$ for $x\geq 0$, (\ref{eq:pi_hat_sums_to_1_2}) holds due to Eq.~(\ref{eq:prob_sum_to_1_2}) and (\ref{eq:exp_ineq_2_2}) holds since $1+x\leq e^x$.
% \begin{itemize}
%     \item (\ref{eq:back_to_pi_hat_2}) follows from (\ref{eq:block_prob}).
%     \item (\ref{eq:exp_ineq_1_2}) holds since $e^{-x}\leq 1-x+x^2$ for $x\geq 0$.
%     \item (\ref{eq:pi_hat_sums_to_1_2}) holds due to Eq.~\ref{eq:prob_sum_to_1_2}.
%     \item (\ref{eq:exp_ineq_2_2}) holds since $1+x\leq e^x$.
% \end{itemize}

Due to Eq.~(\ref{eq:init_2}),  we have,
\begin{align} \label{eq:aggergate_block_losses}
  \tilde{w}_{t,\mathcal{I}_t} 
  = 
  \prod_{i\in \mathcal{I}_t}w_{t,i}^{\frac{1}{|\mathcal{I}_t|}} 
  =
  \prod_{i\in \mathcal{I}_t}w_{0,i}^{\frac{1}{|\mathcal{I}_t|}} e^{-\frac{\eta}{|\mathcal{I}_t|}\sum_{t=1}^T\ell_{t,i}}
  =  e^{-\frac{\eta}{\mathcal{I}_t}\sum_{t=1}^T\sum_{i\in \mathcal{I}_t}\ell_{t,i}}.
\end{align}
And,
\begin{equation}\label{eq:big_init_2}
   W_0=\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}\tilde{w}_{0,\mathcal{I}_t}
   =\sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in\mathcal{S}_c}1
   =\sum_{c\in\mathcal{C}}|\mathcal{S}_c|=
   \sum_{c\in\mathcal{C}}\binom{D}{c} \leq (D!)^{|\mathcal{C}|}.
\end{equation}
Given that the weight of a certain coordinate block $\mathcal{I}^*$ is less than the total sum of all weights, together with Eq.~(\ref{eq:exp_ineq_2_2}),~(\ref{eq:init_2}) and~(\ref{eq:big_init_2}) we have
\begin{align}
   e^{-\frac{\eta}{|\mathcal{I}^*|}\sum_{t=1}^T\sum_{i\in \mathcal{I}^*}\ell_{t,i}}
   &=  
   \tilde{w}_{t,\mathcal{I}^*} \leq \tilde{W}_T 
   \notag \\ 
   &%\hspace{1cm}
   \leq 
   (D!)^{|\mathcal{C}|} e^{
   \sum_{t=1}^T\sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)
  }. \nonumber
\end{align}
Taking the $\log$ of both sides, we have
\begin{align} %\label{eq:recursive_ineq_2}
-\eta\sum_{t=1}^T\frac{1}{|\mathcal{I}^*|}\sum_{i\in \mathcal{I}^*}\ell_{t,i}
  \leq &
  \sum_{t=1}^T\sum_{c\in\mathcal{C}}\cdot\left(\sum_{\mathcal{I}_t\in \mathcal{S}_c} \eta^2\tilde{\pi}_{t,\mathcal{I}_t}\left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2 -\frac{\eta}{|\mathcal{I}_t|}\tilde{\pi}_{t,\mathcal{I}_t}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right) 
% \notag \\& 
  + |\mathcal{C}|\log(D!), \nonumber
\end{align}
which, using $D!\leq D^D$, finishes the proof.
%\end{proof}

%  \textbf{Proof} 
\noindent \textbf{Proof of Theorem~1}:%\ref{theo:regret_comb}}:
%\begin{proof}
%\noindent \textbf{Proof}: 
Since $\ell_{t,i} \leq \log(\tilde{\alpha}\tilde{\beta})$, then 
 \begin{align}
     \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2
     \leq
     \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\log(\tilde{\alpha}\tilde{\beta})\right)^2
     \leq
     \log(\tilde{\alpha}\tilde{\beta})^2.  \nonumber
 \end{align}
 
 Thus, due to Eq.~(\ref{eq:prob_sum_to_1_2}), one has
 \begin{align}
    \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\cdot \left(\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\ell_{t,i}\right)^2
     \leq
     \sum_{c\in\mathcal{C}}\sum_{\mathcal{I}_t\in \mathcal{S}_c}\tilde{\pi}_{t,\mathcal{I}_t}\log(\tilde{\alpha}\tilde{\beta})^2
     =
     \log(\tilde{\alpha}\tilde{\beta})^2. \nonumber
 \end{align}
 Setting $\eta=\frac{1}{\log(\tilde{\alpha}\tilde{\beta})}\sqrt{\frac{|\mathcal{C}|D\log(D)}{T}}$ in Eq.~(\ref{eq:lemma_1_2}) yields
 \begin{equation}
     Regret_t \leq \eta T \log(\tilde{\alpha}\tilde{\beta})^2 + \frac{|\mathcal{C}|D\log(D)}{\eta}
    = 
     2 \log(\tilde{\alpha}\tilde{\beta})\sqrt{T|\mathcal{C}|D\log(D)}.
 \end{equation}
%\end{proof}