
\section{\texttt{SLATE-GLM-OFU}}
Let $\bm{x}^i\in \R^{d}$, we define the ``\emph{lift}``
 $\tilde{\bm{x}}^i \in \R^{dN}$, of $\bm{x}^i$ as follows,
 
\[   
\tilde{\bm{x}}^i(j) = 
     \begin{cases}
       0 &\quad \text{if } j\notin [(i-1)d, id-1]\\
       \bm{x}(j - (i-1)d) &\quad\text{otherwise}\\ 
     \end{cases}
\]
In other words, consider $\tilde{\bm{x}}^i$ to be a vector with $N$ slots of dimension $d$, such that the $i^{th}$ slot is $\bm{x}^i$ while the rest of the slots are assigned the zero vector. Then, for any vector $\bm{z} = (\bm{z}^1, \ldots, \bm{z}^N) \in \R^{dN}$, with $\bm{z}^i\in \R^d$, $\forall i\in [N]$, we get that $\bm{z} = \tilde{\bm{z}}^1 + \ldots +\tilde{\bm{z}}^N$.

Let $T_0 \in \mathbb{N}$ be a constant (depending on N and $\rho$) such that $\forall t \geq T_0, t\geq \frac{3 + 2\rho N}{3\rho^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}}$. We assume that the total rounds $T$ satisfies $T \geq T_0$. 

We now prove that the regret for \texttt{Slate-GLM-OFU} can be bounded above by the quantity mentioned in Theorem \ref{theorem: Regret OFUL} (restated and expanded below).
Define the following events:
$$\event_1 = \cbrak{\forall i,j \in [N] , i\neq j, \forall t \in [T]: \norm{\bm{W}^{i,j}_t} \leq \sqrt{\frac{t}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}} \text{and} \norm{\bm{V}^{\mathcal{H},i,j}_t} \leq \sqrt{\frac{8t}{\kappa^2 N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}}$$

$$\event_2 = \cbrak{\forall i \in [N], \forall t \in [T_0, T]: \eigmin{{\bm{V}^i_t}} \geq 1 + \frac{\rho t}{2} \text{ and } \eigmin{{\bm{V}^{\mathcal{H},i}_t}}  \geq \gamma_t(\delta) + \frac{\rho t}{2}}$$

$$\event_3 = \cbrak{\forall t \in [T], \matnorm{\bm{\theta}^\star - \bm{\theta}_{t+1}}{\bm{W}_{t+1}}^2 \leq CS^2d\log(t/\delta) \text{ and }\bm{\theta}^\star \in \Theta}$$

$$\event = \event_1 \cap \event_2 \cap \event_3$$

\begin{theorem}[Regret of \slateglincb] At the end of $T (\geq T_0)$ rounds and assuming event $\event$ holds, the regret of \slateglincb\ is bounded by 
    \begin{align*}
   Regret(T) &\leq   T_0 + CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}} \\ 
&+ (1 + \kappa)C S^2N^2d\log(T/\delta)\kappa\pbrak{ d \log(T/4N) + \frac{1}{\rho}\log(T)} +  C  S^6 N^2 d^2 \kappa \log(T/\delta)\log(T/\kappa N)
    \end{align*}
\label{appendix: proof_regret_oful}
\end{theorem}
\begin{proof}  Recall from Section \ref{section:main-algo} that $\mathcal{T}$ is the set of all rounds in $[T]$, where the inequality condition in \emph{Step 2} of Algorithm \ref{algo:adaptive-updates} does not hold. Using the bound on $|\mathcal{T}|$ provided in Lemma \ref{lemma: number of times condition  fails}, we get that,

\begin{align*}
    Regret(T) &\leq |\mathcal{T}| +
    \sum\limits_{t\notin\mathcal{T}}
    \mu({\bm{x}_t^\star}^\top \bm{\theta}^\star)
 - \mu(\bm{x}_t^\top \bm{\theta}^\star)
    \leq C  S^6 d^2 \kappa \log(T/\delta)\log(T/\kappa N) + R(T)
\end{align*}
where $\bm{x}_t^\star = \argmax_{\bm{x}\in \mathcal{X}_t}\mu(\bm{x}^\top \bm{\theta}^\star)$ and $R(T) = \sum_{t\notin\mathcal{T}}\mu({\bm{x}_t^\star}^\top \bm{\theta}^\star)
 - \mu(\bm{x}_t^\top \bm{\theta}^\star)$. 

Now, recall from event $\mathcal{E}$ that all our \emph{good events} are defined for $t \in [T_0 , T]$ (where $T_0$ is some constant in $N$ and $\rho$) . Hence, for rounds $t \leq T_0$, we can trivially bound the regret as $T_0$.

Now, we shift our attention to $t \in [T_0 , T]$. \textbf{From here on, we assume that} $ \mathbf{t \in [T_0, T]}$.

 Now, expanding $R(T)$ using an exact Taylor expansion (Definition \ref{def:Exact_Taylor}) along with the fact that $\modulus{\ddot{\mu}(.)} \leq \dot{\mu}(.)$ gives us,

\begin{align*}
    R(T) &\leq \sum\limits_{t \notin \mathcal{T}}\dot{\mu}(\bm{x}_t^\top \bm{\theta}^\star)(\bm{x}_t^\star - \bm{x}_t)^\top \bm{\theta}^\star  +  \sum\limits_{t \notin \mathcal{T}} \tilde{\alpha}({\bm{x}_t^\star}^\top\bm{\theta}^\star, \bm{x}_t^\top \bm{\theta}^\star) ((\bm{x}_t^\star - \bm{x}_t)^\top \bm{\theta}^\star)^2
\end{align*}

So we bound $R(T)$ by bounding the two quantities $R_1(T) = \sum_{t \notin \mathcal{T}}\dot{\mu}(\bm{x}_t^\top \bm{\theta}^\star)(\bm{x}_t^\star - \bm{x}_t)^\top \bm{\theta}^\star$ and $R_2(T) = \sum_{t \notin \mathcal{T}} \tilde{\alpha}({\bm{x}_t^\star}^\top\bm{\theta}^\star, \bm{x}_t^\top \bm{\theta}^\star) ((\bm{x}_t^\star - \bm{x}_t)^\top \bm{\theta}^\star)^2$ separately.

\underline{Bounding $R_1(T)$: } To bound $R_1(T)$, we define $\mathcal{T}_1 = \{t \in [ T_0 , T] : t \notin \mathcal{T} \text{ and } \sens{\bm{x}_t}{\bm{\theta}^\star} \geq \sens{\bm{x}_t}{\bm{\theta}_{t+1}}\}$ and $\mathcal{T}_2 = \{t \in [T_0 , T] : t \notin \mathcal{T} \text{ and } \sens{\bm{x}_t}{\bm{\theta}^\star} \leq \sens{\bm{x}_t}{\bm{\theta}_{t+1}}\}$. Note that, $\mathcal{T}_1\cap\mathcal{T}_2 = \phi$, and $[T_0 , T]\setminus\mathcal{T} = \mathcal{T}_1 \cup \mathcal{T}_2$. Bu summing over rounds in $\mathcal{T}_1$ we obtain,

\begin{align*}
   &\sum\limits_{t \in \mathcal{T}_1}{} \sens{\bm{x}_t}{\bm{\theta}^\star}\inner{\pbrak{\bm{x}_t^\star - \bm{x}_t}}{\bm{\theta}^\star}
   \overset{(i)}{=} \sum\limits_{t \in \mathcal{T}_1}{} \sbrak{\sens{\bm{x}_t}{\bm{\theta}_{t+1}} + \ddot{\mu}\pbrak{z_t}\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} - \inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\inner{\pbrak{\bm{x}_t^\star - \bm{x}_t}}{\bm{\theta}^\star}\\
\end{align*}
for some $z_t$ between $\bm{x}_t^\top \bm\theta^\star$ and $\bm{x}_t^\top \bm\theta_{t+1}$. Here, $(i)$ follows from the mean value theorem. 
Let $R_1(T)_1 = \sum\limits_{t \in \mathcal{T}_1}{} \sens{\bm{x}_t}{\bm{\theta}_{t+1}}\sbrak{\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star}}$ and $R_1(T)_2 = \sum\limits_{t \in \mathcal{T}_1}{} \ddot{\mu}(z)\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} - \inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star}$. We bound these separately.

\begin{align*}
    R_1(T)_1 & = \sum\limits_{t \in \mathcal{T}_1} \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \left[ \left( \bm{x}_t^\star - \bm{x}_t \right)^\top \bm\theta^\star \right] \leq \sum\limits_{t \in \mathcal{T}_1}{}  \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \dot\mu(z_t)  ({\bm{x}_t^\star}^\top \bm\theta^\star - \bm{x}_t^\top \bm\theta^\star )\\
    &\overset{(i)}{\leq} \sum\limits_{t \in \mathcal{T}_1}{}  \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \left\{ \lvert {\bm{x}_t^\star}^\top \bm\theta^\star - {\bm{x}_t^\star}^\top \bm\theta_t \rvert + \lvert \bm{x}_t^\top \bm\theta^\star - \bm{x}_t^\top \bm\theta_t \rvert + {\bm{x}_t^\star}^\top \bm\theta_t - \bm{x}_t^\top \bm\theta_t \right\}
    \\
    &\overset{(ii)}{\leq} \sum\limits_{t \in \mathcal{T}_1}{}  \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \left\{\lVert \bm{x}_t^\star \rVert_{\bm{W}_t^{-1}} \sqrt{\eta_t(\delta)} + \lVert \bm{x}_t \rVert_{\bm{W}_t^{-1}}\sqrt{\eta_t(\delta)} + \sum_{i=1}^N \left( \tilde{\bm{x}}^{\star,i}_t - \tilde{\bm{x}}^i_t \right)^\top \bm\theta_t \right\}
    \\
    &\overset{(iii)}{\leq} \sum\limits_{t \in \mathcal{T}_1}{}  \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \left\{ \sum_{i=1}^N   \sqrt{\eta_t(\delta)} \left(\lVert \bm{x}_t^{\star,i}\rVert_{(\bm{W}^i_t)^{-1}}  + \lVert \bm{x}^i_t \rVert_{(\bm{W}^i_t)^{-1}} \right) + \sum_{i=1}^N \left( \tilde{\bm{x}}^{\star,i}_t - \tilde{\bm{x}}^i_t \right)^\top \bm\theta^i_t \right\}
    \\
    &\overset{(iv)}{\leq} \sum\limits_{t \in \mathcal{T}_1}{}  \dot{\mu}(\bm{x}_t^\top \bm{\theta}_{t+1}) \left\{ \sum_{i=1}^N  \sqrt{\eta_t(\delta)} \left(\lVert \bm{x}_t^{\star,i}\rVert_{(\bm{W}^i_t)^{-1}}  + \lVert \bm{x}^i_t \rVert_{(\bm{W}^i_t)^{-1}} \right) + \sum_{i=1}^N \left(\sqrt{\eta_t(\delta)} \lVert \bm{x}^i_t \rVert_{(\bm{W}^i_t)^{-1}} - \sqrt{\eta_t(\delta)}\lVert \bm{x}_t^{\star,i}\rVert_{(\bm{W}^i_t)^{-1}} \ \right)   \right\}
    \\
     &\overset{}\leq C\sqrt{\eta_T(\delta)}\sum\limits_{t\in \mathcal{T}_1}\dot{\mu}(\bm{x}_t^\top\bm\theta_{t+1})\sum\limits_{i=1}^N 2\|\bm{x}_t^i\|_{(\bm{W}_t^i)^{-1}} 
     \overset{}{\leq} C\sqrt{\eta_T(\delta)}  \sqrt{\sum\limits_{t \in \mathcal{T}_1}{}\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}\sqrt{\sum\limits_{t \in \mathcal{T}_1}{}\left(\sum\limits_{i=1}^{N} \sqrt{\sens{\bm{x}_t}{\bm{\theta}_{t+1}}} \matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv} \right)^2}\\
     &\overset{(v)}{\leq}C\sqrt{\eta_T(\delta)}  \sqrt{\sum\limits_{t \in \mathcal{T}_1}{}\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}\sqrt{ Nd\log(T/4N) + M(T)}
     \overset{(vi)}{\leq}C \sqrt{\eta_T(\delta)} \sqrt{ Nd\log(T/4N) + M(T)} \sqrt{\sum\limits_{t \in \mathcal{T}_1}{}\sens{\bm{x}_t}{\bm{\theta}^\star}}\\
     &\overset{(vii)}{\leq} C\sqrt{\eta_T(\delta)}\sqrt{ Nd  \log(T/4N) + M(T)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\\
     &\overset{(viii)}{\leq} CSN^{1/2}d^{1/2}\sqrt{ Nd\log(T/4N) + M(T)} \sqrt{\log(T/\delta)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\\
\end{align*}
where $M(T) = \sum\limits_{t \in \mathcal{T}_1}{}\sum\limits_{i=1}^{N}\sum\limits_{j=1 ; j \neq i}^{N}\sens{\bm{x}_t}{\bm{\theta}_{t+1}}\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}\matnorm{\bm{x}^j_t}{(\bm{W}^j_t)\inv}$. 


Here, $(i)$ follows from the fact that $\dot\mu(.) \leq 1$, $(ii)$ follows from an application of the Cauchy-Schwarz inequality and the fact that $\bm\theta_t$ and $\bm\theta^\star \in \mathcal{C}_t(\delta)$, $(iii)$ follows from a direct application of Lemma \ref{lemma: conversion of norms} and the definition of $\tilde{\bm{x}}^i$, $(iv)$ follows from the UCB rule, i.e since in slot $i$, $\bm{x}^i_t$ was chosen, we have ${\bm{x}^i_t}^\top \bm{\theta}_t^i + \sqrt{\eta_t(\delta)}\matnorm{\bm{x}^{i}_t}{(\bm{W}^i_t)\inv} \geq {\bm{x}^{\star,i}_t}^\top \bm{\theta}_t^i + \sqrt{\eta_t(\delta)}\matnorm{\bm{x}^{\star,i}_t}{(\bm{W}^i_t)\inv}$, $(v)$ is a direct application of Lemma \ref{lemma: elliptical potential lemma} on $\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\bm{\theta}_{t+1}}}}}\bm{x}^i_t$ and the fact that $\twonorm{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\bm{\theta}_{t+1}}}}}\bm{x}^i_t} \leq \frac{1}{2\sqrt{N}}$, $(vi)$ holds due to the definition of $\mathcal{T}_1$, $(vii)$ follows from Lemma \ref{Lemma: Abielle result}, and $(viii)$ follows from $\eta_t(\delta) \leq CS^2Nd\log(T/\delta)$.

Turning to $M(T)$, we can bound the term using Rayleigh$^\prime$s quotient and Lemma \ref{lemma : norm of slot design matrix} (since event $\event_0$ holds) as follows:

\begin{align*}
    M(T) &= \sum\limits_{t \in \mathcal{T}_1}{}\sum\limits_{i=1}^{N}\sum\limits_{j=1 ; j \neq i}^{N}\sens{\bm{x}_t}{\bm{\theta}_{t+1}}\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}\matnorm{\bm{x}^j_t}{(\bm{W}^j_t)\inv}\\
    &\overset{(i)}{\leq}  \sum\limits_{t \in \mathcal{T}_1}{}\sum\limits_{i=1}^{N}\sum\limits_{j=1 ; j \neq i}^{N}\sens{\bm{x}_t}{\bm{\theta}_{t+1}} \twonorm{\bm{x}^i_t}\twonorm{\bm{x}^j_t}\sqrt{\eigmax{\bm{W}^i_t}\inv\eigmax{\bm{W}^j_t}\inv}\\
    &\overset{(ii)}{\leq}  \sum\limits_{t \in \mathcal{T}_1}{}\sum\limits_{i=1}^{N}\sum\limits_{j=1 ; j \neq i}^{N}\frac{1}{4N}\frac{1}{\sqrt{\eigmin {\bm{W}^i_t}\eigmin{\bm{W}^j_t}}}
    \overset{(iii)}{\leq} \frac{N^2}{4N}\sum\limits_{t \in \mathcal{T}_1}{}\frac{1}{1 + \frac{\rho t}{2}}
    \overset{(iv)}{\leq} \frac{N}{2\rho}\log(T)
\end{align*}
Here, $(i)$ follows from Rayleigh$^\prime$s Quotient, $(ii)$ follows from $\sens{\bm{x}_t}{\bm{\theta}_{t+1}} \leq\frac{1}{4}$ and $\twonorm{
\bm{x}^i_t} \leq\frac{1}{\sqrt N}$, $(iii)$ follows from a direct application of Lemma \ref{lemma : norm of slot design matrix}, and $(iv)$ follows from the sum of Harmonic Series.

Thus, we get
\[R_1(T)_1 \leq CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\]


The bound on $R_1(T)_2$ is as follows:
\begin{align*}
    R_1(T)_2 &= \sum\limits_{t \in \mathcal{T}_1}{} \ddot{\mu}(z_t)\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} - \inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star} 
    \overset{(i)}{\leq} C\sqrt{\eta_t(\delta)}\sum\limits_{t \in \mathcal{T}_1}{}\modulus{\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} - \inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\sum\limits_{i=1}^{N}2\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}\\
    &\overset{(ii)}{\leq} C\sqrt{\eta_t(\delta)}\sum\limits_{t \in \mathcal{T}_1}{}\pbrak{\sum\limits_{i=1}^{N}\matnorm{\tilde{\bm{x}}^i_t}{\bm{W}_t\inv}\matnorm{\bm{\theta}^\star - \bm{\theta}_{t+1}}{\bm{W}_t}}\sum\limits_{i=1}^{N}2\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}\\
    &\overset{(iii)}{\leq} C\sqrt{\eta_t(\delta)}\sum\limits_{t \in \mathcal{T}_1}{}\pbrak{\sum\limits_{i=1}^{N}\matnorm{\tilde{\bm{x}}^i_t}{\bm{W}_t\inv}\matnorm{\bm{\theta}^\star - \bm{\theta}_{t+1}}{\bm{W}_{t+1}}}\sum\limits_{i=1}^{N}\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}
    \overset{(iv)}{\leq} C \eta_t(\delta)\sum\limits_{t \in \mathcal{T}_1}{}\pbrak{\sum\limits_{i=1}^{N}\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}}^2\\
    &\overset{(v)}{\leq} C \eta_t(\delta)\kappa\sum\limits_{t \in \mathcal{T}_1}{}\pbrak{\sum\limits_{i=1}^{N}\matnorm{\sqrt{\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t}{(\bm{W}^i_t)\inv}}^2
    \overset{(vi)}{\leq} C \eta_t(\delta)\kappa\pbrak{Nd \log(T/4N)
    + \frac{N}{2\rho} \log T}\\
    &\overset{(vii)}{\leq} C  S^2 N d \kappa \log(T/\delta) \pbrak{Nd \log(T/4N) + \frac{N}{2\rho} \log T}\\
\end{align*}
Here, $(i)$ follows in the same manner as the regret bound for rounds $t \leq T_0$, and uses the fact that $|\ddot{\mu}(.)| \leq 1$, $(ii)$ is obtained by an application of Cauchy-Schwarz followed by triangle inequality, $(iii)$ follows using the fact that $\tilde{\bm{\theta}_t^i} , \bm{\theta}^\star \in \C_t(\delta)$ and $\bm{W}_t \mleq \bm{W}_{t+1}$, $(iv)$ follows since $\bm{\theta}_{t+1},\bm{\theta}^\star \in \C_{t+1}(\delta)$ and from Lemma \ref{lemma: conversion of norms}, $(v)$ follows from the definition of $\kappa$, i.e $\kappa \geq \frac{1}{\sens{\bm{x}}{\bm{\theta}}}$, $(vi)$ follows similar to the technique used in bounding $R_1(T)_1$, and $(vii)$ follows from $\eta_t(\delta) \leq CS^2Nd\log(T/\delta)$. 

Similarly, summing over all indices in $\mathcal{T}_2$, we get:

\begin{align*}
    &\sum\limits_{t \in \mathcal{T}_2}{} \sens{\bm{x}_t}{\bm{\theta}^\star}\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star}
    \overset{(i)}{\leq}\sum\limits_{t \in \mathcal{T}_2}{} \sqrt{\sens{\bm{x}_t}{\bm{\theta}^\star}}\sqrt{\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star}\\
    &\overset{(ii)}{\leq} CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\\
\end{align*}

Here, $(i)$ follows from the definition of $\mathcal{T}_2$, $(ii)$ follows  using the same steps as followed for $R_1(T)_1$.

Combining all the bounds on $R_1(T)$, we get,
\begin{align*}
    R_1(T) &\leq CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\\
    &+  C S^2 N^2 d \kappa \log(T/\delta) \pbrak{ d \log(T/4N) + \frac{1}{\rho}\log(T)}\\
\end{align*}
We now bound $R_2(T)$.
\begin{align*}
    R_2(T) &= \sum\limits_{t\notin\mathcal{T}}{}\int_{0}^{1} (1-v)\dsigmoid{v\inner{\bm{x}_t}{\bm{\theta}^\star} + (1-v)\inner{{\bm{x}_t^\star}}{\bm{\theta}^\star}}\diff v\pbrak{\inner{\pbrak{{\bm{x}_t^\star} - \bm{x}_t}}{\bm{\theta}^\star}}^2\\
    &\overset{(i)}{\leq} \eta_t(\delta)\sum\limits_{t\notin\mathcal{T}}{}\int_{0}^{1} (1-v)\modulus{\dsigmoid{v\inner{\bm{x}_t}{\bm{\theta}^\star} + (1-v)\inner{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\diff v\pbrak{\sum\limits_{i=1}^{N} 2\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}}^2\\
    &\overset{(ii)}{\leq} \eta_t(\delta) \sum\limits_{t\notin\mathcal{T}}{}\int_0^1 (1-v)\diff v \pbrak{\sum\limits_{i=1}^{N} 2\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}}^2 
    \overset{}{\leq} 2 \eta_t(\delta)\sum\limits_{t \notin \mathcal{T}}{} \pbrak{\sum\limits_{i=1}^{N}\matnorm{\bm{x}^i_t}{(\bm{W}^i_t)\inv}}^2\\
    &\overset{(iii)}{\leq}  C  S^2 N^2 d \kappa \log(T/\delta) \pbrak{d \log(T/4N)
    + \frac{1}{\rho}\log T}
\end{align*}

Here, $(i)$ follows in a manner similar to the one used in bounding the regret for rounds $t \leq T_0$, $\modulus{ab} \leq \modulus{a}\modulus{b} \text{ and } \modulus{\int f(x)\diff x} \leq \int \modulus{f(x)} \diff x$, $(ii)$ follows from the fact that $\modulus{\dsigmoid{.}} \leq 1$, and $(iii)$ follows in the same manner as steps $(i), (ii), \text{ and }(iii)$ follows in a similar manner as the bound for $R_1(T)_2$.

Combining all the bounds, we get
\begin{align*}
    R(T) &\leq CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \pbrak{\sqrt{R(T)} + \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}}}\\
    &+   C  S^2 N^2 d \kappa \log(T/\delta) \pbrak{d \log(T/4N)
    + \frac{1}{\rho}\log T}\\
\end{align*}
Applying Lemma \ref{lemma: quadratic inequality} for $R(T)$, we get that
\begin{align*}
    R(T) &\leq CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}} \\
    &+ (1 + \kappa)C S^2N^2d\log(T/\delta)\kappa\pbrak{ d \log(T/4N) + \frac{1}{\rho}\log(T)}
\end{align*}

Thus, our overall Regret is
\begin{align*}
Regret(T) &\leq   T_0 + CS N d^{1/2}\sqrt{ \left(d\log(T/4N) + \frac{1}{2\rho}\log T \right)\log(T/\delta)} \sqrt{\sum\limits_{t\notin\mathcal{T}}{}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}} \\ 
&+ (1 + \kappa)C S^2N^2d\log(T/\delta)\kappa\pbrak{ d \log(T/4N) + \frac{1}{\rho}\log(T)} +  C  S^6 N^2 d^2 \kappa \log(T/\delta)\log(T/\kappa N)
\end{align*}

\end{proof}

\subsection{Supporting Lemmas for Theorem \ref{appendix: proof_regret_oful}}

\begin{lemma}
    $\bm{U}_t^{-\frac{1}{2}}\bm{W}_t\bm{U}_t^{-\frac{1}{2}} = \bm{I}_{d} + \bm{A}_t$
    \\where $\bm{A}_t = 
    \begin{bmatrix}
    \bm{0}_{d} & 
    ({\bm{W}^{1}_t})^{-\frac{1}{2}} \bm{W}^{1,2}_t ({\bm{W}^{2}_t})^{-\frac{1}{2}} & 
    \ldots & 
   ({\bm{W}^{1}_t})^{-\frac{1}{2}} \bm{W}^{1,N}_t ({\bm{W}^{N}_t})^{-\frac{1}{2}}\\
    \\
   ({\bm{W}^{2}_t})^{-\frac{1}{2}} \bm{W}^{2,1}_t ({\bm{W}^{1}_t})^{-\frac{1}{2}} &
   \bm{0}_{d} &
   \ldots &
  {(\bm{W}^{2}_t})^{-\frac{1}{2}} \bm{W}^{2,N}_t ({\bm{W}^{N}_t})^{-\frac{1}{2}}\\
    \\
    \vdots & \vdots & \ldots & \vdots\\
    \\
     ({\bm{W}^{N}_t})^{-\frac{1}{2}} \bm{W}^{N,1}_t ({\bm{W}^{1}_t})^{-\frac{1}{2}} &
     ({\bm{W}^{N}_t})^{-\frac{1}{2}} \bm{W}^{N,2}_t ({\bm{W}^{2}_t})^{-\frac{1}{2}} &
     \ldots& 
    \bm{0}_{d}
    \end{bmatrix}$
    \label{lemma: design_matrix_decomp}
\end{lemma}

\begin{proof}  It is enough to show $\bm{W}_t = \bm{U}_t + \bm{U}_t^{\frac{1}{2}}\bm{A}_t\bm{U}_t^{\frac{1}{2}}$ to prove the claim. We can decompose $\bm{W}_t$ as follows:
\begin{align*}
    \bm{W}_t &= \bm{I}_{Nd} + \sum\limits_{s=1}^{t-1} \sens{\bm{x}_s}{\bm{\theta}_{s+1}} \bm{x}_s \bm{x}^\top_s\\
    &\overset{(i)}{=}\bm{I}_{Nd} + \sum\limits_{s=1}^{t-1} \sens{\bm{x}_s}{\bm{\theta}_{s+1}} \pbrak{\sum\limits_{i=1}^{N} \tilde{\bm{x}}^i_s} \pbrak{\sum\limits_{i=1}^{N} \tilde{\bm{x}}^{i ^\top}_s}\\
    & \overset{(ii)}{=} \bm{I}_{Nd} + \sum\limits_{s=1}^{t-1} \sens{\bm{x}_s}{\bm{\theta}_{s+1}} \begin{bmatrix} 
    \bm{x}^1_s \bm{x}^{1^\top}_s &  \bm{x}^1_s \bm{x}^{2^\top}_s & \ldots & \bm{x}^1_s \bm{x}^{N^\top}_s \\
    \\
     \bm{x}^2_s \bm{x}^{1^\top}_s &  \bm{x}^2_s \bm{x}^{2^\top}_s & \ldots & \bm{x}^2_s \bm{x}^{N^\top}_s \\
     \\
     \vdots & \vdots & \ldots & \vdots\\
     \\
     \bm{x}^N_s \bm{x}^{1^\top}_s &  \bm{x}^N_s \bm{x}^{2^\top}_s & \ldots & \bm{x}^N_s \bm{x}^{N^\top}_s \\
    \end{bmatrix}\\
        & \overset{(iii)}{=} \begin{bmatrix}
         \bm{W}^1_t & \bm{W}^{1,2}_t & \ldots & \bm{W}^{1,N}_t\\
         \\
         \bm{W}^{2,1}_t & \bm{W}^{2}_t & \ldots & \bm{W}^{2,N}_t\\
          \\
          \vdots & \vdots & \ldots & \vdots\\
          \\
           \bm{W}^{N,1}_t & \bm{W}^{N,2}_t & \ldots & \bm{W}^{N}_t\\
     \end{bmatrix} \overset{(iv)}{=} \bm{U}_t + \bm{B}_t
\end{align*}

Here, $(i)$ follows using the fact $\bm{x}_s = \sum\limits_{i=1}^{N} \tilde{\bm{x}}^i_s$, $(ii)$ follows from the definition of $\tilde{\bm{x}}^i_s$, $(iii)$  follows from the definitions of $\bm{W}^{i}_t$ and $\bm{W}^{i,j}_t$ and the fact that $\bm{I}_{Nd} = \textrm{diag}(\bm{I}_d , \ldots \bm{I}_d)$, and $(iv)$ follows from the definition of $\bm{U}_t$. 

We finish the claim by showing $\bm{B}_t = 
        \begin{bmatrix}
         \bm{0}_d & \bm{W}^{1,2}_t & \ldots & \bm{W}^{1,N}_t\\
         \\
         \bm{W}^{2,1}_t & \bm{0}_d & \ldots & \bm{W}^{2,N}_t\\
          \\
          \vdots & \vdots & \ldots & \vdots\\
          \\
           \bm{W}^{N,1}_t & \bm{W}^{N,2}_t & \ldots & \bm{0}_d\\
     \end{bmatrix} = 
\bm{U}_t^{\frac{1}{2}}\bm{A}_t\bm{U}_t^{\frac{1}{2}}$, i.e , $\bm{A}_t = \bm{U}_t^{-\frac{1}{2}}\bm{B}_t\bm{U}_t^{-\frac{1}{2}}$.

Note that since $\bm{U}_t$ is a diagonal block matrix, $\bm{U}_t^{-\frac{1}{2}} = \textrm{diag}\pbrak{(\bm{W}^1_t)^{-\frac{1}{2}} , \ldots , (\bm{W}^N_t)^{-\frac{1}{2}}}$. We can write the $(i,j)^{\text{th}}$ element (in this case, $d \times d$ block) of $\bm{U}_t^{-\frac{1}{2}}\bm{B}_t\bm{U}_t^{-\frac{1}{2}}$ as:
\begin{align*}
    \sbrak{\bm{U}_t^{-\frac{1}{2}} \bm{B}_t \bm{U}_t^{-\frac{1}{2}}}_{i,j} &= \sum\limits_{k=1}^{N}\sum\limits_{l=1}^{N} \sbrak{\bm{U}_t^{-\frac{1}{2}}}_{i,k} \sbrak{\bm{B}_t}_{k,l} \sbrak{\bm{U}_t^{-\frac{1}{2}}}_{l,j}\\
    &= \delta_{i,k}\bar{\delta}_{k,l}\delta_{l,j} \sbrak{\bm{U}_t^{-\frac{1}{2}}}_{i,k} \sbrak{\bm{B}_t}_{k,l} \sbrak{\bm{U}_t^{-\frac{1}{2}}}_{l,j}\\
    &=\begin{cases}
    (\bm{W}^j_t)^{-\frac{1}{2}} \bm{W}^{i,j}_t (\bm{W}^j_t)^{-\frac{1}{2}}& i\neq j\\
    \bm{0}_{d\times d} & i = j
    \end{cases}\\
    &= \sbrak{\bm{A}_t}_{i,j}
\end{align*}
where $\delta_{i,j}$ denotes the Kronecker Delta, which takes a value of 1 if $i = j$ and 0 otherwise. Likewise, $\bar{\delta}(i,j)$ denotes the complement of the Kronecker Delta. The second equality follows from the fact that the off-diag entries in $\bm{U}_t^{\frac{-1}{2}}$ are zero matrices and likewise, the diagonal entries in $\bm{B}_t$ are zero matrices. This completes the proof.

\end{proof}

\begin{proposition}
    Let $\Lambda\pbrak{\bm{A}}$ denote the set of eigenvalues of $\bm{A}$. Then,
    \begin{align*}
        \Lambda\pbrak{\bm{A}} = \Lambda\pbrak{\begin{bmatrix}
            \mathbf{0} & \mathbf{0}\\
            \mathbf{0} & \bm{A}
        \end{bmatrix}}
    \end{align*}
    \label{prop: same_eig}
\end{proposition}

\begin{proposition}
    Let $\bm{A}$ and $\bm{B}$ be two symmetric matrices. Then,
    \begin{align*}
        \eigmax{\bm{A} + \bm{B}} \leq\eigmax{\bm{A}} + \eigmax{\bm{B}} \text{ and } \eigmin{\bm{A} + \bm{B}} \geq \eigmin{\bm{A}} + \eigmin{\bm{B}}
    \end{align*}
    \label{prop: max_min_eig}
\end{proposition}


\begin{lemma}
    Define the matrix recurrence relation as follows:
    \begin{align*}
        \bm{A}^{(k)} = \begin{bmatrix}
           \mathbf{0} & \bm{Z}_k\\
            {\bm{Z}_k}^\top & \bm{A}^{(k-1)}
        \end{bmatrix} \text{ and } \bm{A}^{(1)} = \begin{bmatrix}
            \mathbf{0} & \bm{Z}_1\\
            {\bm{Z}_1}^\top & \mathbf{0}
        \end{bmatrix}
    \end{align*}
    Then, $\eigmax{\bm{A}^{(k)}} \leq \sum\limits_{i=1}^{k}\singmax{\bm{Z}_i}$ and $\eigmin{\bm{A}^{(k)}} \geq -\sum\limits_{i=1}^{k}\singmax{\bm{Z}_i}$.
    \label{lemma: recurrence}
\end{lemma}

\begin{proof}  The proof follows by induction. For $k=1$, we see that the statement indeed holds from Lemma \ref{lemma: max_min_eig_hermitian}.

Assume that the statement holds for $k = n$, i.e  $\eigmax{A^{(n)}} \leq \sum\limits_{i=1}^{n}\singmax{\bm{Z}_i}$ and $\eigmin{A^{(n)}} \geq -\sum\limits_{i=1}^{n}\singmin{\bm{Z}_i}$

Consider $\bm{A}^{(n+1)} = 
    \begin{bmatrix}
        \mathbf{0} & \bm{Z}_{n+1}\\
        {\bm{Z}_{n+1}}^\top & \bm{A}^{(n)}
    \end{bmatrix} = 
    \begin{bmatrix}
        \mathbf{0} & \bm{Z}_{n+1}\\
        {\bm{Z}_{n+1}}^\top & \mathbf{0}
    \end{bmatrix} + 
    \begin{bmatrix}
        \mathbf{0} & \mathbf{0}\\
        \mathbf{0} & \bm{A}^{(n)}
    \end{bmatrix}$

We have that,
\begin{align*}
    \eigmax{\bm{A}^{(n+1)}} &\overset{(i)}{\leq} \eigmax{\begin{bmatrix}
        \mathbf{0} & \bm{Z}_{n+1}\\
        {\bm{Z}_{n+1}}^\top & \mathbf{0}
    \end{bmatrix}} + \eigmax{\begin{bmatrix}
        \mathbf{0} & \mathbf{0}\\
        \mathbf{0} & \bm{A}^{(n)}
    \end{bmatrix}}
    \overset{(ii)}{=} \singmax{\bm{Z}_{n+1}} + \eigmax{\bm{A}^{(n)}} \\
    &\overset{(iii)}{\leq} \singmax{\bm{Z}_{n+1}} + \sum\limits_{i=1}^{n}\singmax{\bm{Z_i}} 
    = \sum\limits_{i=1}^{n+1}\singmax{\bm{Z_i}}
\end{align*}
where $(i)$ follows from Proposition \ref{prop: max_min_eig}, $(ii)$ follows from Lemma \ref{lemma: max_min_eig_hermitian} and Proposition \ref{prop: same_eig}, and $(iii)$ follows from the induction hypothesis.

Similarly,
\begin{align*}
    \eigmin{\bm{A}^{(n+1)}} &\overset{(i)}{\geq} \eigmin{\begin{bmatrix}
        \mathbf{0} & \bm{Z}_{n+1}\\
        {\bm{Z}_{n+1}}^\top & \mathbf{0}
    \end{bmatrix}} + \eigmin{\begin{bmatrix}
        \mathbf{0} & \mathbf{0}\\
        \mathbf{0} & \bm{A}^{(n)}
    \end{bmatrix}}
    \overset{(ii)}{=} -\singmax{\bm{Z}_{n+1}} + \eigmin{\bm{A}^{(n)}} \\
    &\overset{(iii)}{\geq} -\singmax{\bm{Z}_{n+1}} - \sum\limits_{i=1}^{n}\singmax{\bm{Z_i}} 
    = -\sum\limits_{i=1}^{n+1}\singmax{\bm{Z_i}}
\end{align*}
where $(i)$ follows from Proposition \ref{prop: max_min_eig}, $(ii)$ follows from Lemma \ref{lemma: max_min_eig_hermitian} and Proposition \ref{prop: same_eig}, and $(iii)$ follows from the induction hypothesis.

\end{proof}

\begin{lemma}
    The items chosen at round $t$ in two different slots, say $i$ and $j$, where $i , j \in [N]$ and $i \neq j$ are independent of one another, conditioned on $\filteration{t}$. In other words, 
    $$\E\sbrak{\bm{x}^i_t {\bm{x}^j_t}^\top | \filteration{t}} = \bm{0}_d$$
    \label{lemma: independence of cross terms}
\end{lemma}
\begin{proof}  It is easy to see that the item chosen in slot $i$ during round $t$ only depends on $\cbrak{\bm{x}_s}_{s=1}^{t-1}, \cbrak{\bm{\theta}_{s+1}}_{s=1}^{t-1}$, and $\cbrak{\bm{x}^i_s}_{s=1}^{t}$. Since, $\filteration{t}$ accounts for all of these terms, conditioned on $\filteration{t}$, the items being chosen in two different slots are independent of one another.

Because of the independence, we can say that 
$$\E\sbrak{\bm{x}^i_t \bm{x}^{j ^\top}_t | \filteration{t}} = \E\sbrak{\bm{x}^i_t | \filteration{t}}\E\sbrak{\bm{x}^{j}_t | \filteration{t}} = \bm{0}_d$$
where the last equality follows from Assumption \ref{assumption: diversity}.

\end{proof}


\begin{lemma}
    The diversity assumptions in Assumption \ref{assumption: diversity} can be extended to the set of vectors $\cbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t}_{i=1}^N$, i.e, we can show the following:
    \begin{enumerate}
    \item $\E\sbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t | \filteration{t}} = \bm{0}_d$
    \item $\E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^j_t}^\top | \filteration{t}} = \bm{0}_d$ where $i \neq j$
    \item $\E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^i_t}^\top | \filteration{t}} \mgeq \rho\kappa\bm{I}_d$ 
    \end{enumerate}
    \label{lemma: extension of diversity}
\end{lemma}
\begin{proof} We attempt to bound $\sens{\bm{x}_t}{\bm{\theta}_{t+1}}$.  

Using the Cauchy-Schwarz inequality, it is easy to see that $-S \leq \inner{\bm{x}_t}{\bm{\theta}_{t+1}} \leq S$. Since $\dsigmoid{.}$ is an increasing function on $(-\infty ,0 ]$ and a decreasing function on  $[0,\infty)$, we have that 
$$\sens{\bm{x}_t}{\bm{\theta}_{t+1}} \in 
\begin{cases}
\sbrak{\dsigmoid{S} , \frac{1}{4}} & \text{if } \inner{\bm{x}_t}{\bm{\theta}_{t+1}} \in [0,S]\\
\sbrak{\dsigmoid{-S} , \frac{1}{4}} & \text{if } \inner{\bm{x}_t}{\bm{\theta}_{t+1}} \in [-S,0]\\
\end{cases}$$
Since $\dsigmoid{-S} = \dsigmoid{S}$, we have that $\sens{\bm{x}_t}{\bm{\theta}_{t+1}} \in \sbrak{\dsigmoid{S} , \frac{1}{4}}$.

Now, we have that
\begin{align*}
    &\sqrt{\dsigmoid{S}}\E\sbrak{\bm{x}^i_t | \filteration{t}}\leq\E\sbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t | \filteration{t}} \leq \sqrt{\frac{1}{4}}\E\sbrak{\bm{x}^i_t | \filteration{t}}
    \implies \bm{0}_d \leq \E\sbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t | \filteration{t}} \leq \bm{0}_d\\
    &\implies \E\sbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t | \filteration{t}} = \bm{0}_d
\end{align*}

Similarly, from Lemma \ref{lemma: independence of cross terms},
\begin{align*}
    &\dsigmoid{S}\E\sbrak{\bm{x}^i_t {\bm{x}^{j}_t}^\top| \filteration{t}} \leq \E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^{j}_t}^\top | \filteration{t}} \leq \frac{1}{4}\E\sbrak{\bm{x}^i_t {\bm{x}^{j}_t}^\top | \filteration{t}}
    \implies \bm{0}_d \leq \E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^{j}_t}^\top| \filteration{t}} \leq \bm{0}_d\\
    &\implies \E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^{j}_t}^\top} = \bm{0}_d
\end{align*}

Finally, since $\kappa = \max\limits_{\bm{x}} \max\limits_{\bm{\theta}}\frac{1}{\dsigmoid{\inner{\bm{x}}{\bm{\theta}}}}$, we have that $\kappa \geq \frac{1}{\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}$. Hence,
\begin{align*}
    \E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^i_t}^\top | \filteration{t}} \mleq \frac{1}{\kappa} \E\sbrak{\bm{x}^i_t {\bm{x}^{i}_t}^\top | \filteration{t}} \mleq \rho\bm{I}_d
\end{align*}
where the last inequality follows from Assumption \ref{assumption: diversity}. 

\end{proof}

\begin{lemma}
    For all $i \in [N]$ , $j \in [i+
    1,N]$, and $t \geq 0$, $\norm{\bm{W}^{i,j}_t} \leq \sqrt{\frac{t}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}$ with probability at least $1-\delta$.
    \label{lemma: norm_cross_terms}
\end{lemma}
\begin{proof}  To prove this lemma, we would invoke Lemma \ref{lemma: generalized norm of cross terms}. We have already shown in Lemma \ref{lemma: extension of diversity} that $\E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^i_t}^\top | \filteration{t}} = \bm{0}_d$. Thus, invoking Lemma \ref{lemma: generalized norm of cross terms} with $\bm{x}_s = \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t$, $\bm{z}_s = \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^j_t$, $m_1 = m_2 = \sqrt{\frac{\sens{\bm{x}_t}{\bm{\theta}_{t+1}}}{N}} \leq \frac{1}{2\sqrt{N}}$, $d_1=d_2=d$, and $\delta = \frac{2\delta}{N(N-1)}$, we get that 
$$\P\cbrak{\exists t \geq 1: \norm{\bm{W^}{i,j}_t} \geq \sqrt{\frac{t}{2N^2}\log\pbrak{\frac{2d N(N-1)}{2\delta}}}} \leq \frac{2\delta}{N(N-1)}$$
Performing a union bound over all $i \in \sbrak{N}$ and $j \in \sbrak{i+1,N}$ results in the following:
$$\P\cbrak{\forall t: \norm{\bm{W^}{i,j}_t} \leq \sqrt{\frac{t}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}} \geq 1 - \delta$$
This finishes the proof.

\end{proof}


\begin{lemma}
    For all $i \in \sbrak{N}$, $\P\cbrak{\forall t \geq T_0 : \eigmin{ \bm{W}^i_t} \leq 1 + \frac{\rho t}{2}} \leq \delta$.
    \label{lemma : norm of slot design matrix}
\end{lemma}
\begin{proof}  To prove this claim, we invoke Lemma \ref{lemma: min_eig_design}. We have already shown in Lemma \ref{lemma: extension of diversity} that $\E\sbrak{\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t | \filteration{t}} = \bm{0}_d$ and $\E\sbrak{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\bm{x}^i_t{\bm{x}^i_t}^\top | \filteration{t}} \mgeq \rho\bm{I}_d$. Thus, invoking Lemma \ref{lemma: min_eig_design} with $\bm{x}_t = \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t$, $m = \frac{1}{2\sqrt{N}}$, $d = d$, $\gamma = 1$, $c = \frac{1}{2}$, and $\delta = \frac{\delta}{N}$, we get that with probability atleast $1 - \frac{\delta}{N}$, 
$$\eigmin{\bm{W}^i_t} \geq 1 + \frac{\rho t}{2},  \forall  t \geq \frac{3 + 2N\rho}{3\rho^2 N^2}\log\pbrak{\frac{2dNT}{\delta}}$$
% $$\P\cbrak{\forall t \geq f(T) : \eigmin\bm{W}^i_t \leq 1 + \frac{\rho t}{2}} \leq \frac{\delta}{NT}$$
Performing a union bound over all $i \in \sbrak{N}$ and using the fact that $(N-1)^2 \geq 1/N^2$ gives us:
$$\P\cbrak{\forall t \geq \frac{3 + 2\rho N}{3\rho^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}} , \forall i \in [N] : \eigmin{\bm{W}^i_t} \geq 1 + \frac{\rho t}{2}} \geq 1 - \delta$$
Since $T_0 \geq \frac{3 + 2\rho N}{3\rho^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}}$, we can say the same for $t \geq T_0$. This finishes the claim.

\end{proof}

Let us define the following events: $\event_1 = \cbrak{\forall i \in [N] , \forall j \in [i+1,N], \forall t \geq 0: \norm{\bm{W}^{i,j}_t} \leq \sqrt{\frac{t}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}}$ , $\event_2 = \cbrak{\forall i \in[N], \forall t \geq T_0: \eigmin{\bm{W}^i_t} \geq 1 + \frac{\rho t}{2}}$, and $\event_ 0 = \event_1 \cap \event_2$

\begin{lemma}
    $\P\cbrak{\event_0} \geq 1-2\delta$
    \label{lemma: prob of events for W}
\end{lemma}
\begin{proof}  $\P\cbrak{\eventc{\event_0}} = \P\cbrak{\eventc{\event}_1 \cup \eventc{\event}_2} \leq \P\cbrak{\eventc{\event}_1} + \P\cbrak{\eventc{\event}_2} \leq 2\delta$ using a union bound. 

\end{proof}

\begin{lemma}
    Define the matrix $\bm{Z}^{(i)}_t = \begin{bmatrix}
        (\bm{W}^{i}_t)^{-\frac{1}{2}} \bm{W}^{i,i+1}_t (\bm{W}^{i+1}_t)^{-\frac{1}{2}}  , \ldots  (\bm{W}^{i}_t)^{-\frac{1}{2}} \bm{W}^{i,N}_t (\bm{W}^{N}_t)^{-\frac{1}{2}} 
    \end{bmatrix}$
    Then, under event $\event_0$, for $t \geq T_0$ and $\rho \geq \frac{12}{N}$, we have that 
    $$\norm{\bm{Z}^{(i)}_t} \leq \frac{N-i}{2N(N-1)}$$
    \label{lemma: bound on norm of Z}
\end{lemma}
\begin{proof}  The idea of the proof is borrowed from \cite{Das_2024}. We know that $\norm{\bm{Z}} = \sup\limits_{\twonorm{\bm{b}} \leq 1}\twonorm{\bm{Z}\bm{b}}$. Thus,
\begin{align*}
    \norm{\bm{Z}^{(i)}_t} &= \sup\limits_{\twonorm{\bm{b}} \leq 1} \twonorm{\bm{Z}^{(i)}_t \bm{b}} 
    =\sup\limits_{\sum\limits_{j=1}^{N-i}\twonorm{b_j} \leq 1} \twonorm{\sum\limits_{j=1}^{N-i} (\bm{W}^{i}_t)^{-\frac{1}{2}}\bm{W}^{i,i+j}_t(\bm{W}^{i+j}_t)^{-\frac{1}{2}} b_j} \\ &\overset{(i)}{\leq} \sup\limits_{\sum\limits_{j=1}^{N-i}\twonorm{b_j} \leq 1} \sum\limits_{j=1}^{N-i}\twonorm{(\bm{W}^{i}_t)^{-\frac{1}{2}}\bm{W}^{i,i+j}_t(\bm{W}^{i+j}_t)^{-\frac{1}{2}} b_j}
    \leq \sum\limits_{j=1}^{N-i}\sup\limits_{\twonorm{b_j} \leq 1} \twonorm{(\bm{W}^{i}_t)^{-\frac{1}{2}}\bm{W}^{i,i+j}_t(\bm{W}^{i+j}_t)^{-\frac{1}{2}} b_j}\\
    &\overset{(ii)}{\leq} \sum\limits_{j=1}^{N-i} \norm{(\bm{W}^{i}_t)^{-\frac{1}{2}}} \norm{\bm{W}^{i , i+j}_t} \norm{(\bm{W}^{i+j}_t)^{-\frac{1}{2}}}
    \overset{(iii)}{\leq} \sum\limits_{j=1}^{N-i} \frac{\norm{\bm{W}^{i , i+j}_t}}{\sqrt{\eigmin{\bm{W}^{i}_t}\eigmin{\bm{W}^{i+j}_t}}}
    \overset{(iv)}{\leq} \sum\limits_{j=1}^{N-i} \frac{\sqrt{\frac{t}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}}{1 + \frac{\rho t}{2}}\\
    &\overset{(v)}{\leq} \sum\limits_{j=1}^{N-i} \sqrt{\frac{\frac{1}{2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}{\frac{3+2\rho N}{12}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}}}} 
    =\frac{1}{N(N-1)} \sum\limits_{j=1}^{N-i} \sqrt{\frac{6}{3 + 2\rho N}} \times \sqrt{\frac{\log\pbrak{\frac{d N(N-1)}{\delta}}}{\log\pbrak{\frac{2d NT}{\delta}}}}\\
    &\leq \frac{N-i}{N(N-1)}\sqrt{\frac{6}{3 + 2\rho N}}
    \leq \frac{N-i}{N(N-1)}\sqrt{\frac{3}{\rho N}}
    \overset{(vi)}{\leq} \frac{N-i}{2N(N-1)}
\end{align*}

where $(i)$ follows from triangle inequality, $(ii)$ follows from the sub-multiplicativity of the norm, $(iii)$ follows from the fact that $\norm{\bm{A}} = \eigmax{\bm{A}} \text{ and }\eigmax{\bm{A}\inv} = \frac{1}{\eigmin{\bm{A}}}$, $(iv)$ follows from Lemma \ref{lemma: generalized norm of cross terms}, $(v)$ follows from $\frac{1}{1+\frac{\rho t }{2}} \leq \frac{1}{\frac{\rho t}{2}} \text{ and } t \geq T_0$, and $(vi)$ follows from the fact that $\rho N \geq 12$.

\end{proof}

\begin{lemma}
    Under event $\event_0$, for all $t \geq T_0$, we have
    $$ \frac{3}{4}\bm{U}_t \mleq \bm{W}_t \mleq \frac{5}{4}\bm{U}_t$$
    \label{lemma: ineq on W}
\end{lemma}
\begin{proof}  Define the matrix recurrence relation:
$$\bm{A}^{(i)}_t = \begin{bmatrix}
    \bm{0}_{d \times d} & \bm{Z}^{(i)}_t\\
    {\bm{Z}^{(i)}_t}^\top & \bm{A}^{(i-1)}_t
\end{bmatrix}$$
where $\bm{Z}^{(i)}_t = \begin{bmatrix}
        (\bm{W}^{i}_t)^{-\frac{1}{2}} \bm{W}^{i,i+1}_t (\bm{W}^{i+1}_t)^{-\frac{1}{2}}  , \ldots  (\bm{W}^{i}_t)^{-\frac{1}{2}} \bm{W}^{i,N}_t (\bm{W}^{N}_t)^{-\frac{1}{2}} 
    \end{bmatrix}$. Then, it is easy to see that $\bm{A}_t$ from Lemma \ref{lemma: design_matrix_decomp} is the same as $\bm{A}^{(1)}_t$.  From Lemma \ref{lemma: recurrence}, we have that $$\eigmax{\bm{A}_t} \leq \sum\limits_{i=1}^{N}\singmax{\bm{Z}^{(i)}_t} = \sum\limits_{i=1}^{N} \norm{\bm{Z}^{(i)}_t} \leq \sum\limits_{i=1}^{N} \frac{N-i}{2N(N-1)}= \frac{1}{4}$$
    Similarly,
    $$\eigmin{\bm{A}_t} \geq -\sum\limits_{i=1}^{N}\singmax{\bm{Z}^{(i)}_t} = -\sum\limits_{i=1}^{N} \norm{\bm{Z}^{(i)}_t} \geq -\sum\limits_{i=1}^{N} \frac{N-i}{2N(N-1)} = -\frac{1}{4}$$
    
Thus, we can write 

\begin{align*}
    -\frac{1}{4} \bm{I}_{d} \mleq \bm{A}_t \mleq \frac{1}{4}\bm{I}_{d}
    \implies -\frac{1}{4} \bm{I}_{d} \mleq  \bm{U}_t^{-\frac{1}{2}}\bm{W}_t\bm{U}_t^{-\frac{1}{2}} - \bm{I}_{d}\mleq \frac{1}{4}\bm{I}_{d}
    \implies \frac{3}{4}\bm{U}_t \mleq  \bm{W}_t \mleq \frac{5}{4}\bm{U}_t
\end{align*}

\end{proof}

\begin{lemma}
    Let $\tilde{\bm{x}}^i$ be the lift of $\bm{x}^i$. Then, 
    $$\matnorm{\tilde{\bm{x}}^i}{\bm{W}\inv} \leq \frac{4}{3}\matnorm{\bm{x}^i}{(\bm{W}^{i})\inv}$$
    \label{lemma: conversion of norms}
\end{lemma}
\begin{proof} 
From Lemma \ref{lemma: ineq on W}, we have
\begin{align*}
    \matnorm{\tilde{\bm{x}}^i}{\bm{W}\inv} \leq \frac{4}{3} \matnorm{\tilde{\bm{x}}^i}{\bm{U}\inv} = \frac{4}{3} \matnorm{\bm{x}^i}{(\bm{W}^i)\inv}
\end{align*}
where the last inequality follows from the definition of the lift of $\bm{x}$ and the structure of $\bm{U}$.

\end{proof}

\begin{lemma}
    With probability at least $1 - 2\delta$, for all $t \geq T_0$ and $\rho \geq \frac{12}{N}$, we have
    $$\frac{3}{4}\bm{U}^\H_t \mleq  \bm{V}^\H_t \mleq \frac{5}{4}\bm{U}^\H_t$$
    \label{lemma: bounds on V for failing data dependent condition}
\end{lemma}
\begin{proof}  

First, notice the similarity in structures between $\bm{V}^{\mathcal{H}}_t$ and $\bm{W}_t$, as well as between $\bm{U}^\mathcal{H}_t$ and $\bm{U}_t$. Thus, we can perform a decomposition similar to the one in Lemma \ref{lemma: design_matrix_decomp}. We first show that the diversity conditions hold. It is enough to obtain a bound on the norm of the matrices $\bm{V}^{\mathcal{H},i,j}_t$ and $\bm{V}^{\mathcal{H},i}_t$ to prove the claim.

We first show that the diversity assumptions also hold for the set of vectors $\cbrak{\frac{1}{\sqrt\kappa}\bm{x}^i_t}_{i=1}^N$. For this, we show that $\frac{1}{\sqrt{\kappa}}$ is bounded.

From the proof of Lemma \ref{lemma: extension of diversity}, we have shown that $\sens{\bm{x}}{\bm{\theta}} \in \sbrak{\dsigmoid{S} , \frac{1}{4}}$. Since, $\kappa = \max\limits_{\bm{x}}\max\limits_{\bm{\theta}}\frac{1}{\sens{\bm{x}}{\bm{\theta}}}, \kappa \in \sbrak{4 , \frac{1}{\dsigmoid{S}}}$. Hence, $\frac{1}{\kappa} \in \sbrak{\dsigmoid{S} , \frac{1}{4}}$ and we can show:
\begin{align*}
    &\sqrt{\dsigmoid{S}}\E\sbrak{\bm{x}^i_s | \filteration{s}} \leq \E\sbrak{\frac{1}{\sqrt\kappa}\bm{x}^i_t|\filteration{s}} \leq \frac{1}{2}\E\sbrak{\bm{x}^i_s | \filteration{s}}
    &\implies \bm{0}_d \leq \E\sbrak{\frac{1}{\sqrt\kappa}\bm{x}^i_t|\filteration{s}}\leq \bm{0}_d
    &\implies \E\sbrak{\frac{1}{\sqrt\kappa}\bm{x}^i_t|\filteration{s}} = \bm{0}_d
\end{align*} 
Similarly, from Lemma \ref{lemma: independence of cross terms},
\begin{align*}
    &\dsigmoid{S}\E\sbrak{\bm{x}^i_s{\bm{x}^{j}_s}^\top | \filteration{s}} \leq \E\sbrak{\frac{1}{\kappa} \bm{x}^i_s{\bm{x}^{j}_s}^\top|\filteration{s}}  \leq \frac{1}{4}\E\sbrak{\bm{x}^i_s{\bm{x}^{j}_s}^\top | \filteration{s}}
    \implies\bm{0}_d \leq \E\sbrak{\frac{1}{\kappa} \bm{x}^i_s{\bm{x}^{j}_s}^\top|\filteration{s}}
    \leq \bm{0}_d
    \implies \E\sbrak{\frac{1}{\kappa} \bm{x}^i_s{\bm{x}^{j}_s}^\top|\filteration{s}} = \bm{0}_d
\end{align*} 
Finally,
\begin{align*}
    &\E\sbrak{\bm{x}^i_s{\bm{x}^{i}_s}^\top | \filteration{s}} \mgeq \rho \kappa \bm{I}_d
    \implies \E\sbrak{\frac{1}{\kappa}\bm{x}^i_s{\bm{x}^{i}_s}^\top | \filteration{s}}\mgeq \rho \bm{I}_d
\end{align*}


 Using an idea similar to Lemma \ref{lemma: norm_cross_terms}, we can define the event 
$$\event_1^\prime = \cbrak{\forall i \in [N] , \forall j \in [i+1,N], \forall t \geq T_0: \norm{\bm{V}^{\mathcal{H},i,j}_t} \leq \sqrt{\frac{8t}{\kappa^2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}}$$


Similarly, using an idea similar to Lemma \ref{lemma : norm of slot design matrix}, we can define the event
$$\event_2^\prime = \cbrak{\forall i \geq 0, \forall t \geq \frac{48 + 8\kappa N\rho}{3\rho^2\kappa^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}}: \eigmin{\bm{V}^{\mathcal{H},i}_t} \geq \gamma_t(\delta) + \frac{\rho t}{2}}$$
Since, $\kappa \geq 4$, we have that $T_0 \geq \frac{3 + 2 N\rho}{3\rho^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}} \geq \frac{48 + 8\kappa N\rho}{3\rho^2\kappa^2}(N-1)^2\log\pbrak{\frac{2d NT}{\delta}} $, and hence, we have 
$$\event_2^\prime = \cbrak{\forall i \geq 0, \forall t \geq T_0: \eigmin{\bm{V}^{\mathcal{H},i}_t} \geq \gamma_t(\delta) + \frac{\rho t}{2}}$$


Define $\event_0^\prime = \event_1^\prime \cap \event_2^\prime$. Then, it is easy to see $\P\cbrak{\event_0^\prime} \geq 1 - 2\delta$.

Finally, following the same line of thought as Lemma \ref{lemma: bound on norm of Z} and Lemma \ref{lemma: ineq on W}, and using the fact that $\frac{1}{\kappa} \leq \frac{1}{4}$, we obtain 
$$\frac{3}{4}\bm{U}^\H_t \mleq  \bm{V}^\H_t \mleq \frac{5}{4}\bm{U}^\H_t$$

\end{proof}


\begin{lemma}
    (\cite{Faury2022}, Proposition 7) Let $\delta \in \pbrak{0,1}$ and $\cbrak{\pbrak{\bm{\theta}_t , \bm{W}_t , \bm{\theta}_t}}_r$ be maintained by the ada-OFU-ECOLog algorithm. Then, 
    $$\P\cbrak{\forall t \geq 1: \bm{\theta}^\star \in \bm{\theta}_t \text{ and } \matnorm{\bm{\theta}^\star - \bm{\theta}_{t+1}}{\bm{W}_{t+1}} \leq CS^2d\log(t/\delta)} \geq 1 - 2\delta$$
    \label{lemma: ada_ofu_ecolog_confidence_bound}
\end{lemma}


\begin{lemma}
    Define the following events:
    $$\event^\prime = \cbrak{\forall t \geq 1, \matnorm{\bm{\theta}^\star - \bm{\theta}_{t+1}}{\bm{W}_{t+1}}^2 \leq CS^2d\log(t/\delta) \text{ and }\bm{\theta}^\star \in \Theta}$$
    $$\event = \event_0 \cap \event_0^\prime \cap \event^\prime$$
    Then, we have that $\P\cbrak{\event} \leq 6\delta$.
\end{lemma}
\begin{proof}  
\begin{align*}
    \P\cbrak{\eventc{\event}} &= \P\cbrak{\eventc{\event_0 \cap \event_0^\prime \cap \event^\prime}} 
    = \P\cbrak{\eventc{\event_0} \cup \eventc{\event_0^\prime} \cup \eventc{\event^\prime}} 
    \leq \P\cbrak{\eventc{\event_0}} +  \P\cbrak{\eventc{\event_0^\prime}} +  \P\cbrak{\eventc{\event^\prime}}
    \leq 2\delta + 2\delta + 2\delta
    =6\delta
\end{align*}

where the last inequality follows from Lemma \ref{lemma: prob of events for W} , \ref{lemma: bounds on V for failing data dependent condition}, and \ref{lemma: ada_ofu_ecolog_confidence_bound} respectively.

\end{proof}


\begin{lemma}
    (\cite{Abeille2021}, Theorem 1) $\sum\limits_{t=1}^{T}\sens{\bm{x}_t}{\bm{\theta}^\star} \leq R(T) + \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star}$ where $R_T = \sum\limits_{t=1}^{T}\sigmoid{\inner{{\bm{x}_t^\star}}{\bm{\theta}^\star}} - \sigmoid{\inner{\bm{x}_t}{\bm{\theta}^\star}}$
    \label{Lemma: Abielle result}
\end{lemma}
\begin{proof}  We provide a brief proof for the sake of completeness
\begin{align*}
    \sum\limits_{t=1}^{T}\sens{\bm{x}_t}{\bm{\theta}^\star} &= \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} +  \sum\limits_{t=1}^{T}\int_0^{1}  \ddot{\mu}\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} + v\inner{({\bm{x}_t^\star} - \bm{x}_t)}{\bm{\theta}^\star}}\; \diff v \inner{\pbrak{\bm{x}_t  -{\bm{x}_t^\star}}}{\bm{\theta}^\star}\\
    &\leq \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} + \sum\limits_{t=1}^{T}\modulus{ \int_0^{1}  \ddot{\mu}\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} + v\inner{({\bm{x}_t^\star} - \bm{x}_t)}{\bm{\theta}^\star}}\; \diff v \inner{\pbrak{\bm{x}_t  -{\bm{x}_t^\star}}}{\bm{\theta}^\star}}\\
    &\overset{(i)}{\leq} \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} +  \sum\limits_{t=1}^{T}\int_0^{1}  \modulus{\ddot{\mu}\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} + v\inner{({\bm{x}_t^\star} - \bm{x}_t)}{\bm{\theta}^\star}}}\; \diff v \modulus{\inner{\pbrak{\bm{x}_t  -{\bm{x}_t^\star}}}{\bm{\theta}^\star}}\\
    &\overset{(ii)}{\leq} \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} +  \sum\limits_{t=1}^{T}\int_0^{1}  \modulus{\ddot{\mu}\pbrak{\inner{\bm{x}_t}{\bm{\theta}^\star} + v\inner{({\bm{x}_t^\star} - \bm{x}_t)}{\bm{\theta}^\star}}}\; \diff v \inner{\pbrak{{\bm{x}_t^\star}- \bm{x}_t}}{\bm{\theta}^\star}\\
    &\overset{(iii)}{\leq} \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} +  \sum\limits_{t=1}^{T}\int_0^{1}  \dsigmoid{\inner{\bm{x}_t}{\bm{\theta}^\star} + v\inner{({\bm{x}_t^\star} - \bm{x}_t)}{\bm{\theta}^\star}}\; \diff v \inner{\pbrak{{\bm{x}_t^\star}- \bm{x}_t}}{\bm{\theta}^\star} \\
    &\overset{(iv)}{=} \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} + \sum\limits_{t=1} ^{T}\sigmoid{\inner{{\bm{x}_t^\star}}{\bm{\theta}^\star}} - \sigmoid{\inner{\bm{x}_t}{\bm{\theta}^\star}}\\
    &= \sum\limits_{t=1}^{T}\sens{{\bm{x}_t^\star}}{\bm{\theta}^\star} + R(T)
\end{align*}

Here, $(i)$ follows from $\modulus{\int f(x) \; \diff x} \leq \int \modulus{f(x)}\; \diff x$, $(ii)$ follows from $\inner{{\bm{x}_t^\star}}{\bm{\theta}^\star} \geq \inner{\bm{x}_t}{\bm{\theta}^\star}$, $(iii)$ follows since $\modulus{\ddot{\mu}(.)} \leq \dsigmoid{.}$, and $(iv)$ follows from applying the Mean-Value Theorem on the expression for $R(T)$. 

\end{proof}

\begin{lemma}
    Let $\mathcal{T}$ represent the set of all time instances where the data-dependent condition fails, i.e $\forall t \in \mathcal{T}$, $\sens{\bm{x}_t}{\bar{\bm{\theta}}_t} \geq 2\sens{\bm{x}_t}{\bm{\theta}^u_t}$ for all $u \in \cbrak{0,1}$. Then,
    \begin{align*}
        \modulus{\mathcal{T}} \leq C  S^6 N^2 d^2 \kappa \log(T/\delta)\log(T/\kappa N)
    \end{align*}
    \label{lemma: number of times condition  fails}
\end{lemma}
\begin{proof}  The proof follows along the lines of \cite{Faury2022}.

By the self-concordance property of the logistic function, we know that
$$\sens{\bm{x}_t}{\bar{\bm{\theta}}_t} \leq \sens{\bm{x}_t}{\bm{\theta}^u_{t}}\exp\pbrak{\modulus{\inner{\bm{x}_t}{(\bar{\bm{\theta}}_t - \bm{\theta}^u_t)}}}$$

Thus, if $t \in \mathcal{T}$, we have that $\modulus{\inner{\bm{x}_t}{(\bar{\bm{\theta}}_t - \bm{\theta}^u_t)}} \geq \log 2$.

Summing this over all indices in $\mathcal{T}$, we get that
\begin{align*}
\sum\limits_{t \in \mathcal{T}}{} \log^2 2  &= \modulus{\mathcal{T}} \log^2 2 \leq \sum\limits_{t \in \mathcal{T}}{}\modulus{\inner{\bm{x}_t}{(\bar{\bm\theta}_t - \bm\theta^u_t)}}^2
\overset{(i)}{\leq} \sum\limits_{t \in \mathcal{T}}{} \matnorm{\bm{x}_t}{(\bm{V}^{\H}_{t})\inv}^2\matnorm{\bar{\bm\theta}_t - \bm\theta^u_t}{\bm{V}^\H_{t}}^2
\overset{(ii)}{\leq} 4\beta_T(\delta)\sum\limits_{t \in \mathcal{T}}{}\matnorm{\bm{x}_t}{(\bm{V}^{\H}_{t})\inv}^2\\
&\overset{(iii)}{\leq} C\beta_T(\delta) \sum\limits_{t\in\mathcal{T}}{}\matnorm{\bm{x}_t}{(\bm{U}^{\H}_{t})\inv}^2
\overset{(iv)}{\leq} C\beta_T(\delta) \sum\limits_{t\in\mathcal{T}}{}\matnorm{\sum\limits_{i=1}^{N}\tilde{\bm{x}}^i_t}{(\bm{U}^{\H}_{t-1})\inv}^2\overset{(v)}{\leq} C\beta_T(\delta) \sum\limits_{i=1}^{N}\sum\limits_{t\in\mathcal{T}}{}\matnorm{\tilde{\bm{x}}^i_t}{(\bm{U}^{\H}_{t})\inv}^2\\
&\leq C\beta_T(\delta) \sum\limits_{i=1}^{N}\sum\limits_{t\in\mathcal{T}}{}\matnorm{\bm{x}^i_t}{(\bm{V}^{\mathcal{H},i}_t)\inv}^2
\overset{(vi)}{\leq} C N d\beta_T(\delta) \kappa  \log\pbrak{t/\kappa N} 
\overset{(vii)}{\leq} C  S^6 N^2 d^2 \kappa \log(T/\delta)\log(T/\kappa N)
\end{align*}

Here $(i)$ follows from the Cauchy-Schwarz Inequality,  $(ii)$ follows from the fact that $\bm{\theta}^u_t , \bar{\bm{\theta}_t} \in \Theta_t$, $(a+b)^2 \leq 2a^2+2b^2$, $(iii)$ follows due to event $\event_0^\prime$, $(iv)$ follows from the definition of the lift of $\bm{x}^i_s$, i.e  $\bm{x}_s = \sum\limits_{i=1}^{N}\tilde{\bm{x}}^i_s$, $(v)$ follows from the triangle inequality, $(vi)$ follows from a direct application of Lemma \ref{lemma: elliptical potential lemma} on $\frac{1}{\sqrt\kappa}\bm{x}^i_t$ and the fact that $\twonorm{\frac{1}{\sqrt\kappa}\bm{x}^i_t} \leq \frac{1}{\sqrt{N\kappa}}$, and $(vii)$ follows from the definition $\beta_{T}(\delta) \leq CS^6Nd\log(T/\delta)$.

\end{proof}
