
\section{\texttt{SLATE-GLM-TS} and \texttt{SLATE-GLM-TS-FIXED}}
\label{appendix:ts-algos}

\subsection{Algorithm in a fixed-arm setting}
We present a Thompson Sampling based algorithm \slateglincbtsfixed\ in the non-contextual (fixed-arm) setting in Algorithm \ref{algo:TS-Fixed}. Following this, we analyze the regret of this algorithm in Theorem \ref{theorem:TS}. Since we are in the non-contextual setting, we directly use the minimum eigenvalue bound in Assumption \ref{assumption_TS}. (See Remarks on Assumption \ref{assumption: diversity} in Section \ref{section:preliminaries}). 

\begin{algorithm}[H] 
\caption{\texttt{Slate-GLM-TS-Fixed}}
\label{algo:TS-Fixed} 
\begin{algorithmic}[1]
\STATE \textbf{Inputs:} Number of rounds $T$, Failure probability $\delta$ , Distribution $\mathcal{D}^{TS}$, warm-up length $\tau$
\STATE Initialize $\bm{V}^{\mathcal{H},i}_0 = \lambda \bm{I}_d \; \forall i \in [N]$ and $\bm{V}^{\mathcal{H}}_0 = \lambda \bm{I}_{Nd}$
\STATE Obtain the set of items $\mathcal{X}^i, \forall i \in [N]$
\FOR{each round $t$ in $[1,\tau]$}
    \STATE For each slot $i \in [N]$, choose $\bm{x}^i_t = \argmax_{\bm{x}\in\mathcal{X}^i}\matnorm{\bm{x}}{(\bm{V}^{\mathcal{H},i}_t)\inv}$, select slate $\bm{x}_t = (\bm{x}^1_t, \ldots, \bm{x}^N_t)$, and get reward $y_t$.
    \STATE Update $\bm{V}^{\mathcal{H}}_t \gets \bm{V}^{\mathcal{H}}_{t-1} + \frac{1}{\kappa} \bm{x}_t\bm{x}_t^\top$ and $\bm{V}^{\mathcal{H},i}_t \gets \bm{V}^{\mathcal{H},i}_{t-1} + \frac{1}{\kappa}\bm{x}^i_t{\bm{x}^{i}_t}^\top$, $\forall i\in [N]$
\ENDFOR
\STATE Compute $\widehat{\bm{\theta}}_{\tau} = \argmin \summation{s=1}{\tau}l_{s+1}(\bm{\theta}) + \frac{\lambda}{2}\twonorm{\bm{\theta}}^2$ and set $\Theta = \cbrak{\matnorm{\bm{\theta} - \widehat{\bm{\theta}}_{\tau}}{\bm{V}^{\mathcal{H}}_{\tau}} \leq \beta_\tau(\delta)}$
\STATE Initialize $\bm{W}_\tau = \bm{I}_{dN}, \bm{W}^{i}_\tau = \bm{I}_{d}, \forall i \in [N]$ and $\bm{\theta}_{\tau+1} \in \Theta$
\FOR{each round $t \in [\tau+1 , T]$}
    \STATE Set reject = True
    \WHILE{reject}
        \STATE For each slot $i \in [N]$, sample $\bm{\eta}^{i}\overset{\mathrm{iid}}{\sim} \mathcal{D}^{TS}$, and set  $\tilde{\bm{\theta}}^{i}_t = \bm{\theta}^{i}_t + \eta_t(\delta)(\bm{W}_t^i)^{-1/2}\bm{\eta}^{i}$
    \STATE If $\tilde{\bm{\theta}}_t = (\tilde{\bm{\theta}}^{1}_t , \ldots , \tilde{\bm{\theta}}^{N}_t) \in \Theta_t$, set reject = False
    \ENDWHILE
    \STATE For each slot $i\in [N]$, choose $\bm{x}^i_t = \argmax_{\bm{x} \in \X^i} \inner{\bm{x}}{\tilde{\bm{\theta}}^{i}_t}$, select slate $\bm{x}_t = (\bm{x}^1_t, \ldots, \bm{x}^N_t)$, and get reward $y_t$

    \STATE Let $\bm{\theta}_{t+1}$ be solution of  \ref{equation:optimization} up to precision $1/t$.
    
    \STATE Update $\bm{W}_{t+1} = \bm{W}_t + \dot{\mu}(\bm{x}_t^T\bm{\theta}_{t+1})\bm{x}_{t}\bm{x}_{t}^T$, and $\bm{W}^i_{t+1} = \bm{W}^i_{t} + \dot{\mu}({\bm{x}_t}^\top \bm{\theta}_{t+1})\bm{x}^i_t{\bm{x}^i_t}^\top$, $\forall i\in [N]$
\ENDFOR
\end{algorithmic}
\end{algorithm}

\begin{assumption}
    The minimum eigenvalue of the design matrices grows linearly, i.e
$$\eigmin{\bm{V}^{(i)}_{t}} = \eigmin{\bm{W}^{(i)}_{t}} \geq \rho t \text{ and } \eigmin{\bm{V}^{\mathcal{W} \; (i)}_t} \geq \rho t$$
\label{assumption_TS}
\end{assumption}


Define $T_0 = max\cbrak{\frac{(N-1)^2}{2\rho^2}\log\frac{dN(N-1)}{\delta} , \frac{8(N-1)^2}{\kappa^2\rho^2}\log\frac{dN(N-1)}{\delta}} = \frac{(N-1)^2}{2\rho^2}\log\frac{dN(N-1)}{\delta}$ since $\kappa > 4$.

\begin{theorem}
\label{theorem:TS}
    (Regret of \texttt{Slate-GLM-TS-Fixed}) At the end of $T \geq T_0$ rounds, the regret of \texttt{Slate-GLM-TS-Fixed} is bounded by
    $$Regret(T) \leq \max\{CS^6N^2d^2\kappa\log(T/\delta)^2 , T_0\} + CSN^{3/2}d^{3/2} \sqrt{\log(T/\delta)\log(T/2)}\sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} + CN^3d^3S^2\log(T/\delta)\log(T/2) $$
    \label{appendix: regret_proof_TS}
\end{theorem}
\begin{proof} 
We have that the \emph{good events} are defined for $t \in [T_0  ,T]$. Since the first $|\mathcal{T}| = \tau$ rounds constitute a warm-up (\emph{Steps 4-7} in Algorithm \ref{algo:TS-Fixed}), we can trivially bound the regret of these rounds (warm-up as well as first $T_0$ ) by $1 \cdot \max\{\tau , T_0\}$. Going forward, let $\max\{\tau , T_0\} = T^\prime$ Hence, we have
\begin{align*}
    Regret(T) &\leq \max\{\tau , T_0 \} + \summation{t=T^\prime +1}{T} \sigmoid{\inner{\bm{x}_\star}{\thetastar}} - \sigmoid{\inner{\bm{x}_t}{\thetastar}}\\
    &\leq \max\{CS^6N^2d^2\kappa\log(T/\delta)^2 , T_0 \} + \summation{t=T^\prime+1}{T} \cbrak{\sigmoid{\inner{\bm{x}_\star}{\thetastar}} - \sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}}} + \summation{t=T^\prime+1}{T}\cbrak{\sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}} - \sigmoid{\inner{\bm{x}_t}{\thetastar}}}\\
    &= \max\{CS^6N^2d^2\kappa\log(T/\delta)^2 , T_0 \} + R^{TS}(T) + R^{PRED}(T) = \max\{CS^6N^2d^2\kappa\log(T/\delta)^2 , T_0 \} + R(T)
\end{align*}
where $R(T) = R^{TS}(T) + R^{PRED}(T)$, $R^{TS}(T) = \summation{t=T^\prime+1}{T} \sigmoid{\inner{\bm{x}_\star}{\thetastar}} - \sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}}$ , and $R^{PRED}(T) = \summation{t=T^\prime+1}{T} \sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}} - \sigmoid{\inner{\bm{x}_t}{\thetastar}}$. The first inequality follows from Lemma \ref{lemma: warmup}.

We first bound $R^{PRED}(T)$ as follows:
\begin{align*}
    R^{PRED}(T) &= \summation{t=T^\prime+1}{T} \sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}} - \sigmoid{\inner{\bm{x}_t}{\thetastar}}
    \leq \summation{t=T^\prime+1}{T} \dsigmoid{\inner{\bm{x}_t}{\thetastar}} \modulus{\inner{\bm{x}_t}{\pbrak{\tilde{\bm{\theta}}_t - \thetastar}}}\\
    &\overset{(i)}{\leq} \summation{t=T^\prime+1}{T}\sqrt{ \dsigmoid{\inner{\bm{x}_t}{\thetastar}}}\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\exp\pbrak{\modulus{\inner{\bm{x}_t}{\pbrak{\thetastar - \bm{\theta}_{t+1}}}}}} \matnorm{\bm{x}_t}{\bm{W}_t\inv}\matnorm{\thetastar - \tilde{\bm{\theta}}_{t}}{\bm{W}_t}\\
    &\overset{(ii)}{\leq} C\sqrt{e}\sqrt{\sigma_t(\delta)}\sqrt{Nd}\summation{t=T^\prime+1}{T}\sqrt{ \dsigmoid{\inner{\bm{x}_t}{\thetastar}}} \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}} \matnorm{\bm{x}_t}{\bm{W}_t\inv}\\
    &\overset{(iii)}{\leq} C\sqrt{\sigma_t(\delta)}\sqrt{Nd}\sqrt{\summation{t=T^\prime+1}{T} \dsigmoid{\inner{\bm{x}_t}{\thetastar}}} \sqrt{\summation{t=T^\prime+1}{T}\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}} \matnorm{\bm{x}_t}{\bm{W}_t\inv}^2}\\
    &\overset{(iv)}{\leq} C\sqrt{\sigma_t(\delta)} Nd \sqrt{\log(T/2)}\pbrak{\sqrt{R(T)} + \sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}}}\\
     &\overset{(v)}{\leq} CS N^{3/2} d^{3/2} \sqrt{\log(T/\delta)\log(T/2)}\pbrak{\sqrt{R(T)} + \sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}}}\\
\end{align*}

where $(i)$ follows from the Self-Concordance result and uses Cauchy-Schwarz, $(ii)$ follows from the fact that $\modulus{\bm{x}_t^\top\pbrak{\thetastar - \bm{\theta}_{t+1}}}\leq\diam{\mathcal{X}}{\Theta} \leq 1$ (Lemma \ref{lemma: warmup}) and Lemma \ref{lemma: TS_concentration}, $(iii)$ follows from Cauchy-Schwarz, $(iv)$ follows from Lemma \ref{lemma: elliptical potential lemma} on $\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}_t$ and Lemma \ref{Lemma: Abielle result}, and $(v)$ follows from the fact that $\sigma_t(\delta) \leq C S^2 Nd \log(T/\delta)$. 

We now turn to bounding $R^{TS}(T)$. Define $J(\bm{\theta}) = \max\limits_{\bm{x}\in\mathcal{X}} \inner{\bm{x}}{\bm{\theta}}$. Then, it is easy to see that $J(\thetastar) = \bm{x}_\star^\top\thetastar$. Also, note that
$$J(\tilde{\bm{\theta}_{t}}) = \max\limits_{\bm{x}\in\mathcal{X}}\summation{i=1}{N}{\bm{x}^i}^\top \tilde{\bm{\theta}}^i_{t} = \summation{i=1}{N} \max\limits_{\bm{x}\in\mathcal{X}^i}\bm{x}^\top\tilde{\bm{\theta}}^i_{t} = \summation{i=1}{N} \bm{x}_t^\top\tilde{\bm{\theta}}^{i}_{t} = \bm{x}_t^\top\tilde{\bm{\theta}}_{t}$$
which uses the fact that the selection of the item in each slot is independent of the rest of the slots.

Hence, we have
\begin{align*}
    R^{TS}(T) &= \summation{t=T^\prime+1}{T} \sigmoid{\inner{\bm{x}_\star}{\thetastar}} - \sigmoid{\inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}}
    = \alpha\pbrak{\inner{\bm{x}_\star}{\thetastar} , \inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}}\pbrak{\inner{\bm{x}_\star}{\thetastar} - \inner{\bm{x}_t}{\tilde{\bm{\theta}}_t}}
    = \alpha(J(\thetastar) , J(\tilde{\bm{\theta}}_t)) \pbrak{J(\thetastar) -J(\tilde{\bm{\theta}}_{t})} 
\end{align*}

Similar to Section D.2 of the Appendix in \cite{Faury2022} and Section C of \cite{Abeille2017}, using the convexity of $J$ gives us:
\begin{align*}
    \modulus{J(\thetastar) - J(\tilde{\bm{\theta}}_{t+1})} &\leq \max\cbrak{\modulus{\inner{\nabla J(\thetastar)}{\pbrak{\thetastar - \tilde{\bm{\theta}}_{t}}}} , \modulus{\inner{\nabla J(\tilde{\bm{\theta}}_{t+1})} {\pbrak{\thetastar - \tilde{\bm{\theta}}_{t}}}}}
    \overset{(i)}{\leq} \max\cbrak{\modulus{\inner{\bm{x}_\star}{\pbrak{\thetastar - \tilde{\bm{\theta}}_{t}}}} , \modulus{\inner{\bm{x}_t} {\pbrak{\thetastar - \tilde{\bm{\theta}}_{t}}}}}\\
    &\leq \diam{\mathcal{X}}{\Theta}
    \overset{(ii)}{\leq}1
\end{align*}
where $(i)$ follows from the fact that $\nabla J(\bm{\theta}) = \argmax\limits_{\bm{x}\in\mathcal{X}}\bm{x}^\top\bm{\theta}$ (\cite{Abeille2017}), and $(ii)$ follows from Lemma \ref{lemma: warmup}. Thus, we have that 
\begin{align*}
    \alpha(J(\thetastar) , J(\tilde{\bm{\theta}}_t))  &= \int\limits_{0}^1 \dsigmoid{J(\thetastar) + v\pbrak{J(\thetastar) - J(\tilde{\bm{\theta}}_t)}} \diff v\
    \leq \dsigmoid{J(\thetastar)} \int\limits_{0}^1 \exp\pbrak{v\modulus{J(\thetastar) - J(\tilde{\bm{\theta}}_t)}} \diff v\\
    &\leq \dsigmoid{J(\thetastar)} \int\limits_{0}^1
    \exp\pbrak{v} \diff v
    \leq 2\dsigmoid{J(\thetastar)}
    = 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}
\end{align*}
where the first inequality follows from self-concordance. Substituting this into the original bound, we get
$$R^{TS}(T) \leq 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}} \summation{t=T^\prime+1}{T} J(\thetastar) - J(\tilde{\bm{\theta}}_t)$$

Following the same steps as the proof in \cite{Abeille2017} and referring to Section D.2 in \cite{Faury2022}, we get that 
$$\summation{t=T^\prime+1}{T} J(\thetastar) - J(\tilde{\bm{\theta}}_t) \lesssim C\sqrt{Nd}\sqrt{\sigma_t(\delta)}\summation{t=T^\prime+1}{T}\matnorm{\bm{x}_t}{\bm{W}_t\inv} + \sqrt{T}$$
Substituting this into the original equation, we get that:
\begin{align*}
    R^{TS}(T) &\leq 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\pbrak{C\sqrt{Nd}\sqrt{\sigma_t(\delta)}\summation{t=T^\prime+1}{T}\matnorm{\bm{x}_t}{\bm{W}_t\inv} + \sqrt{T}}\\
    &\overset{(i)}{\leq} C\sqrt{Nd}\sqrt{\sigma_t(\delta)}\summation{t=T^\prime+1}{T}\sqrt{\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\exp\pbrak{\modulus{\inner{\bm{x}_t}{\bm{\theta}_{t+1}} - \inner{\bm{x}_\star}{\thetastar}}}}\matnorm{\bm{x}_t}{\bm{W}_t\inv} + 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\sqrt{T}\\
    &\overset{(ii)}{\leq} C\sqrt{Nd}\sqrt{\sigma_t(\delta)}\summation{t=T^\prime+1}{T}\sqrt{\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} \sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\matnorm{\bm{x}_t}{\bm{W}_t\inv} + 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\sqrt{T}\\
     &\overset{(iii)}{\leq} C\sqrt{Nd}\sqrt{\sigma_t(\delta)}\sqrt{\summation{t=T^\prime+1}{T}\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} \sqrt{\summation{t=T^\prime+1}{T}\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}\matnorm{\bm{x}_t}{\bm{W}_t\inv}^2} + 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\sqrt{T}\\
     &\overset{(iv)}{\leq} CNd\sqrt{\sigma_t(\delta)}\sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} \sqrt{\log(T/2)} + 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\sqrt{T}\\
     &\overset{(v)}{\leq} CN^{3/2}d^{3/2}S\sqrt{\log(T/2)\log(T/\delta)}\sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}}  + 2\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}\sqrt{T}\\
\end{align*}
where $(i)$ follows from self-concordance, $(ii)$ follows from the fact that $\modulus{\inner{\bm{x}_t}{\bm{\theta}_{t+1}} - \inner{\bm{x}_\star}{\thetastar}} \leq 2\diam{\mathcal{X}}{\Theta}$, $(iii)$ follows from Cauchy-Schwarz, $(iv)$ follows from Lemma \ref{lemma: elliptical potential lemma} on $\sqrt{\dsigmoid{\inner{\bm{x}_t}{\bm{\theta}_{t+1}}}}\bm{x}^i_t$, and $(v)$ follows from the fact that $\sigma_t(\delta) \leq C S^2 Nd \log (T/\delta)$

Combining the bounds on $R(T)$, we get
$$R(T) \leq C SN^{3/2}d^{3/2} \sqrt{\log(T/\delta)\log(T/2)}\pbrak{\sqrt{R(T)} + \sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}}}$$

Using Lemma \ref{lemma: quadratic inequality}, we get

$$R(T) \leq  C SN^{3/2}d^{3/2} \sqrt{\log(T/\delta)\log(T/2)}\sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} + C N^3d^3S^2\log(T/\delta)\log(T/2)$$

Finally, combining the bound for $Regret(T)$ gives us:
$$Regret(T) \leq \max\{CS^6N^2d^2\kappa\log(T/\delta)^2 , T_0\} + CSN^{3/2}d^{3/2} \sqrt{\log(T/\delta)\log(T/2)}\sqrt{T\dsigmoid{\inner{\bm{x}_\star}{\thetastar}}} + CN^3d^3S^2\log(T/\delta)\log(T/2) $$

\end{proof}


\subsection{Supporting Lemmas for Theorem \ref{appendix: regret_proof_TS}}
\label{appendix: general_lemmas_ts}

\begin{lemma}
    Let $\delta \in (0,1)$, then, setting $\tau  =CS^6N^2d^2\kappa \log(T/\delta)^2$ ensures that $\Theta$ returned after the warm-up phase satisfies the following:
    \begin{enumerate}
        \item $\P\cbrak{\thetastar \in \Theta} \geq 1 - \delta$
        \item $\diam{\mathcal{X}}{\Theta} \leq 1$
    \end{enumerate}
    \label{lemma: warmup}
\end{lemma}

\begin{proof} The proof for the first part is the same as the proof for the first part in Proposition 5 in \cite{Faury2022} since the proof does not depend on the manner in which the arm is selected.

For the second part notice that:
\begin{align*}
\diam{\mathcal{X}}{\Theta} &= \max\limits_{\bm{x}\in\mathcal{X}}\max\limits_{\bm{\theta}_1,\bm{\theta}_2\in\Theta} \modulus{\inner{\bm{x}}{\pbrak{\bm{\theta}_1 - \bm{\theta}_2}}}
\overset{(i)}{\leq}\max\limits_{\bm{x}\in\mathcal{X}}\matnorm{\bm{x}}{(\bm{V}^{\mathcal{H}}_\tau)\inv}\max\limits_{\bm{\theta}_1,\bm{\theta}_2\in\Theta}\matnorm{\bm{\theta}_1-\bm{\theta}_2}{\bm{V}^{\mathcal{H}}_\tau}
\overset{(ii)}{\leq}\sqrt{\beta_t(\delta)}\max\limits_{\bm{x}\in\mathcal{X}}\matnorm{\bm{x}}{(\bm{V}^{\mathcal{H}}_\tau)\inv}\\
&\leq\sqrt{\beta_t(\delta)}\sqrt{\max\limits_{\bm{x}\in\mathcal{X}} \matnorm{\bm{x}}{(\bm{V}^{\mathcal{H}}_\tau)\inv}^2}
\leq\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{\summation{t=1}{\tau}\max\limits_{\bm{x}\in\mathcal{X}} \matnorm{\bm{x}}{(\bm{V}^{\mathcal{H}}_\tau)\inv}^2}
\overset{(iii)}{\leq}\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{2\summation{t=1}{\tau}\max\limits_{\bm{x}\in\mathcal{X}} \matnorm{\bm{x}}{(\bm{U}^{\mathcal{H}}_\tau)\inv}^2}\\
&\leq\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{2\summation{t=1}{\tau}\summation{i=1}{N}\max\limits_{\bm{x}\in\mathcal{X}} \matnorm{\tilde{\bm{x}}^i}{(\bm{U}^{\mathcal{H}}_\tau)\inv}^2}
\leq\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{2\summation{t=1}{\tau}\summation{i=1}{N}\max\limits_{\bm{x}\in\mathcal{X}^i} \matnorm{\bm{x}}{(\bm{V}^{\mathcal{H},i}_\tau)\inv}^2}\\
&\overset{(iv)}{\leq}\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{2\summation{t=1}{\tau}\summation{i=1}{N}\matnorm{\bm{x}^i_t}{(\bm{V}^{\mathcal{H},i}_\tau)\inv}^2}
\leq\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{\kappa}\sqrt{2\summation{t=1}
{\tau}\summation{i=1}{N}\matnorm{\frac{1}{\sqrt\kappa}\bm{x}^i_t}{(\bm{V}^{\mathcal{H},i}_\tau)\inv}^2}\\
&\overset{(v)}{\leq}\sqrt{\beta_t(\delta)}\frac{1}{\sqrt{\tau}}\sqrt{\kappa}\sqrt{2\summation{i=1}{N}d\log(T/\kappa N)}
\leq C\sqrt{\frac{Nd\beta_t(\delta)\kappa\log(T/\kappa N)}{\tau}}
\end{align*}
where $(i)$ follows from an application of Cauchy-Schwarz, $(ii)$ follows from the definition of $\Theta$, $(iii)$ follows from Lemma \ref{lemma: TS_bounds_on_warmup}, $(iv)$ follows from how items in each slot are selected,  $(v)$ follows from Lemma \ref{lemma: elliptical potential lemma} on $\frac{1}{\sqrt\kappa}\bm{x}^i_t$.

Thus, setting $\tau \leq Nd\beta_t(\delta)\kappa\log(T/\kappa N) \leq CS^6 N^2 d^2 \kappa \log (T/\kappa N) \log (T/\delta)$ ensures $\diam{\mathcal{X}}{\Theta} \leq 1$.

\end{proof}


\begin{lemma}
\label{lemma:multiplicative-equivalence}
    For $t \geq \frac{(N-1)^2}{2\rho^2}\log\frac{dN(N-1)}{\delta}$, we have
    $$\frac{1}{2}\bm{U}_t\mleq \bm{W}_t \mleq \frac{3}{2}\bm{U}_t$$
    \label{lemma: TS_bound_W}
\end{lemma}
\begin{proof} Following the same line of thought as Lemma \ref{lemma: independence of cross terms}, Lemma \ref{lemma: extension of diversity}, and Lemma \ref{lemma: norm_cross_terms}, we have that
$$\norm{\bm{W}^{i,j}_t} \leq \sqrt{\frac{t}{2N^2}\log\frac{dN(N-1)}{\delta}}$$

Following the same line of thought as Lemma \ref{lemma: bound on norm of Z} and making use of Assumption \ref{assumption_TS}, we can derive
\begin{align*}
    \norm{\bm{Z}^{(i)}_t} &\leq \summation{j=1}{N-i} \frac{\norm{\bm{W}^{i,j}_t}}{\sqrt{\eigmin{\bm{W}^i_t} \eigmin{\bm{W}^j_t} }}
    \leq \summation{j=1}{N-i} \frac{\sqrt{\frac{t}{2N^2}\log\frac{dN(N-1)}{\delta}}}{\rho t}
    \leq \frac{(N-i)}{N(N-1)}
\end{align*}
where the last inequality follows from the fact that $t \geq \frac{(N-1)^2}{2\rho^2}\log\frac{dN(N-1)}{2\delta}$

Finally, using the same line of thought as Lemma \ref{lemma: ineq on W}, we get
$$\frac{1}{2}\bm{U}_t\mleq \bm{W}_t \mleq \frac{3}{2}\bm{U}_t$$

\end{proof}


\begin{lemma}
    For $t \geq \frac{8(N-1)^2}{\kappa^2\rho^2}\log\frac{dN(N-1)}{\delta}$.
    $$\frac{1}{2}\bm{U}^{\mathcal{H}}_t \mleq \bm{V}^\mathcal{H}_t \mleq \frac{3}{2}\bm{U}^\mathcal{H}_t$$
    \label{lemma: TS_bounds_on_warmup}
\end{lemma}
\begin{proof} Following the same line of thought as Lemma \ref{lemma: bounds on V for failing data dependent condition} and making use of Assumption \ref{assumption_TS}, we get

$$\norm{\bm{Z}^{(i)}_t} \leq \summation{j=1}{N-i} \frac{\sqrt{\frac{8t}{\kappa^2N^2}\log\pbrak{\frac{d N(N-1)}{\delta}}}}{\rho t} \leq \frac{(N-i)}{N(N-1)}$$    
\end{proof}

where the last inequality follows from the fact that $t \geq \frac{8(N-1)^2}{\kappa^2\rho^2}\log\frac{dN(N-1)}{\delta}$

Finally, we can show that
$$\frac{1}{2}\bm{U}^{\mathcal{H}}_t \mleq \bm{V}^\mathcal{H}_t \mleq \frac{3}{2}\bm{U}^\mathcal{H}_t$$

\begin{lemma}
    Define the distribution $\mathcal{D} = \bigtimes\limits_{i=1}^N \mathcal{D}^{TS}$ where $\mathcal{D}^{TS}$ is a multivariate distribution that satisfies the properties given in Definition \ref{def: D_TS}. Then, $\mathcal{D}$ also satisfies the properties given in Definition \ref{def: D_TS}, making it a suitable distribution for Thompson Sampling.
    \label{Lemma: TS_distribution_suitability}
\end{lemma}
\begin{proof} Define $\bm{\eta} = \pbrak{\bm{\eta}^1 , \ldots , \bm{\eta}^N} \in \R^{Nd}$ where $\bm\eta^i \sim \mathcal{D}^{TS}$. Then, it is easy to see that sampling $\bm{\eta}^i , i \in [N]$ in an iid fashion from $\mathcal{D}^{TS}$ is the same as sampling $\bm{\eta}$ from $\mathcal{D}$.

We begin by showing the Concentration property, i.e $\exists C , C^\prime$ such that 
$$\P_{\bm{\eta} \sim \mathcal{D}} \cbrak{\twonorm{\bm{\eta}} \leq \sqrt{C(Nd)\log\frac{C^\prime (Nd)}{\delta^\prime}}} \geq 1 - \delta^\prime$$

Since $\mathcal{D}^{TS}$ satisfies the concentration property, we know that $\twonorm{\bm{\eta}^i} \geq \sqrt{cd\log\frac{c^\prime d}{\delta}}$ with probability at most $\delta$. Hence, it is easy to see that 
$$\twonorm{\bm{\eta}} =\sqrt{\summation{i=1}{N} \twonorm{\bm{\eta}_i}^2} \geq \sqrt{cNd\log\frac{c^\prime d}{\delta}}$$
with probability at most $\delta^N$. Setting $C = \frac{c}{N} , C^\prime = \frac{(c^\prime)^N d^{N-1}}{N}$ and $\delta^\prime = \delta^N$, we get that
$$\twonorm{\bm{\eta}} \leq \sqrt{CN^2d\log\pbrak{\frac{C^\prime Nd}{\delta^\prime}}^{1/N}} = \sqrt{C(Nd)\log\frac{C^\prime (Nd)}{\delta^\prime}}$$
with probability at least $1 - \delta^\prime$. This proves that $\mathcal{D}$ satisfies the concentration property.

We now show that $\mathcal{D}$ satisfies the Anti-Concentration property, i.e $\exists P \in (0,1)$ such that $\forall \bm{u} \in \R^{Nd}$:
$$\P_{\bm{\eta} \in \mathcal{D}}\cbrak{\bm{u}^\top{\bm{\eta}} \geq \twonorm{\bm{u}}} \geq P$$

Assume $\bm{u} = \pbrak{\bm{u}^1 , \ldots , \bm{u}^N}$ such that $\twonorm{\bm{u}} = 1$. This implies that $\summation{i=1}{N}\twonorm{\bm{u}^i}^2 = 1$ which in turn implies that $\twonorm{\bm{u}^i} \leq 1$.

Since, $\twonorm{\bm{u}^i} \leq 1$, we have that $\twonorm{\bm{u}^i}^2 \leq \twonorm{\bm{u}^i}$, and since $\bm{\eta}^i \sim \mathcal{D}^{TS}$, we have that
$$\P\cbrak{{\bm{u}^i}^\top\bm{\eta}^i \leq \twonorm{\bm{u}^i}^2} \leq \P\cbrak{{\bm{u}^i}^\top\bm{\eta}^i \leq \twonorm{\bm{u}^i}} \leq 1- p$$

Hence, we have that
\begin{align*}
    \P\cbrak{\bm{u}^\top\bm{\eta} \leq \twonorm{\bm{u}}} &= \P\cbrak{\bm{u}^\top\bm{\eta} \leq \twonorm{\bm{u}}^2}
    =\P\cbrak{\summation{i=1}{N}{\bm{u}^i}^\top\bm{\eta}^i \leq \summation{i=1}{N} \twonorm{\bm{u}^i}^2}
    =\P\cbrak{\bigcap\limits_{i=1}^{N}\cbrak{{\bm{u}^i}^\top\bm{\eta}^i \leq \twonorm{\bm{u}^i}^2}}\\
    &=\prod\limits_{i=1}^{N}\P\cbrak{{\bm{u}^i}^\top\bm{\eta}^i \leq \twonorm{\bm{u}^i}^2}
    \leq (1-p)^N
\end{align*}

Thus, we have that $ \P\cbrak{\bm{u}^\top\bm{\eta} \geq \twonorm{\bm{u}}} \geq 1 - (1-p)^N$, and setting $P = 1 - (1-p)^N$ finishes the claim.

\end{proof}

\begin{lemma}
    At round $t \geq T_0$, let $\tilde{\bm{\theta}}^i = \bm{\theta}^i_t + \sqrt{\sigma_t(\delta)}(\bm{W}^i_t)^{-\frac{1}{2}} \bm{\eta}^i$ for all $i \in [N]$, where $\bm{\eta}^i \sim \mathcal{D}^{TS}$, as given in \emph{Steps 7-8} of Algorithm \ref{algo:TS-Fixed}. Define $\tilde{\bm{\theta}_t} = \pbrak{\tilde{\bm{\theta}}^1_t , \ldots , \tilde{\bm{\theta}}^N_t}$. Assuming event $\event$ holds, we have that,
    $$\matnorm{\tilde{\bm{\theta}} - \bm{\theta}_t}{\bm{W}_t} \leq C\sqrt{\sigma_t(\delta)} \sqrt{Nd}$$
\label{lemma: TS_concentration}
\end{lemma}
\begin{proof} We can write $\tilde{\bm{\theta}}_t = \pbrak{\tilde{\bm{\theta}}^1_t , \ldots , \tilde{\bm{\theta}}^N_t}$ as the following:
$$\tilde{\bm{\theta}}_t = \bm{\theta}_t + \sqrt{\sigma_t(\delta)}
\begin{bmatrix}
    (\bm{W}^1_t)^{-\frac{1}{2}} \bm{\eta}^1\\
    \\
    \vdots\\
    \\
    (\bm{W}^N_t)^{-\frac{1}{2}} \bm{\eta}^N
\end{bmatrix} = \bm{\theta}_t + \sqrt{\sigma_t(\delta)}\textrm{diag}((\bm{W}^1_t)^{-\frac{1}{2}} ,  \ldots , (\bm{W}^N_t)^{-\frac{1}{2}}) \bm{\eta} = \bm{\theta}_t +  \sqrt{\sigma_t(\delta)} \bm{U}_t^{-\frac{1}{2}}\bm{\eta}$$
where $\bm{\eta} = \pbrak{\bm{\eta}^1 , \ldots , \bm{\eta}^N}$.

Thus, we get
\begin{align*}
    \matnorm{\tilde{\bm{\theta}} - \bm{\theta}_t}{\bm{W}_t} &= \sqrt{\sigma_t(\delta)}\matnorm{\bm{U}_t^{-\frac{1}{2}}\bm{\eta}}{\bm{W}_t}
    \overset{(i)}{\leq}\frac{3}{2}\sqrt{\sigma_t(\delta)}  \matnorm{\bm{U}_t^{-\frac{1}{2}}\bm{\eta}}{\bm{U}_t}
    =  \frac{3}{2}\sqrt{\sigma_t(\delta)} \twonorm{\bm{\eta}}
    \overset{(ii)}{\leq} C\sqrt{\sigma_t(\delta)} \sqrt{Nd}
\end{align*}
where $(i)$ follows from Lemma \ref{lemma: TS_bound_W} and $(ii)$ follows from the concentration property shown in Lemma \ref{Lemma: TS_distribution_suitability}.

\end{proof}


