
\section{Concentration results for Random Matrices and Vectors}
\label{appendix:general}

\begin{lemma}
    (\cite{Chatterji2020}, Generalization of Lemma 7) Let $\cbrak{\bm{x}_s}_{s=1}^\top$ be a stochastic process in $\R^d$ such that for filtration $\filteration{t}$, we have that $\E\sbrak{\bm{x}_s|\filteration{s-1}} = \bm{0}_{d}$ and $\E\sbrak{\bm{x}_s\bm{x}_s^\top | \filteration{s-1}} \mgeq \rho\bm{I}_d$. Further, let $\twonorm{\bm{x}_s} \leq m$ for all $s \geq 1$. Also, define the matrix 
    $$\bm{Q}_t = \gamma\bm{I}_d + \summation{s=1}{t}\bm{x}_s\bm{x}_s^\top$$
    Then, with probability atleast $1 - \delta$, we have that
    $$\eigmin{\bm{Q}_t} \geq \gamma + c\rho t$$
    for $0 \leq c \leq 1$ and for all t such that $\frac{12m^4 + 4m^2\rho(1-c)}{3(1-c)^2\rho^2}\log\pbrak{\frac{2dT}{\delta}} \leq t \leq T$
    \label{lemma: min_eig_design}
\end{lemma}
\begin{proof} The proof follows on the same lines as that of \cite{Chatterji2020}.

Assume $\E\sbrak{\bm{x}_s\bm{x}_s^\top | \filteration{s-1}} = \bm{\Sigma}_c \mgeq \rho\bm{I}_d$. Define the matrix martingale $\bm{Z}_s = \summation{s=1}{t}\sbrak{\bm{x}_s\bm{x}_s^\top - \bm{\Sigma}_c}$ with $\bm{Z}_0 = 0$ and the corresponding martingale difference sequence $\bm{X}_s = \bm{Z}_s - \bm{Z}_{s-1}$ for all $s \geq 1$. 

We have that $\twonorm{\bm{x}_s} \leq m$. Also, $\norm{\bm{\Sigma}_c} = \norm{\E\sbrak{\bm{x}_s\bm{x}_s^\top | \filteration{t-1}}} \leq \twonorm{\bm{x}_s}^2 \leq m^2$

Therefore, using triangle inequality, $\norm{\bm{X}_s} = \norm{\bm{x}_s\bm{x}_s^\top - \bm{\Sigma}_c} \leq \norm{\bm{x}_s\bm{x}_s^\top} + \norm{\bm{\Sigma}_c} \leq 2m^2$

Finally, we have that 
\begin{align*}
\summation{s=1}{t}\norm{\E\sbrak{\bm{X}_s\bm{X}_s^\top | \filteration{s-1}}} &=  \summation{s=1}{t}\norm{\E\sbrak{\bm{X}_s^\top\bm{X}_s| \filteration{s-1}}} \\
&=  \summation{s=1}{t}\norm{\E\sbrak{\bm{x}_s\bm{x}_s^\top \bm{x}_s\bm{x}_s^\top - \bm{x}_s\bm{x}_s^\top\bm{\Sigma}_c^\top -\bm{\Sigma}_c \bm{x}_s\bm{x}_s^\top + \bm{\Sigma}_c\bm{\Sigma}_c^\top|\filteration{s-1}}}\\
&\leq  \summation{s=1}{t}\norm{\E\sbrak{\pbrak{\bm{x}_s^\top \bm{x}_s}\bm{x}_s \bm{x}_s^\top + \bm{\Sigma}_c\bm{\Sigma}_c^\top|\filteration{s-1}}}\\
&\leq 2m^4t
\end{align*}
Thus, applying the Matrix Freedman Inequality (Lemma \ref{lemma: freedman inequality}) with $R = 2m^2 , \omega^2 = 2m^4t ,^ d_1 = d_2 = d$ and $u = (1-c)\rho t$, we get
\begin{align*}
    \P\cbrak{\norm{\summation{s=1}{t}\sbrak{\bm{x}_s \bm{x}_s^\top - \bm{\Sigma}_c}} \geq (1-c)\rho t} \leq 2d \exp\pbrak{-\frac{(1-c)^2\rho^2t^2/2}{2m^4t + 2m^2(1-c)\rho t/3}}
\end{align*}

Choosing $t \geq \frac{12m^4 + 4m^2\rho(1-c)}{3(1-c)^2\rho^2}\log\pbrak{\frac{2dT}{\delta}}$, we get that with probability at least $1 - \frac{\delta}{T}$, 
\begin{align*}
    (1-c)\rho t \geq \norm{\summation{s=1}{t}\sbrak{\bm{x}_s\bm{x}_s^\top - \bm{\Sigma}_c}} 
\end{align*}

Now, recall the definition of the norm: $\norm{\bm{A}} =  \sup\limits_{\twonorm{\bm{y}}\leq 1}\bm{A}\bm{y}$. Substituting this definition results in:
$$ (1-c)\rho t \geq \sup\limits_{\twonorm{y} \leq 1}\sbrak{\pbrak{\summation{s=1}{t}\bm{x}_s\bm{x}_s^\top}\bm{y} - t\bm{\Sigma}_c\bm{y}} \geq \modulus{\inf\limits_{\twonorm{y} \leq 1}\pbrak{\summation{s=1}{t}\bm{x}_s\bm{x}_s^\top}\bm{y} - t\cdot \inf\limits_{\twonorm{y} \leq 1} \bm{\Sigma}_c\bm{y}}$$

which uses the inequality $\sup\limits_{A} \modulus{f-g} \geq \modulus{\inf\limits_{A} f - \inf\limits_{A} g}$. Now, using Rayleigh's quotient, we also know that $\inf\limits_{\twonorm{\bm{y}}\leq 1} \bm{A}\bm{y} = \eigmin{\bm{A}}$. Thus, 

$$ (1-c)\rho t \geq \modulus{\eigmin{\summation{s=1}{t}\bm{x}_s\bm{x}_s^\top} - t\eigmin{\bm{\Sigma}_c}} \implies \eigmin{\summation{s=1}{t}\bm{x}_s\bm{x}_s^\top} \geq c\rho t$$

using the fact that $\bm{\Sigma}_c \mgeq \rho\bm{I}$. This holds with probability $1 - \frac{\delta}{T}$. Performing a union bound over all time indices finishes the claim.

\end{proof}

\begin{lemma}
    (\cite{Das_2024}, Lemma 17) Let $\delta \in (0,1)$, $\bm{x}_s \in \R^{d_1}$ and $\bm{z}_s \in \R^{d_2}$ such that $\E\sbrak{\bm{x}_s\bm{z}_s^\top|\filteration{s-1}} = \bm{0}_{d_1\times d_2}$. Define $\bm{M}_t = \summation{s=1}{t}\bm{x}_s\bm{z}_s^\top$. Further, assume that $\twonorm{\bm{x}_s} \leq m_1$ and $\twonorm{\bm{z}_s} \leq m_2$. Then, with probability at least $1 - \delta$
    $$\norm{M_t} \leq 2(m_1\wedge m_2)^2\sqrt{2t\log\pbrak{\frac{d_1+d_2}{\delta}}}$$
    \label{lemma: generalized norm of cross terms}
\end{lemma}
\begin{proof} Denote $\bm{X}_s = \bm{x}_s\bm{z}_s^\top$. Since $\E\sbrak{\bm{X}_s|\filteration{s-1}} = \bm{0}_{d_1\times d_2}$, $\bm{X}_s$ is a Martingale Difference sequence. Further, $\bm{M}_t = \summation{s=1}{t}\bm{X}_s$ is the sum of Martingale Difference Sequences.

Consider the square of the Hermitian Dilation (see Definition \ref{def:HermitianDilation}) of $\bm{X}_s$
\begin{align*}
    \H(\bm{X}_s)^2 = \begin{bmatrix}
        \bm{0}_{d_1\times d_1} & \bm{X}_s\\
        \bm{X}_s^\top & \bm{0}_{d_2\times d_2}
    \end{bmatrix}^2 &= \begin{bmatrix}
        \bm{X}_s\bm{X}_s^\top & \bm{0}_{d_1\times d_2}\\
        \bm{0}_{d_2\times d_1} & \bm{X}_s^\top\bm{X}_s
    \end{bmatrix} \\
    &= \begin{bmatrix}
        \twonorm{\bm{z}_s}^2\bm{x}_s\bm{x}_s^\top & \bm{0}_{d_1\times d_2}\\
        \bm{0}_{d_2\times d_1} & \twonorm{\bm{x}_s}^2\bm{z}_s\bm{z}_s^\top
    \end{bmatrix} \\
    &\mleq (m_1 \wedge m_2)^2\begin{bmatrix}
        \bm{x}_s\bm{x}_s^\top & \bm{0}_{d_1\times d_2}\\
        \bm{0}_{d_2\times d_1} & \bm{z}_s\bm{z}_s^\top
        \end{bmatrix}\\
    & \mleq (m_1 \wedge m_2)^4\bm{I}_{d_1+d_2} 
\end{align*}

Applying the Matrix Azuma inequality (Lemma \ref{lemma: Matrix Azuma}) with $\bm{A}_s = (m_1 \wedge m_2)^2\bm{I}_{d_1+d_2}$, we have that $\sigma_t^2 = (m_1\wedge m_2)^4t$ and thus,
$$\P\cbrak{\exists t \geq 1: \singmax{\bm{M}_t} \geq \epsilon} \leq (d_1+d_2)\exp\pbrak{-\frac{\epsilon^2}{8 (m_1 \wedge m_2)^4t}} $$

Choosing $\epsilon = \sqrt{8 (m_1 \wedge m_2)^4t\log\pbrak{\frac{d_1+d_2}{\delta}}}$ finishes the proof.

\end{proof}
