\section{Rough Notes}

\begin{lemma}[Concentration of sums of weakly dependent random matrices]\label{lemma:concentration_weakly_dependent} Let $\left\{A_{i}\right\}_{i \in [n]}$ be symmetric independent matrices satisfying $\E\bbb{A_i} = \Sigma$ and $\norm{\E\bbb{\bb{A_i-\Sigma}^{2}}}_{2} \leq \mathcal{V}$. Let, 
\bas{
    \forall i \in [n], \;\; X_{i}^{n} &:= \vp\lambp^{n-i-1}\vp^{\top}\bb{A_{i}-\Sigma}v_{1} \\
    Y_{n} &:= \sum_{j \in [n]}X_{j}^{n}
}
Then, for any $\epsilon > 0$,
\bas{
    \mathbb{P}\bb{\norm{\frac{1}{n-t}\sum_{i=t+1}^{n}\bb{Y_{i}Y_{i}^{\top}-\E\bbb{Y_{i}Y_{i}^{\top}}}}_{2} \geq \epsilon} \leq 
}
\end{lemma}
\begin{proof}
    % Let $S_{t,n} := \sum_{i=t+1}^{n}\bb{Y_{i}Y_{i}^{\top}-\E\bbb{Y_{i}Y_{i}^{\top}}}$. We have, 
    % \bas{
    %     S_{t,n} &= \sum_{i=t+1}^{n}\bb{\sum_{j\in[i]}X_{j}^{i}}\bb{\sum_{j\in[i]}X_{j}^{i}}^{\top} - \E\bbb{\bb{\sum_{j\in[i]}X_{j}^{i}}\bb{\sum_{j\in[i]}X_{j}^{i}}^{\top}} \\
    %     &= \sum_{i=t+1}^{n}\sum_{j\in[i]}\bb{X_{j}^{i}X_{j}^{i\top} - \E\bbb{X_{j}^{i}X_{j}^{i\top}}} + \sum_{i=t+1}^{n}\sum_{p,q\in[i], p \neq q}\bb{X_{p}^{i}X_{q}^{i\top} - \E\bbb{X_{p}^{i}X_{q}^{i\top}}} \\
    %     &= \underbrace{\sum_{j\in[n]}\sum_{i=max\left\{j,t+1\right\}}^{n}\bb{X_{j}^{i}X_{j}^{i\top}-\E\bbb{X_{j}^{i}X_{j}^{i\top}}}}_{:= T_1} + \underbrace{\sum_{p,q \in [n], p \neq q}\;\sum_{i=\max\left\{p,q,t+1\right\}}^{n}\bb{X_{p}^{i}X_{q}^{i\top} - \E\bbb{X_{p}^{i}X_{q}^{i\top}}}}_{:= T_2}
    % }
    % Let $B_{p,q}^{i} := X_{p}^{i}X_{q}^{i\top}$ for conciseness of notation. Note that $\E\bbb{B_{p,q}^{i}} = 0$ for $p \neq q$.

    We have, 
    \bas{
        Y_{n} &= \vp\lambp\vp^{\top}Y_{n-1} + \vp\lambp^{-1}\vp^{\top}\bb{A_{n}-\Sigma}v_{1}
    }
    Let $G_{n} := Y_{n}Y_{n}^{\top}$ and $H := \E\bbb{\bb{A_{n}-\Sigma}v_{1}v_{1}^{\top}\bb{A_{n}-\Sigma}}$. Then, 
    \bas{
        G_{n} &= \vp\lambp\vp^{\top}G_{n-1}\vp\lambp\vp^{\top} + \vp\lambp\vp^{\top}Y_{n-1}v_{1}^{\top}\bb{A_{n}-\Sigma}\vp\lambp^{-1}\vp^{\top} \\
        & + \vp\lambp^{-1}\vp^{\top}\bb{A_{n}-\Sigma}v_{1}Y_{n-1}^{\top}\vp\lambp\vp^{\top} + \vp\lambp^{-1}\vp^{\top}\bb{A_{n}-\Sigma}v_{1}v_{1}^{\top}\bb{A_{n}-\Sigma}\vp\lambp^{-1}\vp^{\top}
    }
    Therefore, 
    \bas{
        \E\bbb{G_{n}|\mathcal{F}_{n-1}} &= \vp\lambp\vp^{\top}G_{n-1}\vp\lambp\vp^{\top} + \vp\lambp^{-1}\vp^{\top}H\vp\lambp^{-1}\vp^{\top}
    }
\end{proof}

\begin{lemma}\label{lemma:expectation_calculation}
    For $j \in [d]$, let $\beta_{k} := e_{j}^{\top}Y_{k}$ and $v_{k} := \frac{1}{k-t}\sum_{i=t+1}^{k}\beta_i^{2}$. Then for $t := n - \frac{1}{2\eta\lambda_{1}}$, 
    \bas{
        \E\bbb{\beta_{n}^{2}} & = e_{j}^{\top}\vp R_n \vp^{\top}e_j, \;\;\E\bbb{v_n} = e_{j}^\top\vp G\vp^{\top}e_j 
    }
    such that $\forall k,l \in [d]$, $\eta := \frac{c\log\bb{n}}{n\bb{\lambda_1-\lambda_2}}$, 
    \bas{
        0 \leq \frac{\bbb{R_n}_{kl} - G_{kl}}{\bbb{R_n}_{kl}} \leq \frac{2\bb{ \bb{3-e}2\eta\lambda_{1} + e}}{n^{2c}}
    }
\end{lemma}
\begin{proof}
    We have, using Lemma~\ref{lemma:second_moment_matrix},
    \bas{
        \E\bbb{v_n} &= \frac{1}{n-t}e_{j}^{\top}\bb{\sum_{i = t+1}^{n}\E\bbb{Y_iY_i^\top}}e_{j} \\
        &= \frac{1}{n-t}e_{j}^{\top}\vp \bb{\sum_{i=t+1}^{n}R_i}\vp^{\top}e_j
    }
    where 
    \bas{
        \forall k,l \in [d], \;\; \bbb{R_i}_{k,l} &= \frac{\widetilde{M}_{kl}}{d_kd_l}\bb{\frac{\bb{d_kd_l}^{i} - 1}{\bb{d_kd_l} - 1}} 
    }
    Let $G := \frac{1}{n-t}\bb{\sum_{i=t+1}^{n}R_i}$. Then, 
    \bas{
        G_{k, l} &= \frac{1}{n-t}\frac{\widetilde{M}_{kl}}{d_kd_l}\sum_{i=t+1}^{n}{\frac{\bb{d_kd_l}^{i} - 1}{\bb{d_kd_l} - 1}} \\
        &= \frac{1}{n-t}\frac{\widetilde{M}_{kl}}{d_kd_l\bb{d_kd_l - 1}}\bb{\sum_{i = t+1}^{n}\bb{d_kd_l}^{i} - \bb{n-t}} \\
        &= \frac{1}{n-t}\frac{\widetilde{M}_{kl}}{d_kd_l\bb{d_kd_l - 1}}\bb{ \bb{d_kd_l}^{t+1}\frac{\bb{d_kd_l}^{n-t} - 1}{d_kd_l - 1} - \bb{n-t}} \\
        &= \frac{\widetilde{M}_{kl}}{d_kd_l\bb{1 - d_kd_l}}\bb{1 -  \frac{\bb{d_kd_l}^{t+1} - \bb{d_kd_l}^{n+1}}{\bb{1 - d_kd_l}\bb{n-t}} }
    }
    Note that $\E\bbb{v_{n}} := e_{j}^{\top}\vp G\vp^{\top}e_{j}$. Furthermore, from Lemma~\ref{lemma:second_moment_matrix}, 
    \bas{
        \E\bbb{\beta_{n}^{2}} &= e_{j}^\top\vp R_{n}\vp^{\top}e_{j}, \;\; \bbb{R_n}_{kl} = \frac{\widetilde{M}_{kl}}{d_kd_l\bb{{1 - \bb{d_kd_l}}}}\bb{1 - \bb{d_kd_l}^{n} }
    }
    Comparing $G$ and $R_n$, we have, 
    \bas{
        \bb{\bbb{R_n}_{kl} - G_{kl}}\frac{d_kd_l\bb{{1 - \bb{d_kd_l}}}}{\widetilde{M}_{kl}} &= \frac{\bb{d_kd_l}^{t+1} - \bb{d_kd_l}^{n+1}}{\bb{1 - d_kd_l}\bb{n-t}} - \bb{d_kd_l}^{n} \\
        &= \bb{d_kd_l}^{n}\bb{\frac{1}{n-t}\frac{\bb{d_kd_l}^{-\bb{n-t}} - 1}{\bb{d_kd_l}^{-1}-1} - 1}
    }
    Note that 
    \ba{
        d_kd_l &= \bb{\frac{1+\eta \lambda_{k+1}}{1+\eta\lambda_1}}\bb{\frac{1+\eta \lambda_{l+1}}{1+\eta\lambda_1}} \notag \\
        &\approx 1 + \eta\bb{\lambda_{k+1} - \lambda_{1} + \lambda_{l+1}-\lambda_{1}} \notag \\
        &\leq 1 - 2c\frac{\log\bb{n}}{n}, \;\; \text{ using } \eta := \frac{c\log\bb{n}}{n\bb{\lambda_1-\lambda_2}} \label{eq:dkdl_ub}
    }
    and similarly, 
    \ba{
        d_kd_l &\geq 1-2\eta\lambda_{1}, \text{ since in the worst case, } \lambda_{k+1} = \lambda_{l+1} = 0 \label{eq:dkdl_lb}
    }
    Using equations \eqref{eq:dkdl_lb} and $n-t = \frac{1}{2\eta\lambda_{1}}$,
    \bas{
         \bb{\bbb{R_n}_{kl} - G_{kl}}\frac{d_kd_l\bb{{1 - \bb{d_kd_l}}}}{\widetilde{M}_{kl}} &\leq \bb{d_kd_l}^{n}\bb{\frac{1}{\bb{n-t}\bb{1-2\eta\lambda_1}^{n-t-1}}\frac{1 - \bb{1-2\eta\lambda_1}^{n-t}}{2\eta\lambda_1} - 1} \\
         &= \bb{d_kd_l}^{n}\bb{\frac{1 - \bb{1-2\eta\lambda_1}^{n-t}}{\bb{1-2\eta\lambda_1}^{n-t-1}} - 1},
    }
    Therefore,
    \bas{
        &\bb{\bbb{R_n}_{kl} - G_{kl}}\frac{d_kd_l\bb{{1 - \bb{d_kd_l}}}}{\widetilde{M}_{kl}} \\
        &\leq \bb{d_kd_l}^{n}\bb{\frac{1}{\bb{1-2\eta\lambda_1}^{ \frac{1}{2\eta\lambda_{1}} - 1}} - \bb{1-2\eta\lambda_1} - 1} \\
        &\leq \bb{d_kd_l}^{n}\bb{ \bb{2-e}2\eta\lambda_{1} + e - \bb{1-2\eta\lambda_1} - 1}, \text{ using for } x \in \bb{0,1}, \bb{1-x}^{1-\frac{1}{x}} \leq \bb{2-e}x + e \\
        &= \bb{d_kd_l}^{n}\bb{ \bb{3-e}2\eta\lambda_{1} + e}
    }
    Therefore, 
    \bas{
        \frac{\bb{\bbb{R_n}_{kl} - G_{kl}}}{\bbb{R_n}_{kl}} &\leq \frac{\bb{d_kd_l}^{n}}{1-\bb{d_kd_l}^{n}}\bb{ \bb{3-e}2\eta\lambda_{1} + e} \\
        &\leq \frac{2\bb{ \bb{3-e}2\eta\lambda_{1} + e}}{n^{2c}}, \text{ using } \eqref{eq:dkdl_ub}
    }
\end{proof}

\begin{lemma}\label{lemma:fourth_moment_calculation}
     For $j \in [d]$, let $\beta_{k} := e_{j}^{\top}Y_{k}$ and assume $p \leq q$. Then, 
     \bas{
        \E\bbb{\beta_p^{2}\beta_q^{2}} 
    &\leq \mathcal{M}^{2}\bb{1-\mu}^{2\bb{q-p}}\E\bbb{e_{j}^{\top}Y_pY_p^{\top}e_j}\bbb{\bb{\sum_{i = 1}^{p}\bb{1-\mu}^{p-i-1}}^{2} + \sum_{i=p+1}^{q}\bb{1-\mu}^{2\bb{p-i-1}}} 
     }
     where 
    $\widetilde{M} := \E\bbb{\vp\bb{A_{j}-\Sigma}v_{1}v_{1}^\top\bb{A_{j}-\Sigma}\vp^{\top}}$
\end{lemma}
\begin{proof}
%     Let us first consider $p = q$. Then, 
%     \bas{
%         \E\bbb{\beta_p^{4}} &= e_{j}^{\top}\E\bbb{Y_pY_p^{\top}e_{j}e_{j}^{\top}Y_{p}Y_p^{\top}}e_{j} \\
%         &= e_{j}^{\top}\E\bbb{\bb{\sum_{r, s \in [p]}X_r^pX_s^{p \top}}e_{j}e_{j}^{\top}\bb{\sum_{u, v \in [p]}X_u^pX_v^{p \top}} }e_j \\
%         &= \sum_{r,s,u,v\in[p]}e_{j}^{\top}\E\bbb{X_r^pX_s^{p \top}e_{j}e_{j}^{\top}X_u^pX_v^{p \top}}e_j \\
%         &= \sum_{r,s,u,v\in[p]}\E\bbb{\bb{e_{j}^{\top}X_r^p}\bb{e_{j}^{\top}X_s^p}\bb{e_{j}^{\top}X_u^p}\bb{e_{j}^{\top}X_v^p}}
%     }
%     Let $h_{r} := \bb{e_{j}^{\top}X_r^p}$. We first calculate $\E\bbb{h_{r}h_{s}h_{u}h_{v}}$. Note that if any one of $r,s,u,v$ occurs an odd number of times, then the expectation would be zero because of symmetry. Therefore, we are only left with the following cases - 
% \begin{enumerate}
%     \item $\bb{r = s = u = v}$ - $\E\bbb{h_{r}^4}$
%     \item $\bb{r = s, u = v, r \neq u}$ - $\E\bbb{h_{r}^{2}h_{u}^{2}}$
% \end{enumerate}
% Then we have
% \bas{
%      \E\bbb{\beta_p^4}
%                   &= \underbrace{\sum_{r}\E\bbb{h_{r}^{4}}}_{p \text{ terms}} + \underbrace{\sum_{r \neq s}\E\bbb{h_{r}^{3}h_{j}}}_{8{p \choose 2} \text{ terms}} + \underbrace{\sum_{r = s, u \neq v, i}\E\bbb{h_{r}^{2}h_{s}h_{u}}}_{36{p \choose 3} \text{ terms}} \\
%                   & \;\;\;\; + \underbrace{\sum_{r = s, u = v, r \neq u}\E\bbb{h_{r}^{2}h_{u}^{2}}}_{6{p \choose 2} \text{ terms}} + \underbrace{\sum_{r \neq s \neq u \neq v}\E\bbb{h_{r}h_{s}h_{u}h_{v}}}_{ 24{p \choose 4} \text{ terms } }
% }
% Therefore, 
% \ba{
%     \E\bbb{\beta_p^{4}} &= \sum_{r=1}^{p}\E\bbb{\bb{e_j^{\top}X_{r}^{p}}^{4}} + 3\sum_{1 \leq r \neq u \leq p}\E\bbb{\bb{e_j^{\top}X_{r}^{p}}^{2}\bb{e_j^{\top}X_{u}^{p}}^{2}} \\
%     &= \sum_{r=1}^{p}\E\bbb{\bb{e_j^{\top}X_{r}^{p}}^{4}} + 6\sum_{1 \leq r < u \leq p}\E\bbb{\bb{e_j^{\top}X_{r}^{p}}^{2}}\E\bbb{\bb{e_j^{\top}X_{u}^{p}}^{2}} \label{eq:fourth_moment_equal_index}
% }
Note that 
\bas{
    Y_{q} &= \vp\lambp^{q-p}\vp^{\top}Y_p + \sum_{i=p+1}^{q}X_{i}^{q}
}
Then, 
\bas{
    & \E\bbb{\beta_p^{2}\beta_q^{2}} \\
    &= e_{j}^{\top}\E\bbb{Y_pY_p^{\top}e_{j}e_{j}^{\top}Y_{q}Y_q^{\top}}e_{j} \\
    &= \underbrace{\E\bbb{e_{j}^{\top}Y_pY_p^{\top}e_{j}e_{j}^{\top}\vp\lambp^{q-p}\vp^{\top}Y_pY_p^{\top}\vp\lambp^{q-p}\vp^{\top}e_{j}}}_{:= T_1} + \underbrace{e_{j}^{\top}\E\bbb{Y_pY_p^{\top}}e_{j}e_{j}^{\top}\E\bbb{\bb{\sum_{i=p+1}^{q}X_{i}^{q}}\bb{\sum_{i=p+1}^{q}X_{i}^{q\top}}}e_{j}}_{:= T_2}
}
% Let $W_r := \vp^{\top}\bb{A_r-\Sigma}v_1$.
Let $\frac{1+\eta\lambda_{2}}{1+\eta\lambda_1} := 1-\mu$. 

Note that 
\bas{
    \norm{e_{j}^{\top}\vp\lambp^{q-p}\vp^{\top}Y_p}_{2} &= \norm{\sum_{j \in [p]}\vp\lambp^{q-j-1}\vp^{\top}\bb{A_j-\Sigma}v_{1}}_{2} \\
    &\leq \sum_{j \in [p]}\norm{\vp\lambp^{q-j-1}\vp^{\top}\bb{A_j-\Sigma}v_{1}}_{2} \\
    &\leq \mathcal{M}\sum_{j \in [p]}\bb{1-\mu}^{q-j-1}
}

% Note that 
% \bas{
%     \norm{Y_p}_{2} &= \norm{\sum_{j \in [p]}\vp\lambp^{p-j-1}\vp^{\top}\bb{A_j-\Sigma}v_{1}}_{2} \\
%     &\leq \sum_{j\in[p]}\norm{\vp\lambp^{p-j-1}\vp^{\top}\bb{A_j-\Sigma}v_{1}}_{2} \\
%     &\leq \mathcal{M}\sum_{j\in[p]}\bb{1-\mu}^{p-j-1} = \frac{\mathcal{M}}{1-\mu}\frac{1-\bb{1-\mu}^{p}}{1 - \bb{1-\mu}} = \frac{\mathcal{M}\bb{1 - \bb{1-\mu}^{p}}}{\mu\bb{1-\mu}}
% }
Therefore, 
\bas{
    T_{1} &= \E\bbb{\bb{e_j^{\top}Y_p}^{2}\bb{e_j^{\top}\vp\lambp^{q-p}\vp^{\top}Y_p}^{2}} \leq \mathcal{M}^{2}\E\bbb{e_{j}^{\top}Y_pY_p^{\top}e_j}\bb{\sum_{j \in [p]}\bb{1-\mu}^{q-j-1}}^{2} \\
    &= \mathcal{M}^{2}\E\bbb{e_{j}^{\top}Y_pY_p^{\top}e_j}\bb{1-\mu}^{2\bb{q-p}}\bb{\sum_{j \in [p]}\bb{1-\mu}^{p-j-1}}^{2}
}
For $T_2$, we have,
\bas{
    \E\bbb{\bb{\sum_{i=p+1}^{q}X_{i}^{q}}\bb{\sum_{i=p+1}^{q}X_{i}^{q\top}}} &= \sum_{i=p+1}^{q}\E\bbb{X_{i}^{q}X_{i}^{q\top}} \\
    &= e_{j}^{\top}\vp\bb{\sum_{i=p+1}^{q}\lambp^{q-i-1}\E\bbb{\vp^{\top}\bb{A_i-\Sigma}v_1v_1^{\top}\bb{A_i-\Sigma}\vp}\lambp^{q-i-1}}\vp^{\top}e_j \\
    &= e_{j}^{\top}\vp\bb{\sum_{i=p+1}^{q}\lambp^{q-i-1}\widetilde{M}\lambp^{q-i-1}}\vp^{\top}e_j \\
    &\leq \mathcal{M}^{2}\sum_{i=p+1}^{q}\bb{1-\mu}^{2\bb{q-i-1}} \\
    &= \mathcal{M}^{2}\bb{1-\mu}^{2\bb{q-p}}\sum_{i=p+1}^{q}\bb{1-\mu}^{2\bb{p-i-1}}
    % &\leq \mathcal{M}^{2}\sum_{i=p+1}^{q}\bb{1-\mu}^{2\bb{q-i-1}} 
}
Therefore, 
\bas{
    \E\bbb{\beta_p^{2}\beta_q^{2}} &\leq \mathcal{M}^{2}\bb{1-\mu}^{2\bb{q-p}}\E\bbb{e_{j}^{\top}Y_pY_p^{\top}e_j}\bbb{\bb{\sum_{i = 1}^{p}\bb{1-\mu}^{p-i-1}}^{2} + \sum_{i=p+1}^{q}\bb{1-\mu}^{2\bb{p-i-1}}}
}
which completes our proof.
\end{proof}

% \begin{lemma}[Theorem 5 \cite{boucheron2003concentration}]\label{lemma:exponential_efron_stein}
%     Let $\left\{A_{i}\right\}_{i \in [n]}$ be $n$ $\iid$ random variables in some measurable space, $\mathcal{X}$,  and $\left\{A_{i}'\right\}_{i \in [n]}$ be their corresponding $\iid$ copies. Let $f : \mathcal{X}^{n} \rightarrow \mathbb{R}$ and define the random variables, $v_{n} := f\bb{A_{1}, A_{2}, \cdots, A_{n}}$, $v_{n}^{(i)} := f\bb{A_{1}, A_{2}, \cdots, A_{i}', \cdots, A_{n}}$ and 
%     \bas{
%         V_{+} &:= \E\bbb{\sum_{i \in [n]}\bb{v_{n} - v_{n}^{(i)}}^{2}\mathbbm{1}\bb{v_{n} \geq v_{n}^{(i)}}|\left\{A_{i}\right\}_{i \in [n]}}
%     }
%     such that for $a, b > 0$,
%     \bas{
%         V_{+} \leq av_{n} + b 
%     }
%     Then for $\delta \in \bb{0,1}$, with probability atleast $1-\delta$,  
%     \bas{
%         v_{n} - \E\bbb{v_n} \leq  2\sqrt{\bb{a\E\bbb{v_{n}}+b}\log\bb{\frac{1}{\delta}}} + a\log\bb{\frac{1}{\delta}}
%     }
% \end{lemma}

% Then, 
% \bas{
%     \E\bbb{\beta_p^{2}\beta_q^{2}} &= e_{j}^{\top}\E\bbb{Y_pY_p^{\top}e_{j}e_{j}^{\top}Y_{q}Y_q^{\top}}e_{j} \\
%     &= e_{j}^{\top}\E\bbb{\bb{\sum_{r, s \in [p]}X_r^pX_s^{p \top}}e_{j}e_{j}^{\top}\bb{\sum_{u, v \in [q]}X_u^qX_v^{q \top}} }e_j \\
%     &= \sum_{r,s \in [p], u,v\in[q]}e_{j}^{\top}\E\bbb{X_r^pX_s^{p \top}e_{j}e_{j}^{\top}X_u^qX_v^{q \top}}e_j \\
%     &= \sum_{r,s \in [p], u,v\in[q]}\E\bbb{\bb{e_{j}^{\top}X_r^p}\bb{e_{j}^{\top}X_s^p}\bb{e_{j}^{\top}X_u^q}\bb{e_{j}^{\top}X_v^q}} 
% }
% Recall $h_{r,p} := e_{j}^{\top}X_r^p$. Note that if any one of $r,s,u,v$ occurs an odd number of times, then the expectation would be zero because $\E\bbb{h_{r,p}} = 0$ and $h_{r,p} \indep h_{s,q}$ for any $r \neq s$. Then, we have,
% \bas{
%     \E\bbb{\beta_p^{2}\beta_q^{2}} &= \sum_{r,s \in [p], u,v\in[q]}\E\bbb{h_rh_sg_ug_v} \\
%     &= \underbrace{\sum_{r \in [p]}\E\bbb{h_{r,p}^{2}h_{r,q}^{2}}}_{:= T_1} + \underbrace{\sum_{r \in [p], s \in [q], r \neq s}\E\bbb{h_{r,p}^{2}}\E\bbb{h_{s,q}^{2}}}_{:=T_2} + 2\underbrace{\sum_{r, s\in[p], r \neq s}\E\bbb{h_{r,p}h_{r,q}}\E\bbb{h_{s,p}h_{s,p}}}_{:=T_3}
%     % &\leq \sum_{r \in [p]}\sqrt{\E\bbb{h_{r,p}^{4}}}\sqrt{\E\bbb{h_{r,q}^{4}}} + \sum_{r \in [p], s \in [q], r \neq s}\E\bbb{h_{r,p}^{2}}\E\bbb{h_{s,q}^{2}} + 2\sum_{r, s\in[p], r \neq s}\E\bbb{h_{r,p}h_{r,q}}\E\bbb{h_{s,p}h_{s,p}} 
%     % &\leq \sum_{r \in [p]}\sqrt{\E\bbb{h_{r,p}^{4}}}\sqrt{\E\bbb{h_{r,q}^{4}}} + \sum_{r \in [p], s \in [q], r \neq s}\E\bbb{h_{r,p}^{2}}\E\bbb{h_{s,q}^{2}} + 2\sum_{r, s\in[p], r \neq s}\E\bbb{h_{r,p}h_{r,q}}\E\bbb{h_{s,p}h_{s,p}} 
%     % &= \underbrace{e_{j}^{\top}\sum_{r \in [p]}\E\bbb{X_{r}^{p}X_{r}^{p\top}e_je_j^{\top}X_{r}^{q}X_{r}^{q \top}}e_j}_{:= T_1} + \underbrace{e_{j}^\top\sum_{r \in [p], s \in [q], r \neq s}\E\bbb{X_{r}^{p}X_{r}^{p\top}}e_je_j^{\top}\E\bbb{X_{s}^{q}X_{s}^{q\top}}e_j}_{:= T_2} \\
%     % & \;\; + 2\underbrace{e_{j}^\top\sum_{r, s\in[p], r \neq s}\E\bbb{X_r^p X_r^q} e_je_j^{\top} \E\bbb{X_s^p X_s^q}e_j}_{:= T_3} 
% }
% where we used $h_{r,p} \indep h_{s,q}$ for any $r \neq s$. Let $\frac{1+\eta\lambda_{2}}{1+\eta\lambda_1} := 1-\mu$ and $W_r := \vp^{\top}\bb{A_r-\Sigma}v_1v_1^{\top}\bb{A_r-\Sigma}\vp$ for convenience of notation. We start with $T_1$. 
% \bas{
%     \E\bbb{h_{r,p}^{2}h_{r,q}^{2}} &= \E\bbb{e_j^{\top}\vp\lambp^{p-r-1}W_r\lambp^{q-r-1}\vp^{\top}e_{j}e_j^{\top}\vp\lambp^{q-r-1}W_r\lambp^{p-r-1}\vp^{\top}e_{j}} \\
%     &\leq \bb{1-\mu}^{2\bb{q-r-1}}e_j^{\top}\vp\lambp^{p-r-1}\E\bbb{W_r^{2}}\lambp^{p-r-1}\vp^{\top}e_{j}
% }
% We have, 
% \bas{
%     \E\bbb{W_r^{2}} &= \E\bbb{\vp^{\top}\bb{A_r-\Sigma}v_1\underbrace{v_1^{\top}\bb{A_r-\Sigma}\vp\vp^{\top}\bb{A_r-\Sigma}v_1}_{\leq \mathcal{M}^{2}}v_1^{\top}\bb{A_r-\Sigma}\vp} \leq \mathcal{M}^{2}\E\bbb{W_r}
% }
% % \bas{
% %     e_{j}^{\top}X_{r,p} &\leq \norm{X_{r,p}}_{2} = \norm{\vp\lambp^{p-r-1}\vp^{\top}\bb{A_{r}-\Sigma}v_1}_{2} \leq \bb{\frac{1+\eta\lambda_{2}}{1+\eta\lambda_1}}^{p-r-1}\mathcal{M}
% % }
% Therefore, 
% \bas{
%     \E\bbb{h_{r,p}^{2}h_{r,q}^{2}} &\leq \bb{1-\mu}^{2\bb{q-r-1}}\mathcal{M}^{2}e_j^{\top}\vp\lambp^{p-r-1}\widetilde{M}\lambp^{p-r-1}\vp^{\top}e_{j}
% }
% which implies
% \bas{
%     T_1 &\leq \mathcal{M}^{2} \times e_j^{\top}\vp\underbrace{\bb{\sum_{r \in [p]}\bb{1-\mu}^{2\bb{q-r-1}}\lambp^{p-r-1}\widetilde{M}\lambp^{p-r-1}}}_{:=R_p}\vp^{\top}e_{j} 
% }
% Let us consider the $\bb{k,l}^{\text{th}}$ of $R_p$. We have, 
% \bas{
%     \bbb{R_p}_{kl} &= \sum_{r \in [p]}\bb{1-\mu}^{2\bb{q-r-1}}\bb{d_kd_l}^{\bb{p-r-1}}\widetilde{M}_{kl} \\
%     &= \bb{1-\mu}^{2\bb{q-p}}\widetilde{M}_{kl}\sum_{r \in [p]}\bb{\bb{1-\mu}^{2}d_kd_l}^{p-r-1} \\
%     &= \bb{1-\mu}^{2\bb{q-p}}\frac{\widetilde{M}_{kl}}{\bb{1-\mu}^{2}d_kd_l}\sum_{r \in [p]}\bb{\bb{1-\mu}^{2}d_kd_l}^{p-r} \\
%     &= \bb{1-\mu}^{2\bb{q-p}}\frac{\widetilde{M}_{kl}}{\bb{1-\mu}^{2}d_kd_l}\frac{1 - \bb{1-\mu}^{2p}\bb{d_kd_l}^{p}}{1 - \bb{1-\mu}^{2}d_kd_l} \\
%     &= \widetilde{M}_{kl}\frac{\bb{1-\mu}^{2\bb{q-p-1}}}{d_kd_l}\frac{1 - \bb{1-\mu}^{2p}\bb{d_kd_l}^{p}}{1 - \bb{1-\mu}^{2}d_kd_l}
% }
% Similarly, 
% \bas{
%     T_{2} &= \sum_{r \in [p], s \in [q], r \neq s}\E\bbb{h_{r,p}^{2}}\E\bbb{h_{s,q}^{2}} \\
%     &= \sum_{r \in [p], s \in [q], r \neq s}e_j^{\top}\vp\lambp^{p-r-1}\widetilde{M}\lambp^{p-r-1}\vp^{\top}e_{j}e_j^{\top}\vp\lambp^{q-s-1}\widetilde{M}\lambp^{q-s-1}\vp^{\top}e_{j}
% }



% Starting with $T_1$, we have
% \bas{
%     T_1 &= e_{j}^{\top}\sum_{r \in [p]}\E\bbb{X_{r}^{p}X_{r}^{p\top}e_je_j^{\top}X_{r}^{q}X_{r}^{q \top}}e_j \\
%     &= \sum_{r \in [p]}
% }

% Note that 
% \bas{
%     Y_q &= \vp\lambp^{q-p}\vp^{\top}\bb{Y_p + \sum_{k=p+1}^{q}X_{k}^{p}}
% }
% Therefore, 
% \bas{
%     \E\bbb{\beta_p^{2}\beta_q^{2}} &= e_{j}^{\top}\E\bbb{Y_pY_p^{\top}e_{j}e_{j}^{\top}Y_{q}Y_q^{\top}}e_{j} \\
%     &= e_{j}^{\top}\E\bbb{Y_pY_p^{\top}e_{j}e_{j}^{\top}\vp\lambp^{q-p}\vp^{\top}\bb{Y_pY_p^{\top} + \sum_{k=p+1}^{q}X_{k}^{p}X_{k}^{p \top}} }\vp\lambp^{q-p}\vp^{\top}e_{j} \\
%     &=  \E\bbb{\bb{e_{j}^{\top}\vp\lambp^{q-p}\vp^{\top}Y_p}^{2}\bb{e_j^{\top}Y_p}^{2}} +  \bb{e_{j}^{\top}\E\bbb{Y_pY_p^\top} e_j}\bb{\sum_{k=p+1}^{q} e_{j}^{\top}\E\bbb{X_{k}^{q}X_{k}^{q \top}}e_j} \\
%     &= 
% }
% \end{proof}

\begin{lemma}\label{lemma:efron_stein_concentration_result_1}
    For $\iid$ samples $\left\{A_i\right\}_{i \in [n]}$, let $\left\{A_{i}'\right\}_{i \in [n]}$ be $\iid$ copies of $\left\{A_i\right\}_{i \in [n]}$. For $j \in [d]$, let $\beta_{k} := e_{j}^{\top}Y_{k}$ and $v_{n} := \frac{1}{n-t}\sum_{k=t+1}^{n}\beta_k^{2} =: f\bb{A_{1}, A_{2}, \cdots, A_{n}}$ for some function $f : \R^{n \times d} \rightarrow R$ and let $V_{+}$ be as defined as
    \bas{
        V_{+} &:= \E\bbb{\sum_{i \in [n]}\bb{v_{n} - v_{n}^{(i)}}^{2}\mathbbm{1}\bb{v_{n} \geq v_{n}^{(i)}}|\left\{A_{i}\right\}_{i \in [n]}}
    }
    where $v_{n}^{(i)} := f\bb{A_{1}, A_{2}, \cdots, A_{i}', \cdots, A_{n}}$. Let $\forall i \in [n], \widetilde{M}_i := \vp^{\top}\E\bbb{\bb{A_i-A_i'}v_{1}v_{1}^{\top}\bb{A_i-A_i'}|A_i}\vp$ and define 
    \bas{
        S_{n} := \frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j
    }
    Then, $V_{+} \leq 4S_{n}v_{n}$.
\end{lemma}
\begin{proof}
    Define the $\sigma$-algebra $\mathcal{F}_{n} := \sigma\bb{\left\{A_i\right\}_{i \in [n]}}$
    % define the function $f$ as,
    % \bas{
    %      f\bb{A_{1}, A_{2}, \cdots, A_{n}} := v_n
    % }
    % Furthermore, let $\left\{A_{i}'\right\}_{i \in [n]}$ be $\iid$ copies of $\left\{A_i\right\}_{i \in [n]}$ and define 
    % \bas{
    %     f_{i}\bb{A_{1}, A_{2}, \cdots A_{i}', \cdots, A_{n}} &= f\bb{A_{1}, A_{2}, \cdots, A_{i}', \cdots A_{n}}
    % }
    For convenience of notation denote, 
    \bas{
        Y_{k,i}
        &= Y_{k} + \mathbbm{1}\bb{i \leq k}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i'-A_i}v_{1}
    }
    Let us first consider the case of $i < t+1$. Define 
    \bas{
        T_i &:= \bb{\sum_{k=t+1}^{n}e_{j}^{\top}Y_kY_k^{\top}e_j} - \bb{\sum_{k=t+1}^{n}e_{j}^{\top}Y_{k,i}Y_{k,i}^{\top}e_j}
    }
    Then, 
    \bas{
        V_{+} = \frac{1}{\bb{n-t}^{2}}\E\bbb{T_{i}^{2}\mathbbm{1}\bb{T_i \geq 0}} &= \E\bbb{\bb{\bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}Y_kY_k^{\top}e_j} - \bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}Y_{k,i}Y_{k,i}^{\top}e_j}}^{2}\mathbbm{1}\bb{T_i \geq 0}\bigg|\mathcal{F}_{n}} \\
        &= \frac{1}{\bb{n-t}^{2}}\E\bbb{\bb{\underbrace{\sum_{k=t+1}^{n}e_{j}^{\top}\bb{Y_k-Y_{k,i}}\bb{Y_k + Y_{k,i}}e_j}_{= T_i}}^{2}\mathbbm{1}\bb{T_i \geq 0}\bigg|\mathcal{F}_{n}}
    }
    We now simplify $T_i$ as, 
    \bas{
        T_i &= \sum_{k=t+1}^{n}e_{j}^{\top}\bb{Y_k-Y_{k,i}}\bb{Y_k + Y_{k,i}}^{\top}e_j \\
        &= \sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}\bb{2Y_k^{\top} + v_{1}^{\top}\bb{A_i'-A_i}\vp\lambp^{k-i-1}\vp^{\top}}e_j \\
        &= 2\sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j} - \sum_{k=t+1}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2} 
    }
    Then, 
    \bas{
        T_{i}\mathbbm{1}\bb{T_{i} \geq 0} \leq 2\left|\sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j}\right|
    }
    % Therefore, 
    % \bas{
    %     \left|T_i\right| &\leq 2\left|\sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j}\right| + \sum_{k=t+1}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2} 
    % }
    Therefore, 
    \bas{
        T_{i}^{2}\bb{T_{i} \geq 0} &\leq 4\bb{\sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j}}^{2}
    }
    Using the Cauchy-Schwarz inequality,
    \bas{
& \bb{\sum_{k=t+1}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j}}^{2} \\
&\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\leq \bb{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2} }\bb{\sum_{k=t+1}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2}}
    }
Recall 
\bas{
    \widetilde{M}_i &:= \vp^{\top}\E\bbb{\bb{A_i-A_i'}v_{1}v_{1}^{\top}\bb{A_i-A_i'}|A_i}\vp
}
Then, 
\bas{
     \E\bbb{T_i^{2}\mathbbm{1}\bb{T_i \geq 0}|\mathcal{F}_{n}} &\leq 4\bb{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}\E\bbb{\sum_{k=t+1}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2}|\mathcal{F}_n} \\
     &= 4\bb{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}\bb{e_{j}^{\top}\vp\bb{\sum_{k=t+1}^{n}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j}
}
Therefore, for $i < t+1$,
\ba{
    \frac{1}{\bb{n-t}^{2}}\sum_{i\in[t]}\E\bbb{T_i^{2}\mathbbm{1}\bb{T_i \geq 0}|\mathcal{F}_{n}} &\leq 4\bb{\frac{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{i=1}^{t}e_{j}^{\top}\vp\bb{\sum_{k=t+1}^{n}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \notag \\
    &= 4\bb{\frac{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{t}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \label{eq:efron_stein_case_1}
}
We next analyze the case for $n\geq i \geq t+1$. Then, we simplify $T_i$ as, 
\bas{
    T_i &= \sum_{k=t+1}^{n}e_{j}^{\top}\bb{Y_k-Y_{k,i}}\bb{Y_k + Y_{k,i}}^{\top}e_j \\
    &= \sum_{k=i}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}\bb{2Y_k^{\top} + v_{1}^{\top}\bb{A_i'-A_i}\vp\lambp^{k-i-1}\vp^{\top}}e_j \\
    &= 2\sum_{k=i}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j} - \sum_{k=i}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2} 
}
Therefore, proceeding similar as before, we have for $i \geq t+1$,
\ba{
    & \frac{1}{\bb{n-t}^{2}}\sum_{i\in[t+1, n]}\E\bbb{T_i^{2}\mathbbm{1}\bb{T_i \geq 0}|\mathcal{F}_{n}} \notag \\
    &\;\;\;\;\;\; \leq 4\bb{\frac{\sum_{k=i}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{i=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{k=i}^{n}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \notag \\
    &\;\;\;\;\;\; \leq 4\bb{\frac{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{i=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{k=i}^{n}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \notag \\
    &\;\;\;\;\;\; = 4\bb{\frac{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=t+1}^{k}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \label{eq:efron_stein_case_2}
}
Therefore, combining \eqref{eq:efron_stein_case_1} and \eqref{eq:efron_stein_case_2}, 
\ba{
    \frac{1}{\bb{n-t}^{2}}\sum_{i\in[ n]}\E\bbb{T_i^{2}\mathbbm{1}\bb{T_i \geq 0}|\mathcal{F}_{n}} &= 4\bb{\frac{\sum_{k=t+1}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}{n-t}}\bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \label{eq:efron_stein_bound_random_variable}
}
\eqref{eq:efron_stein_bound_random_variable} shows that $V_{+} \leq 4v_{n}S_{n}$, completing our proof. 

% Next, let $1-\mu := \frac{1+\eta\lambda_{2}}{1+\eta\lambda_{1}}$. Then, 
% \bas{
%     \frac{1}{\bb{n-t}^{2}}\sum_{i\in[ n]}\E\bbb{T_i^{2}\mathbbm{1}\bb{T_i \geq 0}|\mathcal{F}_{n}} &\leq 4v_{n}\bb{\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j} \\
%     &\leq 4v_{n}\bb{\frac{\mathcal{L}}{n-t}\sum_{k=t+1}^{n}\bb{\sum_{i=1}^{k}\bb{1-\mu}^{2\bb{k-i-1}}}} \\
%     &= 4v_{n}\bb{\frac{\mathcal{L}}{\bb{n-t}\bb{1-\mu}}\sum_{k=t+1}^{n}\bb{\frac{1-\bb{1-\mu}^{2k}}{1-\bb{1-\mu}^{2}}}} \\
%     &\leq 2v_{n}\bb{\frac{\mathcal{L}}{\bb{n-t}\bb{1-\mu}}\sum_{k=t+1}^{n}\bb{\frac{1}{\mu}}} \\
%     &= \frac{2\mathcal{L}}{\mu\bb{1-\mu}}v_{n}
% }
% The result then follows using Lemma~\ref{lemma:exponential_efron_stein}.
\end{proof}

\begin{lemma}\label{lemma:variance_estimator_upper_tail_bound}
    In the setting of Lemma~\ref{lemma:efron_stein_concentration_result_1}, we have for $\delta \in \bb{0,1}$, if 
    \bas{
        r \gtrsim \E\bbb{v_n}\log\bb{\frac{1}{\delta}} + \sqrt{\frac{\mathcal{L}^{2}}{\mu}\log^{3}\bb{\frac{1}{\delta}}}
    }
    Then, $\mathbb{P}\bb{v_{n} > \E\bbb{v_n} + r} \leq \delta$.
\end{lemma}
\begin{proof}
    We have,
    \bas{
        S_{n} &= \frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}}\vp^{\top}e_j \\
        &= \frac{1}{n-t}\sum_{i=1}^{n}\sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}\vp^{\top}e_j
    }
    Consider the random variable $z_{t,i} := \sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}\vp^{\top}e_j$. Let $\mu := \frac{1+\eta\lambda_{2}}{1+\eta\lambda_{1}}$. Then, 
    \bas{
        z_{t,i} &= \sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\widetilde{M}_i\lambp^{k-i-1}\vp^{\top}e_j \\
        &= \sum_{k=\max\left\{t+1, i\right\}}^{n}\E\bbb{\left(e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}\right)^{2}|A_i} \\
        &\leq 4\mathcal{L}\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{1-\mu}^{2\bb{k-i-1}} \\
        &= \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{1-\mu}^{2\bb{k-i}} \\
        &= \begin{cases}
            \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-i+1}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i \geq t+1 \\ \\
            \frac{4\mathcal{L}\bb{1-\mu}^{2\bb{t+1-i}}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-t}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i < t+1
        \end{cases} \\
        &= \begin{cases}
            \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-i+1}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i \geq t+1 \\ \\
            4\mathcal{L}\bb{1-\mu}^{2\bb{t-i}}\frac{1 - \bb{1-\mu}^{2\bb{n-t}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i < t+1
        \end{cases}
    }
    Now, $S_{n} = \sum_{i = 1}^{n}\frac{z_{t,i}}{n-t}$. There, it is a sum of $\iid$ random variables, where each of them are upper bounded by 
    \bas{
        b_{i} := \begin{cases}
            \frac{4\mathcal{L}}{\bb{1-\mu}^{2}\bb{n-t}}\frac{1 - \bb{1-\mu}^{2\bb{n-i+1}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i \geq t+1 \\ \\
            \frac{4\mathcal{L}\bb{1-\mu}^{2\bb{t-i}}}{\bb{n-t}}\frac{1 - \bb{1-\mu}^{2\bb{n-t}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i < t+1
        \end{cases}
    }
    and lower bounded by $0$. By Hoeffding's lemma, 
    \ba{
        \E\bbb{\exp\bb{\frac{\lambda \bb{S_n-\E\bbb{S_n}} }{\theta}}} \leq \exp\bb{\frac{1}{8}\bb{\frac{\lambda}{\theta}}^{2} \sum_{i \in [n]}b_i^{2}} \label{eq:hoeffdings_lemma_bound}
    }
    We have, 
    \ba{
        \sum_{i \in [n]}b_i^{2} &= \sum_{i \geq t+1}b_i^{2} + \sum_{i < t+1}b_{i}^{2} \notag \\
        &= \frac{16\mathcal{L}^{2}}{\bb{n-t}^{2}\mu^{2}}\bb{\frac{1}{\bb{1-\mu}^{4}} \sum_{i \geq t+1}\bb{1 - \bb{1-\mu}^{2\bb{n-i+1}}}^{2}  + \bb{1 - \bb{1-\mu}^{2\bb{n-t}}}^{2}\sum_{i < t+1}\bb{1-\mu}^{4\bb{t-i}} } \notag \\
        &\leq \frac{16\mathcal{L}^{2}}{\bb{n-t}^{2}\mu^{2}}\bb{\frac{n-t}{\bb{1-\mu}^{4}}   + \bb{1 - \bb{1-\mu}^{2\bb{n-t}}}^{2}\frac{1 - \bb{1-\mu}^{4t}}{1 - \bb{1-\mu}^{4}} } \notag \\
        &\leq \frac{16\mathcal{L}^{2}}{\bb{n-t}^{2}\mu^{2}}\bb{\frac{n-t}{\bb{1-\mu}^{4}}   + \frac{1}{4\mu} } \notag \\
        &\leq \frac{C\mathcal{L}^{2}}{\mu}, \text{ using } \mu\bb{n-t} = 1 \label{eq:hoeffding_variance_bound}
    }
    and 
    \ba{
        E\bbb{S_{n}} &:= \frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\E\bbb{\widetilde{M}_i}\lambp^{k-i-1}}\vp^{\top}e_j \notag \\
        &=\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\E\bbb{\vp^{\top}\E\bbb{\bb{A_i-A_i'}v_{1}v_{1}^{\top}\bb{A_i-A_i'}|A_i}\vp}\lambp^{k-i-1}}\vp^{\top}e_j \notag \\
        &=\frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\vp^{\top}\E\bbb{\bb{A_i-A_i'}v_{1}v_{1}^{\top}\bb{A_i-A_i'}}\vp\lambp^{k-i-1}}\vp^{\top}e_j \notag \\
        &= \frac{2}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}\vp\bb{\sum_{i=1}^{k}\lambp^{k-i-1}\vp^{\top}\E\bbb{\bb{A_i-\Sigma}v_{1}v_{1}^{\top}\bb{A_i-\Sigma}}\vp\lambp^{k-i-1}}\vp^{\top}e_j \notag \\
        &= \frac{2}{n-t}\sum_{k=t+1}^{n}\E\bbb{\bb{e_{j}^{\top}Y_{k}}^{2}} \notag \\
        &= 2\E\bbb{v_n} \label{eq:E_Sn}
    }
    Substituting \eqref{eq:hoeffding_variance_bound} and \eqref{eq:E_Sn} in \eqref{eq:hoeffdings_lemma_bound}, 
    \bas{
        \E\bbb{\frac{\lambda S_{n}}{\theta}} \leq \exp\bb{\frac{2\lambda}{\theta}\E\bbb{v_n} + \frac{C}{8}\bb{\frac{\lambda}{\theta}}^{2}\frac{\mathcal{L}^{2}}{\mu}}
    }
    
    Using Lemma~\ref{lemma:efron_stein_concentration_result_1}, $V_{+} \leq 4S_{n}v_{n}$. Therefore, from Theorem 8 from \cite{boucheron2003concentration}, we have that for all $\theta > 0$ and $\lambda \in \bb{0, \frac{1}{\theta}}$, 
    \bas{
       \mathbb{P}\bb{v_{n} > \E\bbb{v_n} + r} \leq \E\bbb{\exp\bb{\frac{\lambda S_n}{\theta}}}^{\frac{\lambda\theta}{1-\lambda\theta}}\exp\bb{-\lambda x}
    }
    where $x := \sqrt{\E\bbb{v_n} + r} - \sqrt{\E\bbb{v_{n}}}$. Therefore, setting $\lambda := \frac{1}{2\theta}$, 
    \bas{
    \mathbb{P}\bb{v_{n} > \E\bbb{v_n} + r} &\leq \exp\bb{\frac{\lambda\theta}{1-\lambda\theta}\bb{\frac{2\lambda}{\theta}\E\bbb{v_n} + \frac{C}{8}\bb{\frac{\lambda}{\theta}}^{2}\frac{\mathcal{L}^{2}}{\mu}}}\exp\bb{-\lambda x} \\
    &= \exp\bb{\frac{C\mathcal{L}^{2}\lambda^{4}}{2\mu} + 2\lambda^{2}\E\bbb{v_n} - \lambda x} \\
    &\leq \exp\bb{-\min\left\{\frac{3x^{2}}{64\E\bbb{v_n}}, \; \frac{3}{8}\bb{\frac{2\mu x^{4}}{C\mathcal{L}^{2}}}^{\frac{1}{3}} \right\}}, \text{ using Lemma~\ref{lemma:quartic_minimization}}
    }
    Setting the RHS smaller than $\delta$, we have that $\mathbb{P}\bb{v_{n} > \E\bbb{v_n} + r} \leq \delta$ for  
    \bas{
        x \gtrsim \bb{\E\bbb{v_n}\log\bb{\frac{1}{\delta}}}^{\frac{1}{2}} + \bb{\frac{\mathcal{L}^{2}}{\mu}\log^{3}\bb{\frac{1}{\delta}}}^{\frac{1}{4}}
    } 
    Substituting the value of $x$, we have
    \bas{
        \sqrt{\E\bbb{v_n} + r} - \sqrt{\E\bbb{v_{n}}} \gtrsim \bb{\E\bbb{v_n}\log\bb{\frac{1}{\delta}}}^{\frac{1}{2}} + \bb{\frac{\mathcal{L}^{2}}{\mu}\log^{3}\bb{\frac{1}{\delta}}}^{\frac{1}{4}}
    }
    Squaring both sides, it suffices to ensure 
    \bas{
        r \gtrsim \E\bbb{v_n}\log\bb{\frac{1}{\delta}} + \sqrt{\frac{\mathcal{L}^{2}}{\mu}\log^{3}\bb{\frac{1}{\delta}}}
    }
\end{proof}

\begin{lemma}\label{lemma:quartic_minimization}
    For $\alpha, \beta, x > 0$, let $f\bb{\lambda} := \alpha\lambda^{4} + \beta\lambda^{2} - x\lambda$. Then, 
    \bas{
        \min_{\lambda \geq 0}f\bb{\lambda} \leq 
\begin{cases}
    -\frac{3x^{2}}{32\beta}, \;\; 0 \leq x \leq 8\sqrt{\frac{\beta^{3}}{\alpha}} \\
    -\frac{3x^{\frac{4}{3}}}{8\alpha^{\frac{1}{3}}}, \;\; x > 8\sqrt{\frac{\beta^{3}}{\alpha}}
\end{cases}
    }
    or equivalently, $\min_{\lambda \geq 0}f\bb{\lambda} \leq -\min\left\{\frac{3x^{2}}{32\beta}, \frac{3x^{\frac{4}{3}}}{8\alpha^{\frac{1}{3}}}\right\}$.
\end{lemma}
\begin{proof}
    Define $f_{1}\bb{\lambda} := 2\beta\lambda^{2} - x\lambda$ and $f_{2}\bb{\lambda} := 2\alpha\lambda^{4} - x\lambda$. Then, 
    \bas{
        f\bb{\lambda} \leq \begin{cases}
            f_1\bb{\lambda}, \; 0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}, \\
            f_2\bb{\lambda}, \; \lambda > \sqrt{\frac{\beta}{\alpha}}
        \end{cases}
    }
    Therefore, 
    \bas{
        \min_{\lambda \geq 0}f\bb{\lambda} \leq \min\left\{\min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda}, \;\; \min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda} \right\}
    }
    Let us first consider $\min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda}$. Define $\lambda_{1}^{*} := \frac{x}{8\beta}$. Then, $f_{1}'\bb{\lambda_{1}^{*}} < 0$. If $\lambda_{1}^{*} \leq \sqrt{\frac{\beta}{\alpha}}$, $\min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda} \leq f_{1}\bb{\lambda_{1}^{*}}$. Otherwise, $\min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda} = f_{1}\bb{\sqrt{\frac{\beta}{\alpha}}}$. Equivalently, 
    \ba{
        \min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda} \leq \begin{cases}
            -\frac{3x^{2}}{32\beta}, \;\; x \leq 8\sqrt{\frac{\beta^{3}}{\alpha}}, \\
            2\frac{\beta^{2}}{\alpha} - x\sqrt{\frac{\beta}{\alpha}}, \; \text{ otherwise }
        \end{cases}\label{eq:f_1_cases}
    }
    Next, consider $\min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda}$. Define $\lambda_{2}^{*} := \bb{\frac{x}{8\alpha}}^{\frac{1}{3}}$. Then, $f_{2}'\bb{\lambda_{2}^{*}} = 0$. If $\lambda_{2}^{*} \geq \sqrt{\frac{\beta}{\alpha}}$, $\min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda} = f_{2}\bb{\lambda_{2}^{*}}$. Otherwise, $\min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda} = f_{2}\bb{\sqrt{\frac{\beta}{\alpha}}}$. Equivalently, 
    \ba{
        \min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda} \leq \begin{cases}
            -\frac{3x^{\frac{4}{3}}}{8\alpha^{\frac{1}{3}}}, \;\; x \geq 8\sqrt{\frac{\beta^{3}}{\alpha}}, \\
            2\frac{\beta^{2}}{\alpha} - x\sqrt{\frac{\beta}{\alpha}}, \; \text{ otherwise }
        \end{cases}\label{eq:f_2_cases}
    }
    
    Combining \eqref{eq:f_1_cases} and \eqref{eq:f_2_cases}, we note that 
    \bas{
        \min\left\{\min_{0 \leq \lambda \leq \sqrt{\frac{\beta}{\alpha}}}f_{1}\bb{\lambda}, \;\; \min_{\lambda > \sqrt{\frac{\beta}{\alpha}}}f_{2}\bb{\lambda} \right\} \leq \begin{cases}
    -\frac{3x^{2}}{32\beta}, \;\; 0 \leq x \leq 8\sqrt{\frac{\beta^{3}}{\alpha}} \\
    -\frac{3x^{\frac{4}{3}}}{8\alpha^{\frac{1}{3}}}, \;\; x > 8\sqrt{\frac{\beta^{3}}{\alpha}}
\end{cases}
    }
    which completes our proof.
\end{proof}

\begin{lemma}\label{lemma:efron_stein_concentration_2}
    In the setting of Lemma~\ref{lemma:efron_stein_concentration_result_1}, we have for $\delta \in \bb{0,1}$, if 
    \bas{
        r \gtrsim \E\bbb{v_n}\log\bb{\frac{1}{\delta}} + \sqrt{\frac{\mathcal{L}^{2}}{\mu}\log^{3}\bb{\frac{1}{\delta}}}
    }
    Then, $\mathbb{P}\bb{v_{n} > \E\bbb{v_n} + r} \leq \delta$.
\end{lemma}
\begin{proof}
    Using Lemma~\ref{lemma:efron_stein_concentration_result_1}, we have
    \bas{
        V_{+} \leq 4v_{n}S_{n}
    }
    Define the $\sigma$-algebra $\mathcal{F}_{n} := \sigma\bb{\left\{A_i\right\}_{i \in [n]}}$. For convenience of notation denote, 
    \bas{
        Y_{k,i}
        &= Y_{k} + \mathbbm{1}\bb{i \leq k}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i'-A_i}v_{1}
    }
    Define 
    \bas{
        v_{n} - v_{n}^{(i)} := T_i := \bb{\sum_{k=\max\left\{t+1, i\right\} }^{n}e_{j}^{\top}Y_kY_k^{\top}e_j} - \bb{\sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}Y_{k,i}Y_{k,i}^{\top}e_j}
    }
    Then, 
    \bas{
    T_i &= \sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\bb{Y_k-Y_{k,i}}\bb{Y_k + Y_{k,i}}^{\top}e_j \\
        &= \sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}\bb{2Y_k^{\top} + v_{1}^{\top}\bb{A_i'-A_i}\vp\lambp^{k-i-1}\vp^{\top}}e_j \\
        &= 2\sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j} - \sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2}
    }
    Therefore, 
    \bas{
        |T_i| &\leq 2\left|\sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}Y_{k}^{\top}e_{j}\right| + \sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2} \\
        &\leq 2\sqrt{\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}}\sqrt{\sum_{k=t+1}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2}} + \underbrace{\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}}^{2}}_{:= b_{t,i}} 
    }
    Note that,  
    \bas{
        b_{t,i} &= \sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}v_{1}^{\top}\bb{A_i-A_i'}\vp\lambp^{k-i-1}\vp^{\top}e_j \\
        &= \sum_{k=\max\left\{t+1, i\right\}}^{n}\left(e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_i-A_i'}v_{1}\right)^{2}
        }
        \bas{
        &\leq 4\mathcal{L}\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{1-\mu}^{2\bb{k-i-1}} \\
        &= \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{1-\mu}^{2\bb{k-i}} \\
        &= \begin{cases}
            \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-i+1}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i \geq t+1 \\ \\
            \frac{4\mathcal{L}\bb{1-\mu}^{2\bb{t+1-i}}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-t}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i < t+1
        \end{cases} \\
        &= \begin{cases}
            \frac{4\mathcal{L}}{\bb{1-\mu}^{2}}\frac{1 - \bb{1-\mu}^{2\bb{n-i+1}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i \geq t+1 \\ \\
            4\mathcal{L}\bb{1-\mu}^{2\bb{t-i}}\frac{1 - \bb{1-\mu}^{2\bb{n-t}}}{1 - \bb{1-\mu}^{2}}, \; \text{ for } i < t+1
        \end{cases}
    }
    Therefore, $\exists C > 0$ such that for all $i$, $|b_{t,i}| \leq \frac{C\mathcal{L}}{\mu}$. Forllowing a similar bound for $\sum_{k=\max\left\{t+1, i\right\}}^{n}\bb{Y_{k}^{\top}e_{j}}^{2}$, we have $\forall i \in [n]$, $|T_{i}| \lesssim \frac{\mathcal{L}}{\mu}$. Define a new set of random variables, $D_{i} := \frac{A_{i}\mu}{\mathcal{L}}$ so that 
\end{proof}


\begin{lemma}[Concentration of the mean]
    For $j \in [d]$, let $\beta_{k} := e_{j}^{\top}Y_k$ and $w_{n} := \frac{1}{n-t}\sum_{k=t+1}^{n}\beta_{k}$. Then, for $\delta \in \bb{0,1}$, with probability at least $1-\delta$, 
    \bas{
        \mathbb{P}\bb{|w_{n}| \geq t} \leq 
    }
\end{lemma}
\begin{proof}
    We have, 
    \bas{
        w_{n} &= \frac{1}{n-t}\sum_{k=t+1}^{n}\beta_{k} \\
        &= \frac{1}{n-t}\sum_{k=t+1}^{n}e_{j}^{\top}Y_{k} \\
        &= \frac{1}{n-t}\sum_{k=t+1}^{n}\sum_{i=1}^{k}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}\bb{A_{i}-\Sigma}v_{1} \\
        &= \sum_{i=1}^{n}\bb{\underbrace{\frac{1}{n-t}\sum_{k=\max\left\{t+1, i\right\}}^{n}e_{j}^{\top}\vp\lambp^{k-i-1}\vp^{\top}}_{:= Z_{t,i}^{\top}}}\bb{A_{i}-\Sigma}v_{1} \\
        &= \sum_{i=1}^{n}\underbrace{Z_{t,i}^{\top}\bb{A_{i}-\Sigma}v_{1}}_{:= w_{t,i}}
    }
    Each of the $w_{t,i}$ are independent mean-zero random variables. 
\end{proof}



