\subsubsection{$\Ethree{n}$ tail bound}
\label{appendix:Ethree_bound}

% \rd This is built for union bound. But I think we can also do a variance bound. \bk
% \rd $c_k$ and $c_n$ are different.\bk
\begin{lemma}\label{lemma:en3_norm}
Let $\Ethree{n}$ be as defined in Lemma~\ref{lemma:oja_error_decomposition} with $u_0 = g/\norm{g}_{2}$. Let $\eta_n$ be set according to Lemma~\ref{lemma:learning_rate_choice}. Fix $\delta \in \bb{0,1}$. Then for any $\epsilon > 0$
we have with probability at least $1-\delta$,
% \bas{
% &\Prob\bb{|\Ethree{n}|\geq \epsilon\sqrt{s_n}\norm{\vp^{\top}e_{k}}_{2}\bigg|\mathcal{G}} \\
% &\quad\quad \leq 
%   \frac{1}{\Prob(\mathcal{G})}\bb{\enthreeerror}
% }
\bas{
    &\norm{\Ethree{n}}_{2} \lesssim \\ & \quad\quad \sqrt{s_n}b_k\bb{\bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}}}
}
where $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n \Mtwo^{2}}{\bb{\eigengap}}$ for a universal constant $C > 0$.
\end{lemma}
\begin{proof}
Let $c_n=(1+\eta_{n}\lambda_1)^n |u_0^Tv_1|$. We first note that 
\ba{
    \norm{\Ethree{n}}_2 &= \norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_2 = \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}\bb{1 - \frac{\norm{B_{n}u_0}_{2}}{c_{n}}}}_2 \notag \\
    &\leq \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} \label{eq:en3_error_decomposition_1}
}
Observe that 
\bas{
    \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}^{2} = 1 - \frac{\bb{v_{1}^{\top}B_{n}u_0}^{2}}{\norm{B_{n}u_0}_{2}^{2}} = \sin^{2}\bb{\voja, v_{1}}
} 
Define the event $\mathcal{G}_1 := \left\{\sin^{2}\bb{\voja, v_{1}} \leq s_n\right\}$. Therefore, using Corollary 1 from \cite{lunde2021bootstrapping}, 
\ba{\Prob\bb{\mathcal{G}_1} = \Prob\bb{\norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}^{2} \leq s_n} = \Prob\bb{\sin^{2}\bb{\voja, v_{1}} \leq s_n} \geq 1 - \delta  \label{eq:sinsquared_scaling}
}
Next, define the event $\mathcal{G}_{2} := \left\{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta_{n}\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta_{n}\lambda_1)^{2n}}\right\}$ over the randomness of $g$.
Using the trace trick (Proposition B.6 in~\cite{lunde2021bootstrapping}), % Cite trace lemma \bk
we have 
\ba{
   \Prob\bb{\mathcal{G}_2} = \Prob\bb{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta_{n}\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta_{n}\lambda_1)^{2n}}} \geq 1 - \delta \label{eq:good_event_prob}
}
For $\epsilon > 0$ and $\mathcal{G} := \left\{\mathcal{G}_1 \cap \mathcal{G}_2\right\}$, we therefore have using \eqref{eq:en3_error_decomposition_1}, 
\ba{
 \Prob\bb{\norm{\Ethree{n}}_{2} \geq \epsilon\sqrt{s_n}} &\leq \Prob\bb{\mathcal{G}^{\complement}} +  \Prob\bb{\norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} \geq \epsilon\sqrt{s_n}\bigg|\mathcal{G}} \notag \\
 &\leq \Prob\bb{\mathcal{G}_1^{\complement}} + \Prob\bb{\mathcal{G}_2^{\complement}} +  \Prob\bb{\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} \geq \epsilon\bigg|\mathcal{G}} \label{eq:en3_union_bound}
}
We now adapt the proof of Lemma B.2 in~\cite{lunde2021bootstrapping}. Let $a_1=\Abs{v_1^{\top}u_0}$. We have:
\begin{align}
 \left|\frac{\|B_nu_0\|}{c_n}-1\right|  & \leq  \left|\frac{\|B_n v_1 a_1\| - \|a_1 (I+\eta_{n}\Sigma)^n v_1\|}{c_n} \right| +   \frac{\|B_n V_\perp V_\perp^T u_0 \|}{c_n}  \label{eq:en3_error_decomposition_2} 
\end{align}
We will start by bounding the second term. Recall that $c_n=|v_{1}^{\top}u_0|(1+\eta_{n}\lambda_1)^n $.
% the $\norm{B_n u_0}_2$ with $\norm{B_n V_\perp V_\perp^T u_0}_2 + \norm{B_n v_1 v_2^T u_0}_2 $
% Since $u_0 = g/\norm{g}$, where $g \sim N(0,I_d)$, we have that:
% \begin{align*}
% \frac{\|B_nu_0\|_2}{|a_1|(1+\eta_n\lambda_1/n)^n} = \frac{\|B_n g\|}{(|v_1^Tg| \ (1+\eta_n\lambda_1/n)^n} 
% \end{align*}
% We will first bound the contribution of $V_\perp$.
Markov's inequality conditioned on $\mathcal{G}$,  together with Lemma 5.2 of \cite{jain2016streaming} with $\mathcal{V}_n \leq \Mtwo^{2}$ yields that:
\begin{align}
 \Prob\left(\frac{\|B_n V_\perp V_\perp^Tg\|}{|v_1^{\top}g|(1+\eta_{n}\lambda_1)^{n}} \geq \frac{\epsilon}{2} \bigg| \mathcal{G} \right) &\leq \Prob\bb{\frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta_{n}\lambda_1)^{2n}} \geq \frac{\epsilon}{2} \bigg|\mathcal{G}} \notag \\
 & \leq \frac{1}{\Prob(\mathcal{G})}\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}} }{4\delta^3 \log^{-1}(1/\delta) \ \epsilon^2  } \label{eq:en3_bound_second_summand}
\end{align}
Now we bound the first summand.
% \begin{align*}
% & \ P\left(\left| \frac{\|B_nv_1\|_2}{(1+\eta_n\lambda_1/n)^n} -1 \right| > \epsilon_n \right) \\ 
% % \leq & \ P\left(\frac{\|B_nu_0\|}{|a_1|(1+\eta_n\lambda_1/n)^n} > \epsilon_n \right) \\ 
% \leq &  \  P\left( \frac{\|(B_n - T_0)v_1\|_2}{ (1+\eta_n\lambda_1/n)^n}  > \epsilon_n/2 \right) 
%  \end{align*}
 %\rd CITE LEMMA \bk
%Now, by Corollary 5.4 (5.12) in \cite{huang2020matrix} with  $m_i = I + \frac{\eta_n \lambda_1}{n}$, $\nu = \eta_n^2 M_d/n$, 
Using Eq 5.6 of~\cite{huang2022matrix}, we have by Markov's inequality, 
\begin{align}
\Prob\left( \frac{\normop{B_n - \E\bbb {B_n}}}{ (1+\eta_n\lambda_1/n)^{n}}  > \frac{\epsilon}{2} \bigg|\mathcal{G}\right) \leq   \ \frac{\E\bbb{\normop{B_n - \E\bbb{B_n}}^q}}{\Prob(\mathcal{G})(1+\eta_{n}\lambda_1)^{n}\epsilon^q} \leq \frac{(C\eta_n^2 n\Mtwo^{2})^{q/2}(1+\log d)^q}{\Prob(\mathcal{G})\epsilon^q} \label{eq:en3_bound_first_summand}
\end{align}
We set $q = 2$ in \eqref{eq:en3_bound_first_summand}. Now, let 
\ba{
    \epsilon := \bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}} \label{eq:choice_of_epsilon}
}
Then, substituting $\epsilon$ in \eqref{eq:en3_bound_second_summand} and \eqref{eq:en3_bound_first_summand}, and combining with \eqref{eq:en3_error_decomposition_2}, we have
\ba{
    \Prob\bb{\left|\frac{\|B_nu_0\|}{c_n}-1\right| \geq \epsilon|\mathcal{G}} \leq \frac{\delta}{\mathbb{P}\bb{G}} \label{eq:en3_decomposition_term_1_bound}
}
Therefore, using \eqref{eq:en3_union_bound}, and combining \eqref{eq:en3_decomposition_term_1_bound} with \eqref{eq:sinsquared_scaling}, \eqref{eq:good_event_prob}, the result follows.
\end{proof}

\begin{lemma}\label{lemma:en3_tail_bound} Let $\Ethree{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition} for $u_{0} = g/\norm{g}_{2}$ with $g \sim \mathcal{N}(0, \id_d)$. Let $\eta_n$ be set according to Lemma~\ref{lemma:learning_rate_choice}. Let $\left\{\Ethree{n}^{(i)}\right\}_{i \in [m]}$ and $\left\{g^{(i)}\right\}_{i \in [m]}$ be $m$ $\iid$ instances of $\Ethree{n}$ and $g$ respectively. Then for any $\delta \in \bb{0,1}$, with probability at least $1-\delta$,
\bas{
    & \frac{\sum_{i \in [m]}\bb{e_{k}^{\top}\Ethree{n}^{(i)}}^{2}}{m} \\
    & \lesssim s_nb_{k}^{2}\bb{m^{4}\bb{\frac{  d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta) }} + m\frac{\eta_n^{2}n\Mtwo^{2}\log^{2}\bb{d}}{\delta}}
}
for all $k \in [d]$, where $b_k := \norm{\vp^{\top}e_{k}}_{2}$ and $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n \Mtwo^{2}}{\bb{\eigengap}}$ for a universal constant $C > 0$.
\end{lemma}
\begin{proof}
Using Lemma~\ref{lemma:en3_norm}, for any fixed $i \in [m]$, with probability at least $1-\delta$,
\ba{
    &\norm{\Ethree{n}^{(i)}}_{2} \lesssim \\ & \quad\quad \sqrt{s_n}b_k\bb{\bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}}} \label{eq:en3_norm_i_bound}
}
Furthermore, note that 
\ba{
\Abs{e_{k}^{\top}\Ethree{n}^{(i)}}_{2} &= \Abs{e_{k}^{\top}\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&= \Abs{e_{k}^{\top}\vp\vp^{\top}\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&\leq \norm{e_{k}^{\top}\vp\vp^{\top}}_{2}\norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&= b_k\norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} = b_{k}\norm{\Ethree{n}^(i)}_{2} \label{eq:en3_ek_to_norm_conversion}
}
The result then follows by a union bound over all $i \in [m]$ for the event in \eqref{eq:en3_norm_i_bound} and using \eqref{eq:en3_ek_to_norm_conversion}.

% Define the events, $\left\{\mathcal{G}_i\right\}_{i \in [m]}$ as
% \bas{
%   \mathcal{G}_{i} := \left\{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta\lambda_1)^{2n}}\right\} 
% }
% Using Lemma~\ref{lemma:en3}, we have with probability at least $1-\frac{\delta}{\Prob\bb{\mathcal{G}_i}}$, for any $i \in [m]$, conditioned on $\mathcal{G}_i$, 
% \bas{
% \frac{\Abs{e_{k}^{\top}\Ethree{n}^{(i)}}}{\sqrt{s_n}b_k} \leq  \bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \bb{\frac{\Mtwo^{6}}{\delta n^3(\lambda_1-\lambda_2)^6}}^{\frac{1}{6}} 
% }
% Using the trace trick (prop B.6 in~\cite{lunde2021bootstrapping}), % Cite trace lemma \bk
% we have with probability at least $1 - C \delta$, 
% \begin{align*}
%   & \ \frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta\lambda_1)^{2n}} 
% \end{align*}
% Therefore, for all $\left\{\mathcal{G}_i\right\}_{i \in [m]}$, $\mathbb{P}\bb{\mathcal{G}_i} \geq 1-\delta$. Therefore, using a union bound, for any fixed $i \in [m]$, with probability at least $1-\delta$,  
% \bas{
% \frac{\bb{e_{k}^{\top}\Ethree{n}^{(i)}}^{2}}{s_{n}b_k^{2}} \leq  2\bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}} + 2\bb{\frac{\Mtwo^{6}}{\delta n^3(\lambda_1-\lambda_2)^6}}^{\frac{1}{3}} 
% }
% The conclusion then follows by another union bound over all $i \in [m]$.
\end{proof}


