\subsubsection{$\Ethree{n}$ tail bound}
\label{appendix:Ethree_bound}

% \rd This is built for union bound. But I think we can also do a variance bound. \bk
% \rd $c_k$ and $c_n$ are different.\bk
\begin{lemma}\label{lemma:en3_norm}
Let $\Ethree{n}$ be as defined in Lemma~\ref{lemma:oja_error_decomposition} with $u_0 = g/\norm{g}_{2}$. Let $\eta_n$ be set according to Lemma~\ref{lemma:learning_rate_choice}. Fix $\delta \in \bb{0,1}$. Then for any $\epsilon > 0$
we have with probability at least $1-\delta$,
% \bas{
% &\Prob\bb{|\Ethree{n}|\geq \epsilon\sqrt{s_n}\norm{\vp^{\top}e_{k}}_{2}\bigg|\mathcal{G}} \\
% &\quad\quad \leq 
%   \frac{1}{\Prob(\mathcal{G})}\bb{\enthreeerror}
% }
\bas{
    \norm{\Ethree{n}}_{2} \lesssim \sqrt{s_n} \bb{ \frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{\eta_{n}\Mtwo^2}{\bb{\lambda_{1}-\lambda_{2}}  } }{\delta^3 (1-\delta) \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \sqrt{s_n} \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}}.
}
where $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n \Mtwo^{2}}{\bb{\eigengap}}$ for a universal constant $C > 0$.
\end{lemma}
\begin{proof}
Let $c_n=(1+\eta_{n}\lambda_1)^n |u_0^Tv_1|$. We first note that 
\ba{
    \norm{\Ethree{n}}_2 &= \norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_2 = \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}\bb{1 - \frac{\norm{B_{n}u_0}_{2}}{c_{n}}}}_2 \notag \\
    &\leq \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1}. \label{eq:en3_error_decomposition_1}
}
We bound each of the two multiplicands separately. The first term corresponds to the $\sin$ error between $\voja$ and $v_1$:
\bas{
    \norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}^{2} = 1 - \frac{\bb{v_{1}^{\top}B_{n}u_0}^{2}}{\norm{B_{n}u_0}_{2}^{2}} = \sin^{2}\bb{\voja, v_{1}}.
} 
By Corollary 1 of \cite{lunde2021bootstrapping}, 
\ba{
\Prob\bb{\norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}^{2} > s_n} = \Prob\bb{\sin^{2}\bb{\voja, v_{1}} > s_n} \leq \delta. \label{eq:sinsquared_scaling}
}
 % Next, define the event $\mathcal{G}_{2} := \left\{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta_{n}\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^2  (1+\eta_{n}\lambda_1)^{2n}}\right\}$ over the randomness of $g$.
% Using the trace trick (Proposition B.6 in~\cite{lunde2021bootstrapping}), % Cite trace lemma \bk
% we have 
% \ba{
%    \Prob\bb{\mathcal{G}} = \Prob\bb{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta_{n}\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta_{n}\lambda_1)^{2n}}} \geq 1 - \delta \label{eq:good_event_prob}
% }

It follows that for any $\eps > 0$,
%and $\mathcal{G} := \left\{\mathcal{G}_1 \cap \mathcal{G}\right\}$, we therefore have using \eqref{eq:en3_error_decomposition_1}, 
\ba{
 \Prob\bb{\norm{\Ethree{n}}_{2} > \epsilon\sqrt{s_n}} & \leq \Prob\bb{\norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}^{2} > s_n} +\Prob\bb{\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} > \epsilon} \\
 &\leq \delta + \Prob\bb{\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} > \epsilon}. %\\
% \Prob\bb{\mathcal{G}^{\complement}} +  \Prob\bb{\norm{\frac{\vp\vp^{\top}B_{n}u_{0}}{\norm{B_{n}u_0}_{2}}}_{2}\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} \geq \epsilon\sqrt{s_n}\bigg|\mathcal{G}} \notag \\
 %&\leq \Prob\bb{\mathcal{G}_1^{\complement}} + \Prob\bb{\mathcal{G}^{\complement}} +  \Prob\bb{\Abs{\frac{\norm{B_{n}u_0}_{2}}{c_{n}} - 1} \geq \epsilon\bigg|\mathcal{G}} 
 \label{eq:en3_union_bound}
}
To bound the second term, we adapt the proof of Lemma B.2 in~\cite{lunde2021bootstrapping}. Letting $a_1=\Abs{v_1^{\top}u_0}$,
\begin{align}
 \left|\frac{\|B_nu_0\|}{c_n}-1\right|  & \leq  \left|\frac{\|B_n v_1 a_1\| - \|a_1 (I+\eta_{n}\Sigma)^n v_1\|}{c_n} \right| +   \frac{\|B_n V_\perp V_\perp^T u_0 \|}{c_n} \notag \\
 &= \left|\frac{\|B_n v_1\| - \| (I+\eta_{n}\Sigma)^n v_1\|}{(1+\eta_{n} \lambda_1)^{n}} \right| + \frac{\|B_n V_\perp V_\perp^T u_0 \|}{c_n} \notag \\
 &\le \frac{\normop{B_n - \E [B_n]}}{(1+\eta_n \lambda_1)^n} + \frac{\|B_n V_\perp V_\perp^T u_0 \|}{c_n}. 
 \label{eq:en3_error_decomposition_2} 
\end{align}
For the first summand, using Eq 5.6 of~\cite{huang2022matrix} with $q=2$ and by Markov's inequality, 
\begin{align}
\Prob\left( \frac{\normop{B_n - \E\bbb {B_n}}}{ (1+\eta_n\lambda_1/n)^{n}}  > \frac{\epsilon}{2} \right) \leq   \ \frac{\E\bbb{\normop{B_n - \E\bbb{B_n}}^2}}{(1+\eta_{n}\lambda_1)^{n}\epsilon^2} \leq \frac{C\eta_n^2 n\Mtwo^{2}(1+\log d)^2}{\epsilon^2} \label{eq:en3_bound_first_summand}
\end{align}

For the second summand of equation~\eqref{eq:en3_error_decomposition_2}, 
% the $\norm{B_n u_0}_2$ with $\norm{B_n V_\perp V_\perp^T u_0}_2 + \norm{B_n v_1 v_2^T u_0}_2 $
% Since $u_0 = g/\norm{g}$, where $g \sim N(0,I_d)$, we have that:
% \begin{align*}
% \frac{\|B_nu_0\|_2}{|a_1|(1+\eta_n\lambda_1/n)^n} = \frac{\|B_n g\|}{(|v_1^Tg| \ (1+\eta_n\lambda_1/n)^n} 
% \end{align*}
% We will first bound the contribution of $V_\perp$.
define the event
\bas{
\mathcal{G}=\left\{\frac{\|B_n \vp\vp^T u_0\|^2}{|v_1^T u_0|^2}\leq \frac{C\log (1/\delta)}{\delta^2}\tr{\vp^T B_n^T B_n \vp}\right\}.
}
By Proposition B.6 of~\cite{lunde2021bootstrapping}, $P(\mathcal{G}) \ge 1-\delta$ where $C > 0$ is some universal constant. Since $P(A|B)P(B) = P(A\cap B) \leq P(A)$,
Markov's inequality % conditioned on $\mathcal{G}$,  
together with Lemma 5.2 of \cite{jain2016streaming} with $\mathcal{V} \leq \Mtwo^{2}$ yields

%\rd Jain's lemma applies to $B_nB_n^T$ but since $A_i$ are IID and symmetric they should apply to $B_n^T B_n$?\bk

\begin{align}
 %\Prob\left(\frac{\|B_n V_\perp V_\perp^Tg\|}{|v_1^{\top}g|(1+\eta_{n}\lambda_1)^{n}} \geq \frac{\epsilon}{2} \bigg| \mathcal{G} \right) &\leq 
 &\Prob\bb{\frac{\|B_n \vp\vp^T u_0\|}{c_n}\geq \frac{\eps}{2}|\mathcal{G}}\\
 &\leq 
 \frac{1}{1-\delta}\Prob\bb{\mathrm{trace}(V_\perp B_n^T B_n V_\perp^T)  \geq \frac{\epsilon^2}{4} \cdot \frac{\delta^{2}}{C\log (1/\delta)} } \notag \\
 & \leq \frac{1}{1-\delta}C\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{\eta_{n}\Mtwo^2\exp(n\eta_n^2(2\lambda_1^2+\Mtwo^2))}{2\bb{\lambda_{1}-\lambda_{2}}  }  }{ \epsilon^2 \delta^{2}\log^{-1}\bb{1/\delta}  } \\
 & \leq \frac{1}{1-\delta}C\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{e \eta_{n}\Mtwo^2 }{2\bb{\lambda_{1}-\lambda_{2}}}  }{ \epsilon^2 \delta^{2}\log^{-1}\bb{1/\delta}},
 \label{eq:en3_bound_second_summand}
\end{align}
where the last bound follows from Lemma~\ref{lemma:learning_rate_choice}.
% \begin{align}
%  %\Prob\left(\frac{\|B_n V_\perp V_\perp^Tg\|}{|v_1^{\top}g|(1+\eta_{n}\lambda_1)^{n}} \geq \frac{\epsilon}{2} \bigg| \mathcal{G} \right) &\leq 
%  \Prob\bb{\mathrm{trace}(V_\perp B_n^T B_n V_\perp^T)  \geq \frac{\epsilon^2}{4} } 
%  & \leq 4\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \rd \frac{\eta_{n}\Mtwo^2\exp(n\eta_n^2(2\lambda_1^2+\Mtwo^2))}{2\bb{\lambda_{1}-\lambda_{2}}  }}{ \epsilon^2  } \label{eq:en3_bound_second_summand}
% \end{align}

% \rd Jain does not have a log(d) so I am really confused about the term in red in the numerator. Also I am getting $\eta_n(\bar{\Nu})$ in the denominator of the term in red.\bk 

% Now we bound the first summand. {\rd how is this related to the first summand...}

% Using Eq 5.6 of~\cite{huang2022matrix} with $q=2$ and by Markov's inequality, 
% \begin{align}
% \Prob\left( \frac{\normop{B_n - \E\bbb {B_n}}}{ (1+\eta_n\lambda_1/n)^{n}}  > \frac{\epsilon}{2} \right) \leq   \ \frac{\E\bbb{\normop{B_n - \E\bbb{B_n}}^2}}{\Prob(\mathcal{G})(1+\eta_{n}\lambda_1)^{n}\epsilon^2} \leq \frac{(C\eta_n^2 n\Mtwo^{2})^{1/2}(1+\log d)^2}{\epsilon^2} \label{eq:en3_bound_first_summand}
% \end{align}

Finally, define the error $\eps$ as
\ba{
    \epsilon := \bb{C \frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{\eta_{n}\Mtwo^2}{\bb{\lambda_{1}-\lambda_{2}}  }}{\delta^3 (1-\delta) \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}}. \label{eq:choice_of_epsilon}
}
Substituting $\epsilon$ in equations~\eqref{eq:en3_bound_second_summand} and~\eqref{eq:en3_bound_first_summand}, and combining with equation~\eqref{eq:en3_error_decomposition_2},
\ba{
    \Prob\bb{\left|\frac{\|B_nu_0\|}{c_n}-1\right| > \epsilon} &\leq \Prob\bb{\frac{\|B_n \vp\vp^T u_0\|}{c_n} >  \frac{\eps}{2}} +  \Prob\bb{ \frac{\normop{B_n - \E\bbb {B_n}}}{ (1+\eta_n\lambda_1/n)^{n}}  > \frac{\epsilon}{2}} \\
    &\leq \Prob\bb{\frac{\|B_n \vp\vp^T u_0\|}{c_n} >  \frac{\eps}{2} | \mathcal{G}} + \Prob(\mathcal{G}^{\complement}) +  \Prob\bb{ \frac{\normop{B_n - \E\bbb {B_n}}}{ (1+\eta_n\lambda_1/n)^{n}}  > \frac{\epsilon}{2}} \le 3\delta.
    \label{eq:en3_decomposition_term_1_bound}
}
From equations~\eqref{eq:en3_union_bound} and~\eqref{eq:en3_decomposition_term_1_bound}, we conclude
\bas{
\Prob\bb{\norm{\Ethree{n}}_{2} > \epsilon\sqrt{s_n}} \le 4\delta.
}
\end{proof}

\begin{lemma}\label{lemma:en3_tail_bound} Let $\Ethree{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition} for $u_{0} = g/\norm{g}_{2}$ with $g \sim \mathcal{N}(0, \id_d)$. Let $\eta_n$ be set according to Lemma~\ref{lemma:learning_rate_choice}. Let $\left\{\Ethree{n}^{(i)}\right\}_{i \in [m]}$ and $\left\{g^{(i)}\right\}_{i \in [m]}$ be $m$ $\iid$ instances of $\Ethree{n}$ and $g$ respectively. Then for any $\delta \in \bb{0,1}$, with probability at least $1-\delta$,
\bas{
    & \frac{\sum_{i \in [m]}\bb{e_{k}^{\top}\Ethree{n}^{(i)}}^{2}}{m} \\
    & \lesssim s_nb_{k}^{2}\bb{m^{3}\bb{ \frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{\eta_{n}\Mtwo^2}{\bb{\lambda_{1}-\lambda_{2}}  } }{\delta^3 (1-\delta/m) \log^{-1}(m/\delta)}} + m\frac{\eta_n^{2}n\Mtwo^{2}\log^{2}\bb{d}}{\delta}}.
}
for all $k \in [d]$, where $b_k := \norm{\vp^{\top}e_{k}}_{2}$ and $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n \Mtwo^{2}}{\bb{\eigengap}}$ for a universal constant $C > 0$.
\end{lemma}
\begin{proof}
Using Lemma~\ref{lemma:en3_norm}, for any fixed $i \in [m]$, with probability at least $1-\delta$,
\ba{
    \norm{\Ethree{n}^{(i)}}_{2} \lesssim \sqrt{s_n}\bb{\bb{ \frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} +  \frac{\eta_{n}\Mtwo^2}{\bb{\lambda_{1}-\lambda_{2}}  } }{\delta^3 (1-\delta) \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\delta^{\frac{1}{2}}}}. \label{eq:en3_norm_i_bound}
}
Furthermore, note that 
\ba{
\Abs{e_{k}^{\top}\Ethree{n}^{(i)}}_{2} &= \Abs{e_{k}^{\top}\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&= \Abs{e_{k}^{\top}\vp\vp^{\top}\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&\leq \norm{e_{k}^{\top}\vp\vp^{\top}}_{2}\norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} \notag \\
&= b_k\norm{\vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{c_{n}}}}_{2} = b_{k}\norm{\Ethree{n}^{(i)}}_{2} \label{eq:en3_ek_to_norm_conversion}
}
The result then follows by a union bound over all $i \in [m]$ for the event in \eqref{eq:en3_norm_i_bound} and using \eqref{eq:en3_ek_to_norm_conversion}.

% Define the events, $\left\{\mathcal{G}_i\right\}_{i \in [m]}$ as
% \bas{
%   \mathcal{G}_{i} := \left\{\frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta\lambda_1)^{2n}}\right\} 
% }
% Using Lemma~\ref{lemma:en3}, we have with probability at least $1-\frac{\delta}{\Prob\bb{\mathcal{G}_i}}$, for any $i \in [m]$, conditioned on $\mathcal{G}_i$, 
% \bas{
% \frac{\Abs{e_{k}^{\top}\Ethree{n}^{(i)}}}{\sqrt{s_n}b_k} \leq  \bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}}^{\frac{1}{2}} + \bb{\frac{\Mtwo^{6}}{\delta n^3(\lambda_1-\lambda_2)^6}}^{\frac{1}{6}} 
% }
% Using the trace trick (prop B.6 in~\cite{lunde2021bootstrapping}), % Cite trace lemma \bk
% we have with probability at least $1 - C \delta$, 
% \begin{align*}
%   & \ \frac{\|B_n V_\perp V_\perp^Tg\|^2}{|v_1^Tg|^2(1+\eta\lambda_1)^{2n}} \leq  \  \frac{\log(1/\delta) \mathrm{trace}(V_\perp B_n B_n V_\perp^T) )}{\delta^3  (1+\eta\lambda_1)^{2n}} 
% \end{align*}
% Therefore, for all $\left\{\mathcal{G}_i\right\}_{i \in [m]}$, $\mathbb{P}\bb{\mathcal{G}_i} \geq 1-\delta$. Therefore, using a union bound, for any fixed $i \in [m]$, with probability at least $1-\delta$,  
% \bas{
% \frac{\bb{e_{k}^{\top}\Ethree{n}^{(i)}}^{2}}{s_{n}b_k^{2}} \leq  2\bb{\frac{ d\exp\bb{-2\eta_{n}n\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}^{2}n\bb{\lambda_{1}^{2}+\Mtwo^{2}}} + \frac{e\eta_{n}^{3}n\Mtwo^{4}\bb{1+2\log\bb{d}}}{2\bb{\lambda_{1}-\lambda_{2}} + \eta_{n}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\Mtwo^{2}}}  }{4\delta^4 \log^{-1}(1/\delta)}} + 2\bb{\frac{\Mtwo^{6}}{\delta n^3(\lambda_1-\lambda_2)^6}}^{\frac{1}{3}} 
% }
% The conclusion then follows by another union bound over all $i \in [m]$.
\end{proof}


