\section{Entrywise Error Bounds}\label{appendix:entrywise_error_bounds}

\newcommand{\yij}{Y_{j,i}}

\begin{lemma}\label{lemma:oja_error_hajek_tail_bound}
Let the learning rate, $\eta_n$, be set according to Lemma~\ref{lemma:learning_rate_choice}. Further, for $X_i \sim \mathcal{P}, A_i = X_iX_i^{\top}$, let $\normop{A_i - \Sigma} \leq \Mone$ almost surely. Then, for $\delta \in \bb{0,1}$, with probability at least $1-\delta$, we have for all $k \in [d]$, 
\bas{
    \Abs{e_{k}^{\top} \Eone{n}} &\lesssim  \sqrt{\eta_{n}\bb{e_k^{\top}\vp R_0\vp^{\top}e_{k}}\log\bb{\frac{d}{\delta}}} +  \eta_{n}b_k\bb{\Mone\log\bb{\frac{d}{\delta}} + \Mtwo\sqrt{\frac{\lambda_{1}}{\eigengap}}\sqrt{\log\bb{\frac{d}{\delta}}}}
}
where $\Eone{n}$ is defined in Lemma~\ref{lemma:oja_error_decomposition}, $b_k := \norm{\vp^{\top}e_k}_{2}$,  $\widetilde{M} := \E\bbb{\vp^{\top}\bb{A_j-\Sigma}v_1v_1^{\top}\bb{A_j-\Sigma}^{\top}\vp}$ and $R_0 \in \R^{(d-1) \times (d-1)}$ with entires
\bas{
    R_0(k,l) := \frac{\widetilde{M}_{k\ell}}{2\lambda_1-\lambda_{k+1}-\lambda_{\ell+1}}, \;\; \forall k, l \in [d-1]
}
% Let $S_k = e_{k}^{\top}Y_n = \sum_{j=1}^n e_{k}^{\top}X_j^{n}$, where $X_j^{n}=\vp\Lambda_{\perp}^{n-j}\vp^{\top}(A_j-\Sigma)v_1$,  defined in Lemma~\ref{lemma:hajek_decomposition}. Then, we have, 
%     \bas{
% P\left(\forall i\in[d], |S_i|\leq t_i\right)\geq  1-\delta.
% }
\end{lemma}
\begin{proof}
Using Lemma~\ref{lemma:hajek_decomposition}, we have 
\bas{
e_{k}^{\top}\Eone{n} = \eta_{n}e_{k}^{\top}Y_n = \sum_{j=1}^n \eta_{n}e_{k}^{\top}X_j^{n}, \text{ where  } X_j^{n} := \vp\Lambda_{\perp}^{n-j}\vp^{\top}(A_j-\Sigma)v_1
}
Let $\alpha_{j} := \eta_{n}e_{k}^{\top}X_j^{n}$. Then, note that $\E\bbb{\alpha_j} = 0$. Furthermore, 
\bas{
    \E\bbb{\alpha_j^{2}} &= \eta_{n}^{2}e_{k}^{\top}\vp\lambp^{n-j}\E\bbb{\vp^{\top}\bb{A_j-\Sigma}v_1v_1^{\top}\bb{A_j-\Sigma}^{\top}\vp}\lambp^{n-j}\vp^{\top}e_{k} = \eta_{n}^{2}e_{k}^{\top}\vp\lambp^{n-j}\widetilde{M}\lambp^{n-j}\vp^{\top} e_{k} =: \sigma_{jk}^{2}, \\
    \Abs{\alpha_j} &= \Abs{\eta_{n}e_k^{\top}\vp\Lambda_{\perp}^{n-j}\vp^{\top}(A_j-\Sigma)v_1} \leq \eta_{n}b_k\normop{\lambp^{n-j}}\Mone \leq \eta_{n}b_k\Mone
}
Therefore, using the fact that $\alpha_{j}$ are independent of each other, along with Bernstein's inequality, (see e.g. Proposition 2.14 and the subsequent discussion in \cite{wainwright2019high}), we have with probability at least $1-\delta$, 
\bas{
    \Abs{e_{k}^{\top}\Eone{n}} &\leq \sqrt{\bb{\sum_{j=1}^{n}\sigma_{jk}^{2}}\log\bb{\frac{1}{\delta}}} + \eta_{n}\Mone b_k\log\bb{\frac{1}{\delta}}
}
Furthermore, considering a union bound over $k \in [d]$, we have for all $k \in [d]$, 
\bas{
   \Abs{e_{k}^{\top}\Eone{n}} &\leq \sqrt{\bb{\sum_{j=1}^{n}\sigma_{jk}^{2}}\log\bb{\frac{d}{\delta}}} + \eta_{n}\Mone\log\bb{\frac{d}{\delta}} 
}
Finally, using Lemma~\ref{lemma:second_moment_matrix}, we have
\bas{
\sum_{j=1}^{n}\sigma_{jk}^{2} &= \eta_{n}^{2}e_{k}^{\top}\bb{\sum_{j=1}^{n}\vp\lambp^{n-j}\widetilde{M}\lambp^{n-j}}\vp^{\top} e_{k} \\
&= \eta_{n}^{2}e_{k}^{\top}\E\bbb{Y_{n}{Y_n}^{\top}} e_{k} \\
&= \eta_{n}^{2}e_{k}^{\top}\vp\bb{R^{(n)}} \vp^{\top}e_{k} \\
&= \eta_{n}^{2}e_{k}^{\top}\vp\bb{\frac{R_0}{\eta_n} + \bb{R^{(n)} - \frac{R_0}{\eta_n}}}\vp^{\top} e_{k} \\
&\leq \eta_{n}e_{k}^{\top}\vp R_0 \vp^{\top}e_{k} + \eta_{n}^{2}b_{k}^{2}\norm{R^{(n)} - \frac{R_0}{\eta_n}}_{F} \\
&\leq \eta_{n}e_{k}^{\top}\vp R_0 \vp^{\top}e_{k} + \frac{\eta_n^{2}b_{k}^{2}\lambda_1\Mtwo^{2}}{\bb{\eigengap}}
}
which completes our proof.
%     Define 
%  Let $Y^{(i)}_j=u_i^TX_j$. Let $E[Y_j]=0$. 
% Let
% \bas{
% E[\yij^2]=u_i^T E[X_jX_j^T]u_i=u_i^T\Lambda_{\perp}^{n-j}\underbrace{\E[(A_i-\Sigma)v_1v_1^T(A_i-\Sigma)^T]}_{H}\Lambda_{\perp}^{n-j} u_i=:\sigma_{ij}^2
% }

% We have $|\yij|\leq \|u_i\|\|\Lambda_{\perp}\|^{n-j}M=:b_{ij}$.
% Now we apply the Azuma-Bernstein inequality~\cite{wainwright2019high}. We note that $\{\yij\}$ for $j\in [n]$ form a martingale sequence with $\yij|\mathcal{H}_{i-1}$ subexponential with parameters $\sigma_{ij}^2,b_{ij}$. Define $b_{i*}=\|u_i\|M$.
% So we have, for $t_i=2\left(\sqrt{\sum_i\sigma_{ij}^2\log(1/\delta)}+b_{i*}\log(1/\delta)\right)$,
% \bas{
% P\left(|S_i|\geq t_i\right)\leq  2\exp\left(-\frac{t^2}{2(\sum_j\sigma_{ij}^2+b_{i*}t)}\right)\leq \delta/d.
% }
% \rd We need to write a proof for the above. \bk
% %Now note that, for $u_i=e_i$, $t_*=\max_i t_i$,

% \bas{
% P\left(\forall i\in[d], |S_i|\leq t_i\right)\geq  1-\delta.
% }

\end{proof}

\begin{lemma}\label{lemma:oja_error_decomposition_higher_order_tail_bounds}
    Let the learning rate, $\eta_n$, be set according to Lemma~\ref{lemma:learning_rate_choice}. Then, for $\delta \in \bb{0,1}$, with probability at least $1-\delta$, we have
    \bas{
         \norm{\Etwo{n} + \Ethree{n} + \Efour{n}}_{2} &\lesssim \frac{ \eta_{n}^2n \Mtwo^2 \log d} {\sqrt{\delta}} + \frac{\sqrt{s_n}\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\sqrt{\delta}} \\
         &\quad\quad + \frac{\log\bb{\frac{1}{\delta}}}{\delta^{3}} \bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}^{3}n}\Mtwo^{2}\log\bb{d}}{\sqrt{\lambda_{1}-\lambda_{2}}}} 
    }
    and for all $k \in [d]$,
    \bas{
         \Abs{e_{k}^{\top}\bb{ \Etwo{n} + \Ethree{n} + \Efour{n}}} &\leq b_{k}\norm{\Etwo{n} + \Ethree{n} + \Efour{n}}_{2} \\
         &\lesssim \frac{b_k \eta_{n}^2n \Mtwo^2 \log d} {\sqrt{\delta}} + \frac{b_k\sqrt{s_n}\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\sqrt{\delta}} \\
         &\quad\quad + b_{k}\frac{\log\bb{\frac{1}{\delta}}}{\delta^3}\bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}^3 n}\Mtwo^{2}\log\bb{d}}{\sqrt{\lambda_{1}-\lambda_{2}}}} 
    }
    where $\Etwo{n}, \Ethree{n}, \Efour{n}$ are as defined in Lemma~\ref{lemma:oja_error_decomposition}, $b_k := \norm{\vp^{\top}e_k}_{2}$ and $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n\Mtwo^{2}}{\bb{\eigengap}}$ for a universal constant $C > 0$.
\end{lemma}
\begin{proof}
    We have 
    \ba{
        \norm{\Etwo{n} + \Ethree{n} + \Efour{n}}_{2} \leq  \Abs{e_k^{\top}\Etwo{n}} + \Abs{e_k^{\top}\Ethree{n}} + \Abs{e_k^{\top}\Efour{n}} \label{eq:E2_E3_E4_triangle_ineq}
    }
    Using Lemma~\ref{lemma:en2_norm}, we have for all $k \in [d]$, with probability at least $1-\frac{\delta}{3}$, 
    \ba{
        \norm{\Etwo{n}} \leq \frac{12\eta_{n}^2 \mathcal{M}_2^2 n \log d} {\sqrt{\delta/3}} \leq  \frac{21\eta_{n}^2 \mathcal{M}_2^2 n \log d} {\sqrt{\delta}}. \label{eq:residual_en2_bound}
    }
    Using Lemma~\ref{lemma:en3_norm} , along with the definition of $\eta_n$ in Lemma~\ref{lemma:learning_rate_choice}, with probability at least $1-\frac{\delta}{3}$,
    \ba{
    \norm{\Ethree{n}}_{2} &\lesssim \frac{\sqrt{s_n}\sqrt{\log\bb{\frac{1}{\delta}}}}{\delta^{\frac{3}{2}}}  \bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}}\Mtwo}{\sqrt{\lambda_{1}-\lambda_{2}}}} + \sqrt{s_n}\frac{\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\sqrt{\delta}} \notag \\
    &\lesssim \frac{\sqrt{\log\bb{\frac{1}{\delta}}}}{\delta^{\frac{3}{2}}}  \bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \sqrt{\frac{C \log (1/\delta)}{\delta^3} \frac{\eta_{n} \Mtwo^2}{\eigengap}} \cdot \frac{\sqrt{\eta_{n}}\Mtwo \log d}{\sqrt{\lambda_{1}-\lambda_{2}}}} \notag \\
    &\lesssim \frac{\log\bb{\frac{1}{\delta}}}{\delta^3} 
    \bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}^3 n}\Mtwo^{2}\log\bb{d}}{\sqrt{\lambda_{1}-\lambda_{2}}}},
    \label{eq:residual_en3_bound}
    }
    where the second inequality used $s_n \le 1$. 
    Using Lemma~\ref{lemma:en4_norm}, along with the definition of $\eta_n$ in Lemma~\ref{lemma:learning_rate_choice}, with probability at least $1-\frac{\delta}{3}$,
    \ba{
    \norm{\Efour{n}}_{2} \lesssim \frac{1}{\delta^{\frac{3}{2}}}  \bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}^{3}n}\Mtwo^{2}\log\bb{d}}{\sqrt{\lambda_{1}-\lambda_{2}}}} \label{eq:residual_en4_bound}
    }
    The first result follows by a union bound over \eqref{eq:residual_en2_bound}, \eqref{eq:residual_en3_bound}, \eqref{eq:residual_en4_bound} and substituting in \eqref{eq:E2_E3_E4_triangle_ineq}.
    Finally, note that using Lemma~\ref{lemma:oja_error_decomposition}, $\exists x_{n}, y_{n}, z_{n} \in \R^{d-1}$ such that  of $\Etwo{n} = \vp\vp^{\top}x_{n}$, $\Ethree{n} = \vp\vp^{\top}x_{n}$, $\Efour{n} = \vp\vp^{\top}x_{n}$. Therefore,  
    \bas{
        \Abs{e_{k}^{\top}\bb{\Etwo{n} + \Ethree{n} + \Efour{n}}} &= \Abs{e_{k}^{\top}\vp\vp^{\top}\bb{x_n + y_n + z_n}} \\
        &= \Abs{e_{k}^{\top}\vp\vp^{\top}\vp\vp^{\top}\bb{x_n + y_n + z_n}} \\
        &\leq \norm{e_{k}^{\top}\vp\vp^{\top}}_{2}\norm{\vp\vp^{\top}\bb{x_n + y_n + z_n}}_{2} \\
        &= b_k \norm{\Etwo{n} + \Ethree{n} + \Efour{n}}_{2}
    }
    which completes the proof of the second result.
\end{proof}
Now we are ready to prove a detailed version of Theorem~\ref{thm:main:entrywise_concentration_bound}.
\begin{lemma}\label{lemma:entrywise_concentration_bound} Let the learning rate, $\eta_n$, be set according to Lemma~\ref{lemma:learning_rate_choice}. Further, for $X_i \sim \mathcal{P}, A_i = X_iX_i^{\top}$, let $\normop{A_i - \Sigma} \leq \Mone$ almost surely. Define $\roja := \voja - \bb{v_1^{\top}\voja}v_1$.  Then, with probability at least $1-\delta$, for all $k \in [d]$, 
\bas{
    \Abs{e_k^{\top}\roja} &\lesssim \sqrt{\eta_{n}\bb{e_k^{\top}\vp R_0\vp^{\top}e_{k}}\log\bb{\frac{d}{\delta}}} +  \eta_{n}b_k\bb{\Mone\log\bb{\frac{d}{\delta}} + \Mtwo\sqrt{\frac{\lambda_{1}}{\eigengap}}\sqrt{\log\bb{\frac{d}{\delta}}}} \\
    &\quad\quad + b_{k}\frac{\log\bb{\frac{1}{\delta}}}{\delta^3}\bb{\sqrt{d}\exp\bb{-\eta_{n}n\bb{\lambda_{1}-\lambda_{2}}} + \frac{\sqrt{\eta_{n}^{3}n}\Mtwo^{2}\log\bb{d}}{\sqrt{\lambda_{1}-\lambda_{2}}}} \\
    &\quad\quad + \frac{b_k \eta_{n}^2n \Mtwo^2 \log d} {\sqrt{\delta}} + \frac{b_k\sqrt{s_n}\eta_n\sqrt{n}\Mtwo\log\bb{d}}{\sqrt{\delta}} 
}
where $b_k := \norm{\vp^{\top}e_k}_{2}$, $s_n := \frac{C\log\bb{\frac{1}{\delta}}}{\delta^{3}}\frac{\eta_n\Mtwo^{2}}{\bb{\eigengap}}$,  $\widetilde{M} := \E\bbb{\vp^{\top}\bb{A_j-\Sigma}v_1v_1^{\top}\bb{A_j-\Sigma}^{\top}\vp}$ and $R_0 \in \R^{(d-1) \times (d-1)}$ with entires
    \bas{
    R_0(k,l) = \frac{\widetilde{M}_{k\ell}}{2\lambda_1-\lambda_{k+1}-\lambda_{\ell+1}}, k, l \in [d-1]
    }.
\end{lemma}
\begin{proof}
    Using Lemma~\ref{lemma:oja_error_decomposition}, we have
    \bas{
        e_{k}^{\top}\roja := e_{k}^{\top}\Eone{n} + e_{k}^{\top}\Etwo{n} + e_{k}^{\top}\Ethree{n} + e_{k}^{\top}\Efour{n}
    }
    Therefore, 
    \bas{
        \Abs{e_{k}^{\top}\roja} &\leq \Abs{e_{k}^{\top}\Eone{n}} + \Abs{e_{k}^{\top}\Etwo{n} + e_{k}^{\top}\Ethree{n} + e_{k}^{\top}\Efour{n}}
    }
    The result then following by a union bound over the events defined in Lemma~\ref{lemma:oja_error_hajek_tail_bound} and Lemma~\ref{lemma:oja_error_decomposition_higher_order_tail_bounds}.
\end{proof}

% \begin{lemma}
%     Let $r=\hat{v}_1-(\hat{v}_1^Tv_1)v_1$, 
%     \bas{
% P\left(\forall i\in[d], |r_i|\leq ??\right)\geq  1-\delta.
% }
% \end{lemma}
% \begin{proof}
% Let $\eta=\eta_n/n$
%     First, note that:
%     \bas{
%     r&=\hat{v}_1-(\hat{v}_1^Tv_1)v_1 = \vp\vp^T \frac{B_n u_0}{\|B_n u_0\|}
%     }
%     Define the event $\mathcal{E}=\left\{\frac{\left\|\vp\vp^T B_n \vp\vp^T u_0\right\|}{(1+\eta\lambda_1)^n|u_0^Tv_1|}\leq \right\}$
%     Using Lemma B.2 from~\cite{lunde2021bootstrapping}, we have, for $i\in[d]$, \rd with probability $1-C\delta$ and the choice $\epsilon = 1/2$,\bk:
%     \ba{
%      |r_i|&\leq \frac{\left|e_i^T\vp\vp^T B_n u_0\right|}{(1+\eta\lambda_1)^n(1-\epsilon)|u_0^Tv_1|}\leq 2\frac{|u_0^Tv_1|\left|e_i^T\vp\vp^T B_n v_1\right|+\left|e_i^T\vp\vp^T B_n \vp\vp^T u_0\right|}{(1+\eta\lambda_1)^n|u_0^Tv_1|}\notag\\
%      &\leq 2\frac{\left|e_i^T\vp\vp^T B_n v_1\right|}{(1+\eta\lambda_1)^n}+2\frac{\left|e_i^T\vp\vp^T B_n \vp\vp^T u_0\right|}{(1+\eta\lambda_1)^n|u_0^Tv_1|}\label{eq:ri}%\leq 2\frac{\left|e_i^T\vp\vp^T B_n v_1\right|}{(1+\eta\lambda_1)^n}+2
%     }
%     Using Lemma B.3 from~\cite{lunde2021bootstrapping} we have:
%     \begin{align*}
% & \  P\left(\sqrt{\frac{n}{\eta_n}} \  \frac{\norm{V_\perp V_\perp^T B_n V_\perp V_\perp^T u_0}}{|v_1^Tu_0|(1+\frac{\eta_n \lambda_1}{n})^n} \geq \epsilon \right) \\
% \leq & \  \frac{nd \log(1/\delta) \exp\bigl\{-2\eta_n(\lambda_1-\lambda_2)+\eta_n^2(\lambda_1^2+M_d)/n\bigr\} }{\eta_n \epsilon^2 \delta^{2}} +\frac{eM_d^2(1+2\log d)\eta_n^2 \epsilon^{-2} \log(1/\delta) \delta^{-2}}{n2(\lambda_1-\lambda_2)+\eta_n^2(\lambda_1^2-\lambda_2^2-M_d)} + C \delta   
% \end{align*}
% Combining this with Eq~\ref{eq:ri}, we have:
%     \ba{
%      |r_i|%&\leq \frac{\left|e_i^T\vp\vp^T B_n u_0\right|}{(1+\eta\lambda_1)^n(1-\epsilon)|u_0^Tv_1|}%\leq 2\frac{|u_0^Tv_1|\left|e_i^T\vp\vp^T B_n v_1\right|+\left|e_i^T\vp\vp^T B_n \vp\vp^T u_0\right|}{(1+\eta\lambda_1)^n|u_0^Tv_1|}\notag\\
%      &\leq 2\frac{\left|e_i^T\vp\vp^T B_n v_1\right|}{(1+\eta\lambda_1)^n}+2\frac{\left|e_i^T\vp\vp^T B_n \vp\vp^T u_0\right|}{(1+\eta\lambda_1)^n|u_0^Tv_1|}\\
%      &\stackrel{(i)}{\leq} 2\frac{\left|e_i^T\vp\vp^T T_1 v_1\right|}{(1+\eta\lambda_1)^n}+2\epsilon\sqrt{\frac{\eta_n}{n}}+2\sqrt{\frac{\eta_n^3}{n^2\delta}}\label{eq:ri}%\leq 2\frac{\left|e_i^T\vp\vp^T B_n v_1\right|}{(1+\eta\lambda_1)^n}+2
%     }
% Lemma B.2 and B.4 establish step (i).
% Now note that $\vp\vp^T$
% \end{proof}