\subsubsection{$\Eone{n}$ (Hajek Projection) Concentration}
\label{appendix:Eone_bound}

\begin{lemma}\label{lemma:en1_concentration_bound} Let $\Eone{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition} for $u_{0} = g/\norm{g}_{2}$ with $g \sim \mathcal{N}(0, \id_d)$. Let $\left\{\Eone{n}^{(i)}\right\}_{i \in [m]}$ and $\left\{g^{(i)}\right\}_{i \in [m]}$ be $m$  $\iid$ instances of $\Eone{n}$ and $g$ respectively. Then, for any $\delta \in \bb{0,1}$ and $k \in [d]$, with probability at least $1-\delta$,
\bas{
    \Abs{\frac{\sum_{i \in [m]}\bb{e_{k}^{\top}\Eone{n}^{(i)}}^{2}}{m} - \E\bbb{\bb{e_{k}^{\top}\Eone{n}}^{2}}} \le \frac{\sqrt{2} \E\bbb{\bb{e_k^\top \Eone{n}}^2} + \eta_n^2 b_k^2 \mathcal{M}_4^2 \sqrt{n}}{\sqrt{m\delta}}.
}
where $b_k := \norm{\vp^{\top}e_k}_{2}$.
\end{lemma}

\begin{proof}
Recall the notations $X_j^n = \vp\lambp^{n-j}\vp^{\top}\bb{A_{j}-\Sigma}v_{1}$ and $Y_n = \sum_{j=1}^n X_j^n$ from Lemma~\ref{lemma:second_moment_matrix}. Since $\vp \vp^\top X_j^n = X_j^n$ and $T_{n,1} = \eta_n \sum_{i=1}^n X_j^n$, $e_k^\top \Eone{n}$ can be written as
\ba{
e_k^\top \Eone{n} = \frac{e_k^\top \vp \vp^\top T_{n,1} v_1 \sign(v_1^\top u_0)}{(1+\eta_n \lambda_1)^n} = \frac{\eta_n \sign(v_1^\top u_0)}{(1+\eta_n \lambda_1)} \sum_{j=1}^n e_k^\top \vp \vp^\top X_j^n = \frac{\eta_n \sign(v_1^\top u_0)}{1+\eta_n \lambda_1} e_k^\top Y_n. \label{eq:en1-Y-relation}
}
Next, we bound the variance of $(e_k^{\top}Y_n)^2$.
\bas{
(e_k^{\top} Y_n)^2 = \sum_{j=1}^n \bb{e_k^{\top} X_j^n}^2 + 2\sum_{j < j'} \bb{e_k^{\top} X_j^n}\bb{e_k^{\top} X_{j'}^n}.
}
Most pairs of summands are uncorrelated.
\begin{itemize}
\item $\cov\bb{(e_k^\top X_j^n)^2, (e_k^\top X_{j'}^n)^2} = 0$ for any distinct $j, j' \in [n]$.

\item $\cov\bb{(e_k^\top X_{\ell}^n)^2, (e_k^\top X_j^n)(e_k^\top X_{j'}^n)} = 0$ for any $\ell \in [n]$ and $1 \le j < j' \le n$.

\item $\cov\bb{(e_k^\top X_{j}^n)(e_k^\top X_{j'}^n), (e_k^\top X_{\ell}^n)(e_k^\top X_{\ell'}^n)} = 0$ for any $1 \le j < j' \le n$ and $1 \le \ell < \ell' \le n$ such that $(j,j') \neq (\ell, \ell')$.
\end{itemize}
It follows that
\ba{
\Var\bb{(e_k^{\top} Y_n)^2} = \sum_{j=1}^n \Var\bb{(e_k^\top X_{j}^n)^2} + 4\sum_{j < j'} \Var\bb{(e_k^\top X_{j}^n)(e_k^\top X_{j'}^n)}. \label{eq:en1_error_decomposition}
}
We bound both terms separately. By Lemma~\ref{lemma:second_moment_matrix}, the second term can be bounded as
\ba{
4\sum_{j < j'} \Var\bb{(e_k^\top X_{j}^n)(e_k^\top X_{j'}^n)} &= 4 \sum_{i < j} \E\bbb{(e_k^\top X_{j}^n)^2}\E\bbb{(e_k^\top X_{j'}^n)^2} \nonumber \\
&\le 2 \sum_{j=1}^n \sum_{j'=1}^n \E\bbb{(e_k^\top X_{j}^n)^2}\E\bbb{(e_k^\top X_{j'}^n)^2} = 2 \E\bbb{(e_k^\top Y_n)^2}^2. \label{eq:en1_error_decomposition_1}
}
Next, we bound the first term of Equation~\eqref{eq:en1_error_decomposition}. For any $j \in [n]$,
\bas{
\Abs{e_k^\top X_j^n} &= \Abs{e_k^\top \vp \lambp^{n-j} \vp^\top (A_i-\Sigma) v_1} \le \norm{e_k^\top \vp} \norm{\lambp^{n-j}} \norm{\vp^\top (A_j-\Sigma) v_1} \le b_k \norm{A_j-\Sigma},
}
which implies
\ba{
\sum_{j=1}^n \Var\bb{(e_k^\top X_{j}^n)^2} \le \sum_{j=1}^n \E\bbb{(e_k^\top X_{j}^n)^4} \le \sum_{j=1}^n \E\bbb{b_k^4 \norm{A_j-\Sigma}^4} \le b_k^4 \mathcal{M}_4^4 n. \label{eq:en1_error_decomposition_2}
}
Combining equations~\eqref{eq:en1_error_decomposition},~\eqref{eq:en1_error_decomposition_1}, and~\eqref{eq:en1_error_decomposition_2} and using equality~\eqref{eq:en1-Y-relation}, 
\bas{
\Var\bb{(e_k^\top \Eone{n})^2} \leq 2\E\bbb{\bb{(e_k^\top \Eone{n})^2}}^2 + \frac{\eta_n^4}{(1+\eta_n \lambda_1)^4}b_k^4 \mathcal{M}_4^4 n.
}
% We can also show an upper bound on the second term in terms of the parameters. For any $i$, we have from Lemma~\ref{lemma:second_moment_matrix} that
% \bas{
% \E\bbb{(e_k^\top X_{i}^n)^2} \le \mathcal{V} (1-\eta_n \mu)^{2(n-i-1)}.
% }
% \textcolor{red}{Assuming $\eta_n \mu \le 1/2$}, we conclude
% \bas{
% 4\sum_{i < j} \Var((e_k^\top X_{i}^n)(e_k^\top X_{j}^n)) &\le 2 \mathcal{V}^2 \bb{\sum_{i=1}^n (1-\eta_n \mu)^{2(n-i-1)}}^2 \le 2 \mathcal{V}^2 \bb{\frac{(1-\eta_n \mu)^{-2}}{1-(1-\eta_n \mu)^2}}^2 \le \frac{32 \mathcal{V}^2}{\eta_n^2 \mu^2}.
% }

% Next, we bound the first term. We have
% \bas{
% \Var((e_k^\top Y_n)^2) &= \sum_{i=1}^n \Var((e_k^\top X_{i}^n)^2) \le \sum_{i=1}^n \E\bbb{(e_k^\top X_i^n)^4} \\
% &= \sum_{i=1}^n \E\bbb{e_k^\top \vp\lambp^{n-i-1}\vp^{\top}\bb{A_{i}-\Sigma}v_{1}v_1^\top \bb{A_{i}-\Sigma} \vp \lambp^{n-i-1} \vp^{\top} e_k (e_k^\top X_i^n)^2} \\
% &\le \sum_{i=1}^n \E\bbb{\mathcal{M}^2 e_k^\top \vp \lambp^{2(n-i-1)} \vp^{\top} e_k (e_k^\top X_i^n)^2)} \\
% &= \mathcal{M}^2 \sum_{i=1}^n e_k^\top \vp \lambp^{2(n-i-1)} \vp^{\top} e_k \E\bbb{(e_k^\top X_i^n)^2} \\
% &\le \mathcal{M}^2 \sum_{i=1}^n \bb{(1-\eta_n \mu)^{2(n-i-1)} \E\bbb{(e_k^\top X_i^n)^2}} \\
% &\le \mathcal{M}^2 \mathcal{V} \sum_{i=1}^n (1-\eta_n \mu)^{4(n-i-1)} \le \mathcal{M}^2 \mathcal{V} \frac{(1-\eta_n \mu)^{-4}}{1-(1-\eta_n \mu)^4} \le \frac{16 \mathcal{M}^2 \mathcal{V}}{\eta_n \mu}.
% }
% % \textcolor{red}{not sure how to relate this to the expectation squared directly}.
% Overall, we obtain the following upper bound on the variance:
% \bas{
% \Var((e_k^\top Y_n)^2) \le \frac{32 \mathcal{V}^2}{\eta_n^2 \mu^2} + \frac{16 \mathcal{M}^2 \mathcal{V}}{\eta_n \mu}.
% }

By Chebyshev's inequality, for any $t > 0$,
\bas{
P\bb{\Abs{\frac{1}{m} \sum_{i=1}^m \bb{e_k^\top \Eone{n}^{(i)}}^2 - \E\bbb{\bb{e_k^\top \Eone{n}}^2}} \ge t} &\le \frac{\var\bb{\bb{e_k^\top \Eone{n}}^2}}{mt^2} \\
&\le \frac{ 2\E\bbb{\bb{(e_k^\top \Eone{n})^2}}^2 + \frac{\eta_n^4}{(1+\eta_n \lambda_1)^4}b_k^4 \mathcal{M}_4^4 n}{mt^2}.\label{eq:en2_inequality}
}
The result follows by setting $t = \frac{\sqrt{2} \E\bbb{\bb{(e_k^\top \Eone{n})^2}} + \eta_n^2 b_k^2 \mathcal{M}_4^2 \sqrt{n}}{\sqrt{m\delta}}$.
% Now, 
% \bas{
% \norm{\Eonetwo{n}} &= \norm{\frac{\vp \vp^\top \sum_{k \ge 1} T_{n,k} v_1 \sign\bb{v_1^\top u_0}}{(1+\eta_n \lambda_1)^n}} \\
% &\le \frac{\norm{\vp \vp^\top} \norm{\sum_{k \ge 1} T_{n,k}} \norm{v_1} |\sign(v_1^\top u_0)|}{(1+\eta_n \lambda_1)^n} \le \frac{\norm{\sum_{k \ge 1} T_{n,k}}}{(1+\eta_n \lambda_1)^n}.
% }
% As a result, 
% \bas{
% \E\bbb{\bb{e_k^\top \Eonetwo{n}^{(1)}}^4} &\le \E\bbb{\norm{\Eonetwo{n}}^4} \le \E\bbb{\frac{\norm{\sum_{k \ge 1} T_{n,k}}^4}{(1+\eta_n \lambda_1)^{4n}}} \le 32\bb{\frac{2\eta_n M_4 \sqrt{n \log d}}{1+\eta_n \lambda_1}}^4 \le 512 \eta_n^4 n^2 M_4^4 \log^2 d,
% }
% where the last inequality uses Lemma~\ref{lemma:higher-order-opnorm}. Plugging this bound in Equation~\eqref{eq:en12_inequality} and setting $t = \frac{23 \eta_n^2 n M_4^2 \log d}{\sqrt{m\delta}}$, we get
% \bas{
% P\bb{\Abs{\frac{1}{m} \sum_{i=1}^m \bb{e_k^\top \Eonetwo{n}^{(i)}}^2 - \E\bbb{\bb{e_k^\top \Eonetwo{n}^{(1)}}^2}} \ge \frac{23 \eta_n^2 n M_4^2 \log d}{\sqrt{m\delta}}} \le \delta.
% }
% Next, we bound the expectation $\E\bbb{\bb{e_k^\top \Eonetwo{n}^{(1)}}^2}$.
% \bas{
% \E\bbb{\bb{e_k^\top \Eonetwo{n}^{(1)}}^2} \le 2\E\bbb{\bb{e_k^\top \Eone{n}^{(1)}}^2} + 2\E\bbb{\bb{e_k^\top \Etwo{n}^{(1)}}^2} \le 2\E\bbb{\bb{e_k^\top \Eone{n}^{(1)}}^2} + 2\E\bbb{\norm{\Etwo{n}}^4}^{1/2}.
% }
% By Lemma~\ref{lemma:higher-order-opnorm},
% \bas{
% \E\bbb{\norm{\Etwo{n}}^4}^{1/2} \le \bb{\frac{1}{(1+\eta_n \lambda_1)^{4n}} 32(1+\eta_n \lambda)^{4n} \bb{\frac{2\eta_n M_4 \sqrt{n \log d}}{1+\eta_n \lambda_1}}^{8}}^{1/2} \le 100 \eta_n^4 n^2 M_4^4 \log^2 d.
% }
% To bound $\E\bbb{\bb{e_k^\top \Eone{n}^{(1)}}^2}$, we first decompose $\Eone{n}$ as follows:
% \bas{
% (1+\eta_n \lambda_1)^n \Eone{n} &= \vp \vp^\top T_{n,1} v_1 \sign(v_1^\top u_0) \\
% &= \eta_n \vp \vp^\top \bb{\sum_{j=1}^n (I+\eta_n \Sigma)^{n-j} (A_j - \Sigma) (I+\eta_n \Sigma)^{j-1}} v_1 \sign(v_1^\top u_0) \\
% &= \eta_n \sign(v_1^\top u_0) (1+\eta_n \lambda_1)^{j-1} \vp \vp^\top \bb{\sum_{j=1}^n (I+\eta_n \Sigma)^{n-j} (A_j - \Sigma)} v_1 \\
% &= \eta_n \sign(v_1^\top u_0) (1+\eta_n \lambda_1)^{n-1} \vp \bb{\sum_{j=1}^n \vp^\top \bb{\frac{I+\eta_n \Sigma}{1+\eta_n \lambda_1}}^{n-j} (A_j - \Sigma)} v_1 \\
% &= \eta_n \sign(v_1^\top u_0) (1+\eta_n \lambda_1)^{n-1} \sum_{j=1}^n \vp  \lambp^{n-j} \vp^\top (A_j - \Sigma) v_1.
% }
% Therefore, with $R^{(n)}$ defined as in Lemma~\ref{lemma:second_moment_matrix},
% \bas{
% \E\bbb{e_k^\top \Eone{n}\Eone{n}^\top e_k} &= \frac{\eta_n^2}{(1+\eta_n \lambda_1)^2} \sum_{j=1}^n e_k^\top \vp \lambp^{n-j} \E\bbb{\vp^\top (A_j - \Sigma) v_1 v_1^\top (A_j - \Sigma) \vp} \lambp^{n-j} \vp^\top e_k \\
% &= \frac{\eta_n^2}{(1+\eta_n \lambda_1)^2} e_k^\top \vp R^{(n)} \vp^\top e_k.
% }

\end{proof}

\begin{remark} 
Note that in Lemma~\ref{lemma:en1_concentration_bound}, one can always provide a uniform bound on all elements using a Bernstein-type tail inequality rather than a Chebyshev bound. This is possible because we can use our concentration inequality in Lemma~\ref{lemma:oja_error_hajek_tail_bound}. However, there are two pitfalls of this approach; first, for failure probability $\delta$, the errors of the lower order terms ($\Etwo{n}, \Ethree{n}, \Efour{n}$) still depend polynomially on the $1/\delta$ (see Lemma~\ref{lemma:en2_tail_bound}, \ref{lemma:en3_tail_bound}, \ref{lemma:en4_tail_bound}), which limits the sample complexity of our estimator to have a  $\poly(1/\delta)$ factor, and secondly, Lemma~\ref{lemma:oja_error_hajek_tail_bound} requires a stronger $a.s.$ upper bound on $A_i-\Sigma$ for $i\in [n]$. However, we can get both a uniform bound over all coordinates $k \in [d]$, and a $\log(1/\delta)$ dependence on the sample complexity, using our median of means based algorithm (Algorithm~\ref{alg:variance_estimation}).
\end{remark}