\section{Proof Techniques}\label{sec:proof_techiniques}

% \rd
% Put a plan that remainder are small in norm. And Psi-1 requires careful bounding --- Put Lemma A.9 here.

% Say T-n-k requires strong tail bounds -- Put Martingale decomposition and T-n-k, k>2
% Lemma A.11. It follows from p,q martingale - mention the martingale structure+rachel's smooth blah.
% \bk
Let $\voja \sim \Oja\bb{\mathcal{D}_{\ell,j}, \eta_{n}, u_0}$ for uniform unit vector $u_0$ and $\tilde{v} \sim \Oja\bb{\mathcal{D}_{\ell,j}, \eta_{N}, u_0}$. To estimator the uncertainty of the estimator, the residual vector $\troja \defeq \voja - (\vmain^{\top} \voja) \vmain$ is decomposed as the sum of five terms, as stated in Lemma~\ref{lemma:oja_error_decomposition}. 
% \begin{restatable}{lemma}{ojahoeffdingdecomposition}[Hoeffding decomposition for Oja's Algorithm]\label{lemma:oja_hoeffding_decomposition} 
Proposition A.1 in~\cite{lunde2021bootstrapping} shows that $B_n$, defined in \eqref{definition:Bn}, can be written as
\ba{\label{eq:hoeffding-main}
    B_{n} &= \sum_{k=0}^{n} T_{n,k},
}
where
\ba{
T_{n,k} &\defeq \sum_{S \subseteq [n], |S| = k}\ \ \prod_{i=1}^{n}M_{S,n+1-i}, \label{eq:tnk} \text{ and} \\
M_{S,i} &\defeq \begin{cases}
        \eta_n\bb{X_{i}X_{i}^{\top}-\Sigma} \; \text{ if } i \in S, \\
        I + \eta_n\Sigma \; \text{ if } i \notin S.
    \end{cases}
}

The term $T_{n,1}$ is called the Hájek projection of the random variable $B_n$ on the random variables $X_1, \dots, X_n$. $T_{n,1}$ is the best approximation to $B_n$ among the estimators that can be written as the sum of independent random vectors and satisfy certain integrability conditions. Moreover,
\begin{itemize}
    \item $T_{n,k}$ and $T_{n,j}$ are uncorrelated for all $k \neq j$, and
    \item the summands in $T_{n,k}$ are also pairwise uncorrelated.
\end{itemize}

We exploit this structure of the Hoeffding decomposition to decompose the residual vector $\troja$. 
%\end{restatable}

\begin{restatable}{lemma}{ojaerrordecomposition}[Error Decomposition of $\voja$]\label{lemma:oja_error_decomposition} Let $\voja, \tilde{v}$ be defined as in \eqref{eq:voja_def} and \eqref{eq:vtilde} respectively. Then, 
\ba{\label{eq:ojadecomp}
    \voja - (\vmain^{\top} \voja) \vmain = \Ezero{n} + \Eone{n} + \Etwo{n} + \Ethree{n} + \Efour{n},
}
\text{ where}
\begin{align}\label{eq:ojadecomperror}
    &\Ezero{n} := (v_{1}^{\top}\voja)v_{1}-(\vmain^{\top} \voja) \vmain, \notag \\ &\Eone{n} := \frac{\vp\vp^{\top}T_{n,1}v_{1}\sign(v_{1}^{\top}u_{0})}{(1+\eta_n\lambda_{1})^{n}}, \notag \\ &\Etwo{n} := \frac{\vp\vp^{\top}(\sum_{k\geq 2}T_{n,k})v_{1}\sign(v_{1}^{\top}u_{0})}{(1+\eta_n\lambda_{1})^{n}}, \notag \\
    &\Ethree{n} := \vp\vp^{\top}B_{n}u_{0}\bb{\frac{1}{\norm{B_{n}u_0}_{2}} - \frac{1}{\Abs{v_{1}^{\top}u_{0}}(1+\eta\lambda_{1})^{n}}}, \notag\\
    &\Efour{n} :=  \frac{\vp\vp^{\top}B_{n}\vp\vp^{\top}u_{0}}{\Abs{v_{1}^{\top}u_{0}}(1+\eta\lambda_{1})^{n}}. 
\end{align}
\end{restatable}
We bound the variance of each of these terms separately. The dominating term $\Eone{n}$ corresponding to the Hájek projection $T_{n,1}$ has the largest variance. Recall from Lemma~\ref{lemma:second_moment_matrix} that
\bas{
\Abs{\E\bbb{\bb{e_{k}^{\top}\Eone{n}}^{2}}- \eta_n \lambda_1 \V_{kk}} \le \tilde{O}\bb{\frac{1}{n^2}}.
}
% {\rd Without an $M_4$ assumption we can get a constant factor approximation? \bk}
A finer analysis is needed for this term than the other residual terms in~\eqref{eq:ojadecomperror}. To do this, we bound the variance of $\bb{e_k^{\top} \Eone{n}}^2$. Lemma~\ref{lemma:en1_variance_bound} shows that $\sqrt{\Var((e_k^\top \Eone{n})^2)}$ is a constant factor within $\E[(e_k^\top \Eone{n})^2] = \tilde{O}(1/n)$ up to an additive error term $\tilde{O}(1/n^{3/2})$ which depends polynomially on model parameters. 
% Note that $\E[\bb{e_k^{\top} \Eone{n}}^2]$ scales as $\tilde{O}(1/n)$. 
\begin{lemma}[Variance of the Hájek projection]\label{lemma:en1_variance_bound} Let $\Eone{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition}. Then, 
\bas{
\sqrt{\Var\bb{(e_k^\top \Eone{n})^2}} \leq \sqrt{2} \E\bbb{\bb{e_k^\top \Eone{n}}^2} + \tilde{O}\bb{\frac{1}{n^{3/2}}}.
}
\end{lemma}
The three terms $\Etwo{n}, \Ethree{n},$ and $\Efour{n}$ are lower order terms.
\begin{lemma}[Bound on lower order terms]\label{lemma:en234_variance_bound} Let $\Etwo{n}$, $\Ethree{n}$, and $\Efour{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition}. Then, 
\bas{
\E\bbb{\bb{e_k^\top \Etwo{n}}^2+\bb{e_k^\top \Ethree{n}}^2+\bb{e_k^\top \Efour{n}}^2} = \tilde{O}\bb{\frac{1}{n^{2}}}.
}
\end{lemma}
The bound on the error term $e_k^{\top} \Etwo{n}$  stems from a more general analysis of the terms $T_{n,k}$ in the Hoeffding decomposition of $B_n$. Lemma~\ref{lemma:higher-order-norm-main} is shown by exploiting the Martingale structure of $T_{n,k}$ and using norm inequalities~\citep{huang2022matrix} to compare the operator norm with the $\vertiii{.}_{p,q}$ norm.

\begin{lemma}
\label{lemma:higher-order-norm-main} Let $T_{n,k}$ be as defined in equation~\eqref{eq:tnk}. Let for any $2 \le q \le 4 \log d$,  $\mathcal{M}_q$ be defined such that $\E\bbb{\norm{A_i -\Sigma}^q}^{1/q} \le \mathcal{M}_q$ and $\eta_n \mathcal{M}_q \sqrt{n \log d} \lesssim 1$. Then, for any $j \in [n]$, $\delta \in (0,1)$, with probability at least $1-\delta$
\bas{
\norm{\sum_{k \ge j} T_{n,k}} \le 
\frac{3 (1+\eta_n \lambda_1)^n \bb{\eta_n \mathcal{M}_q \sqrt{4n \log d}}^j}{\delta^{\frac{1}{4\log d}}}
}
\end{lemma}

\begin{proof}[Proof sketch]
    Let $\mathcal{S}_{n,k}$ be the set of subsets of $[n]$ of size $k$. 
    % Consider a general product of $n$ matrices, where all but $k$ of the matrices are constant, and $k$ indexed by the subset $S$ are mean zero independent random matrices. With slight abuse of notation, let $M_{S,i}$ denote a constant matrix $M_i$ with $\|M_i\|=:m_i$ when $i\not\in S$ and $W_i$ when $i\in S$, $EW_i=0$, $W_i,i=1,\dots,n$ are mutually independent.
    % \begin{align}
    % T_{n,k}:=\sum_{S\in \mathcal{S}_{n,k}}\prod_{i=1}^n M_{S,n+1-i}
    % \end{align}
    \bas{
    T_{n,k} %&= \sum_{S \in \mathcal{S}_{n,k}} \prod_{i=1}^n M_{S,n+1-i} \\
    &= (I+\eta_n \Sigma)T_{n-1,k} + \eta_n(A_n - \Sigma)T_{n-1,k-1}.
    }
    Proposition 4.3. of~\cite{huang2022matrix} implies
    \bas{
    \vertiii{T_{n,k}}_{p,q}^2 & \le \vertiii{(I+\eta_n \Sigma) T_{n-1,k}}_{p,q}^2 \\
    &\;\; + (p-1)\vertiii{\eta_n(A_n-\Sigma) T_{n-1,k-1}}_{p,q}^2.
    }
    as long as $\E\bbb{\eta_n(A_n - \Sigma)T_{n-1,k-1} | (I+\eta_n \Sigma)T_{n-1,k}} = 0$, which is true due to $A_1, A_2, \dots, A_n$ being mutually independent. Solving the recurrence shows the bound.
\end{proof}

The term $\Ezero{n}$ arises in the decomposition~\eqref{eq:ojadecomperror} because we use $\vmain$ as a proxy to $v_1$ in Algorithm~\ref{alg:variance_estimation}.
\begin{lemma}[Variance of Approximating $v_1$]\label{lemma:enzero_variance_bound} Let $\Ezero{n}$ be defined as in Lemma~\ref{lemma:oja_error_decomposition}. Then, $
\E\bbb{\bb{e_k^\top \Ezero{n}}^2} = \tilde{O}\bb{\frac{1}{N}},
$ where $\tilde{v}$ (Eq~\ref{eq:vtilde}) uses $N$
 samples.\end{lemma}
% \rd
% TODO: Elaborate on how these error terms connect to Theorem 3.2.
% \bk
Theorem~\ref{thm:high_prob_error_bound} follows by combining all these bounds. See Appendix~\ref{sub_appendix:uncertainty} for a complete argument.
% \rd write something about Martingale stuff? Not sure how to write it naturally. \bk






