\section{Utility Results}\label{appendix:utility_results}

\begin{lemma}\label{lemma:square_expansion_cs}
    For any integer $n \ge 2$, real $\eps \in (0,1)$, and reals $\left\{a_i\right\}_{i \in [n]}$, 
    \bas{
        (1-\epsilon)a_1^2 - \frac{n-1}{\epsilon} \sum_{i=2}^n a_i^2
        \le \Bigl(\sum_{i=1}^n a_i\Bigr)^2
        \le (1+\epsilon)a_1^2 + \frac{2(n-1)}{\epsilon} \sum_{i=2}^n a_i^2.
    }
\end{lemma}
\begin{proof}
    We begin by writing
    \ba{
        \Bigl(a_1+ \sum_{i=2}^n a_i\Bigr)^2
        = a_1^2 + 2a_1\Bigl(\sum_{i=2}^n a_i\Bigr) + \Bigl(\sum_{i=2}^n a_i\Bigr)^2. \label{eq:square_expansion}
    }
    By Cauchy-Schwarz inequality,
    \ba{
         0 \le \Bigl(\sum_{i=2}^n a_i\Bigr)^2 \le (n-1)\sum_{i=2}^n a_i^2. \label{eq:cs_applicaition}
    }
    The cross-term can be bounded using the inequality
    \[
        -\epsilon x^2-\frac{1}{\epsilon}\,y^2 \le 2xy \le \epsilon x^2+\frac{1}{\epsilon}\,y^2
    \]
    with \(x=a_1\) and \(y=\sum_{i=2}^n a_i\) to get
    \[
        2a_1\Bigl(\sum_{i=2}^n a_i\Bigr)
        \ge -\epsilon a_1^2 - \frac{1}{\epsilon}\Bigl(\sum_{i=2}^n a_i\Bigr)^2 \geq -\epsilon a_1^2 - \frac{n-1}{\epsilon}\sum_{i=2}^n a_i^2,
    \]
    and
    \[
        2a_1\Bigl(\sum_{i=2}^n a_i\Bigr)
        \le \epsilon a_1^2 + \frac{1}{\epsilon}\Bigl(\sum_{i=2}^n a_i\Bigr)^2 \leq \epsilon a_1^{2} + \frac{n-1}{\epsilon}\sum_{i=2}^n a_i^2.
    \]
    The proof follows by using the above inequalities in \eqref{eq:square_expansion} followed by another application of \eqref{eq:cs_applicaition}. 
\end{proof}

\begin{lemma}\label{lemma:entrywise_to_sin_squared} Let $\mathbb{V}$ be the asymptotic variance matrix defined in Lemma~\ref{lemma:second_moment_matrix}, and let $\voja$ be the Oja vector as defined in \eqref{eq:voja_def}. If the coordinate-wise bound
\bas{
    \Abs{e_i^{\top}\bb{\voja - \bb{v_1^{\top}\voja}v_1}} \lesssim C_{d,n}\sqrt{\frac{\mathbb{V}_{kk}}{n}}
}
holds for every $i \in [d]$, where $C_{d,n}^2$ hides logarithmic factors in $d, n$, then
\bas{
    \sin^{2}\bb{\voja, v_1} = \sum_{i \in [d]}\bb{e_i^{\top}\bb{\voja - \bb{v_1^{\top}\voja}v_1}}^{2} \lesssim C_{d,n}^2\frac{\Nu}{\bb{\eigengap}^{2}n},
}
where $\Nu$ is the matrix variance statistic defined in Assumption~\ref{assumption:bounded_moments}. 
\end{lemma}
\begin{proof}
    By the definitions of $\mathbb{V}$ and $R_0$ as in Lemma~\ref{lemma:second_moment_matrix},
    \bas{
    \sum_{i \in [d]}\bb{e_i^{\top}\bb{\voja - \bb{v_1^{\top}\voja}v_1}}^{2} &\lesssim C_{d,n}^2\frac{\Tr\bb{\mathbb{V}}}{n} \leq \bb{\frac{C_{d,n}^2}{\eigengap}}\frac{\Tr\bb{R_0}}{n} = \bb{\frac{C_{d,n}^2}{\eigengap}}\frac{1}{n}\sum_{2 \le k \le d}\frac{\widetilde{M}_{kk}}{2\bb{\lambda_{1}-\lambda_{k}}} \\
    &\leq \frac{C_{d,n}^2}{\bb{\eigengap}^{2}}\frac{\Tr\bb{\E\bbb{\vp\bb{A-\Sigma}v_1v_1^{\top}\bb{A-\Sigma}\vp^{\top}}}}{n} \\
    &=  \frac{C_{d,n}^2}{\bb{\eigengap}^{2}}\frac{\E\bbb{\Tr\bb{\vp\bb{A-\Sigma}v_1v_1^{\top}\bb{A-\Sigma}\vp^{\top}}}}{n} \\
    &=  \frac{C_{d,n}^2}{\bb{\eigengap}^{2}}\frac{v_{1}^{\top}\E\bbb{\bb{A-\Sigma}\vp\vp^{\top}\bb{A-\Sigma}}v_{1}}{n} \\
    &\leq  \frac{C_{d,n}^2}{\bb{\eigengap}^{2}}\frac{v_{1}^{\top}\E\bbb{\bb{A-\Sigma}^{2}}v_{1}}{n} \leq C_{d,n}^2\frac{\Nu}{\bb{\eigengap}^{2}n}.
    }
\end{proof}

\begin{lemma}[Choice of learning rate]\label{lemma:learning_rate_choice} Let $\eta_{n} := \frac{\alpha\log\bb{n}}{n\bb{\lambda_1-\lambda_2}}$ for $\alpha > 1$. Then, under Assumptions~\ref{assumption:bounded_moments} and~\ref{assumption:sample_size} 
% and $\frac{B}{\log\bb{B}} \geq \max\left\{\frac{2\lambda_{1}}{\eigengap}, \frac{2\mathcal{M}_{2}^{2}}{\bb{\eigengap}^{2}}\right\}$,
\begin{enumerate}
    \item $nd\exp\bb{-\eta_n n \bb{\eigengap}} = o\bb{1}$.
    \item $\max\left\{\eta_{n}, \frac{\log\bb{d}}{\eigengap}\right\}\frac{\Mtwo^{4}}{\eigengap}\eta_n^{2} = o\bb{1}$.
    \item $n\eta_n^2(2\lambda_1^2+\Mtwo^2) \leq 1$
    % \item $\eta_B \le \frac{1}{2\lambda_1} + \frac{\eigengap}{2 \Mtwo^2}$ where $B$ represents the batch size in Algorithm~\ref{alg:variance_estimation}.
\end{enumerate}
\end{lemma}
\begin{proof}
The above conditions on $\eta_n$ imply Corollary 1 in \cite{lunde2021bootstrapping}. Let's start with the first condition. We have
\bas{
    nd\exp\bb{-\eta_n n \bb{\eigengap}} \leq nd\exp\bb{-\alpha\log\bb{n}} = \frac{d}{n^{\alpha-1}} = o(1), \text{ using the bound on } d
}
For the second condition, we first note that for $n \geq \alpha\log\bb{n}$ provided by Assumption~\ref{assumption:sample_size}, 
\bas{
    \eta_n \leq \frac{\log\bb{d}}{(\eigengap)}
}
Now for the second condition, we require,
\bas{
\frac{\alpha^{2}\Mtwo^{4}\log^{2}\bb{n}\log\bb{d}}{n^2\bb{\eigengap}^{4}} = o(1)
}
which is again ensured by the condition on $n$ in Assumption~\ref{assumption:sample_size}.
\end{proof}

% \rd[PS:] match with shourya's notation in lemma A.11
% \bk

% \ojahoeffdingdecomposition*
% \begin{proof}
%     The proof follows from Corollary A.1 of \cite{lunde2021bootstrapping}.
% \end{proof}

% \begin{lemma}[Concentration of the norm] Let $B_{n}$ be as defined in \eqref{definition:Bn} and $u_{0} \in \mathcal{S}^{d-1}$ be any unit vector. Then, for some $C > 0$, and any $\epsilon > 0$, $\delta \in \bb{0,1}$, 
% \bas{
%     & \mathbb{P}\bb{\left|\frac{\norm{B_n u_0}_{2}}{|v_{1}^{\top}u_0|\bb{1+\eta\lambda_{1}}^{n}} - 1\right| \geq \epsilon} \\
%     & \;\;\;\;\;\;\;\;\;\; \leq \frac{\log\bb{\frac{1}{\delta}}}{\delta^{2}}\frac{d\exp\bb{-\eta n\bb{\lambda_{1}-\lambda_{2}} + \eta^{2}n\bb{\mathcal{M}+\lambda_{1}^{2}}} + \eta^{2}n\mathcal{M}\exp\bb{\eta^{2}n}}{4\epsilon^{2}\bb{1+\eta^{2}n\lambda_{1}^{2}}} + \frac{e^{2}\eta^{2}n\mathcal{V}\bb{1+\log\bb{d}}}{\epsilon^{2}} + C\delta
% }
% where $\mathcal{M} := \E\bbb{\norm{XX^{\top}-\Sigma}_{2}^{2}}$.
% \end{lemma}
% \begin{proof}
% The proof follows from Lemma B.2 of \cite{lunde2021bootstrapping}.
% \end{proof}

% \begin{lemma}[Negligibility of projection of $\vp$] Let $B_{n}$ be as defined in \eqref{definition:Bn} and $u_{0} \in \mathcal{S}^{d-1}$ be any unit vector. Then, for some $C > 0$, and any $\epsilon > 0$, $\delta \in \bb{0,1}$,
% \bas{
%     & \mathbb{P}\bb{\frac{1}{\sqrt{\eta}}\frac{\norm{\vp\vp^{\top}B_{n}\vp\vp^{\top}u_{0}}_{2}}{|v_{1}^{\top}u_0|\bb{1+\eta\lambda_{1}}^{n}} \geq \epsilon} \\
%     & \;\;\;\; \leq \enfourerror %\frac{\log\bb{\frac{1}{\delta}}}{\epsilon^{2}\delta^{2}}\bb{\frac{d\bb{\exp\bb{-2\eta\bb{\lambda_{1}-\lambda_{2}} + \eta^{2}n\bb{\lambda_{1}^{2}+\mathcal{M}}}} }{\eta} + \frac{e\eta^{2}n^{2}\mathcal{M}^{2}\bb{1+2\log\bb{d}}}{2n\bb{\lambda_{1}-\lambda_{2}} + \eta^{2}n^{2}\bb{\lambda_{1}^{2}-\lambda_{2}^{2}-\mathcal{M}^{2}}}} + C\delta
% }
% \end{lemma}
% \begin{proof}
%     The proof follows from Lemma B.3 of \cite{lunde2021bootstrapping}.
% \end{proof}



% \begin{lemma}[Negligibility of higher-order Hoeffding projection] Let $\beta_n := \eta^{2}n\mathcal{M}$ and suppose $\beta_{n} \in [0,1]$. Then, for some $C > 0$ and any $\epsilon > 0$, 
% \bas{
%     \mathbb{P}\bb{\frac{1}{\sqrt{\eta}}\frac{\norm{\vp\vp^{\top}\sum_{k > 1}T_{k}v_{1}}}{\bb{1+\eta\lambda_{1}}^{n}} > \epsilon} \leq \frac{C\beta_n n\eta}{\bb{1-\beta_{n}}\epsilon^{2}}
% }
% where $\left\{T_{k}\right\}_{k \in [n]}$ are defined as in Lemma~\ref{lemma:oja_hoeffding_decomposition}.
% \end{lemma}
% \begin{proof}
%     The proof follows from Lemma B.4 of \cite{lunde2021bootstrapping}.
% \end{proof}

% \begin{lemma}\label{lemma:Bn_moment_bound}
%     \rd Write down bound on $\E\|B_n-\E B_n\|^q$. Use it in different places. \bk
% \end{lemma}
% \begin{proof}
    
% \end{proof}

\begin{lemma}\label{lemma:MoM}
Let $t$ be a positive integer, $\delta \in (0,1)$, and let $I$ be an interval in $\mathcal{R}$. Suppose $a_1, a_2, \dots, a_t$ are independent random variables such that $P\bb{a_i \in I} \ge 3/4$. Then, for $t \ge 8 \log\bb{1/\delta}$,
\bas{
P\bb{\median\bb{\left\{a_{i} \right\}_{i \in [t]}} \in I} \ge 1-\delta.
}
\end{lemma}

\begin{proof}
Since $I$ is an interval, the median does lies in $I$ if at least half the $a_i$ are in $I$. Let $b_i$ be the indicator that $a_i \notin I$, and let $B = \sum_{i \in [t]} b_i$. Then, $b_1, b_2, \dots, b_t$ are independent Bernoulli random variables each with mean at most $1/4$. By Hoeffding's inequality, 
\bas{
P\bb{\median\bb{\left\{a_{i} \right\}_{i \in [t]}} \notin I} \le P\bb{B > t/2} \le \exp\bb{-2(t/2 - \E\bbb{B})^2/t} \le \exp\bb{-t/8} \le \delta. 
}
\end{proof}

% \begin{lemma}[Azuma-Bernstein Inequality]\label{lemma:azuma_bernstein_inequality}
% 	Let \( \{X_i\}_{i=1}^n \) be a martingale difference sequence with respect to the filtration \( \{\mathcal{F}_i\}_{i=0}^n \). Assume that for each \( i \):
% 	\begin{enumerate}
% 		\item \( \mathbb{E}[X_i \mid \mathcal{F}_{i-1}] = 0 \) (martingale difference property),
% 		\item \( |X_i| \leq M \) almost surely for some constant \( M > 0 \),
% 		\item \( \sum_{i=1}^n \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}] \leq \sigma^2 \) almost surely.
% 	\end{enumerate}
% 	Then, for any \( \delta \in (0,1) \), with probability at least \( 1 - \delta \),
% 	\[
% 	\left| \sum_{i=1}^n X_i \right| \leq \sigma \sqrt{2 \ln\left(\frac{2}{\delta}\right)} + \frac{2}{3} M \ln\left(\frac{2}{\delta}\right).
% 	\]
% \end{lemma}

% \begin{proof}
% 	Let \( S_n = \sum_{i=1}^n X_i \) and fix \( \delta \in (0,1) \). We aim to bound \( |S_n| \) by bounding \( \mathbb{P}(|S_n| \geq t) \) for a suitable threshold \( t \).

% 	First, observe that:
% 	\[
% 	\mathbb{P}\left( |S_n| \geq t \right) = \mathbb{P}\left( S_n \geq t \right) + \mathbb{P}\left( S_n \leq -t \right).
% 	\]
% 	We will bound each probability separately and then apply the union bound.

% 	For any \( \lambda > 0 \),
% 	\[
% 	\mathbb{P}(S_n \geq t) = \mathbb{P}\left( e^{\lambda S_n} \geq e^{\lambda t} \right) \leq e^{-\lambda t} \mathbb{E}\left[ e^{\lambda S_n} \right].
% 	\]
% 	Similarly,
% 	\[
% 	\mathbb{P}(S_n \leq -t) = \mathbb{P}\left( e^{-\lambda S_n} \geq e^{\lambda t} \right) \leq e^{-\lambda t} \mathbb{E}\left[ e^{-\lambda S_n} \right].
% 	\]
% 	Thus,
% 	\[
% 	\mathbb{P}\left( |S_n| \geq t \right) \leq e^{-\lambda t} \left( \mathbb{E}\left[ e^{\lambda S_n} \right] + \mathbb{E}\left[ e^{-\lambda S_n} \right] \right).
% 	\]

% 	Next, we bound \( \mathbb{E}\left[ e^{\lambda S_n} \right] \) and \( \mathbb{E}\left[ e^{-\lambda S_n} \right] \). Consider \( \mathbb{E}\left[ e^{\lambda S_n} \right] \). Since \( S_n = \sum_{i=1}^n X_i \), we have:
% 	\[
% 	e^{\lambda S_n} = \prod_{i=1}^n e^{\lambda X_i}.
% 	\]
% 	Using the tower property of conditional expectation and the martingale difference property \( \mathbb{E}[X_i \mid \mathcal{F}_{i-1}] = 0 \),
% 	\[
% 	\mathbb{E}\left[ e^{\lambda S_n} \right] = \mathbb{E}\left[ \prod_{i=1}^n \mathbb{E}\left[ e^{\lambda X_i} \mid \mathcal{F}_{i-1} \right] \right].
% 	\]
	
% 	For each \( i \), since \( |X_i| \leq M \) and \( \mathbb{E}[X_i \mid \mathcal{F}_{i-1}] = 0 \), we can bound the conditional expectation using the Taylor series expansion of the exponential function:
% 	\[
% 	e^{\lambda X_i} \leq 1 + \lambda X_i + \frac{\lambda^2 X_i^2}{2} + \frac{\lambda^3 |X_i|^3}{6} e^{\lambda |X_i|}.
% 	\]
% 	Taking the conditional expectation and noting that \( \mathbb{E}[X_i \mid \mathcal{F}_{i-1}] = 0 \),
% 	\[
% 	\mathbb{E}\left[ e^{\lambda X_i} \mid \mathcal{F}_{i-1} \right] \leq 1 + \frac{\lambda^2 \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}]}{2} + \frac{\lambda^3 M \mathbb{E}[|X_i|^3 \mid \mathcal{F}_{i-1}]}{6}.
% 	\]
% 	Since \( |X_i| \leq M \), \( \mathbb{E}[|X_i|^3 \mid \mathcal{F}_{i-1}] \leq M \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}] \). Therefore,
% 	\[
% 	\mathbb{E}\left[ e^{\lambda X_i} \mid \mathcal{F}_{i-1} \right] \leq 1 + \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}] \left( \frac{\lambda^2}{2} + \frac{\lambda^3 M}{6} \right).
% 	\]
	
% 	Using the inequality \( 1 + a \leq e^{a} \) for \( a \geq 0 \),
% 	\[
% 	\mathbb{E}\left[ e^{\lambda X_i} \mid \mathcal{F}_{i-1} \right] \leq \exp\left( \frac{\lambda^2 \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}]}{2} + \frac{\lambda^3 M \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}]}{6} \right).
% 	\]
	
% 	To simplify, assume \( \lambda M \leq 1 \). Then,
% 	\[
% 	\frac{\lambda^3 M}{6} \leq \frac{\lambda^2 M^2}{6},
% 	\]
% 	and since \( \sum_{i=1}^n \mathbb{E}[X_i^2 \mid \mathcal{F}_{i-1}] \leq \sigma^2 \),
% 	\[
% 	\mathbb{E}\left[ e^{\lambda S_n} \right] \leq \exp\left( \frac{\lambda^2 \sigma^2}{2(1 - \lambda M /3)} \right).
% 	\]
	
% 	Similarly, for \( \mathbb{E}\left[ e^{-\lambda S_n} \right] \),
% 	\[
% 	\mathbb{E}\left[ e^{-\lambda S_n} \right] \leq \exp\left( \frac{\lambda^2 \sigma^2}{2(1 - \lambda M /3)} \right).
% 	\]
	
% 	Substituting these bounds into the probability inequality,
% 	\[
% 	\mathbb{P}\left( |S_n| \geq t \right) \leq 2 \exp\left( -\lambda t + \frac{\lambda^2 \sigma^2}{2(1 - \lambda M /3)} \right).
% 	\]
	
% 	To minimize the exponent \( -\lambda t + \frac{\lambda^2 \sigma^2}{2(1 - \lambda M /3)} \), choose
% 	\[
% 	\lambda = \frac{3t}{2\sigma^2 + Mt}.
% 	\]
% 	This choice satisfies \( \lambda M = \frac{3tM}{2\sigma^2 + Mt} \leq \frac{3}{2} \) provided \( t > 0 \).

% 	Substituting \( \lambda \) back into the exponent,
% 	\[
% 	-\lambda t + \frac{\lambda^2 \sigma^2}{2(1 - \lambda M /3)} = -\frac{t^2}{2\sigma^2 + \frac{2}{3}Mt}.
% 	\]
	
% 	Thus,
% 	\[
% 	\mathbb{P}\left( |S_n| \geq t \right) \leq 2 \exp\left( -\frac{t^2}{2\sigma^2 + \frac{2}{3}Mt} \right).
% 	\]
	
% 	To ensure \( \mathbb{P}\left( |S_n| \geq t \right) \leq \delta \), set
% 	\[
% 	2 \exp\left( -\frac{t^2}{2\sigma^2 + \frac{2}{3}Mt} \right) \leq \delta.
% 	\]
% 	Taking natural logarithms,
% 	\[
% 	-\frac{t^2}{2\sigma^2 + \frac{2}{3}Mt} \leq \ln\left( \frac{\delta}{2} \right),
% 	\]
% 	which implies
% 	\[
% 	t^2 \geq \left( 2\sigma^2 + \frac{2}{3}Mt \right) \ln\left( \frac{2}{\delta} \right).
% 	\]
	
% 	This is a quadratic inequality in \( t \). Solving for \( t \), we obtain
% 	\[
% 	t \geq \sigma \sqrt{2 \ln\left( \frac{2}{\delta} \right)} + \frac{2}{3} M \ln\left( \frac{2}{\delta} \right).
% 	\]
	
% 	Hence, with probability at least \( 1 - \delta \),
% 	\[
% 	|S_n| \leq \sigma \sqrt{2 \ln\left( \frac{2}{\delta} \right)} + \frac{2}{3} M \ln\left( \frac{2}{\delta} \right).
% 	\]
% 	This completes the proof.
% \end{proof}