\section{Omitted proofs and results}
\subsection{Section~\ref*{sec:general}}
\subsubsection{Proof of Lemma~\ref*{lem:kmeans}}

\begin{proof}[Proof of Lemma~\ref{lem:kmeans}]
%Let $A_1,\dots,A_k$ be the partition of $V$ returned by ~\ref*{alg:spectral} and $S_$ 
% which means
% \[
% \sum_{j=1}^k \sum_{u \in A_j} d(u) \| \Tilde{F}(u,.) -  m_j \|^2 \le U.
% \]
Let $c_i = \sum_{u \in A_i} d(u) \Tilde{F}(u,.)$ be the centroid of $A_i$ for $i=1,\dots,k$. Let $c \colon V \to \mathbb{R}^{k}$ be a map from a vertex to its corresponding centroid, i.e., $c(u) = c_i$ if $u \in A_i$.

Since $A_1,\dots,A_k$ is a $(1+\alpha)$ approximation of the optimal $k$-means partition, we have that

   
\begin{align}
\sum_{j = 1}^k \sum_{u \in A_j}d(u) \|\Tilde{F}(u,.) - c_j\|^2 
& = \sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\Tilde{F}(u,.) -  c(u)\|^2 \nonumber \\
& \leq (1+\alpha) \sum_{j = 1}^k \sum_{u \in S_j}d(u) \|\Tilde{F}(u,.) - \mu_j\|^2 \nonumber \\
& \leq (1+\alpha) \sum_{j = 1}^k \sum_{u \in S_j} \|F(u,.) - G(u,.)\|^2 \leq (1+\alpha)U. \label{eq:ubound}
\end{align}

By applying the triangle inequality and the simple inequality $(x-y)^2 \ge \frac{x^2}{2} - y^2$, we obtain the following:
 \begin{align*}
\sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\Tilde{F}(u,.) - c(u)\|^2 
& \geq \sum_{j = 1}^k \sum_{u \in S_j}d(u) (\|\mu_j - c(u)\| - \|\Tilde{F}(u,.) - \mu_j\|)^2 \\
& \geq \sum_{j = 1}^k \sum_{u \in S_j} \frac{1}{2}d(u)\|\mu_j - c(u)\|^2 - \sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\Tilde{F}(u,.) -  \mu_j\|^2 \\
& \geq \sum_{j = 1}^k \sum_{u \in S_j} \frac{1}{2}d(u)\|\mu_j - c(u)\|^2 - U,
\end{align*}
where the last inequality follows from Equation~\ref{eq:ubound}. 

% Rearranging, we have that
% \begin{equation} \label{eq:means_bound}
% \sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\mu_j - c(u)\|^2 \leq 4U.
% \end{equation}

By the assumption \(\|\mu_i - \mu_j\| \geq \mathcal{D}\) for all \(i \ne j\), it holds that \(\|c_i - c_j\| \geq \mathcal{D}/2\). Otherwise, there would be \(\mu_\ell\) such that \(\|\mu_\ell - c_i\| \geq \mathcal{D}/2\) for any \(i\), but this would violate our assumption on $U$. Indeed, assume by contradiction \(\|\mu_l - c_i\| \geq \mathcal{D}/2\) for any \(i\). Then,
\begin{align*}
(1+\alpha) U &\ge \sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\tilde{F}(u,.) - c(u)\|^2 \\ 
&\ge \frac{1}{2}\sum_{u \in S_\ell} d(u)\|\mu_\ell - c(u)\|^2 - U \\
&\ge \frac{1}{4}\cdot \mathcal{D} \cdot \vol(S_\ell) - U.
\end{align*}

Therefore, $U \ge \frac{\mathcal{D} \cdot \vol(S_\ell)}{4(2+\alpha)}$, contradicting our assumption on $U$.
\color{black}


We say that a vertex $u \in S_j$ is \emph{misclassified} if $\|c(u) - \mu_j\| \ge \mathcal{D}/2$. Let $\sigma \colon [k] \to [k]$ be the permutation of the indices minimising the volume of the symmetric difference $\sum_{i=1}^k \vol(S_i \setminus A_{\sigma(i)} \cup A_{\sigma(i)} \setminus S_i)$. Notice that we can obtain an upper bound to this volume by bounding the volume of misclassified vertices. We can upper bound the latter by noticing that, whenever we misclassify a vertex, we pay a price of $\Omega(D^2)$ in the $k$-means cost. Noting this, we obtain the following bound:
\begin{align*}
\sum_{j = 1}^k \sum_{u \in S_j} d(u)\|\Tilde{F}(u,.) - c(u)\|^2 
& \geq \sum_{j = 1}^k \sum_{u \in S_j} \frac{1}{2}d(u)\|\mu_j - c(u)\|^2 - U \\
& \geq \frac{\mathcal{D}^2}{8}  \text{vol}(\text{misclassified vertices}) - U,
\end{align*}


Together with Equation~\ref{eq:ubound}, this implies that
\[
\min_{\sigma} \sum_{i=1}^k \vol(S_i \setminus A_{\sigma(i)} \cup A_{\sigma(i)} \setminus S_i) \le \text{vol}(\text{misclassified vertices}) \leq \frac{8(2+ \alpha)U}{\mathcal{D}^2}.
\]
\end{proof}


In this section we prove the results from Section ~\ref*{sec:general}. 
For the following proofs, we must first define the matrices \(Q,R \in \mathbb{C}^{N \times N}\) which map between \(F\) and \(G\). Precisely,
\begin{align*}
    G = FQ \\
    F = GR.
\end{align*}
In the following lemma we prove some basic properties of \(Q\) and \(R\).
\begin{lem}\label{lem:PropertiesOfQ}
    We have the following basic properties of \(Q\):
\begin{enumerate}
    \item \(Q\) is orthogonal.
    \item \(R = Q^*\).
\end{enumerate}
\end{lem}
\begin{proof}
    As \(F\) is orthogonal, we have that \(Q = F^*G\). Noting that \(G\) is also orthogonal, we have that
    \[ Q^* Q = (F^*G)^*(F^*G) = G^*FF^*G = I\]
    and similarly, \(QQ^* = I\) so \(Q\) is orthogonal. \\
    For 2., Clearly,
    \[R = G^*F = Q^*\]
    as \(G\) is orthogonal.
\end{proof}

\subsubsection{Proof of Theorem~\ref*{thm:general} and Corollary ~\ref*{cor:structure}}
Below we use Lemma~\ref{lem:PropertiesOfQ} to prove our Theorem ~\ref{thm:general}.% We restate the result for convenience.
%\general*

\begin{proof}[Proof of Theorem~\ref*{thm:general}]

    As \(G = F Q\), this implies that \(g_i = \sum_{j=1}^N Q_{ij} f_j\). We choose \(\hat{f}_i = \sum_{j=1}^k Q_{ij} f_j\).  Now notice that,
    
    \[\gamma_i = \bar{g}_i^* M \bar{g}_i = \sum_{j=1}^N |Q_{ij}|^2 \lambda_j.\]
    As \(Q\) is orthogonal, \(\sum_{j=1}^N |Q_{ij}|^2 = 1\). This coupled with the assumption that \(\lambda_1 \leq \lambda_2 \leq \hdots \leq \lambda_N\) provides the following bound:
    
    \begin{align*}
        \gamma_i = \sum_{j=1}^N |Q_{ij}|^2 \lambda_j & \geq \lambda_1  \sum_{j=1}^k |Q_{ij}|^2 + \lambda_{k+1} (1- \sum_{j=1}^k |Q_{ij}|^2)
        \\  \Rightarrow \gamma_i & \geq \sum_{j=1}^k |Q_{ij}|^2 (\lambda_1 - \lambda_{k+1}) + \lambda_{k+1}
    \end{align*}
    Rearranging for \(\sum_{j=1}^k|Q_{ij}|^2\), we get the following:
    \[\sum_{j=1}^k |Q_{ij}|^2 \geq \frac{ \lambda_{k+1} - \gamma_i}{\lambda_{k+1}- \lambda_1}\]
    
    It follows that 
    \[1 - \sum_{j=1}^k |Q_{ij}|^2 \leq \frac{\gamma_i - \lambda_1}{\lambda_{k+1} - \lambda_1}\].
    As \(F = G R\), \(f_i = \sum_{j=1}^N R_{ij} g_j\). We choose \(\hat{g}_i = \sum_{j=1}^k R_{ij} g_j\).
    Now notice that,
    \[\|f_i - \hat{g}_i\|^2 = 1 - \sum_{j=1}^k |R_{ij}|^2.\]
    Summing over \(i=1, \hdots, k\) gives the following.
    \[\sum_{i=1}^k \|f_i - \hat{g}_i\|^2 = k - \sum_{i=1}^k \sum_{j=1}^k |R_{ij}|^2\]
    Recall from Lemma~\ref{lem:PropertiesOfQ} that \(Q^* = R\) so \( \sum_{i=1}^k \sum_{j=1}^k |R_{ij}|^2 = \sum_{i=1}^k \sum_{j=1}^k |Q_{ij}|^2\). So, it follows that:
    \[\sum_{i=1}^k \|f_i - \hat{g}_i\|^2 \leq \sum_{i=1}^k \frac{\gamma_i - \lambda_1}{\lambda_{k+1} - \lambda_1} .\]

\end{proof}

By a particular choice of \(\{g_i\}_{i=1}^k\) and \(M\) in Theorem~\ref{thm:general}, we obtain the structure theorem from \cite{macgregor2022tighter}.
%\structure*

\begin{proof}[Proof of Corollary~\ref*{cor:structure}]
     With the choice of \(g_i = \frac{D^{1/2}\chi_i}{\|D^{1/2}\chi_i\|}\) for \(i=1,\hdots,k\), we have the following:
    \begin{align*}
        \gamma_i & = g_i^* \mathcal{L} g_i \\
        & = \frac{\chi_i^*(D-A)\chi_i}{\chi_i^*D \chi_i} \\
        & = \frac{E(S_i, V - S_i)}{\text{vol}(S_i)} \\
        & \leq \max_{i=1, \hdots, k} \frac{E(S_i, V - S_i)}{\text{vol}(S_i)} = \rho(k)
    \end{align*}
Applying Theorem~\ref{thm:general} with \(M = \mathcal{L}\) and noting that \(\lambda_1 = 0\) completes the proof. 
\end{proof}


\subsubsection{Proof of Theorem~\ref*{thm:rec} and Corollary~\ref*{cor:RemoveFirstEvec}}
%\recursive*
\begin{proof}[Proof of Theorem~\ref*{thm:rec}]
     Since \(f_1, \hdots, f_N\) form an orthonormal basis, we can write that \(g_i = \sum\limits_{j=1}^N Q_{ij}f_j\) and we choose \(\hat{f}_i = \sum\limits_{j=1}^q Q_{ij}f_j\) for \(i = 1, \hdots, q\) (summing only up to \(q\) by assumption) and \(\hat{f}_i = \sum\limits_{j=1}^k Q_{ij}f_j\) for \(i = q+1, \hdots, k\). Now let \(i\) be some index between \(q+1\) and \(k\),
    \begin{align*}
        \gamma_i = g_i^*Mg_i = \sum_{j=1}^N \lambda_{j} |Q_{ij}|^2 & \geq \lambda_1 \sum_{j=1}^{q} |Q_{ij}|^2 + \lambda_{q + 1} \sum_{j = q + 1}^{k} |Q_{ij}|^2 + \lambda_{k+1} \sum_{j = k+1}^n |Q_{ij}|^2 \\
        & = \lambda_1 \sum_{j=1}^{q} |Q_{ij}|^2  + \lambda_{q + 1} \sum_{j = q + 1}^{k} |Q_{ij}|^2 + \lambda_{k + 1}\left(1 -  \sum_{j = q + 1}^{k} |Q_{ij}|^2 -  \sum_{j=1}^{q} |Q_{ij}|^2 \right)
    \end{align*}
Rearranging for \(\sum\limits_{j=q + 1}^{k} |Q_{ij}|^2\), we get:

\begin{align*}
\sum\limits_{j=q + 1}^{k} |Q_{ij}|^2 & \geq \frac{\lambda_{k+ 1}\left( 1 - \sum\limits_{j=1}^{q} |Q_{ij}|^2 \right) - \gamma_i  + \lambda_1 \sum\limits_{j=1}^{q} |Q_{ij}|^2}{\lambda_{k + 1} - \lambda_{q + 1}} \\
& \geq \frac{\lambda_{k + 1}\left( 1 - \sum\limits_{j=1}^{q} |Q_{ij}|^2 \right) - \gamma_i}{\lambda_{k + 1} - \lambda_{q + 1}} \\
\end{align*}

Next, we sum over \(i=q+1, \hdots, k\) so we have that:
\begin{align*}
\sum\limits_{i= q + 1}^{k}\sum\limits_{j=q + 1}^{k} |Q_{ij}|^2
& \geq \frac{\lambda_{k + 1}\left( (k-q) - \sum\limits_{i= q + 1}^{k}\sum\limits_{j=1}^{q} |Q_{ij}|^2 \right) - \sum\limits_{i= q + 1}^{k}\gamma_i}{\lambda_{k + 1} - \lambda_{q + 1}}.
\end{align*}
Since \(\hat{f}_i \in \spn\{f_1, \hdots, f_q\}\) for \(i \in \{1, \hdots, q\}\), the sum in the brackets can be bounded as follows:
\[\sum\limits_{i= q + 1}^{k}\sum\limits_{j=1}^{q} |Q_{ij}|^2 \leq  \sum\limits_{i= q + 1}^{N}\sum\limits_{j=1}^{q} |Q_{ij}|^2 = q - \sum\limits_{i= 1}^{q}\sum\limits_{j=1}^{q} |Q_{ij}|^2 = \sum_{i=1}^q \|\hat{f}_i - g_i\|^2 \]
So,
\[
\sum\limits_{i= q + 1}^{k}\sum\limits_{j=q + 1}^{k} |Q_{ij}|^2
\geq \frac{\lambda_{k + 1}\left( (k-q) -  \sum\limits_{i=1}^{q} \|f_i - \hat{g}_i\|^2 \right) - \sum\limits_{i= q + 1}^{k}\gamma_i}{\lambda_{k + 1} - \lambda_{q + 1}}
\]
Now, noting that \(Q\) is orthogonal and therefore its rows are unit length, it follows that:
\[\sum\limits_{i= q + 1}^{k} \|\hat{f}_i - g_i\|^2 = 
(k-q) - \sum\limits_{i= q + 1}^{k}\sum\limits_{j= 1}^{k}|Q_{ij}|^2 \leq (k-q) - \sum\limits_{i= q + 1}^{k}\sum\limits_{j= q+1}^{k}|Q_{ij}|^2\]
Therefore,
\[\sum\limits_{i= q + 1}^{k} \|\hat{f}_i - g_i\|^2 \leq \frac{\sum \limits_{i=q+1}^k \gamma_i - (k-q)\lambda_{q+1} - \lambda_{k+1}\sum\limits_{i=1}^{q} \|\hat{f}_i - g_i\|^2 }{\lambda_{k + 1} - \lambda_{q + 1}}\]
Now, we show the same bound for \(\sum\limits_{i= q + 1}^{k} \|f_i - \hat{g}_i\|^2\). Choosing \(\hat{g}_i = \sum\limits_{j=1}^{q} R_{ij}f_j\) for \(i = 1, \hdots, q\), Lemma~\ref{lem:PropertiesOfQ} implies that
\[\sum\limits_{i=1}^{q} \|\hat{f}_i - g_i\|^2 = \sum\limits_{i=1}^{q} \|f_i - \hat{g}_i\|^2\]
as \(Q = R^*\).
Similarly, choosing \(\hat{g}_i = \sum\limits_{j=1}^{k} R_{ij}f_j\) for \(i = q+1, \hdots, k\) implies that 
\[\sum\limits_{i=q+1}^{k} \|\hat{f}_i - g_i\|^2 = \sum\limits_{i=q+1}^{k} \|f_i - \hat{g}_i\|^2\]
which completes the proof.
\end{proof}


%\RemoveFirstEvec*


\begin{proof}[Proof of Corollary~\ref*{cor:RemoveFirstEvec}]
    Choosing \(g_1 = f_1\) and remaining indicator vectors \(g_2, \hdots, g_k\) all orthonormal means the conditions for Theorem~\ref{thm:rec} are satisfied with \(q=1\). 
    \(\hat{g}_1 = g_1 = f_1\)  means that:
    \begin{enumerate}
        \item \(\|\hat{g}_1 - f_1\|^2 = 0\)
        \item \(\sum\limits_{i=2}^k \|f_i - \hat{g}_i\|^2 \leq \frac{\sum_{i=2}^k (\gamma_i - \lambda_2)}{\lambda_{k+1} - \lambda_2} \)
    \end{enumerate}
    
    which completes the proof.
\end{proof}





\subsubsection{Spectral Clustering performance on SBMs}

Using Corollary~\ref*{cor:RemoveFirstEvec} we have a result regarding the performance of Spectral Clustering on SBMs.

We generate $\mathcal{G} \sim SBM(n,k,p,q)$ as follows: we divide $kn$ vertices into $k$ communities $S_1,\dots,S_k$ of equal size $n$. For any $u \in S_i, v \in S_j$, we place an edge between $u$ and $v$ with probability $p$ if $i=j$, with probability $q$ otherwise. We recover the following well-known result.
\newline
\begin{corollary}
\label{cor:sbm}
Let \(\mathcal{G} \sim SBM(n,k,p,q)\) for some \(k \geq 2\). Choose \(M = {L}\) and let \(g_1, \hdots, g_k \in \mathbb{C}^{nk}\) be the first \(k\) eigenvectors of \(\mathbb{E}{L}\). Assume  $p-q \ge 40 \sqrt{pk\log(kn)/n}$. Then, with high probability,

\[
\sum_{i=2}^k \|f_i - \hat{g}_i\|^2 = \mathcal{O}\left(\frac{k}{p-q} \sqrt{\frac{pk\log(kn)}{n}} \right).
\]
\end{corollary}

Corollary~\ref{cor:sbm} together with Lemma~\ref{lem:kmeans} implies Spectral Clustering %using the bottom $k$ eigenvectors of \(\mathcal{G} \sim SBM(n,k,p,q)\) 
misclassifies vertices with a volume of at most $\mathcal{O}\left(\frac{k^2}{p-q} \sqrt{p(kn)\log(kn)} \right)$. We note that whilst this is not an improvement on what can be achieved with results from perturbation analysis such as the Davis-Kahan theorem \citep{davis1970rotation}, the original structure theorem achieves merely a constant bound for SBMs. 

In order to prove Corollary~\ref*{cor:sbm}, we require some prerequisite results.
\newline
\begin{thm} [\cite{chung2011spectra}] \label{thm:Chung bound}
    Let \(X_1, X_2,\hdots, X_m\) be independent random \(d \times d\) Hermitian matrices. Moreover, assume that \(\|X_j - \mathbb{E}(X_j) \| \leq M\) for all \(j\), and let \(\sigma^2  = \|\sum_{j=1}^m \mathbb{E}((X_j - \mathbb{E}(X_j))^2)\|\). Let \(X = \sum_{j=1}^m X_j\). Then, for any \(a>0\), it holds that:
    \[
    \mathbb{P}(\|X - \mathbb{E}(X)\| >a) < 2d \exp\left(\frac{-a^2}{2 \sigma^2 + 2Ma/3}\right)
    \]
\end{thm}

\begin{thm}[Courant-Fischer \citep{horn2012matrix}] \label{Courant-Fischer}
Let \( A \) be a Hermitian matrix in \( \mathbb{C}^{N \times N} \). The eigenvalues \( \lambda_1, \lambda_2, \ldots, \lambda_N \) of \( A \), arranged in non-increasing order (\( \lambda_1 \geq \lambda_2 \geq \ldots \geq \lambda_N \)), can be characterized by the Courant-Fischer min-max principle as follows:

For \( k = 1, 2, \ldots, N \):
\[
\lambda_k = \max_{\substack{S \subseteq \mathbb{C}^N \\ \dim(S) = k}} \min_{\substack{x \in S \\ x \neq 0}} \frac{\langle x, Ax \rangle}{\langle x, x \rangle}
\]
and
\[
\lambda_k = \min_{\substack{T \subseteq \mathbb{C}^N \\ \dim(T) = N-k+1}} \max_{\substack{x \in T \\ x \neq 0}} \frac{\langle x, Ax \rangle}{\langle x, x \rangle}
\]

where \( \langle \cdot, \cdot \rangle \) denotes the standard inner product in \( \mathbb{C}^N \).
\end{thm}


\begin{corollary}[Courant-Fischer Corollary] \label{CF Corollary}
Let \( A \) and \( B \) be Hermitian matrices in \( \mathbb{C}^{N \times N} \), and let \( \lambda_k(A) \) and \( \lambda_k(B) \) denote their \( k \)-th eigenvalues, respectively. Then
\[
|\lambda_k(A) - \lambda_k(B)| \leq \|A - B\|.
\]
\end{corollary}


In the following lemma, we apply Theorem~\ref{thm:Chung bound} to obtain a bound on  $\|L - \mathbb{E}(L)\|$ for SBMs.





\begin{lem} \label{lem:bound on L for SBMs}
Suppose that \(G \sim SBM(n,k,p,q)\) for some \(k \geq 2\). Let \(L\) be the Laplacian of \(G\). Then, with high probability,
\[
     \|L - \mathbb{E}(L)\| \leq 18 \sqrt{pkn \log (kn)}.\label{statement1}
\]
\end{lem}
\begin{proof}
    Firstly, let \[
    M_{uv} = (e_u e_u^T + e_v e_v^T) - (e_u e_v^T + e_v e_u^T),
    \]

    which has precisely four non-zero entries: $M_{uv}(u,u) = M_{uv}(v,v) = 1$ and $M_{uv}(u,v) = M_{uv}(v,u) = -1$. %It also holds that \(M_{uv}^2 = 2 M_{uv}\). 
    Now we define the random matrix
\[X_{uv} = \begin{cases}
    M_{uv} \ \text{ if } u \sim v \\
    0 \ \text{ otherwise.}
\end{cases}\]
Notice that \(L= \sum_{(u,v) \in E} X_{uv}\).
Moreover, we have that \(\|X_{uv} - \mathbb{E}(X_{uv})\| \leq 2\). 


By the identity
\[
 \mathbb{E}((X_{uv} - \mathbb{E}(X_{uv}))^2) = \mathbb{E}((X_{uv})^2) - (\mathbb{E}(X_{uv}))^2, 
 \]
 and the fact that $M_{uv}^2 = 2 M_{uv}$,
 we obtain that
\[
 \mathbb{E}((X_{uv})^2) - (\mathbb{E}(X_{uv}))^2  = \begin{cases}
     2p(1-p)M_{uv} & \ \text{if } u \text{ and } v \text{ are in the same block} \\
     2q(1-q)M_{uv} & \ \text{otherwise.}
 \end{cases}
\]
Summing over all \(u,v \in V\) and taking the norm, we obtain the following:
\[\sigma^2 = \lVert\sum_{\{u,v\} \in {V\choose 2}} \mathbb{E}((X_{uv})^2) - (\mathbb{E}(X_{uv}))^2\rVert \leq 4(p+(k-1)q)n.\]

Applying Theorem~\ref{thm:Chung bound} with  \(a = 18 \sqrt{pkn \log (kn)}\) yields the statement.
% To prove statement \ref{statement2}, notice that:
%     \begin{equation*}
%     \lVert \mathbb{E}(A) - A \rVert  = \max_{x \in \mathbb{C}^N, \|x\| = 1} x^*(\mathbb{E}(A) - A)x 
% \end{equation*}

% We can rewrite this maximum as \(\max_{x,y \in \mathbb{C}^N, \|x\|=\|y\| = 1} x^*(\mathbb{E}(A) - A)y\). This term can be bounded below by restricting our choices of \(x\) and \(y\). We choose \(y=\frac{1}{\sqrt{N}}1\) to get the inequality
% \[\max_{x,y \in \mathbb{C}^N, \|x\|=\|y\| = 1} x^*(\mathbb{E}(A) - A)y \geq \max_{x} x^*(\mathbb{E}(A) - A)1\]
% Finally, we restrict \(x\) to the set of \(2N\) vectors \(\{\pm e_i\}_{i=1}^N\) (where \(e_i\) is the standard basis vector with \(1\) in the \(i\)th slot and \(0\) everywhere else). This results in the following inequality:
% \begin{align*}
%  \max_{x \in \mathbb{C}^N} x^*(\mathbb{E}(A) - A)1 & \geq \max_{x \in\{\pm e_i\}_{i=1}^N } x^*(\mathbb{E}(A) - A)1 \\
%  & = \max_{i=1, \hdots, N} |\mathbb{E}(D)_{ii} - D_{ii}|.
% \end{align*}
    

% As \(|\mathbb{E}(D)_{ii} - D_{ii}| = \|D -\mathbb{E}(D)\|\), this proves the statement.
% \[ \begin{pmatrix}
%     1 & -1 \\
%     -1 & 1
% \end{pmatrix} \begin{pmatrix}
%     1 & -1 \\
%     -1 & 1
% \end{pmatrix} = \begin{pmatrix}
%     1^2 + (-1)^2 & -1 \\
%     -1 & 1
% \end{pmatrix}\]
\end{proof}

The following result states the eigenvalues of the expected Laplacian in stochastic block models. Its proof is by elementary calculations.

\begin{lem} \label{eigenvalues expected laps SBMs}
Suppose that \(G \sim SBM(n,k,p,q)\) for some \(k \geq 2\). Let \(L = D - A\) be the Laplacian of \(G\) and let \(\mathcal{L} = I - D^{-1/2}AD^{-1/2}\) be the normalized Laplacian of \(G\). Then, the first eigenvalue of  \(\mathbb{E}(L)\) and \(\mathbb{E}(\mathcal{L})\) are both 0. The next \(k-1\) eigenvalues of \(\mathbb{E}(L)\) and \(\mathbb{E}(\mathcal{L})\) are \(knq\) and \(\frac{knq}{(n-1)p + (k-1)nq}\) respectively and the \((k+1)\)st eigenvalues are \(np + (k-1)nq\) and \(\frac{np + (k-1)nq}{(n-1)p + (k-1)nq}\).
\end{lem}

%\sbm*
We are now ready to prove Corollary~\ref*{cor:sbm}.

\begin{proof}[Proof of Corollary~\ref*{cor:sbm}]
    Firstly, let \(\lambda_1(\mathbb{E}(L)) \le \dots \le \lambda_N(\mathbb{E}(L))\) denote the eigenvalues of \(\mathbb{E}(L)\) with corresponding orthonormal eigenvectors \(g_1, \hdots, g_k\). Let \(\gamma_i = \frac{g_i^* L g_i}{g_i^* g_i}\).
     Notice that \(g_1 = f_1 = \frac{1}{\sqrt{N}}1\). By Corollary~\ref*{cor:RemoveFirstEvec}, we have that
       \[\sum_{i=2}^k \|\hat{f}_i - g_i\|^2 \leq \frac{\sum_{i=2}^k (\gamma_i - \lambda_2)}{ \lambda_{k+1} - \lambda_2}.\]
    We will bound the numerator of the previous expression as follows:
     \begin{align*}
            |\gamma_i - \lambda_2| & = |\gamma_i - \lambda_i(\mathbb{E}(L)) + \lambda_i(\mathbb{E}(L)) - \lambda_2| \\
            & \leq |\gamma_i - \lambda_i(\mathbb{E}(L))| + |\lambda_i(\mathbb{E}(L)) - \lambda_2|.
        \end{align*}
    The first term can be bounded above as follows:
    \[
    |\gamma_i - \lambda_i(\mathbb{E}(L))| = \|g_i^*(L - \mathbb{E}(L)) g_i\| \leq \| L - \mathbb{E}(L) \|.
    \]
By Lemma~\ref{lem:bound on L for SBMs}, with high probability, \(\|L - \mathbb{E}(L)\| \leq 18\sqrt{(pkn)\log(kn)}\).
By Corollary \ref{CF Corollary}, for all \(i\), it holds that:
\begin{equation}
\label{eq:lambdai}
 |\lambda_{i} - \lambda_i(\mathbb{E}(L)| \leq \|L - \mathbb{E}(L)\| \leq 18\sqrt{(pkn)\log(kn)}.
\end{equation}
Thus, by Lemma \ref{eigenvalues expected laps SBMs},  for \(i = 1, \hdots, k\),
\[
\lambda_i \in \left[ knq - 18\sqrt{(pkn)\log(kn)}, knq + 18\sqrt{(pkn)\log(kn)} \right]
\]
which implies
\[
|\lambda_i(\mathbb{E}(L)) - \lambda_2| = |knq - \lambda_2| \leq 18\sqrt{(pkn)\log(kn)}
\]
and
\[
\gamma_i - \lambda_2 \leq 36 \sqrt{(pkn)\log(kn)}.
\]
On the other hand, Lemma~\ref{eigenvalues expected laps SBMs} and \eqref{eq:lambdai} imply that
\[
\lambda_{k+1} \in \left[ np + (k-1)nq - 18\sqrt{(pkn)\log(kn)}, np + (k-1)nq + 18\sqrt{(pkn)\log(kn)} \right]
\]
and, therefore, 
\begin{align*}
\lambda_{k+1} - \lambda_2 &\geq (np + (k-1)nq -18 \sqrt{(pkn)\log(kn)}) - (knq + 18\sqrt{(pkn)\log(kn)}) \\
&=  n(p-q) - 36 \sqrt{(pkn)\log(kn)}.
\end{align*}
Finally, by our assumption that $p-q \ge 40 \sqrt{pk\log(kn)/n}$, it holds that
\begin{align*}
\sum_{i=2}^k \|\hat{f}_i - g_i\|^2 &\leq
\frac{\sum_{i=2}^k (\gamma_i - \lambda_2)}{ \lambda_{k+1} - \lambda_2} \\ &\leq \frac{36(k-1)\sqrt{(pkn)\log(kn)}}{n(p-q) - 36\sqrt{(pkn)\log(kn)}} \\ 
&= \mathcal{O}\left(\frac{k}{p-q} \sqrt{\frac{pk\log(kn)}{n}} \right).
\end{align*}
\end{proof}

%\subsubsection{Proof of Lemma~\ref{lem:kmeans}}

%\kmeans*

