\subsection{Proof of Theorem \ref{thm:3}}
Following the same flow of Lemma 9 in \citep{koloskova2020unified}, applying Theorem \ref{thm:3}, we have for all $\alpha > 0$:

\begin{equation}
\begin{aligned}
\mathbb{E} \| \mathbf{C}^{t}-\overline{\mathbf{C}}^{t} \|_F^2 & = N \mathbf{E}_t \leq \mathbb{E}\left\|\mathbf{C}^{(m \beta)} \prod_{i=t-1}^{m \beta} \mathbf{W}^{(i)}-\bar{\mathbf{C}}^{\left(m \beta \right)}+\sum_{j=m \beta}^{t-1} \eta_j \nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) \prod_{i=t-1}^j \mathbf{W}^{(i)}\right\|_F^2 \\
& \leq \mathbb{E}\left\|\mathbf{C}^{(m \beta)} \prod_{i=t-1}^{m \beta} \mathbf{W}^{(i)}-\bar{\mathbf{C}}^{\left(m \beta \right)}+\sum_{j=m \beta}^{t-1} \eta_j \left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \prod_{i=t-1}^j \mathbf{W}^{(i)}\right\|_F^2 \\
& + \left \| \sum_{j=m \beta}^{t-1} \eta_j \left( \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \prod_{i=t-1}^j \mathbf{W}^{(i)}\right\|_F^2 \\
& \leq (1+\alpha) \mathbb{E}\left\|\mathbf{C}^{(m \beta)} \prod_{i=t-1}^{m \beta} \mathbf{W}^{(i)}-\bar{\mathbf{C}}^{\left(m \beta \right)}\right \|_F^2 \\
& + (1+\alpha^{-1})\mathbb{E}\left\|\sum_{j=m \beta}^{t-1} \eta_j \left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \prod_{i=t-1}^j \mathbf{W}^{(i)}\right\|_F^2 \\
& + \left \| \sum_{j=m \beta}^{t-1} \eta_j \left( \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \prod_{i=t-1}^j \mathbf{W}^{(i)}\right\|_F^2 \\
\end{aligned}    
\end{equation}

Using Assumption \ref{as:7}, the above can be further simplified:

\begin{equation}
\begin{aligned}
\mathbb{E} \| \mathbf{C}^{t}-\overline{\mathbf{C}}^{t} \|_F^2 & \leq (1+\alpha) (1-p) \mathbb{E}\left\|\mathbf{C}^{(m \beta)} -\overline{\mathbf{C}}^{(m \beta)} \right \|_F^2 \\
& + (1+\alpha^{-1}) 2 \beta \sum_{j=m \beta}^{t-1} \eta_j^2 \mathbb{E}\left \|\left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \right \|_F^2 \\
& + \sum_{j=m \beta}^{t-1} \eta_j^2 \mathbb{E}\left \|\left( \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \right \|_F^2 \\
& \leq (1+\alpha) (1-p) \mathbb{E}\left\|\mathbf{C}^{(m \beta)} -\overline{\mathbf{C}}^{(m \beta)} \right \|_F^2 \\
& + (1+\alpha^{-1}) 2 \beta \sum_{j=m \beta}^{t-1} \eta_j^2 \mathbb{E}\left \|\left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \right \|_F^2 \\
& + \sum_{j=m \beta}^{t-1} \eta_j^2 nv^2 \\
\end{aligned}    
\end{equation}

The expectation of the second term on the right-hand side can be bounded as:
\begin{equation}
\begin{aligned}
&\mathbb{E}\left\|\left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right)  - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \right\|_F^2 \\
& = \mathbb{E}\left \|\left(\nabla \mathbf{F}\left(\mathbf{C}^{(j)}\right) - \nabla \mathbf{F}\left(\overline{\mathbf{C}}\right) + \nabla \mathbf{F}\left(\overline{\mathbf{C}}\right) - \nabla \mathbf{F}\left(\mathbf{C}^{\star}\right) + \nabla \mathbf{f}\left(\mathbf{C}^{\star}\right) \right) \right \|_F^2 \\
& \leq 3 \frac{n}{N} L^2 \|\mathbf{C}^{(j)} - \overline{\mathbf{C}}^{(j)}\|_F^2 + 3n \sigma^2 + 6nL(f(\overline{\mathbf{c}^{{j}}})-f(\mathbf{c}^{\star})) \\
& \leq 3 \frac{n}{N} L^2 \|\mathbf{C}^{(j)} - \overline{\mathbf{C}}^{(j)}\|_F^2 + 3n \sigma^2 + 6nL(f(\overline{\mathbf{c}^{{j}}})-f(\mathbf{c}^{\star}))
\end{aligned}    
\end{equation}

Putting the above equations together and setting a proper $\alpha$ to make the first term become $1-\frac{p}{2}$, similar to \citep{koloskova2020unified} with stepsize $\eta_j \leq \frac{p\sqrt{N}}{12\sqrt{2n}\beta L}$, we can get the desired bound:

\begin{equation}
\begin{aligned}
\mathbf{E}_t & \leq (1-\frac{p}{2}) \mathbf{E}_{m\beta} + \frac{p}{16\beta} \sum_{j=m\beta}^{t-1} \mathbf{E}_j + \frac{36Ln\beta}{pN} \sum_{j=m\beta}^{t-1} \eta_j^2 (f(\overline{\mathbf{c}^{{j}}})-f(\mathbf{c}^{\star})) \\
& + \left (  \frac{18\beta n}{Np} \sigma^2 + \frac{n}{N} v^2 \right ) \sum_{j=m\beta}^{t-1} \eta_j^2
\end{aligned}    
\end{equation}