\section{End-to-end analysis}\label{sec:end_to_end}


\begin{lem}[TV error after one round of predictor and corrector]\label{l:pc}
    Choose predictor step sizes $h_1,\ldots, h_{\Npred}$ as in Lemma~\ref{lem:pred} with $\Tpred=h_1+\cdots+h_{\Npred}\le 1/L$.
    That is, if $T-t_0-\Tpred \lesssim 1/L$, then we ensure that $h_{n+1} \le \frac{T-t_0 - h_1 - \cdots - h_n}{2}$ for all $n$, and if $T-t_0 \gtrsim 1/L$, then we can take $h_1 = \cdots = h_N$.
    Let $\hpred \deq \max_{1\le n\le \Npred} h_n$ and abbreviate $\Pode^{(\Npred)} \deq \Podeth{t_0}{h_1,\dotsc,h_{\Npred}}$ (and similarly for $\Podes$).
    \begin{enumerate}
        \item Consider running the overdamped Langevin corrector for time $\Tcorr\asymp 1/L$, step size $\hcorr$, and stationary distribution $q_{t_0}\Pode^{(\Npred)} = q_{t_0+\Tpred}$; set $\Ncorr = \Tcorr/\hcorr$. Then,
        \begin{align*}
            \TV(p\Podes^{(\Npred)}\Plans^{\Ncorr},\, q_{t_0+\Tpred}) 
            &\le
            \TV(p,q_{t_0})
            + O\Bigl( L\sqrt d \, \hpred + \sqrt{\Lmax d\hcorr}  + \frac{\esc}{\sqrt{\Lmax}} \Bigr)\,.
        \end{align*}
        \item Consider running the underdamped Langevin corrector for time $\Tcorr\asymp 1/\sqrt L$, step size $\hcorr$, and stationary distribution $q_{t_0}\Pode^{(\Npred)} = q_{t_0+\Tpred}$; set $\Ncorr = \Tcorr/\hcorr$. Then,
        \begin{align*}
            \TV(p\Podes^{(\Npred)}\Pulds^{\Ncorr},\, q_{t_0+\Tpred}) 
            &\le
            \TV(p,q_{t_0})
            + O\Bigl( L\sqrt d \, \hpred + \sqrt{\Lmax d}\,\hcorr  + \frac{\esc}{\sqrt{\Lmax}} \Bigr)\,.
        \end{align*}
    \end{enumerate}
\end{lem}
\begin{proof}
    By the triangle inequality and the data-processing inequality,
    \begin{align*}
        &\TV(p\Podes^{(\Npred)} \Plans^{\Ncorr}, \, q_{t_0+\Tpred}) \\
        &\qquad \le \TV(p\Podes^{(\Npred)} \Plans^{\Ncorr}, \, q_{t_0} \Podes^{(\Npred)} \Plans^{\Ncorr}) + \TV(q_{t_0}\Podes^{(\Npred)} \Plans^{\Ncorr}, \,q_{t_0+\Tpred}) \\
        &\qquad \le \TV(p, q_{t_0}) + \TV(q_{t_0}\Podes^{(\Npred)} \Plans^{\Ncorr}, \,q_{t_0+\Tpred})\,.
    \end{align*}
    For overdamped Langevin, applying Theorem~\ref{thm:main_overdamped},
    \begin{align}\label{eq:overdamped_corr}
        \TV(q_{t_0}\Podes^{(\Npred)} \Plans^{\Ncorr}, \,q_{t_0+\Tpred})
        &\lesssim \sqrt L\,W_2(q\Podes^{(\Npred)}, \, q_{t_0+\Tpred}) + \esc/\sqrt L + \sqrt{Ld\hcorr}\,.
    \end{align}
    For the Wasserstein term, Lemma~\ref{lem:pred} yields
    \begin{align*}
        W_2(q_{t_0} \Podes^{(\Npred)}, q_{t_0+\Tpred})
        &= W_2(q_{t_0} \Podes^{(\Npred)}, q_{t_0} \Pode^{(\Npred)})
        \lesssim \sqrt{Ld}\,\hpred + \frac{\esc}{\Lmax}\,.
    \end{align*}
    Combining these bounds yields the result for the overdamped corrector.
    For the underdamped corrector, we modify~\eqref{eq:overdamped_corr} by replacing the use of Theorem~\ref{thm:main_overdamped} with Theorem~\ref{thm:main_underdamped}.
\end{proof}

We also need the following lemma on the convergence of the OU process.

\begin{lem}\label{lem:ou_conv}
    Let ${(\quo_t)}_{t\ge 0}$ denote the marginal law of the OU process started at $\quo_0 = \qdata$. Then, for all $T\gtrsim 1$, it holds that
    \begin{align*}
        \TV(\quo_T, \gamma^d)
        &\lesssim (\sqrt d+\mf m_2)\exp(-T)\,.
    \end{align*}
\end{lem}
\begin{proof}
    This follows from~\cite[Lemma C.4]{CheLeeLiu23ImprovedSGM}. Alternatively, using the short-time regularization result of~\cite[Lemma 4.2]{bobkov2001hypercontractivity} for time $t_0 \asymp 1$ and the Wasserstein contraction of the OU process,
    \begin{align*}
        \TV(\quo_T, \gamma^d)
        &\lesssim \sqrt{\KL(\quo_T \mmid \gamma^d)}
        \lesssim \frac{W_2(\quo_{T-t_0}, \gamma^d)}{\sqrt{t_0}}
        \le \exp(-(T-t_0)) \,W_2(\qdata, \gamma^d)\,.
    \end{align*}
    The result follows from $W_2(\qdata,\gamma^d) \le W_2(\qdata, \delta_0) + W_2(\delta_0, \gamma^d) \le \mf m_2 +\sqrt d$.
\end{proof}

We now prove our main theorems.

\begin{proof}[Proof of Theorems~\ref{thm:pc_over} and~\ref{thm:pc_under}]
For $t\in [0,T]$, let $p_t \deq \law(\widehat x_t)$.
From Lemma~\ref{lem:ou_conv},
\begin{align*}
    \TV(p_0, q_0)
    &= \TV(\quo_T, \gamma^d)
    \lesssim (\sqrt d + \mf m_2) \exp(-T)\,.
\end{align*}

We divide our analysis according to the two stages of the algorithm.
In the first stage, after iterating Lemma~\ref{l:pc} for $N_0\asymp LT$ steps,
\begin{align*}
    \TV(p_{T-\hpred}, q_{T-\hpred})
    &\le \TV(p_0, q_0) + O\Bigl(L\sqrt d\,\hpred + \sqrt{Ld}\,\hcorr^\pow + \frac{\esc}{\sqrt L}\Bigr) \times N_0 \\
    &\lesssim (\sqrt d + \mf m_2) \exp(-T) + L^2 T d^{1/2} \hpred + L^{3/2} T d^{1/2} \hcorr^\pow + L^{1/2}T \esc
\end{align*}
where $\pow = \frac{1}{2}$ if we use the overdamped corrector and $\pow = 1$ if we use the underdamped corrector.
Applying the second part of Lemma~\ref{l:pc} for the second stage of the algorithm, we then conclude that
\begin{align*}
    \TV(p_{T-\delta}, q_{T-\delta})
    &\lesssim (\sqrt d + \mf m_2) \exp(-T) + L^2 T d^{1/2} \hpred + L^{3/2} T d^{1/2} \hcorr^\pow + L^{1/2}T \esc\,.
\end{align*}
Finally, we note that if we take $\de \asymp \fc{\ep^2}{L^2\,(d\vee \mf m_2^2)}$, then by~\cite[Lemma 6.4]{leelutan23sgmgeneral}, $\TV(q_{T-\de},q_T)\le \ep$; a triangle inequality thus finishes the proof.
\end{proof}

\begin{rmk}\label{rmk:two_stage}
    Alternatively, instead of taking geometrically decreasing step sizes and employing early stopping, we could split the algorithm into two stages: for time $t < T-\hpred$, we take constant step size $\hpred$, and for time $t > T-\hpred$, we use a smaller constant step size $h'$ as required if working with the original score perturbation lemma (see Remark~\ref{r:sp}).
\end{rmk}