We fix a time $t$, horizon $H$, and a noise sequence $\varepsilon_{t:t+H-1}$.
All quantities below are defined along the imagined rollout \eqref{eq:mpc_rollout_recursion}.
Since the noise is fixed, the rollout is deterministic, hence the gradient
$\nabla_{\psi}L(\psi;\varepsilon_{t:t+H-1})$ is obtained by repeated application of the chain rule.

\subsection{Sensitivity recursions}

Recall the rollout recursion \eqref{eq:mpc_rollout_recursion}:
\begin{align}
\tilde{s}_0 &= s_t,
\qquad
\tilde{a}_h = \pi_{\psi}(\tilde{s}_h),
\qquad
\tilde{s}_{h+1} = f_{\theta}(\tilde{s}_h,\tilde{a}_h,\varepsilon_{t+h}),
\qquad h=0,\ldots,H-1.
\label{eq:app_rollout_recursion}
\end{align}
Define the state and action sensitivities
\begin{align}
G_h := \nabla_{\psi}\tilde{s}_h,
\qquad
D_h := \nabla_{\psi}\tilde{a}_h.
\label{eq:app_def_G_D}
\end{align}
Since $\tilde{s}_0=s_t$ does not depend on $\psi$, we have
\begin{align}
G_0 = 0.
\label{eq:app_G0}
\end{align}

For each $h$, the action is $\tilde{a}_h=\pi_{\psi}(\tilde{s}_h)$. By the chain rule,
\begin{align}
D_h
=
\nabla_{\psi}\pi_{\psi}(\tilde{s}_h)
+
\nabla_s \pi_{\psi}(s)\big|_{s=\tilde{s}_h}\,\nabla_{\psi}\tilde{s}_h.
\label{eq:app_chain_policy}
\end{align}
Introduce the policy Jacobians (as in the theorem statement)
\begin{align}
\Pi_s(h) := \nabla_s \pi_{\psi}(s)\big|_{s=\tilde{s}_h},
\qquad
\Pi_{\psi}(h) := \nabla_{\psi}\pi_{\psi}(\tilde{s}_h).
\label{eq:app_def_Pi}
\end{align}
Substituting \eqref{eq:app_def_Pi} into \eqref{eq:app_chain_policy} yields
\begin{align}
D_h = \Pi_s(h)\,G_h + \Pi_{\psi}(h).
\label{eq:app_D_recursion}
\end{align}

For each $h\in\{0,\ldots,H-1\}$, the next state is
$\tilde{s}_{h+1}=f_{\theta}(\tilde{s}_h,\tilde{a}_h,\varepsilon_{t+h})$.
Differentiating with respect to $\psi$ and applying the chain rule gives
\begin{align}
G_{h+1}
=
\nabla_s f_{\theta}(s,a,\varepsilon_{t+h})\big|_{s=\tilde{s}_h,a=\tilde{a}_h}\,G_h
+
\nabla_a f_{\theta}(s,a,\varepsilon_{t+h})\big|_{s=\tilde{s}_h,a=\tilde{a}_h}\,D_h.
\label{eq:app_chain_dynamics}
\end{align}
Introduce the world-model Jacobians (as in the theorem statement)
\begin{align}
F_s(h) := \nabla_s f_{\theta}(s,a,\varepsilon_{t+h})\big|_{s=\tilde{s}_h,a=\tilde{a}_h},
\qquad
F_a(h) := \nabla_a f_{\theta}(s,a,\varepsilon_{t+h})\big|_{s=\tilde{s}_h,a=\tilde{a}_h}.
\label{eq:app_def_F}
\end{align}
Substituting \eqref{eq:app_def_F} into \eqref{eq:app_chain_dynamics} yields
\begin{align}
G_{h+1} = F_s(h)\,G_h + F_a(h)\,D_h.
\label{eq:app_G_recursion}
\end{align}

Equations \eqref{eq:app_G0}, \eqref{eq:app_D_recursion}, and \eqref{eq:app_G_recursion} are exactly the sensitivity recursions
stated in Theorem~\ref{thm:grad_recursion_diffusion_mpc}.

\subsection{Gradient of the per-noise return}

Recall the per-noise return definition \eqref{eq:mpc_per_noise_return}:
\begin{align}
L(\psi;\varepsilon_{t:t+H-1})
=
\sum_{h=0}^{H-1}\gamma^h\, r_{\xi}(\tilde{s}_h,\tilde{a}_h)
+
\gamma^H\, Q_{\phi}\!\Big(\tilde{s}_H,\pi_{\psi}(\tilde{s}_H)\Big).
\label{eq:app_def_L}
\end{align}

Fix $h\in\{0,\ldots,H-1\}$ and define the stage reward
$r_h := r_{\xi}(\tilde{s}_h,\tilde{a}_h)$.
By the chain rule,
\begin{align}
\nabla_{\psi} r_h
=
\nabla_s r_{\xi}(\tilde{s}_h,\tilde{a}_h)\,G_h
+
\nabla_a r_{\xi}(\tilde{s}_h,\tilde{a}_h)\,D_h.
\label{eq:app_grad_stage_reward}
\end{align}
Introduce the shorthand
\begin{align}
r_s(h) := \nabla_s r_{\xi}(\tilde{s}_h,\tilde{a}_h),
\qquad
r_a(h) := \nabla_a r_{\xi}(\tilde{s}_h,\tilde{a}_h).
\label{eq:app_def_rs_ra}
\end{align}
Then \eqref{eq:app_grad_stage_reward} becomes
\begin{align}
\nabla_{\psi} r_{\xi}(\tilde{s}_h,\tilde{a}_h)
=
r_s(h)\,G_h + r_a(h)\,D_h.
\label{eq:app_grad_stage_compact}
\end{align}

Define the terminal action $\tilde{a}_H := \pi_{\psi}(\tilde{s}_H)$ and the terminal value
$V_T := Q_{\phi}(\tilde{s}_H,\tilde{a}_H)$.
By the chain rule,
\begin{align}
\nabla_{\psi} V_T
=
\nabla_s Q_{\phi}(\tilde{s}_H,\tilde{a}_H)\,G_H
+
\nabla_a Q_{\phi}(\tilde{s}_H,\tilde{a}_H)\,D_H.
\label{eq:app_grad_terminal_value}
\end{align}
Introduce the terminal derivatives
\begin{align}
Q_s := \nabla_s Q_{\phi}(\tilde{s}_H,\tilde{a}_H),
\qquad
Q_a := \nabla_a Q_{\phi}(\tilde{s}_H,\tilde{a}_H).
\label{eq:app_def_Qs_Qa}
\end{align}
Then \eqref{eq:app_grad_terminal_value} becomes
\begin{align}
\nabla_{\psi} Q_{\phi}(\tilde{s}_H,\tilde{a}_H)
=
Q_s\,G_H + Q_a\,D_H.
\label{eq:app_grad_terminal_compact}
\end{align}
Here $D_H$ is defined exactly as in \eqref{eq:app_def_G_D}, and can be expanded via the same policy sensitivity identity
\eqref{eq:app_D_recursion} with $h=H$:
\begin{align}
D_H = \Pi_s(H)\,G_H + \Pi_{\psi}(H).
\label{eq:app_DH}
\end{align}

Differentiating \eqref{eq:app_def_L} and applying \eqref{eq:app_grad_stage_compact} and \eqref{eq:app_grad_terminal_compact} gives
\begin{align}
\nabla_{\psi} L(\psi;\varepsilon_{t:t+H-1})
&=
\sum_{h=0}^{H-1}\gamma^h\,
\nabla_{\psi} r_{\xi}(\tilde{s}_h,\tilde{a}_h)
+
\gamma^H\,
\nabla_{\psi} Q_{\phi}(\tilde{s}_H,\tilde{a}_H)
\nonumber\\
&=
\sum_{h=0}^{H-1}\gamma^h\,
\Big(
r_s(h)\,G_h + r_a(h)\,D_h
\Big)
+
\gamma^H\,
\Big(
Q_s\,G_H + Q_a\,D_H
\Big),
\label{eq:app_grad_L_final}
\end{align}
which is the gradient expression stated in Theorem~\ref{thm:grad_recursion_diffusion_mpc}.

Next we derive and prove the Jacobian recursions for $\nabla_a f_{\theta}(s,a,\varepsilon)$ and $\nabla_s f_{\theta}(s,a,\varepsilon)$ under the reverse diffusion implementation \eqref{eq:g_h_def}.

Fix $(s,a)$ and a noise pack $\varepsilon=(z_K,z_{K-1},\ldots,z_0)$. Let $\{s^{(k)}\}_{k=0}^{K}$ be generated by
\begin{align}
s^{(K)} &= g_K(z_K), \label{eq:app_depth_init}\\
s^{(k-1)} &= h_k\big(s^{(k)},s,a,z_{k-1}\big),
\qquad k=K,\ldots,1, \label{eq:app_depth_step}\\
f_{\theta}(s,a,\varepsilon) &= s^{(0)}. \label{eq:app_depth_out}
\end{align}
Assume $g_K$ is independent of $(s,a)$ and each $h_k(u,s,a,z)$ is differentiable in $(u,s,a)$.

Define the diffusion-depth sensitivities
\begin{align}
A_k := \nabla_a s^{(k)} \in \mathbb{R}^{d\times m},
\qquad
B_k := \nabla_s s^{(k)} \in \mathbb{R}^{d\times d}.
\label{eq:app_depth_def_AB}
\end{align}

Since $s^{(K)}=g_K(z_K)$ and $g_K$ does not depend on $(s,a)$,
\begin{align}
A_K = 0,
\qquad
B_K = 0.
\label{eq:app_depth_init_AB}
\end{align}

Fix $k\in\{1,\ldots,K\}$ and write \eqref{eq:app_depth_step} as
\begin{align}
s^{(k-1)} = h_k(u,s,a,z_{k-1})\Big|_{u=s^{(k)}}.
\label{eq:app_depth_step_u}
\end{align}
Differentiate \eqref{eq:app_depth_step_u} with respect to $a$ and apply the chain rule:
\begin{align}
A_{k-1}
&=
\nabla_a s^{(k-1)}
=
\frac{\partial h_k}{\partial u}\,\nabla_a s^{(k)}
+
\frac{\partial h_k}{\partial a}
=
\frac{\partial h_k}{\partial u}\,A_k
+
\frac{\partial h_k}{\partial a},
\label{eq:app_depth_A_rec}
\end{align}
where the partial derivatives are evaluated at
\begin{align}
(u,s,a,z)=\big(s^{(k)},\,s,\,a,\,z_{k-1}\big).
\label{eq:app_depth_eval_point}
\end{align}

Differentiating \eqref{eq:app_depth_step_u} with respect to $s$ gives
\begin{align}
B_{k-1}
&=
\nabla_s s^{(k-1)}
=
\frac{\partial h_k}{\partial u}\,\nabla_s s^{(k)}
+
\frac{\partial h_k}{\partial s}
=
\frac{\partial h_k}{\partial u}\,B_k
+
\frac{\partial h_k}{\partial s},
\label{eq:app_depth_B_rec}
\end{align}
with the same evaluation point \eqref{eq:app_depth_eval_point}.

\paragraph{Iterating to obtain $\nabla_a f_{\theta}$ and $\nabla_s f_{\theta}$.}
Starting from \eqref{eq:app_depth_init_AB} and iterating \eqref{eq:app_depth_A_rec}--\eqref{eq:app_depth_B_rec} for
$k=K,K-1,\ldots,1$ yields $A_0$ and $B_0$. Since $f_{\theta}(s,a,\varepsilon)=s^{(0)}$ by \eqref{eq:app_depth_out},
\begin{align}
\nabla_a f_{\theta}(s,a,\varepsilon) = \nabla_a s^{(0)} = A_0,
\qquad
\nabla_s f_{\theta}(s,a,\varepsilon) = \nabla_s s^{(0)} = B_0.
\label{eq:app_depth_conclusion}
\end{align}