\section{Method}
\label{sec:met}

\begin{algorithm}[t]
\caption{Conditional Predictor-Corrector Sampler} 
\label{alg} 
\begin{algorithmic}[1] 
\State \textbf{Input:} $\mathbf{x}_T$, timestep schedule, frozen prior $f_\theta$, predictor multiplier $k$, measurement $\mathbf{y}$, forward operator $\mathcal{A}$, guidance scale $\eta$ 
\State \textbf{Initialize:} $\mathcal{X}\leftarrow\{\mathbf{x}_T\}$, $\beta_t \leftarrow \frac{\pi}{2}\frac{t}{T}$ \Comment{Angle for spherical transition} 
\For{$t=T,T-1,\dots,1$} 
\State $\mathbf{x}_t \leftarrow \mathrm{last}(\mathcal{X})$, \quad $(\hat{\boldsymbol{\epsilon}},\hat{\mathbf{x}}_0)\leftarrow f_\theta(\mathbf{x}_t,t)$ \Comment{Predict noise and clean signal} 
\State $\mathbf{f}_t \leftarrow \sin(\beta_t)\hat{\mathbf{x}}_0-\cos(\beta_t)\hat{\boldsymbol{\epsilon}}$ \Comment{Prior-induced update direction} 
\State \textcolor{blue}{$\mathcal{L}_{\mathrm{data}}\leftarrow \|\mathcal{A}(\hat{\mathbf{x}}_0)-\mathbf{y}\|_2^2$, \quad $\mathbf{g}_t\leftarrow\nabla_{\hat{\mathbf{x}}_0}\mathcal{L}_{\mathrm{data}}$} \Comment{\textcolor{blue}{Data-consistency guidance}} 
\State $\kappa_t\leftarrow \min(k,t)$, \quad $\Delta\beta^{(\kappa_t)}\leftarrow \beta_t-\beta_{t-\kappa_t}$ \Comment{Avoid $t-k<0$} 
\State \textcolor{blue}{$\tilde{\mathbf{x}}\leftarrow \mathbf{x}_t-\Delta\beta^{(\kappa_t)}\mathbf{f}_t-\eta\mathbf{g}_t$} \Comment{\textcolor{blue}{Guided predictor step}} 
\State $\Gamma_t^{(\kappa_t)}\leftarrow \cos(\beta_{t-1})/\cos(\beta_{t-\kappa_t})$ \Comment{Variance-preserving scale} 
\State $\mathbf{x}_{t-1}\leftarrow \Gamma_t^{(\kappa_t)}\tilde{\mathbf{x}} +\sqrt{1-(\Gamma_t^{(\kappa_t)})^2}\boldsymbol{\epsilon}', \quad \boldsymbol{\epsilon}'\sim\mathcal{N}(0,\mathbf{I})$ \Comment{Corrector step} 
\State $\mathcal{X}\leftarrow \mathcal{X}\cup\{\mathbf{x}_{t-1}\}$ 
\EndFor 
\State \textbf{Return} $\mathcal{X}$ 
\end{algorithmic}
\end{algorithm}


TF-PRDiT solves inverse problems of the form
$\mathbf{y}=\mathcal{A}(\mathbf{x})+\boldsymbol{\xi}$,
where $\mathbf{x}$ is the unknown 3D volume, $\mathbf{y}$ is the measurement, $\mathcal{A}$ is a known forward operator, and $\boldsymbol{\xi}$ is measurement noise. The same formulation covers X-ray projection, downsampling, masking, and deblurring by changing $\mathcal{A}$. 

\noindent\textbf{Relation to DPS.}
Our sampler follows the same posterior-sampling perspective as DPS~\cite{chung2022diffusion}: an unconditional diffusion prior supplies the generative score, while a task-specific likelihood term enforces agreement with measurements. We do not introduce a new posterior-guidance principle. Instead, TF-PRDiT specializes this principle for native 3D medical volumes by combining a voxel-level CT diffusion prior with differentiable volumetric forward operators. This is important for sparse X-ray-to-CT because the forward model maps a 3D volume to one or more 2D projections, so each additional X-ray can be incorporated by adding another projection-space residual to the same guidance loss.

\noindent\textbf{Frozen 3D diffusion prior.}
We use a pretrained PRDiT model~\cite{zhang2026pixellevel} as a frozen unconditional prior over chest CT volumes. PRDiT is a diffusion transformer trained directly in voxel space on LIDC-IDRI CT volumes~\cite{armato2011lung}, avoiding the compressed latent representation used by many latent diffusion models. This voxel-level formulation is important for medical reconstruction because fine anatomical boundaries and small structures may be weakened or lost during latent compression. Given a noisy volume $\mathbf{x}_t$ and timestep $t$, the frozen network jointly predicts the noise component and the corresponding clean-volume estimate: $(\hat{\boldsymbol{\epsilon}},\hat{\mathbf{x}}_0)=f_\theta(\mathbf{x}_t,t)$.
During downstream reconstruction, all parameters of $f_\theta$ remain fixed. Task adaptation is performed only through measurement-guided sampling, so the same prior can be reused across different view counts and inverse operators without retraining. 

\noindent\textbf{Predictor-corrector sampling.}
Using the cosine-sine parameterization~\cite{zhang2023improving}, let $\beta_t=\frac{\pi}{2}\frac{t}{T}$. The prior-induced update direction is $\mathbf{f}_t=\sin(\beta_t)\hat{\mathbf{x}}_0-\cos(\beta_t)\hat{\boldsymbol{\epsilon}}$.
We use a $k$-step predictor,
\begin{equation}
\tilde{\mathbf{x}}=\mathbf{x}_t-\Delta\beta^{(k)}\mathbf{f}_t,\quad
\Delta\beta^{(k)}=\beta_t-\beta_{t-k},
\label{eq:k_scaled_predictor}
\end{equation}
which produces $\tilde{\mathbf{x}}\approx\mathbf{x}_{t-k}$ at noise level $t-k$. Because the predictor strides $k$ steps, the noise level of $\tilde{\mathbf{x}}$ is $\beta_{t-k}$, which differs from the target $\beta_{t-1}$ when $k>1$. A variance-preserving corrector rescales $\tilde{\mathbf{x}}$ and injects fresh noise to reach the correct noise level at timestep $t-1$:
\begin{equation}
\mathbf{x}_{t-1} = \Gamma_t^{(k)}\tilde{\mathbf{x}} + \sqrt{1-\bigl(\Gamma_t^{(k)}\bigr)^2}\,\boldsymbol{\epsilon}',\quad
\Gamma_t^{(k)} := \frac{\cos(\beta_{t-1})}{\cos(\beta_{t-k})},\quad \boldsymbol{\epsilon}'\sim\mathcal{N}(0,\mathbf{I}).
\label{eq:corrector}
\end{equation}
This keeps the marginal variance of $\mathbf{x}_{t-1}$ consistent with the forward process, preventing accumulated drift when $k>1$. Larger $k$ enables broader stochastic exploration, which is useful for ill-posed sparse-view reconstruction.

\noindent\textbf{Likelihood guidance on the denoised estimate.}
Directly matching measurements on the noisy state can produce unstable gradients. We instead guide sampling through the denoised estimate $\hat{\mathbf{x}}_0$, using the data-consistency loss
\begin{equation}
\mathcal{L}_{\mathrm{data}}=\|\mathcal{A}(\hat{\mathbf{x}}_0)-\mathbf{y}\|_2^2.
\label{eq:data_loss}
\end{equation}
The gradient $\nabla_{\hat{\mathbf{x}}_0}\mathcal{L}_{\mathrm{data}}$ is computed by backpropagating only through $\mathcal{A}$, treating $\hat{\mathbf{x}}_0$ as the free variable and holding the denoiser $f_\theta$ fixed. Following the DPS approximation~\cite{chung2022diffusion}, the Jacobian $\partial\hat{\mathbf{x}}_0/\partial\mathbf{x}_t$ is omitted for tractability; the resulting voxel-space gradient is applied directly as a correction to the $\mathbf{x}_t$ predictor update. For X-ray-to-CT, $\mathcal{A}$ is instantiated as a differentiable DRR projector using DiffDRR~\cite{gopalakrishnan2022fast}, allowing gradients to flow from projection space to voxels. With $M$ available X-rays, the loss becomes
\begin{equation}
\mathcal{L}_{\mathrm{xray}}=\sum_{i=1}^{M}\|\mathcal{P}_{g_i}(\hat{\mathbf{x}}_0)-\mathbf{y}_i\|_2^2,
\label{eq:multi_view_loss}
\end{equation}
where $\mathcal{P}_{g_i}$ denotes projection under view geometry $g_i$. Thus changing the number or geometry of views changes only the residual terms in the loss, not the network architecture or weights.

\noindent\textbf{Cosine-decay guidance.}
We use a time-dependent guidance scale $\eta_t=\eta_{\max}\cdot (1-\cos(\pi t/T))/2$.
At high-noise timesteps, $\eta_t\approx\eta_{\max}$ provides strong measurement guidance to shape global structure. As $t\to1$, $\eta_t$ decays to zero, reducing over-correction and preserving fine anatomical details. This avoids the trade-off of fixed guidance, which can under-correct at high or over-correct at low noise.

Algorithm~\ref{alg} assembles the above components into the full conditional sampling procedure. At each timestep, the frozen prior predicts $(\hat{\boldsymbol{\epsilon}}, \hat{\mathbf{x}}_0)$, the predictor applies the prior-induced update together with measurement-consistency guidance, and the corrector restores the target noise level. Task adaptation is entirely controlled by $\mathcal{A}$ and $\mathbf{y}$, while $f_\theta$ remains fixed.
