% \vspace{-0.1in}
\section{Reflected Schr\"{o}dinger bridge}
\label{rSB_}
Although reflected diffusion models have demonstrated empirical success in image applications on hypercubes, extensions to general domains with optimal-transport guarantee remain limited \citep{Lavenant_Santambrogio_22}. Notably, the forward process \eqref{SGM-SDE-f} requires a long time $T$ to approach the prior distribution, which inevitably leads to a slow inference \citep{DSB}. To solve that problem, the dynamic SB problem on a bounded domain $\Omega$ proposes to solve 
\begin{align}\label{dynamic_SBP}
    &\inf_{\mathbb{P}\in \mathcal{D}(\mu_{\star}, \nu_{\star})}\text{KL}(\mathbb{P}\|\mathbb{Q}), 
\end{align}
where the coupling $\mathbb{P}$ belongs to the path space $\mathcal{D}(\mu_{\star}, \nu_{\star})\subset C(\Omega, [0, T])$ with marginal measures $\mu_{\star}$ at time $t=0$ and $\nu_{\star}$ at $t=T$; $\mathbb{Q}$ is the prior path measure, such as the measure induced by the path of the reflected Brownian motion or Ornstein-Uhlenbeck (OU) process. From the perspective of stochastic control, the dynamical SBP aims to minimize the cost along the reflected process 


% %%%%%%%%%%%%%%%%%%%%%%%%%% single columns
% \begin{align}
%     &\qquad\quad\quad\inf_{\bu\in \mathcal{U}} \E\bigg\{\int_0^T \frac{1}{2}\|\bu(\bx_t, t)\|^2_2 \dd t \bigg\} \notag\\
%     \text{s.t.} \  &{\dd \bx_t=\left[\bbf(\bx_t, t)+g(t)\bu(\bx_t, t)\right]\dd t+ \sqrt{2\varepsilon}g(t) \dd \bm{\mathrm{w}}_t}+\bn(\bx_t)\dd \mathbf{L}_t, \label{control_diffusion_main}\\
%     & \quad \bx_0\sim \mu_{\star},\ \  \bx_T\sim \nu_{\star}, \ \ \bx_t\in \Omega, \ \ \text{ for any } t\in [0, T] \notag %\\
%     % &\ \ \ \ \bx_0\sim \mu_{\star}(\cdot),\ \  \bx_T\sim \nu_{\star}(\cdot)\notag
%     % ,
% \end{align}

%%%%%%%%%%%%%%%%%%%%%%% double
\begin{align}
    &\qquad\quad\quad\inf_{\bu\in \mathcal{U}} \E\bigg\{\int_0^T \frac{1}{2}\|\bu(\bx_t, t)\|^2_2 \dd t \bigg\} \notag\\
    \text{s.t.} \  &\dd \bx_t=\left[\bbf(\bx_t, t)+g(t)\bu(\bx_t, t)\right]\dd t \label{control_diffusion_main}\\
    & \qquad\qquad\qquad\qquad + \sqrt{2\varepsilon}g(t) \dd \bm{\mathrm{w}}_t+\bn(\bx_t)\dd \mathbf{L}_t, \notag \\
    & \quad \bx_0\sim \mu_{\star},\ \  \bx_T\sim \nu_{\star}, \ \ \bx_t\in \Omega, \ \ \text{ for any } t\in [0, T] \notag %\\
    % &\ \ \ \ \bx_0\sim \mu_{\star}(\cdot),\ \  \bx_T\sim \nu_{\star}(\cdot)\notag
    % ,
\end{align}
where $\mathcal{U}$ is a set of control functions; $\varepsilon$ is the entropic regularizer for EOT; $\bn(\bx)$ is an inner unit normal vector at $\bx\in \partial \Omega$ and  $\bm{0}$ for $\bx \in {\Omega}$;  the expectation follows from the density $\rho(\bx, t)$. \textcolor{black}{Simulation demos of the reflected SDEs are shown in Figure \ref{reflected_Langevin}.}

To derive the reflected FB-SDEs and training scheme, we first present standard assumptions on the regularity properties \citep{oksendal2003stochastic}, as well as the smoothness of measure \citep{Sitan_22_sampling_is_easy, forward_backward_SDE} and boundary \citep{Andrew_Lamperski_21_COLT}:


\begin{assump}[Regularity on drift and diffusion]\label{ass:regularity}
    The drift $\bbf$ and diffusion term $g>0$ satisfy the Lipschitz and linear growth condition.
\end{assump}

\begin{assump}[Smooth boundary]\label{ass:smooth_boundary}
    The domain $\Omega$ is bounded and has a smooth boundary.
\end{assump}
% Regarding domains with countable corners, the probability of hitting these corners is 0. 
Extensions to general convex domains (with corners) are also studied in \citet{Andrew_Lamperski_21_COLT}. %For non-convex domains with corners, we didn't observe any empirical challenges given properly-defined unit vectors.

\begin{assump}[Smooth measure] \label{ass:smooth_measure}
The probability measures $\mu_{\star}$ and $\nu_{\star}$ are smooth in the sense that the energy functions $U_{\star}=-\nabla \log \frac{\dd\mu_{\star}}{\dd \bx}$ and $V_{\star}=-\nabla \log \frac{\dd\nu_{\star}}{\dd \bx}$ are differentiable. 
\end{assump}




\subsection{Reflected forward-backward stochastic differential equations}




Following the tradition in mechanics \citep{Pavliotis14}, we rewrite the reflected SBP as follows
\begin{align}
    &\inf_{\bu\in \mathcal{U}} \int_0^T \int_{\Omega} \frac{1}{2} \rho \|\bu\|^2_2\dd \bx \dd t \notag \\
     \text{s.t.} &\ \ \frac{\partial \rho}{\partial t}+\nabla \cdot  \mathbf{J}|_{\bx\in\Omega}=0, \ \ \big\langle \mathbf{J},  \bn \big\rangle|_{\bx \in\partial \Omega}=0,  \label{FKP_eqn_main}
\end{align}
where $\mathbf{J}$ is the probability flux of continuity equation  $\mathbf{J}\equiv \rho (\bbf+g \bu)-\varepsilon g^2 \nabla \rho$ \citep{Pavliotis14}.

% , \rho \nicole{\rho \in ?} \Wei{not needed}


We next solve the objectives with a Lagrangian multiplier: $\phi(\bx, t)$. Applying the Stokes theorem with details presented in appendix \ref{rFB-SDE_derive}, we have
\begin{align}
\footnotesize
    \mathcal{L}(\rho, \bu, \phi)&=\underbrace{\int_0^T\int_{\Omega} \bigg(\frac{1}{2}\rho\|\bu\|^2_2\ - \rho\frac{\partial \phi}{\partial t}-\langle \nabla \phi, \mathbf{J}\rangle \bigg)\dd \bx \dd t}_{\overline{\mathcal{L}}(\rho, \bu, \phi)} + \underbrace{\int_{\Omega} \phi \rho |_{t=0}^T \dd \bx}_{\text{constant term w.r.t. $\bu$}} + \underbrace{\int_0^T \int_{\partial \Omega} \big\langle \mathbf{J}, \bn\big\rangle     \dd \sigma(\bx) \dd t}_{:=0 \text{ by Eq.} \eqref{FKP_eqn_main}}.\notag
\end{align}

Minimizing $\mathcal{L}$ with respect to $\bu$, we can obtain $\bu^{\star}=g \nabla\phi$.  Further applying the Cole-Hopf transform $\overrightarrow\psi(\bx, t)=\exp\big(\frac{\phi(\bx, t)}{2\varepsilon}\big)$ and setting $\overline{\mathcal{L}}(\rho, \bu^{\star}, \phi)=0$, we derive the \emph{backward Kolmogorov equation} with \emph{Neumann boundary} conditions
\begin{align*}
\begin{cases}
&\frac{\partial \overrightarrow\psi}{\partial t}+\varepsilon g^2\Delta\overrightarrow\psi + \langle \nabla \overrightarrow\psi, \bbf \rangle=0 \qquad \text{   in }\Omega\\
&\langle \nabla\overrightarrow\psi, \bn\rangle=0  \qquad\qquad\qquad\qquad\quad \text{   on }\partial\Omega. 
\end{cases}
\end{align*}
% \nicole{$\nabla \overrightarrow\psi$} \Wei{great catch, thanks!}
Next we define $\overleftarrow\varphi=\rho^{\star}/\overrightarrow\psi$, where $\rho^{\star}$ is the optimal density of Eq.\eqref{control_diffusion_main} given $\bu^{\star}$. We arrive at the \emph{forward Kolmogorov equation} with the \emph{Robin boundary} condition
\begin{align*}
\begin{cases}
&\partial_t \overleftarrow \varphi +\nabla\cdot \big(\overleftarrow \varphi \bbf - \varepsilon g^2 \nabla \overleftarrow \varphi \big)=0 \qquad \text{ in }\Omega\\
&\langle \overleftarrow \varphi \bbf - \varepsilon g^2 \nabla \overleftarrow \varphi, \bn\rangle =0 \ \qquad\qquad\quad\ \text{ on } \partial \Omega.
\end{cases}
\end{align*}

Despite the elegance, solving PDEs in high dimensions often poses significant challenges due to the curse of dimensionality \citep{Han19}. To overcome these challenges, we resort to presenting a set of reflected FB-SDEs:

\begin{theorem}
Consider a \emph{Schr\"{o}dinger (PDE) system} with Neumann and Robin boundary conditions
\begin{align}\label{PDE_optimal}
\footnotesize
\begin{cases}
\frac{\partial \overrightarrow\psi}{\partial t}+\langle \nabla \overrightarrow\psi, \bbf \rangle +\varepsilon g^2\Delta\overrightarrow\psi=0 \\[3pt]
\frac{\partial \overleftarrow\varphi}{\partial t}+\nabla\cdot (\overleftarrow\varphi \bbf)-\varepsilon g^2 \Delta \overleftarrow\varphi=0
\end{cases}
\text{s.t. } \ \big\langle \nabla\overrightarrow \psi,\bn \big\rangle|_{\bx\in\partial \Omega} =0,\big\langle  \bbf \overleftarrow\varphi- \varepsilon g^2 \nabla\overleftarrow\varphi,  \bn \big\rangle|_{\bx \in\partial \Omega}=0.
\end{align}
    
Solving the PDE system gives rise to the reflected FB-SDEs with $\bx_t\in\Omega$
\begin{subequations}
\begin{align}
\dd  \bx_t&=\left[\bbf(\bx_t, t) + 2\varepsilon g(t)^2\nabla\log\overrightarrow\psi(\bx_t, t)\right]\dd t+ \sqrt{2\varepsilon} g(t) \dd  \mathbf{w}_t+\bn(\bx)\dd {\mathbf{L}}_t, \ \  \bx_0\sim \mu_{\star}, \label{FB-SDE-f}\\
\dd  \bx_t&=\left[\bbf(\bx_t, t) - 2\varepsilon g(t)^2 \nabla\log\overleftarrow\varphi(\bx_t, t)\right]\dd t+ \sqrt{2\varepsilon} g(t) \dd  \overline{\mathbf{w}}_t+\bn(\bx)\dd \overline{\mathbf{L}}_t,\ \ \  \bx_T \sim \nu_{\star}.\label{FB-SDE-b}
\end{align}\label{FB-SDE}
\end{subequations}
The connection to the probability flow ODE is also studied and presented in section \ref{prob-flow-ode}.
\end{theorem}
% \vspace{-0.1in}

\begin{figure*}[!ht]
  \centering
  % \vskip -0.2in
    {\includegraphics[scale=0.3]{figures/5_domains_titiled.png}} 
    % \vskip -0.1in
  \caption{Reflected OU processes (\textbf{\textcolor{red}{reflected}} v.s. \textcolor{teal}{unconstrained}), driven by the same Brownian motion, excluding the reflections. All boundary curves have properly defined unit vectors.
  % \textcolor{blue}{this figure is not referred. }
  }\label{reflected_Langevin}
  % \vskip -0.05in
  % \vspace{-1.5em}
\end{figure*}

% \vspace{-0.1in}


\subsection{Likelihood training}

It is worth mentioning that the reflected FB-SDE \eqref{FB-SDE} is not directly accessible due to the unknown control variables $(\nabla\log\overrightarrow\psi, \nabla\log\overleftarrow\varphi)$. To tackle this issue, a standard tool is the (nonlinear) Feynman-Kac formula \citep{Ma_FB_SDE, Karatzas_Shreve}, which leads to a stochastic representation.

\begin{proposition}[Feynman-Kac representation] Assume assumptions \ref{ass:regularity}-\ref{ass:smooth_boundary} hold. $\overleftarrow\varphi$ satisfies a PDE \eqref{PDE_optimal} and $\bx_t$ follows from a diffusion \eqref{FB-SDE-f}. Define $\overrightarrow y_t \equiv \overrightarrow y(\bx_t, t)= \log \overrightarrow\psi(\bx_t, t)$ and $\overleftarrow y_t\equiv \overleftarrow y(\bx_t, t)=\log \overleftarrow\varphi(\bx_t, t)$.
% Then the diffusion for $\overleftarrow y_t$ follows that
% \begin{align}\label{fb-sde-solver}
%     \dd \overleftarrow y_t=\bigg(\underbrace{\frac{1}{2} \|  \overleftarrow \bz_t\|_2^2  + \nabla \cdot \big( \overleftarrow g\bz_t - \bbf \big) + \langle  \overleftarrow \bz_t,  \overrightarrow \bz_t\rangle}_{\zeta(\bx_t, t)}\bigg)\dd t + \langle \overleftarrow \bz_t, \dd  \overline{\mathbf{w}}_t \rangle +\frac{1}{g}\big\langle  \overleftarrow \bz_t, \bn_t \rangle \dd \overline{\mathbf{L}}_t,
% \end{align}
% where $\overrightarrow\bz_t \equiv \overrightarrow\bz(\bx_t, t)=g \nabla \overrightarrow y_t$, $\overleftarrow \bz_t \equiv \overleftarrow \bz(\bx_t, t)=g \nabla \overleftarrow y_t$. 
Then $\overleftarrow y_s$ admits a stochastic representation 
\begin{align*}
    \overleftarrow y_s=\E\bigg[\overleftarrow y_T -\int_s^T \bigg(\underbrace{\frac{1}{2} \|  \overleftarrow \bz_t\|_2^2  + \nabla \cdot \big( \overleftarrow g\bz_t - \bbf \big) + \langle  \overleftarrow \bz_t,  \overrightarrow \bz_t\rangle\bigg)\dd t-\dd \overleftarrow{\mathbf{L}}_t}_{\zeta(\bx_t, t)} \bigg|\bx_s=\textbf{x}_s\bigg],
\end{align*}
on $\Omega\times[0, T]$; $\overrightarrow\bz_t \equiv \overrightarrow\bz(\bx_t, t)=g \nabla \overrightarrow y_t$, $\overleftarrow \bz_t \equiv \overleftarrow \bz(\bx_t, t)=g \nabla \overleftarrow y_t$, $\dd \overleftarrow{\mathbf{L}}_t=\frac{1}{g}\langle \overleftarrow\bz_t, \bn_t\rangle \dd {\mathbf{L}}_t.$
\end{proposition}
\begin{rawproof}  The proof primarily relies on Theorem 3 from \citet{forward_backward_SDE} and applies (generalized) It\^o's lemma to $\overleftarrow y_t$ using \eqref{PDE_optimal} and \eqref{FB-SDE-f}. The difference is to incorporate the generalized It\^o's lemma \citep{sebastien_bubeck, Andrew_Lamperski_21_COLT} to address the local time of $\bx_t$ at the boundary $\partial \Omega$. Subsequently, our analysis establishes that $\overleftarrow y_{s}-\int_{s_1}^{s} \zeta(\bx_t, t)$, where $s\in[s_1, T]$, is a martingale within the domain $\Omega$, which concludes our proposition.
    \qed
\end{rawproof}
    



A direct application of the proposition is to obtain the log-likelihood $\overleftarrow y_0$ given data points $\textbf{x}_0$. With parametrized models $(\overrightarrow\bz_t^{\theta}, \overleftarrow\bz_t^{\omega})$ to approximate $(\overrightarrow\bz_t, \overleftarrow\bz_t)$, we can optimize the backward score function $\overleftarrow\bz_t^{\omega}$ through the forward loss function ${\mathcal{L}}(\textbf{x}_0;\omega)$ in Algorithm \ref{primal_dyanmic_IPF}. Regarding the forward-score estimation, similar to Theorem 11 \citep{forward_backward_SDE}, the symmetric property of the reflected SB also enables to optimize $\overrightarrow\bz_t$ via the backward loss function ${\mathcal{L}}(\textbf{x}_T;\theta)$.



\begin{algorithm}[ht]
\caption{One iteration of the backward-forward score function solver to optimize $(\overrightarrow\bz_t^{\theta}, \overleftarrow\bz_t^{\omega})$ with the reflection implemented in Algorithm \ref{reflection_alg}. We cache the trajectories following \citet{DSB} to avoid expensive computational graphs. \textcolor{black}{In practice, $\E[\log \overleftarrow y_T]$ and $\E[\log \overrightarrow y_0]$ are often omitted to facilitate training \citep{forward_backward_SDE}.}}\label{primal_dyanmic_IPF}
    \begin{align*}
    % \footnotesize
    {\mathcal{L}}(\textbf{x}_0;\omega)&=-\int_0^T \E_{{\bx_t\sim \eqref{FB-SDE-f}}}\bigg[\bigg(\frac{1}{2}\|  \overleftarrow\bz^{\omega}_t \|_2^2 + g\nabla \cdot \overleftarrow \bz^{\omega}_t+ \langle \overrightarrow\bz^{\theta}_t, \overleftarrow\bz^{\omega}_t \rangle  \bigg)\dd t + \dd \overleftarrow{\mathbf{L}}^{\omega}_t \bigg|\bx_0=\textbf{x}_0\bigg]\\
    {\mathcal{L}}(\textbf{x}_T;\theta)&=-\int_0^T \E_{\bx_t\sim \eqref{FB-SDE-b}}\bigg[\bigg(\frac{1}{2}\|  \overrightarrow\bz^{\theta}_t \|_2^2 + g\nabla \cdot \overrightarrow \bz_t^{\theta}+ \langle \overleftarrow\bz_t^{\omega}, \overrightarrow\bz_t^{\theta} \rangle  \bigg)\dd t +\dd \overrightarrow{\mathbf{L}}_t^{\theta} \bigg|\bx_T=\textbf{x}_T\bigg],
\end{align*}
where $\dd \overleftarrow{\mathbf{L}}_t^{\omega}=\frac{1}{g}\langle \overleftarrow\bz_t^{\omega}, \bn_t\rangle \dd {\mathbf{L}}_t$ and $\dd \overrightarrow{\mathbf{L}}_t^{\theta}=\frac{1}{g}\langle \overrightarrow\bz_t^{\theta}, \bn_t\rangle \dd \overline{\mathbf{L}}_t$. \eqref{FB-SDE-f} (respectively, \eqref{FB-SDE-b}) is approximated via $\overrightarrow\bz_t^{\theta}$ (respectively, $\overleftarrow\bz_t^{\omega}$).
\end{algorithm}

By the data processing inequality, our loss function provides a lower bound of the log-likelihood, which resembles the evidence lower bound (ELBO) in variational inference  \citep{song_likelihood_training}. We can expect a smaller variational gap given more accurate parametrized models.




When the domain is taken to be $\Omega=\mathbb{R}^d$, the aforementioned solvers become equivalent to the loss function (18-19) presented in \citet{forward_backward_SDE}. 

\subsection{Connections to the IPF algorithm}\label{connections_to_IPF}

Similar in spirit to Theorem 3 of \citet{song_likelihood_training}, Algorithm \ref{primal_dyanmic_IPF} results in an elegant half-bridge solver ($\mu_{\star}\rightarrow\nu_{\star}$ v.s. $\mu_{\star}\leftarrow\nu_{\star}$) to approximate the primal formulation \citep{Nutz22_note} of the dynamic Schr\"odinger bridge \eqref{dynamic_SBP} \citep{DSB, SBP_max_llk}:



\begin{align}\label{dynamic_IPF_projection}
    {\textbf{Dynamic Primal IPF}}\quad &\mathbb{P}_{2k}=\argmin_{\mathbb{P}\in \mathcal{D}(\cdot, \nu_{\star})} \text{KL}(\mathbb{P}\|\mathbb{P}_{2k-1}),\quad \mathbb{P}_{2k+1}=\argmin_{\mathbb{P}\in \mathcal{D}(\mu_{\star}, \cdot)} \text{KL}(\mathbb{P}\|\mathbb{P}_{2k}),
\end{align}



which is also known as the dynamic IPF algorithm (also known as Sinkhorn algorithm) \citep{IPF_95, DSB}. Consider the disintegration of the path measure $\mathbb{P}=\pi\otimes \mathbb{P}^{\mu_{\star}, \nu_{\star}}$ %such that
\begin{align}
    \mathbb{P}(\cdot)=\iint_{\Omega^2} \mathbb{P}^{\textbf{x}_0, \textbf{x}_T}(\cdot)\pi(\dd\textbf{x}_0, \dd\textbf{x}_T)\label{bridge_representation},
\end{align}
where $\mathbb{P}^{\textbf{x}_0,\textbf{x}_T}\in \mathbb{P}^{\mu_{\star}, \nu_{\star}}$ is a diffusion bridge from ${\bx_0}=\textbf{x}_0$ to ${\bx_T}=\textbf{x}_T$, $\pi \in \Pi(\mu_{\star}, \nu_{\star})$ and the product space $\Pi(\mu_{\star}, \nu_{\star})\subset \Omega^2$ denotes the space of couplings with the first and second marginals following from $\mu_{\star}$ and $\nu_{\star}$, respectively. Now project the path space $\mathcal{D}$ to the product space $\Pi$. We have the static IPF algorithm in the primal formulation:
\begin{align}\label{staic_IPF_projections_}
    {\textbf{Static Primal IPF}}\quad \pi_{2k}=\argmin_{\pi\in \Pi(\cdot, \nu_{\star})} \text{KL}(\pi\|\pi_{2k-1}),\quad\pi_{2k+1}=\argmin_{\pi\in \Pi(\mu_{\star}, \cdot)} \text{KL}(\pi\|\pi_{2k}).
\end{align}
