
\section{Empirical Ablation Study} \label{app:empirical_study}
In Section~\ref{sec:app:dataset}, we introduce the three datasets used in our experiments: the five-mode, 3-round spiral, and dot-hyperbola datasets.  
In Section~\ref{sec:app:first_order_loss}, we present results for the first-order loss applied to the datasets, and in Section~\ref{sec:app:second_order_nrflow}, we examine the effect of the second-order loss. Section~\ref{sec:app:third_order_nrflow} extends this analysis by including the third-order loss. 
\subsection{Three Dataset}\label{sec:app:dataset}
We employ three datasets for our experiments: the five-mode dataset, the 3-round spiral dataset, and the dot-hyperbola Gaussian mixture distribution dataset, all with a variance of $0.3$ for each Gaussian component. In the five-mode dataset, five source modes (\textbf{orange}) are positioned at a distance of $D_0 = 6$ from the origin, and five target modes (\textbf{pink}) are positioned at $D_0 = 13$, each mode containing 200 sampled points. For the 3-round spiral dataset, 600 points are drawn from Gaussian distributions, each with a variance of $0.3$, for both the source and target distributions. Similarly, the dot-hyperbola dataset consists of 900 points sampled from Gaussian distributions with a variance of $0.3$ for both the source and target.
\begin{figure}[!ht]\label{fig:three_dataset}
\centering
\includegraphics[width=0.25\textwidth]{5_dataset.pdf}
\includegraphics[width=0.25\textwidth]{3round_spin_dataset.pdf}
\includegraphics[width=0.25\textwidth]{dothyperbola_dataset.pdf}
\caption{Gaussian mixture distributions visualized: five-mode dataset (\textbf{Left}), 3-round spiral dataset (\textbf{Middle}), and dot-hyperbola dataset (\textbf{Right}). The primary objective is for NRFlow to learn the transport trajectory from the source distribution $\pi_0$ ({\textbf{orange}}) to the target distribution $\pi_1$ ({\textbf{pink}}). }
\end{figure}

\subsection{Only First Order Loss}\label{sec:app:first_order_loss}
The models are optimized by minimizing the sum of squared error (SSE). Both the source and target distributions are Gaussian. The target transport trajectory is modeled using the VP ODE framework from \cite{rectified_flow}, expressed as $x_t = \alpha_t x_0 + \beta_t x_1$. The parameters $\alpha_t$ and $\beta_t$ are defined as $\alpha_t = \exp(-\frac{1}{4} a(1-t)^2 - \frac{1}{2} b(1-t))$ and $\beta_t = \sqrt{1 - \alpha_t^2}$, with hyperparameters $a = 19.9$ and $b = 0.1$. In each of the five-mode, 3-round spiral, and dot-hyperbola datasets, 100 points are sampled from both the source and target distributions for each mode. The five-mode dataset training involves an ODE solver and Adam optimizer, using a 2-layer MLP with 100 hidden dimensions, a batch size of 800, a learning rate of 0.005, and 2000 training steps. For the 3-round spiral dataset, the training setup is similar, except with a batch size of 1000 and 1000 training steps. For the dot-hyperbola dataset, the batch size is increased to 1600 while maintaining the same learning rate and optimizer settings and 1000 training steps. 

\begin{figure}[!ht]\label{fig:1_distribution}
\centering
\includegraphics[width=0.25\textwidth]{5_1_output.pdf}
\includegraphics[width=0.25\textwidth]{1_1_3round_spin_output.pdf}
\includegraphics[width=0.25\textwidth]{1_1_dothyperbola.pdf}
\caption{
NRFlow generated distributions optimized by the first-order loss only: five-mode dataset (\textbf{Left}), 3-round spiral dataset (\textbf{Middle}), and dot-hyperbola dataset (\textbf{Right}). The source distribution $\pi_0$ ({\textbf{orange}}), the target distribution $\pi_1$ ({\textbf{pink}}), and the generated distribution ({\textbf{purple}}) are shown. }
\end{figure}

\subsection{Second Order NRFlow}\label{sec:app:second_order_nrflow}
The models are optimized by minimizing the sum of squared error (SSE). Both the source and target distributions are Gaussian. The target transport trajectory is modeled using the VP ODE framework from \cite{rectified_flow}, expressed as $x_t = \alpha_t x_0 + \beta_t x_1$. The parameters $\alpha_t$ and $\beta_t$ are defined as $\alpha_t = \exp(-\frac{1}{4} a(1-t)^2 - \frac{1}{2} b(1-t))$ and $\beta_t = \sqrt{1 - \alpha_t^2}$, with hyperparameters $a = 19.9$ and $b = 0.1$. In each of the five-mode, 3-round spiral, and dot-hyperbola datasets, 100 points are sampled from both the source and target distributions for each mode. The five-mode dataset training involves an ODE solver and Adam optimizer, using a 2-layer MLP with 100 hidden dimensions, a batch size of 800, a learning rate of 0.005, and 2000 training steps. For the 3-round spiral dataset, the training setup is similar, except with a batch size of 1000 and 1000 training steps. For the dot-hyperbola dataset, the batch size is increased to 1600 while maintaining the same learning rate and optimizer settings and 1000 training steps. 
\begin{figure}[!ht]\label{fig:12_distribution}
\centering
\includegraphics[width=0.25\textwidth]{5_12_output.pdf}
\includegraphics[width=0.25\textwidth]{1_12_3round_spin_output.pdf}
\includegraphics[width=0.25\textwidth]{1_12_dothyperbola.pdf}
\caption{
NRFlow generated distributions optimized by the first order and second order losses: five-mode dataset (\textbf{Left}), 3-round spiral dataset (\textbf{Middle}), and dot-hyperbola dataset (\textbf{Right}). The source distribution $\pi_0$ ({\textbf{orange}}), the target distribution $\pi_1$ ({\textbf{pink}}), and the generated distribution ({\textbf{purple}}) are shown. }
\end{figure}

\subsection{Third Order NRFlow}\label{sec:app:third_order_nrflow}
The models are optimized by minimizing the sum of squared error (SSE). Both the source and target distributions are Gaussian. The target transport trajectory is modeled using the VP ODE framework from \cite{rectified_flow}, expressed as $x_t = \alpha_t x_0 + \beta_t x_1$. The parameters $\alpha_t$ and $\beta_t$ are defined as $\alpha_t = \exp(-\frac{1}{4} a(1-t)^2 - \frac{1}{2} b(1-t))$ and $\beta_t = \sqrt{1 - \alpha_t^2}$, with hyperparameters $a = 19.9$ and $b = 0.1$. In each of the five-mode, 3-round spiral, and dot-hyperbola datasets, 100 points are sampled from both the source and target distributions for each mode. The five-mode dataset training involves an ODE solver and Adam optimizer, using a 2-layer MLP with 100 hidden dimensions, a batch size of 800, a learning rate of 0.005, and 2000 training steps. For the 3-round spiral dataset, the training setup is similar, except with a batch size of 1000 and 1000 training steps. For the dot-hyperbola dataset, the batch size is increased to 1600 while maintaining the same learning rate and optimizer settings and 1000 training steps. 
\begin{figure}[!ht]\label{fig:123_distribution}
\centering
\includegraphics[width=0.25\textwidth]{5_123_output.pdf}
\includegraphics[width=0.25\textwidth]{1_123_3round_spin_output.pdf}
\includegraphics[width=0.25\textwidth]{1_123_dothyperbola.pdf}
\caption{
NRFlow generated distributions optimized by the first-order, second-order, and third-order losses: five-mode dataset (\textbf{Left}), 3-round spiral dataset (\textbf{Middle}), and dot-hyperbola dataset (\textbf{Right}). The source distribution $\pi_0$ ({\textbf{orange}}), the target distribution $\pi_1$ ({\textbf{pink}}), and the generated distribution ({\textbf{purple}}) are shown. }
\end{figure}