\documentclass[accepted]{uai2022}  % for initial submission
\usepackage[american]{babel}


%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams


\usepackage[nottoc]{tocbibind}

\usepackage{times}
\usepackage{soul}
\usepackage{url}
%\usepackage[hidelinks]{hyperref}
\usepackage[utf8]{inputenc}
%\usepackage[small]{caption}
%\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{algorithm}
\usepackage{algorithmic}
\urlstyle{same}
\usepackage{amsmath,amssymb,amsfonts}
\usepackage{xcolor}
\usepackage{booktabs} % For formal tables
% \usepackage[linesnumbered,ruled,vlined]{algorithm2e}

%\usepackage{cite}
\usepackage{graphicx}
\usepackage{physics}  
\usepackage{subfigure}
\usepackage{multirow}
\usepackage{bm}
\usepackage{color}
\usepackage{comment}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{braket}
\usepackage{mathtools}
\usepackage{enumerate}
\usepackage{enumitem}
\usepackage{empheq}
\usepackage{calc}
\usepackage{balance}
\usepackage{dsfont}
\usepackage{textcomp} 
\usepackage{color, colortbl}
\usepackage[first=0,last=9]{lcg}

% \setlength{\textfloatsep}{0.1cm}
% \setlength{\intextsep}{0pt plus 2pt}
% \setlength{\floatsep}{0pt}
% \captionsetup[table]{skip=1pt}





\newcommand{\swap}[3][-]{#3#1#2} % just an example
\begin{document}
\title{Physics Guided Neural Networks for Spatio-temporal Super-resolution of Turbulent Flows}

\author[1]{Tianshu Bao\thanks{These authors contributed equally to this work.}}
\author[2]{Shengyu Chen$^*$}
\author[1]{Taylor T. Johnson}
\author[3]{Peyman Givi}
\author[3]{Shervin Sammak}
\author[2]{Xiaowei Jia}
\affil[1]{
    Department of Computer Science\\
    Vanderbilt University,\
    Nashville, Tennessee, USA
}
\affil[2]{
    Department of Computer Science\\
    University of Pittsburgh,\
    Pittsburgh, Pennsylvania, USA
}
\affil[3]{
    Department of Mechanical Engineering and Materials Science\\
    University of Pittsburgh,\
    Pittsburgh, Pennsylvania, USA
}
\maketitle




\begin{abstract}
Direct numerical simulation (DNS) of turbulent flows is computationally expensive and is not practical for simulating   flows at high Reynolds numbers.  Low-resolution large eddy simulation (LES) is a pragmatic alternative, but its success depends on modeling of the small scale flow dynamics.   Reconstructing DNS from low-resolution LES is critical for  many scientific and engineering disciplines, but it poses many challenges to existing super-resolution methods due to the complexity of turbulent flows and computational cost of generating frequent LES data. 
In this work, we propose a physics-guided neural network for reconstructing frequent DNS from sparse LES data by enhancing its spatial resolution and temporal frequency. Our proposed method consists of a partial differential equation (PDE)-based recurrent unit for capturing underlying temporal processes and a physics-guided super-resolution model that incorporates additional physical constraints. 
We demonstrate the effectiveness of both components in reconstructing the data generated by simulating the  Taylor-Green Vortex  sparse LES data. Moreover, we show that the proposed recurrent unit can preserve the physical characteristics of turbulent flows by leveraging the physical relationships in the Navier-Stokes equation.

\end{abstract}
%\vspace{-.1in}
\section{Introduction}

%\textcolor{red}{Revise the first two paragraphs.}
%Computational fluid dynamics (CFD) has proven to be a very effective research tool in a very wide variety of disciplines, including engineering, science, medicine and more \citet{CFD1995}. For its applications in  turbulent flows, however, the range of the temporal and  spatial scales is too broad to be captured by brute force direct numerical simulations (DNS) \citet{Davidson2015}. Large eddy simulation (LES) provides an alternative, by filtering the small-scale scales of transport and concentrating on the larger scale energy containing eddies \citet{Sagaut05}. 
Understanding turbulence is the key to our comprehension of many natural and technological processes in engineering, science,  medicine and many other disciplines. Direct numerical simulation (DNS) of the 
Navier-Stokes equations is  widely regarded as the  methodology  with the highest fidelity in capturing the dynamics of turbulent flows (\citet{Givi94}). DNS is essentially a brute force computational methodology to provide  solution of the unsteady governing equations of fluid flow at all temporal and spatial scales. Such simulations can be very expensive at high Reynolds numbers.  %Straightforward estimates indicate that simulation of an incompressible flow with  Reynolds number $\text{Re} = {\cal O} (10^5)$ within  a domain of size of ${\cal O}\left[(100 \ell)^3\right]$ would require about {\it a century} of CPU time on a 1 teraflop computer!  
A practical alternative, the large eddy simulation (LES) concentrates on the larger scale eddies and models the effects of the  subgrid-scale transport.  By this filtering, LES can be conducted on coarser grids as compared to those required by DNS. The penalty is that LES-generated data are, generally, of lower fidelity as   compared to DNS (\citet{NNGLP17}). 
%Moreover, the generation of high-quality LES can also be expensive.  %Appraisal of LES predictions and assessments of its fidelity as compared to DNS, have been of interest in the turbulence research community for the past several decades \citet{givi1989model,pope2001turbulent}. %The objective of the present work is to build a new data-driven methodology to reconstruct DNS from LES data, which facilitates a more robust means of LES appraisal. % predictions.  

% promise of machine learning
Machine learning, especially super-resolution (SR) methods~(\cite{Cheo2003SR}), has already shown tremendous success in reconstructing high-resolution data in a variety of commercial applications.  %For example, convolutional neural networks (CNNs) and their extensions, e.g., SRCNN~\cite{dong2014learning}, RCAN~\cite{zhang2018image}, and SRGAN~\cite{ledig2017photo}, have proven very effective in directly mapping low-resolution images to high-resolution images. The effectiveness of these methods mainly come from the power of CNNs in automatically extracting representative spatial features through deep layers. An alternative solution is to consider super-resolution as an inverse modeling problem~\cite{geiss2020invertible,mccann2017convolutional} with the constraints that the down-sampled version of the underlying high-resolution data should be consistent to the observed low-resolution data. 
%Deep learning-based super-resolution (SR) methods have been widely and very successfully utilized  in computer vision for upscaling image data to a higher resolution. 
The power of these models comes mainly from the use of convolutional network layers~(\cite{albawi2017understanding}), which can extract the spatial texture features and transform them through complex non-linear mappings to recover high-resolution data. 
From the earliest end-to-end convolution-based SR model~(\cite{dong2014learning}),  
many investigators  have added skip-connections in SR models~(\cite{zhang2018image,Duong2021,Dai2019,zhang2018residual,ahn2018fast,Tai2017}) to bypass redundant low-resolution information and promote the stability of optimizing deep networks. 
Moreover,  advances in adversarial learning allow preservation of  high-level features extracted from target high-resolution images through a separate discriminator network~(\cite{ledig2017photo,chen2018fsrnet,wang2018recovering, wang2018esrgan,karras2017progressive,gan8759375,cheng2021mfagan,Long2021}). 
Given their success in computer vision, researchers begin to apply SR methods to reconstruct turbulence data~(\cite{fukami2019super,obiols2021surfnet,Deng2019SuperresolutionRO,stengel2020adversarial,venkatesh2021comparative,xie2018tempogan,Fukami_2020,liu2020deep,chen2021reconstructing}). %Fukami et al.~\cite{fukami2019super} propose an improved CNN-based hybrid DSC/MS model by extracting patterns from multiple scales. Other recent works have shown that the reconstruction can be further improved by leveraging both the spatial and temporal contextual information~\cite{Fukami_2020,liu2020deep}. Adversarial learning-based methods have also shown promising results in reconstructing turbulent flows~\cite{Deng2019SuperresolutionRO,stengel2020adversarial,venkatesh2021comparative}.
%Several investigators  have produced promising reconstruction results using multi-scale spatial convolutional models
% ~\cite{fukami2019super,obiols2021surfnet}, adversarial learning-based models~\cite{Deng2019SuperresolutionRO,stengel2020adversarial,venkatesh2021comparative,xie2018tempogan}, and convolutional models that combine both spatial and temporal information~\cite{Fukami_2020,liu2020deep}.  
% However, there are some limitations associated with these  approaches.

% 1. data patterns are complex, e.g., interactions amongst variables over space and time
% 2. paucity of data - both x and y
However, existing SR methods face several challenges when  applied for reconstructing turbulent flows. Such flows involve multiple physical variables and often exhibit complex dynamic patterns, i.e., multiple physical variables evolve and interact at different scales. 
In the absence of underlying physical processes, pure data-driven SR models require a large number of training samples to capture the correct physics. Due to the substantial computational cost in simulating turbulent flows, high-fidelity DNS data are rarely available, and even the generation of high-quality LES at a lower resolution can be expensive. Hence, low-resolution LES data cannot be frequently generated for a large variety of scenarios. 
When trained with limited data at discrete time steps (i.e., when both LES and DNS are available), these models can have degraded performances because they may learn spurious patterns between sparse observations, and such patterns are often not generalizable.


%Second, existing SR methods are trained to minimize image-based error metrics, e.g., pixel-wise loss (such as mean squared error (MSE)) or perceptual loss [31]. Although these metrics are considered gold standard within the computer vision community, they are not designed for recovering desired physical characteristics in the reconstructed flows, degrading the fidelity of the SR model. 
%Third, these methods do not explicitly capture the information gap between LES and DNS. This gap is caused by both the large resolution difference (i.e., upscaling factor) and the discrepancy between different simulation strategies (e.g., filterings in LES). 
%Finally, existing methods have shown improvements only on benchmark turbulence datasets, and have not been assessed in large scale simulations over a wide variety of flows. Given these limitations, there is a need to systematically advance deep learning-based SR methods by explicitly integrating ow-physics and build new assessment pipeline on a diverse set of turbulence datasets in both passive and chemically reactive flows.


% solution: 
% spatio-temporal super-resolution, using a customized recurrent unit PRU, describe PRU at a high level

In this work, we propose a new physics-guided neural network framework for spatial and temporal super-resolution. The idea is to leverage underlying physical relationships to guide the learning of generalizable spatial and temporal patterns in the reconstruction process. In particular, our framework consists of two components, physics-guided recurrent unit (PRU) and physics-guided super resolution model (PGSR). The PRU structure is designed based on the underlying partial differential equation (PDE), and is responsible for capturing the temporal dynamics of turbulent flows from sparse data. The PGSR model incorporates additional physical constraints to improve the reconstruction from the available LES data. Our evaluation of the Taylor-Green Vortex data~(\cite{brachet1984taylor}) has demonstrated the superiority of PRU and PGSR in modeling the turbulent flows. At the same time, we also verify that the proposed method can preserve the physical properties of turbulent flows.   

% experiment 

%\textcolor{red}{Tianshu: }
Our contributions can be summarized as:

\begin{itemize}[noitemsep]
    \item We propose innovative physics-guided PRU and PGSR architectures to capture the temporal and spatial patterns of the turbulent flows, respectively. 
    \item We design a unified neural network framework combining PGSR and PRU to effectively simulate and reconstruct high-resolution frequent turbulent flows. 
    \item We evaluate our model in a series of experiments. The experimental results demonstrate that our approaches have significant superiority compared with existing methods in both DNS simulation from historical data and DNS reconstruction from sparse LES data.
\end{itemize}


%\section{Problem definition and preliminary}

\section{Problem definition}

% \textcolor{red}{make it more general here. }
%For the purpose of demonstration, 

Our objective is to reconstruct frequent high-resolution flow data from low-resolution and sparse LES data. In particular, we consider a general %type of vortex. %variant of the Taylor-Green vortex (TGV) \cite{brachet1984taylor}.  
%The system is a 
three-dimensional vortex flow over space and time $\boldsymbol{Q}(x,y,z,t)$,  where $(x,y,z)$ denotes the spatial coordinates, $t$ represents the time step (in seconds), and  $\boldsymbol{Q}(x,y,z,t)$ consists of multiple variables that describe turbulent transport, such as the velocity along with different directions and the thermodynamic pressure. We represent low-resolution LES data as $\boldsymbol{Q}^{LR}(x,y,z,t)$, which are available at sparse time steps, e.g., starting from a time step $t_0$, the LES is generated with a time interval of $d$ at $\{t_0,t_0+d,t_0+2d,...\}$.   % and is simulated within a predefined domain. It provides a suitable setting for our demonstration as it exhibits several salient features of turbulent transport. 
%The primary transport variables are the fluid density $\rho(x, y, z, t)$, the velocity vector fluid $u(x, y, z, t), v(x, y, z, t), w(x, y, z, t)$, %$(i = 1, 2, 3)$
%the thermodynamic pressure $p(x, y, z, t)$ and the enthalpy $h(x, y, z, t)$. % and the species' mass fractions Y(x; t) 
The flow variables in $\boldsymbol{Q}(x,y,z,t)$ also follow the Navier-Stokes equation, which  governs the transport of these variables in space ($x, y, z$) and time ($t$). % are the continuity, momentum and enthalpy (energy) % and species' mass fraction
%equations, along with an equation of state. 
Boundary conditions are specified near the boundary of the domain to describe the interaction of the flow with the external environment. More details about the flow dataset will be provided in Section~\ref{sec:dataset}. 
%In this flow, the original vortex collapses into turbulent worm-like structures which become progressively more turbulent until viscosity eventually dissipates the large scale vortical structures. 
%We compare our proposed method against several existing %super-resolution 
%algorithms to predict DNS data.% of TGV. 
%We also demonstrate the effectiveness of each component in our proposed method by showing the improvement both qualitatively and quantitatively. 
%\todo{write this}





\section{Method}

% PRU+Reconstruction

\begin{figure} [!h]
\centering
% \vspace{-.1in}
%\vspace{-.1in}
\includegraphics[width=1.0\columnwidth]{PRU flow.pdf}
%\vspace{-.1in}
\caption{The proposed physics-guided neural networks framework combining PRU and PGSR for reconstructing turbulent flows $\boldsymbol{Q}$. } %\textcolor{red}{need to redraw}}
%\vspace{-.08in}
\label{fig:bdc}
\end{figure}

Our proposed framework consists of two structural components, PGSR and PRU, which are illustrated in Fig.~\ref{fig:bdc}. Starting from an initial time step $t_0$, the proposed method will follow a two-step process: (i) the PGSR model  is used to reconstruct high-resolution $\boldsymbol{Q}(x,y,z,t)$  when low-resolution LES data are available. (ii) Then PRU is used to estimate $\boldsymbol{Q}(x,y,z,t+1)$ from $\boldsymbol{Q}(x,y,z,t)$ until the next LES sample is available. In the following, we will describe these two components: PRU and PGSR. 

%\subsection{Runge–Kutta methods}
\subsection{Physics-Guided Recurrent Unit (PRU)}

% 1. Intuition
% \textcolor{red}{1. Intuition: to bridge the gap between discrete model such as RNN and continuous flow dynamics.
% 2. High-level description of PRU, (1) 2-step iterative process (intermediate): (i) Q$_t$ = f(), will be discussed later 
% (ii) text description. For example, we use 4-th order RK, give equations on (ii).  (2) aggregation process: Eq. 6 - > eq. 7.3. two issues to be solved in computing f: (1) spatial derivative estimation, (2) boundary condition. Also, we will give an example error stability.  }

% The classical recurrent neural network (RNN) based methods evaluate the state values at every time step and update these values according to predefined layers. 

% In certain types of scenarios, such as continuous flow dynamic, each observation are taken at a large time step while these classical RNN methods are not able to capture the physical relations precisely within the given large time step. 


Physical variables $\boldsymbol{Q}$ in turbulent flows interact with each other and evolve at different speeds for different locations. Temporal neural network models, e.g., long-short term memory (LSTM)~(\cite{LSTM}), have sophisticated structures and thus heavily rely on large representative training data that are sampled at the high temporal frequency to capture the underlying continuous patterns over time. 
%When trained with limited  data at discrete time steps (e.g., when both LES and DNS are available), these models can have degraded performance because they may learn spurious patterns between sparse observations, and such patterns are often not generalizable.
Given sparse and limited LES data, we come up the PRU structure as a more accurate and reliable way to predict the future flow variables by leveraging the continuous physical relationship described by the underlying PDE. This helps bridge the gap between discrete data samples and continuous flow dynamics. The proposed PRU structure is inspired by our previous work on combining machine learning and physical equations~\cite{jia2019sdm2,bao2021partial,jia2021physics,willard2021integrating}. The PRU structure is also generally applicable to many dynamical systems with governing PDEs.  % with these continuous physical models


% our proposed method provides a more accurate and reliable way to capture the underlying physical laws comparing with classical RNN based approaches.


% combining a set of estimated intermediate values and these intermediate values are obtained through numerical computing related methods. 
% In order to bridge the gap between discrete RNN models with these continuous physical models, we propose 

% It is because, instead of using the entire large time step, we break it into small steps, make predictions accordingly and then weight the small predictions to get the final output.  %We break down the whole procedure in the following paragraph. 
%We show the whole procedure of our approach in this section. 


%Let's introduce the problem first. The physical problem considered is usually in the following form:

% \begin{equation}    \label{eq:flow} 
% \begin{aligned}
% %    \frac{du}{dt} = f(u, t)
%      \boldsymbol{Q}_t = \boldsymbol{f}(t, \boldsymbol{Q})
% \end{aligned}
% \end{equation}

%where $\boldsymbol{Q}$ is a vector consisting of unknown physical variables. 
Most PDEs can be represented in the form of $\boldsymbol{Q}_t = {\boldsymbol{f}}(t, \boldsymbol{Q};\theta)$, where $\boldsymbol{Q}_t$ is the temporal derivative of $\boldsymbol{Q}$,  and ${\boldsymbol{f}}(t, \boldsymbol{Q};\theta)$ is a non-linear function (parameterized by coefficient $\theta$) that summarizes the current value of $\boldsymbol{Q}$ and its spatial context. For example, the incompressible Navier-Stokes  equation for the velocity field can be expressed as:
\begin{equation}
{\boldsymbol{f}(\boldsymbol{Q})} = \frac{-1}{\rho} \nabla p + \nu \Delta \boldsymbol{Q} - (\boldsymbol{Q}. \nabla) \boldsymbol{Q},
\label{eq:NS}
\end{equation}%for the velocity field in the incompressible Navier-Stokes  equation, 
where $\rho$, $p$, and $\nu$ denote the fluid density, the
thermodynamic pressure, and the viscosity, respectively. Since the function ${\boldsymbol{f}}(\boldsymbol{Q})$ in the Navier-Stokes equation is independent of time $t$, we omit the independent variable $t$ in the function ${\boldsymbol{f}}(\cdot)$.  Here $p$ is treated as a known variable, and $\theta=\{\rho,\nu\}$. %a set of functions

%is obtained through spatial discretization. The Runge–Kutta method is defined as follows:
%The classical numerical methods towards Eq.\ref{eq:flow} are the Runge–Kutta (RK) methods. 

\begin{figure} [!h]
\centering
\includegraphics[width=1.0\columnwidth]{RK diagram.pdf}
\caption{Diagram for the physical recurrent unit, which iteratively estimates the temporal derivative and the intermediate state variable. } %\textcolor{red}{need to revise. }} %$Q$ represents the output and can be replaced by $u, v, w$.}
\label{fig:PRU}
\end{figure}

The PRU structure is inspired by the classical numerical Runge–Kutta (RK) methods~(\cite{butcher2007runge}), 
which have been used in temporal discretization for the approximate solutions of differential equations. 
As shown in Fig.~\ref{fig:PRU}, the central idea of PRU is to  interpolate virtual intermediate variables and create smaller intervals between two time steps which facilitate refining the gradient of flow variables over time. 
Starting from a time step $t$, PRU estimates $N-1$ intermediate state variables  $\boldsymbol{Q}(t,1)$, ..., $\boldsymbol{Q}(t,N-1)$ and $N$ intermediate temporal derivatives $\boldsymbol{Q}_{t,1}$,  ..., $\boldsymbol{Q}_{t,N}$ before reaching the next step $t+1$. 

%$\boldsymbol{Q}_{t,1}, \boldsymbol{Q}_{t,2}, \boldsymbol{Q}_{t,3}, \boldsymbol{Q}_{t,4}$ are the estimated intermediate temporal derivatives. $\boldsymbol{Q}_{1}, \boldsymbol{Q}_{2}, \boldsymbol{Q}_{3}$ are the estimated intermediate state values. 




In particular,
%$\boldsymbol{Q}(t,0)$ is set as the current flow variables $\boldsymbol{Q}(t)$, and 
PRU interpolates intermediate state variables by iteratively following a two-step process:  for $n$ from 1 to $N$, (i) PRU first estimates the temporal derivative $\boldsymbol{Q}_{t,n}=\boldsymbol{f}(\boldsymbol{Q}(t,n-1))$ at the previous intermediate  flow state $\boldsymbol{Q}(t,{n-1})$, and  $\boldsymbol{Q}(t,0)=\boldsymbol{Q}(t)$. We will discuss more details about how to compute the function $\boldsymbol{f}(\cdot)$ later. (ii) Then PRU computes the next intermediate state variable $\boldsymbol{Q}(t,n)$ by moving the flow data $\boldsymbol{Q}(t)$ along the direction of  obtained temporal derivatives. 
%uses the obtained derivative $\boldsymbol{Q}_{t,n}$ value
In our tests, we follow the most %widely used Runge–Kutta method is the 
popular $4^{th}$ order RK method for computing the three intermediate state variables, as follows: 
\begin{equation}    \label{eq:RK-steps}
\begin{aligned}
\boldsymbol{Q}(t,{1}) &= \boldsymbol{Q}(t) + \Delta t \frac{\boldsymbol{Q}_{t,1}}{2},\\
\boldsymbol{Q}(t,{2}) &= \boldsymbol{Q}(t) + \Delta t\frac{\boldsymbol{Q}_{t,2}}{2},\\
\boldsymbol{Q}(t,{3}) &= \boldsymbol{Q}(t) + \Delta t\boldsymbol{Q}_{t,3},
\end{aligned}
\end{equation}

The temporal derivative $\boldsymbol{Q}_{t,4}$ is then computed from the last intermediate point, as $\boldsymbol{f}(\boldsymbol{Q}(t,{3}))$. The $4^{th}$ order RK method has %known member of the Runge–Kutta family is generally referred to as "RK4" 
%with 
the total accumulated error %on the order 
of $O(\Delta t^4)$, where $\Delta t$ represents the time interval between consecutive time steps.  

% The $4^{th}$ order RK method is given below.



% \begin{comment}

% \begin{equation}    \label{eq:RK-steps}
% \begin{aligned}
% u_{n + 1} &= u_n + \frac{1}{6}(k_1 + 2k_2 + 2k_3 + k_4), \\
% t_{n + 1} &= t_n + \Delta t,\\
% k_1 &= f(t_n, u_n),\\
% k_2 &= f(t_n + \frac{h}{2}, u_n + h\frac{k_1}{2}),\\
% k_3 &= f(t_n + \frac{h}{2}, u_n + h\frac{k_2}{2}),\\
% k_4 &= f(t_n + h, u_n + hk_3).\\
% \end{aligned}
% \end{equation}
    

    
    
% \begin{equation}    \label{eq:RK-steps}
% \begin{aligned}
% \boldsymbol{Q}_{pred} &= \boldsymbol{Q} +  \frac{1}{6}(\boldsymbol{Q}_{t,1} + 2\boldsymbol{Q}_{t,2} + 2\boldsymbol{Q}_{t,3} + \boldsymbol{Q}_{t,4}), \\
% t_{next} &= t + \Delta t,\\
% \boldsymbol{Q}_{t,1} &= \boldsymbol{f}(t, \boldsymbol{Q}),\\
% %\pdv{\boldsymbol{y}}{t}|_1 
% \boldsymbol{Q}_{1} &= \boldsymbol{f}(t, \boldsymbol{Q}),\\
% \boldsymbol{Q}_{t,2} &= \boldsymbol{f}(t + \frac{\Delta t}{2}, \boldsymbol{Q} + \Delta t \frac{\boldsymbol{Q}_{t,1}}{2}),\\
% \boldsymbol{Q}_{t,3} &= \boldsymbol{f}(t + \frac{\Delta t}{2}, \boldsymbol{Q} + \Delta t\frac{\boldsymbol{Q}_{t,2}}{2}),\\
% \boldsymbol{Q}_{t,4} &= \boldsymbol{f}(t + \Delta t, \boldsymbol{Q} + \Delta t\boldsymbol{Q}_{t,3}).\\
% \end{aligned}
% \end{equation}

% \end{comment}

% \begin{equation}    \label{eq:RK-steps}
% \begin{aligned}
% \boldsymbol{Q}_{pred} &= \boldsymbol{Q}_{curr} +  \frac{1}{6}(\boldsymbol{Q}_{t,1} + 2\boldsymbol{Q}_{t,2} + 2\boldsymbol{Q}_{t,3} + \boldsymbol{Q}_{t,4}), \\
% t_{next} &= t + \Delta t,\\
% \boldsymbol{Q}_{t,1} &= \boldsymbol{f}(t, \boldsymbol{Q}_{curr}),\\
% %\pdv{\boldsymbol{y}}{t}|_1 
% \boldsymbol{Q}_{1} &= \boldsymbol{Q}_{curr} + \Delta t \frac{\boldsymbol{Q}_{t,1}}{2},\\
% \boldsymbol{Q}_{t,2} &= \boldsymbol{f}(t + \frac{\Delta t}{2}, \boldsymbol{Q}_1),\\
% \boldsymbol{Q}_{2} &= \boldsymbol{Q}_{curr} + \Delta t\frac{\boldsymbol{Q}_{t,2}}{2},\\
% \boldsymbol{Q}_{t,3} &= \boldsymbol{f}(t + \frac{\Delta t}{2}, \boldsymbol{Q}_2),\\
% \boldsymbol{Q}_{3} &= \boldsymbol{Q}_{curr} + \Delta t\boldsymbol{Q}_{t,2},\\
% \boldsymbol{Q}_{t,4} &= \boldsymbol{f}(t + \Delta t, \boldsymbol{Q}_3).\\
% \end{aligned}
% \end{equation}
% \todo{change k to pdv??}

%where $\boldsymbol{Q}_{curr}$ is the state value at current time step. $\boldsymbol{Q}_{pred}$ is the predicted state value. 
% $\boldsymbol{Q}_{t,1}, \boldsymbol{Q}_{t,2}, \boldsymbol{Q}_{t,3}, \boldsymbol{Q}_{t,4}$ are the estimated intermediate temporal derivatives. $\boldsymbol{Q}_{1}, \boldsymbol{Q}_{2}, \boldsymbol{Q}_{3}$ are the estimated intermediate state values. 
%Briefly speaking, we derive $\boldsymbol{Q}_{t,1}, \boldsymbol{Q}_{t,2}, \boldsymbol{Q}_{t,3}, \boldsymbol{Q}_{t,4}$ from $\boldsymbol{Q}_{curr} ,\boldsymbol{Q}_{1}, \boldsymbol{Q}_{2}, \boldsymbol{Q}_{3}$ step by step. 


Finally, %the $4^{th}$ order RK method 
PRU combines all the %requires to calculate 
%$4$ estimated 
intermediate temporal derivatives as a composite gradient to %generate the final predicted value 
predict the flow variables at the  next time step $\boldsymbol{Q}(t+1)$, as follows: % according to Eq.\ref{eq:RK-training_comb}. $\boldsymbol{w}_1, \boldsymbol{w}_2, \boldsymbol{w}_3, \boldsymbol{w}_4$ are determined during the training process.
\begin{equation}    \label{eq:RK-training_comb}
% \begin{aligned}
\text{PRU}(\hat{\boldsymbol{Q}}(t+1)|\boldsymbol{Q}(t)) = \boldsymbol{Q}(t) + \sum_{n=1}^N w_n \boldsymbol{Q}_{t,n}, 
% \end{aligned}
\end{equation}
where $\{w_n\}_{n=1}^N$ are the trainable model parameters. Given a series of high-fidelity DNS training data of $T$ time steps, the PRU structure can be trained by minimizing the mean squared error (MSE) between the predicted flow variables and true DNS values, as $\sum_t ||\text{PRU}(\hat{\boldsymbol{Q}}(t+1)|\boldsymbol{Q}(t))-\boldsymbol{Q}(t+1) ||^2/T$.    
% \begin{equation}    \label{eq:RK-training_comb}
% \begin{aligned}
% \boldsymbol{Q}_{pred} &= \boldsymbol{Q}_{curr} +  (\boldsymbol{w}_1\boldsymbol{Q}_{t,1} + \boldsymbol{w}_2\boldsymbol{Q}_{t,2} + \boldsymbol{w}_3\boldsymbol{Q}_{t,3} + \boldsymbol{w}_4\boldsymbol{Q}_{t,4}). \\
% \end{aligned}
% \end{equation}


% Finally, the overall loss function is 
% \begin{equation}
%     \mathcal{L} = \text{MSE}(\textbf{Q}_{pred},\textbf{Q}_{real}).
% \end{equation}


%So far, we have built the architecture of our proposed method and deploy the training process step by step. However, there still exists some detailed issues being ignored. The most important thing would be evaluating $\boldsymbol{f}$ when calculating those intermediate values. In real world physical problems, $\boldsymbol{f}$ can contain complex physical behaviors such as spatial derivatives or various kinds of boundary conditions. Let's break them down in the following sections. 


In the following, we will describe two major issues in computing the function ${\boldsymbol{f}}(\cdot)$: (i) estimating spatial derivatives in the function ${\boldsymbol{f}}(\cdot)$, and (ii) preserving boundary conditions. We will also investigate the stability of this method for long-term prediction with a simple case study.  




%At the beginning, we construct three $5 \times 5$ filters for first order spatial derivatives and three $5 \times 5$ filters for second order spatial derivatives. Then we perform data augmentation for $\boldsymbol{x}$. %$u_n, v_n, w_n$ respectively. 
%After that, we calculate $k_1$ by evaluating $\boldsymbol{f}(\boldsymbol{y}, t)$
%for $u_n, v_n, w_n$, 
%and use $k_1$ to get $k_2$, use $k_2$ to get $k_3$ and use $k_3$ to get $k_4$ step by step. Eventually, we combine these intermediate variables %$k_1, k_2, k_3, k_4$ for $u, v, w$ 
%with trainable weights to get the final prediction. %instead of using the fixed weights defined in Eq. \ref{eq:RK-steps}.

\begin{comment}
\begin{equation}    \label{eq:RK-training_comb}
\begin{aligned}
u_{n + 1} &= u_n + (w_{1,u}k_{1, u} + w_{2,u}k_{2, u} + w_{3,u}k_{3, u} \\
& + w_{4,u}k_{4, u}), \\
v_{n + 1} &= v_n + (w_{1,v}k_{1, v} + w_{2,v}k_{2, v} + w_{3,v}k_{3, v} \\
& + w_{4,v}k_{4, v}), \\
w_{n + 1} &= w_n + (w_{1,w}k_{1, w} + w_{2, w}k_{2, w} + w_{3, w}k_{3, w} \\
& + w_{4, w}k_{4, w}).\\
\end{aligned}
\end{equation}
\end{comment}




\subsubsection{Spatial derivative approximation} 

\begin{comment}
    In Eq. \ref{eq:RK-eq}, we have the right hand side in the following form:
\begin{equation}\label{eq:righthand}
\begin{aligned}
f(u, t) =& -(\textbf{V} \cdot \nabla) u -\frac{1}{\rho} p_x + \nu\Delta u,\\
=& -(u\cdot u_x + v\cdot u_y + w\cdot u_z)-\frac{1}{\rho}  p_x + \\
& \nu(u_{xx} + u_{yy}+ u_{zz}).\\
\end{aligned}
\end{equation}
%If $\boldsymbol{f}(\boldsymbol{y}, t)$ contains spatial derivative, 
\end{comment} 
%In most real world problems, Eq.\ref{eq:RK-steps} requires us to evaluate $\boldsymbol{f}(t, \boldsymbol{Q})$ explicitly four times and 
The proposed PRU evaluates the function ${\boldsymbol{f}}(\cdot)$ explicitly for estimating the temporal derivatives of intermediate state variables. In many general PDEs (e.g., the Navier-Stokes equation),  ${\boldsymbol{f}}(\boldsymbol{Q})$  contains spatial derivatives of $\boldsymbol{Q}$. 
One popular approach for evaluating the spatial derivatives is through the finite difference methods (FDMs),  
%Therefore, it is important to leverage classical approaches, such as the finite difference methods (FDMs), to evaluate these spatial derivatives. % evaluate the first and second spatial derivative. %defined in Eq. 8. Let's look at the finite difference methods before moving to the machine learning methods. 
%The finite difference methods are a group of approaches used for approximating real values or 
which approximate variable derivatives of a function on predefined mesh points by solving algebraic equations containing finite differences and values from nearby points. %The error between the discrete solutions and the exact solutions is measured through Taylor expansions. 
For example, the first and second order spatial derivatives along the $x$ dimension  in Eq.~\ref{eq:NS} (represented as  $\boldsymbol{Q}_{x}$ and $\boldsymbol{Q}_{xx}$) can be estimated by the FDMs as follows: 
%Eqs. \ref{eq:spatial_diff1},\ref{eq:spatial_diff2} summarize how first and second spatial derivatives are approximated. There also exist some other types of schemes \cite{thomas2013numerical} providing high approximation accuracy than Eqs. \ref{eq:spatial_diff1},\ref{eq:spatial_diff2}.
% \begin{equation}\label{eq:spatial_diff1}
% % \small
% \begin{split}
%     &\boldsymbol{Q}_x(x_{i},y_{j},z_{k},t_n) \approx \frac{\boldsymbol{Q}(x_{i + 1},y_{j},z_{k},t_n) - \boldsymbol{Q}(x_{i - 1},y_{j},z_{k},t_n)}{2\Delta x}.\\
% \end{split}
% \end{equation}
% %\todo{use s to represent distance}
% \begin{equation}\label{eq:spatial_diff2}
% % \small
% \begin{aligned}
% \boldsymbol{Q}_{xx}(x_{i},y_{j},z_{k},t_n) \approx &\frac{\boldsymbol{Q}(x_{i + 1},y_{j},z_{k},t_n) -  2\boldsymbol{Q}(x_{i},y_{j},z_{k},t_n)}{2\Delta x}\\
% &+ \frac{\boldsymbol{Q}(x_{i - 1},y_{j},z_{k},t_n)}{2\Delta x}. 
% \end{aligned}
% \end{equation}
\begin{equation}\label{eq:spatial_diff1}
%\small
\begin{aligned}
    &\boldsymbol{Q}_x(x_{i},y_{j},z_{k},t_n)\\
    &\!\approx\! \frac{\boldsymbol{Q}(x_{i + 1},y_{j},z_{k},t_n)\! -\! \boldsymbol{Q}(x_{i - 1},y_{j},z_{k},t_n)}{2\Delta x},\\
%&\boldsymbol{Q}_{xx}(x_{i},y_{j},z_{k},t_n) \\
%&\approx \frac{\boldsymbol{Q}(x_{i + 1},y_{j},z_{k},t_n) -  2\boldsymbol{Q}(x_{i},y_{j},z_{k},t_n)}{2\Delta x}\\
%&+ \frac{\boldsymbol{Q}(x_{i - 1},y_{j},z_{k},t_n)}{2\Delta x}. 
&\boldsymbol{Q}_{xx}(x_{i},y_{j},z_{k},t_n) \\
&\approx [\boldsymbol{Q}(x_{i + 1},y_{j},z_{k},t_n) -  2\boldsymbol{Q}(x_{i},y_{j},z_{k},t_n)\\
&+ \boldsymbol{Q}(x_{i - 1},y_{j},z_{k},t_n)] / (\Delta x)^2. 
\end{aligned}
\end{equation}
%\todo{add 4 points scheme?}

%The classical FDMs require using Eq. \ref{eq:spatial_diff1} %to Eq. \ref{eq:righthand} 
%at every mesh point, and such 
The approximation used in FDMs results in an error compared to the exact solution, which can be estimated through Taylor expansions. 
%The error between the discrete solutions and the exact solutions is measured through Taylor expansions.
Instead of using FDMs for every mesh point, we propose to build a spatial difference (SD) layer using convolutional neural network (CNN) layers. The CNN layers have the expressive power to capture the relationships defined in FDMs (Eq.~\ref{eq:spatial_diff1}) while also being more flexible in learning other non-linear relationships from data. 

% and these operations are equivalent to a convolution neural network layer. Therefore, instead of using FDM directly, we add a CNN layer with a trainable filter with respect to different spatial derivatives separately and %the values in 
% the filters of the convolutional layer weight all the adjacent values of the current node. These filters are determined during the training process. %During the execution, we may need to run up to $6$ different CNN layers, including $3$ first order derivatives and $3$ second order derivatives, for evaluating $\boldsymbol{f}(t, \boldsymbol{Q})$ once. In our proposed method, we evaluate $\boldsymbol{f}(t, \boldsymbol{Q})$ four times for one step prediction. %The values in the filters of the convolutional layer weight all the adjacent values of the current node and are obtained during the training process.



\subsubsection{Boundary Condition and Augmentation}
\label{sec:boundary}
%\todo{add more types of boundary condition?}
% Another type of physical phenomenon could be how the system interacts with the outside environment. These phenomenons are usually described by boundary conditions to the original system. 
Boundary conditions are critical in turbulent flow simulation as they  describe how the turbulent flows interact with the external environment. 
%As we mentioned in Section 3.1, Eq.1 
%One widely used boundary condition is 
Here we consider the periodic boundary condition in our flow data. It is defined in a specified periodic domain indicating that it repeats its own values in all directions. The formal definition of a cubic periodic boundary condition %with respect to $u$ 
is given below:
\begin{equation}
\begin{aligned}
\boldsymbol{Q}(L_x, y, z, t) &= \boldsymbol{Q}(R_x, y, z, t),\\
\boldsymbol{Q}(x, L_y, z, t) &= \boldsymbol{Q}(x, R_y, z, t),\\
\boldsymbol{Q}(x, y, R_z, t) &= \boldsymbol{Q}(x, y, R_z, t),\\
\end{aligned}
\end{equation}
where $L_x, L_y, L_z$ are the three left boundaries with respect with $x, y, z$ coordinates and $R_x, R_y, R_z$ are the three right boundaries with respect with $x, y, z$ coordinates. %In python library, the 
Standard padding strategies for CNN (e.g., same padding) %layer provides the option for same values padding but 
do not satisfy the periodic value requirement. %Therefore, before performing the RK4 steps, 
In order to handle this issue, we make a data augmentation for each of the $6$ faces (of the 3D cubic data) with an additional 2 layers of data  during the training stage and adopt a $5 \times 5$ CNN filter size. The augmented locations will be removed from reconstructed data. %As shown in Fig.~\ref{bdc}, we add $2$ layers of data to the original $3$ dimension volume because the filters can be $5 \times 5$ matrices with the size $2$ width. %The initial data is in the shape $65 \times 128 \times 128$ and it becomes $69 \times 132 \times 132$ after the augmentation.
%\todo{add a diagram?}


\begin{figure} [!h]
\centering
\includegraphics[width=1.0\columnwidth]{bd condition.pdf}
\caption{%Three diagrams for boundary augmentation of a 2 dimension domain are shown. %A 2 dimension domain is considered.
Illustration of data augmentation on a 2-D example. 
The left diagram represents the up and low boundary augmentation. The middle diagram represents the left and right boundary augmentation. And the right diagram represents the corner boundary augmentation. Rectangles carrying identical numbers have the same value. }%Grey rectangle represent the original domain and blue rectangle represent the augmented area.}
\label{bdc}
\end{figure}






\begin{comment}
    \FOR{$epoch$ = 1 : number of training iterations}
    \FOR{$t$ = 1 : number of time steps}
        % \FOR{$i$ = 1 : number of segments}
            \STATE Estimate adjacency matrix $\text{A}^t$ using the current values of $\{D_i\}$ and $\{ca_i\}$ following Algorithm 1
            \STATE Make predictions using the recurrent graph network following Eqs.~\ref{conv}-\ref{prd}
            \STATE Add the accumulated errors to the loss function (Eq.~\ref{loss_PGRGrN})
        \ENDFOR
        \STATE update model parameters (i.e., networks weights) and physical parameters (i.e., $\{D_i\}$ and $ca_i$)
    \ENDFOR
\end{comment}


\subsubsection{Stability}

The classical $4^{th}$ order RK suffers from  the stability issue if the step size is not properly chosen. Consider a simple scalar example $Q_t = \lambda Q$. The $4^{th}$ order RK for this equation can be written as 
\begin{equation}\label{eq:eulerRK}
%\small
\begin{split}
    &Q((n + 1)\Delta t) \\
    &\approx (1 + \lambda\Delta t + \frac{\lambda \Delta t^2}{2} + \frac{\lambda \Delta t^3}{6} +  \frac{\lambda \Delta t^4}{24})Q(n \Delta t) .\\
\end{split}
\end{equation}
%$Q_n$ is the numerical value at $t = n\Delta t$ and $Q_{n+1}$ is the numerical value at $t = (n + 1)\Delta t$. %\textcolor{red}{what are $t_n$ and $t_{n+1}$?}
Let's denote $R(\Delta t)\!\! =\!\! 1 + \Delta t + \frac{\Delta t^2}{2} + \frac{\Delta t^3}{6} +  \frac{\Delta t^4}{24}$, and  we have $Q((n + 1)\Delta t) = R(\Delta t)Q(n\Delta t)$. The analytical solution is $Q((n + 1)\Delta t) = \exp(\lambda \Delta t)Q(n\Delta t)$, and thus the accumulated error is 
\begin{equation}\label{eq:err_eulerRK}
% \small
\begin{split}
    &err_{n + 1} = (\exp(\lambda \Delta t) - R(\Delta t))err_n .\\
\end{split}
\end{equation}

%Eq. \ref{eq:err_eulerRK} is equivalent to 
This indicates that $err_{n + 1} = O(\Delta t^5)err_n$ according to Taylor expansion. When the interval $d$ of LES data is large, the accumulated error may get amplified at every time step and then lead to an explosion. %In Eq.\ref{eq:flow}, the right hand side 
Additional complexity arises when ${f}$ consists of multiple evaluations of spatial derivatives. % making the analysis of stability very complicated.
This requires the access to LES data at a reasonably frequent time interval to avoid significantly large reconstruction errors. 

%In the experiment section, we show that our approach needs to be reset occasionally to avoid the error explosion. % but it can reach very further steps before crush.




\begin{algorithm}   \label{algo_model}
\caption{The flow of the proposed PRU.}
\begin{algorithmic} 
    \STATE Create and initialize $5 \times 5$ filters for $1^{st}$ and $2^{nd}$ order spatial derivatives
    \FOR{$epoch$ = 1 : number of training iterations}
        \FOR{$t$ = 1 : number of time steps}
            \STATE Make data augmentation for $\boldsymbol{Q}(t)$ (Section~\ref{sec:boundary}).
            \STATE Calculate $\boldsymbol{Q}_{t,1}, \boldsymbol{Q}_{t,2}, \boldsymbol{Q}_{t,3}, \boldsymbol{Q}_{t,4}$ following Eq. \ref{eq:RK-steps} and evaluate $\boldsymbol{f}$ accordingly.
            \STATE Calculate $\hat{\boldsymbol{Q}}(t+1)$ following Eq. \ref{eq:RK-training_comb} and remove augmented data over boundaries.
            % \STATE Remove augmented data for $\hat{\boldsymbol{Q}}(t+1)$.
            % \STATE $\boldsymbol{Q}(t) = \hat{\boldsymbol{Q}}(t+1)$
            \STATE Use the predicted $\hat{\boldsymbol{Q}}(t+1)$ as the input flow data for time $t+1$.
            %  %$w_1, w_2, w_3, w_4$.
        \ENDFOR
        \STATE Update trainable filters and weights.
    \ENDFOR
% \STATE where $cross area_i$, $D_L$ are trained inside the model, $flowspeed_i$ is read from the input data.  
\end{algorithmic}
\end{algorithm}

% In terms of LES data, we used \cite{chen2021reconstructing} to get the reconstructed high resolution image and use these reconstructed images as starting points to predict future values.



\subsection{Physics Guided Super Resolution (PGSR)}
The PGSR model %is based on our previous work \cite{chen2021reconstructing}, and 
aims to incorporate additional physical constraints to regularize the standard super-resolution model. In particular, we consider two important physical constraints, the divergence-free property for the incompressible flow and the zero-mean property for the Taylor-Green Vortex~(\cite{brachet1984taylor}).

%\todo{1. Incorporating divergence-free property in the loss function}

First, the incompressible flow follows the divergence-free property in the velocity field. Thus, we can represent the inherent physical relationship of the velocity field as:
\begin{equation}
\nabla \cdot {\bf V}= \frac{\partial u}{\partial x} + \frac{\partial v}{\partial y} + \frac{\partial w}{\partial z} = 0,
\end{equation}
where we represent the velocity vector ${\bf V}({\bf x},t) $ along 3-D dimensions $({\bf x}\equiv x,y,z$)  by $u,\ v,$ and $w$, respectively. Then we use a second-order central finite difference approximation to estimate the partial derivatives and employ this divergent free property as a physical loss in the training process, as follows: 
\begin{equation}
\mathcal{L}_\text{Phy} = \sum_{(x,y,z)}\left[\nabla \cdot \hat{\bf V} ({\bf x},t)\right]^2/M,
\end{equation}
where $M$ is the number of spatial locations in the high-resolution data,  and $\hat{\bf V}$ represents the reconstructed velocity field at high resolution. Such physical constraint can help reduce the search space for model parameters such that the reconstructed high-resolution data follow the divergence-free property which is enforced in incompressible flows. 


%\todo{Add an additional layer for preserving the zero-mean property}
Second, to preserve the zero-mean property of the in a compressible flow, we also implement an extra network layer by reducing the mean value of reconstructed flows in the generative process $\hat{\boldsymbol{Q}}_{0}=g(\hat{\boldsymbol{Q}})$. We do not include the zero-mean constraint directly in the loss function because the obtained model cannot preserve the zero-mean property for the long-term testing phase. On the other hand, direct MSE minimization using $\hat{\boldsymbol{Q}}_{0}$ as output leads to an unstable training process because the original output $\hat{\boldsymbol{Q}}$ can have arbitrarily large values. Hence, we iteratively train the PGSR model to reduce (i) the gap between $\hat{\boldsymbol{Q}}$ and the true DNS, (ii) the gap between  $\hat{\boldsymbol{Q}}$ and its resulted $\hat{\boldsymbol{Q}}_0$, and finally use $\hat{\boldsymbol{Q}}_0$ as the output.

Additionally, we also introduce a degradation process to enforce the consistency between the reconstructed data and the input LES data, similar to ~\cite{chen2021reconstructing}. We create the PGSR model based on the popular SR model SRGAN~(\cite{ledig2017photo}). The methods we used to include physical constraints can easily be applied into enhance other SR models as well. %The final loss function for PGSR is: 

%\begin{equation}
% \begin{aligned}
% \mathcal{L}_{G} &= \alpha_1\mathcal{L}_\text{recon}(\boldsymbol{Q}_{SR},\boldsymbol{Q}_{HR})+  \alpha_2 \mathcal{L}_\text{Phy}\\
% &+\alpha_3 \mathcal{L}_\text{deg}+\alpha_4 \mathcal{L}_\text{GAN,gen},
% \end{aligned}
% \label{eq:loss_pgsr}
% \end{equation}
% where $\mathcal{L}_\text{GAN,gen}$ is the standard generator loss in SRGAN~\cite{ledig2017photo}.



%\subsection{Degradation Process} 


\section{Experiment}


%\textcolor{red}{Tianshu: }
%We have introduced the concepts and methodologies of our work. 
In this section, we evaluate the performance of our method on a  Taylor-Green vortex (TGV) ~(\cite{brachet1984taylor}) dataset and compare the results with existing well-used methods. We first introduce the dataset used in our tests, and discuss the experimental design and evaluation targets. Then we will provide experimental results and our analysis. 

\begin{comment}
%\subsection{Running Environment}

All experiments are conducted using Tensorflow on a computer with the following configuration: Intel Core i7-8750H CPU @2.20GHz $\times$ 6 Processor, 16 GiB Memory, 64-bit Win10 OS. 

%We evaluate the performance of DNS reconstruction  using two different metrics, root mean squared error (RMSE) and structural similarity index measure (SSIM)~\cite{wang2004image}. We use RMSE to measure the difference (error) between reconstructed data and target DNS data. The lower value of RMSE indicates better reconstruction performance. %result is better. While SSIM  is used to appraise the similarity between reconstructed data and target DNS on three aspects, luminance, contrast and overall structure. We compare model performance to multiple baselines, which are described as follows:

\end{comment}


\subsection{Dataset}
\label{sec:dataset}
We consider a variant of the Taylor-Green vortex (TGV).  This is a three-dimensional incompressible flow and is simulated within a box with periodic boundary conditions. The TGV provides a suitable setting for our demonstration as it exhibits several salient features of turbulent transport. In this flow, the original vortex collapses into turbulent worm-like structures which become progressively more turbulent until viscosity eventually dissipates the large scale vortical structures. 
We compare our proposed method against several existing super-resolution algorithms to reconstruct the DNS data of TGV. 

The TGV is produced by a solution of the constant density Navier-Stokes equation:

\begin{equation}
    \frac{\partial \textbf{V}}{\partial t} + (\textbf{V}. \nabla) \textbf{V} = \frac{-1}{\rho} \nabla p + \nu \Delta \textbf{V}.
\end{equation}

%where $\rho ({\bf x},t)$ and $p ({\bf x},t)$ denote the fluid density and the thermodynamic pressure, respectively. 
The evolution of the TGV includes enhancement of vorticity stretching and the consequent production of small-scale eddies. Initially, large vortices are placed in a cubic periodic domain of $[-\pi,\pi]$ (in all three-directions), with initial conditions: %domain with sinusoidal  periodic boundary conditions, from the initial conditions:
\begin{eqnarray}
u (x,y,z,0) &=& \sin(x) \cos(y) \cos(z) \\ v(x,y,z,t) &=& - \cos(x)\sin(y)\cos(z) \\ w(x,y,z,t) &=& 0.    
\end{eqnarray}
Then the value of the Reynolds number is set to $Re=1600$.  
We have LES and DNS results of TGV at several times steps.  
For each time step, we consider the three components of the velocity along the $x$, $y$, and $z$ axis, denoted by  $u$, $v$, and $w$, respectively.
Our objective is to reconstruct the DNS results of %such a three-channel 
the velocity field $(u,v,w)$ using LES data.  In particular, $\boldsymbol{Q}^{LR}$ represents the LES   values of the velocity field while the target $\boldsymbol{Q}$ represents the high-fidelity DNS  of the velocity field. 
%Next, we regarded these three channels(u, v and w)' flow data in both dataset (LES and DNS) as three channels in $\textbf{X}_{LR}$ and  $\textbf{X}_{HR}$ (similar to three channels of image data) respectively. 
Here both LES and DNS data are generated along 65 grid points along the $z$ axis under equal intervals. The LES and DNS  are conducted on 32-by-32 and 128-by-128 grid points, respectively, %in the space 
along the $xy$ %and $y$ 
directions. Hence, the DNS data is of 16 times higher resolution compared to LES data.  
%For standard protocol in traditional single image super-resolution (SISR)~\cite{Danil2009SISR}, the scale of the low-resolution flow image data $\textbf{X}_{LR}$ is 4 times smaller than the high resolution data $\textbf{X}_{HR}$, the scale factor is $\times4$.





% \subsection{Baselines}
% \todo{rewrite}
% We compare the performance of PGSRN method against several existing  methods that have been widely used for image super-resolution and turbulent flow downscaling. Specifically, we implement SRCNN~\cite{dong2014learning}, RCAN~\cite{zhang2018image}, SRGAN~\cite{ledig2017photo}, and a popular dynamic fluid downscaling method: DCS/MS~\cite{fukami2019super} as baselines. 

% To better verify the effectiveness of each component in our proposed method, we further compare PGSRN with two of its variants: PGSRN-P and PGSRN-H as described below. %, trained and tested on the same experiment setting.


% \textit{The variant with only physical Loss (PGSRN-P): } To show the effectiveness of the physical loss, we remove the degradation Loss and hierarchical loss (in middle layers) from the Hierarchical Generative Process. We name this method as  PGSRN-P.

% \textit{The variant with physical loss + hierarchical generative process (PGSRN-H):} In this baseline, we  remove only the degradation loss from the PGSRN method, and we name this baseline as PGSRN-H.

% By comparing PGSRN-P and SRGAN, we hope to show the improvement by incorporating the physical loss. We can further verify the effectiveness of the hierarchical loss by comparing PGSRN-P and PGSRN-H. Finally, the comparison between PGSRN-H and the complete version of PGSRN can show the effectiveness of using the degradation loss. 


\begin{figure*} [!t]
\centering
%\raggedleft
\subfigure[RMSE in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.30\linewidth]{RMSE_4u_g1.png}
}%\hspace{-0.2in}
\subfigure[RMSE in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{RMSE_4v_g1.png}
}
\subfigure[RMSE in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{RMSE_4w_g1.png}
}
%\vspace{-.15in}
\subfigure[SSIM in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.30\linewidth]{SSIM_4u_g1.png}
}%\hspace{-0.2in}
\subfigure[SSIM in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{SSIM_4v_g1.png}
}
\subfigure[SSIM in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{SSIM_4w_g1.png}
}
% \vspace{-.05in}
\caption{Change of RMSE/SSIM values produced by different DNS prediction models from the 1st to 50th time steps in a testing period with true DNS data for 5 time steps. (a)-(c) show the changes of RMSE values, and (d)-(f) show the changes of SSIM values for ($u$,$v$,$w$) three different channels.}
\label{fig:tf_plot3}
%\vspace{-0.1in}
\end{figure*}



\begin{figure*} [!t]
\centering
%\raggedleft
\subfigure[RMSE in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.30\linewidth]{RMSE_9u_g1.png}
}%\hspace{-0.2in}
\subfigure[RMSE in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{RMSE_9v_g1.png}
}
\subfigure[RMSE in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{RMSE_9w_g1.png}
}
%\vspace{-.15in}
\subfigure[SSIM in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.30\linewidth]{SSIM_9u_g1.png}
}%\hspace{-0.2in}
\subfigure[SSIM in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{SSIM_9v_g1.png}
}
\subfigure[SSIM in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.30\linewidth]{SSIM_9w_g1.png}
}
% \vspace{-.05in}
\caption{Change of RMSE/SSIM values produced by different DNS prediction models from the 1st to 50th time steps in a testing period with true DNS data for 10 time steps. (a)-(c) show the changes of RMSE values, and (d)-(f) show the changes of SSIM values for ($u$,$v$,$w$) three different channels.}
\label{fig:tf_plot4}
%\vspace{-0.1in}
\end{figure*}


\begin{figure*} [!t]
\centering
%\raggedleft
\subfigure[RMSE in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.31\linewidth]{RMSE_4u_g2.png}
}%\hspace{-0.2in}
\subfigure[RMSE in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.31\linewidth]{RMSE_4v_g2.png}
}
\subfigure[RMSE in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.31\linewidth]{RMSE_4w_g2.png}
}
%\vspace{-.15in}
\subfigure[SSIM in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.31\linewidth]{SSIM_4u_g2.png}
}%\hspace{-0.2in}
\subfigure[SSIM in $v$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.31\linewidth]{SSIM_4v_g2.png}
}
\subfigure[SSIM in $w$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.31\linewidth]{SSIM_4w_g2.png}
}
% \vspace{-.05in}
\caption{Change of RMSE/SSIM for different models over time using sparse LES data with the interval of 5 time steps. (a)-(c) show the changes of RMSE values, and  (d)-(f) show the changes of SSIM values for the three different channels ($u$,$v$,$w$).}
\label{fig:tf_plot5}
%\vspace{-0.1in}
\end{figure*}
\subsection{Experimental Design} 
% \todo{rewrite}
% We simulate two different designed experiments to analyze the performances of our proposed PGSRN method and state-of-the-art methods. 

We train the proposed method using the TGV data from a consecutive 20-seconds period (with 20 time steps) and then apply the trained model to the next 50 seconds' testing data and measure the performance. \footnote{Code for the experiment is available at drive.google.com/drive/folders/11PTaEjsBkgd6PAAYm
WH\textunderscore KDzTg90IvrJn?usp=sharing}
We evaluate the performance of DNS prediction using two different evaluation metrics, root mean squared error (RMSE) and structural similarity index measure (SSIM)~(\cite{wang2004image}). We use RMSE to measure the difference (error) between reconstructed data and target DNS data. The lower value of RMSE indicates better reconstruction performance at the pixel level.  
SSIM is used to appraise the structural similarity between reconstructed data and target DNS on three aspects: luminance, contrast, and overall structure.


\begin{table}[!t]
\newcommand{\tabincell}[2]{\begin{tabular}{@{}#1@{}}#2\end{tabular}}
\centering
\caption{The prediction performance of reconstructing DNS data measured in terms of RMSE and SSIM. 
The performance is measured  on $(u,v,w)$ channels with DNS interval $d$ as 5 or 10. The upper half is the average results using the LES interval of $d = 5$, the bottom half is the average results using the LES interval of $d = 10$.
}
\begin{tabular}{l|cccc}
\hline
%\textbf{Method} & LES Based & Downsample Based &  \\ \hline
\textbf{Method} & RMSE & SSIM   \\ \hline 
TM&(0.019,0.019,0.019)&(0.972,0.973,0.967)\\ 
rTM& (0.013,0.012,0.015)&(0.983,0.984,0.980)\\ 
PRU &(0.005,0.005,0.005)&(0.996,0.996,0.997)\\

\hline
TM&(0.038,0.038,0.036)&(0.930,0.930,0.917)\\ 
rTM& (0.022,0.022,0.025)&(0.964,0.966,0.954)\\ 
PRU&(0.012,0.012,0.009)&(0.988,0.988,0.991)\\
\hline
\end{tabular}
\label{tab:dns_pred_pru}
%\vspace{-0.1in}
\end{table}

%\textcolor{red}{Tianshu: Rewrite into separate paragraphs. }
%We aim to evaluate the performance of our proposed method 
Our evaluations aim to answer several questions as listed below: 

\textbf{E1}: \textit{Whether PRU alone can effectively predict the next high-resolution DNS using the previous DNS data? } We compare PRU with two pure data-driven baseline models, transition model (TM) and recurrent transition model (rTM). %, in terms of prediction root-mean squared error (RMSE) and structural similarity (SSIM)~\cite{wang2004image}. 
The TM method predicts the flow variables $\boldsymbol{Q}(t+1)$ at next step using an UNet-style encoder-decoder convolutional structure from the flow variables $\boldsymbol{Q}(t)$ at the previous time. The rTM method further extends TM with a recurrent layer. 

\textbf{E2}: \textit{Whether the predictions made by PRU can preserve physical properties of DNS?}  Besides RMSE and SSIM, we also measure the turbulent kinetic energy of the predicted flows and compare it with that of the true DNS.  

\textbf{E3}: \textit{How is the reconstruction performance combining PRU and PGSR using sparse low-resolution LES data? } We combine PRU and PGSR (PGSR-PRU) for reconstructing DNS from sparse LES samples. Since we have already compared PRU with other temporal transition models in \textbf{E1}, here we compare to a baseline SRGAN-TM, which uses our base SR model SRGAN for reconstructing DNS from LES and use TM to predict DNS when LES is not available. We also compare to another two baselines PGSR (LES) and its extension rPGSR (LES). The rPGSR(LES) method has another recurrent layer over time.  Different from PGSR-PRU and SRGAN-TM, these  two methods apply the SR model using LES data at all the time steps, thus can be considered as the upper bound for this test. Our goal is to verify that PGSR-PRU can produce comparable performance with PGSR (LES) and rPGSR (LES) while outperforming other baselines. % with   including SRGAN PGSR, rPGSR

\textbf{E4}: \textit{How is the reconstruction performance of PGSR compared to other SR methods?}  We compare PGSR with two well-used SR methods: RCAN~(\cite{zhang2018image}) and SRGAN~(\cite{ledig2017photo}). We also compare it with DCS/MS~(\cite{fukami2019super}), which is a popular SR approach for turbulent flows reconstruction. Additionally, we compare to a variant of PGSR, termed PGSR-D, which only adds the degradation loss to the SRGAN model without using any physical constraints. % in Eq.~\ref{eq:loss_pgsr}. 





%in two different scenarios. 
%First, we consider the case in which part of flow data is missing at a specific time. For example, the high-resolution flow data is available at certain points along the $z$ axis but not available at other points. We can use the model trained using available data to reconstruct high-resolution DNS data for the remaining locations. %remaining data of the same timestamp, applying them into super-resolution methods to supplement them. For simulating this situation, we design the single-snapshot experiment. 
%We refer to this test as a single-snapshot experiment since the training and testing are conducted at the same time step. %We only use flow data from only one timestamp. Then, in this timestamp, 
%both $\textbf{X}_{LR}$ and $\textbf{X}_{HR}$ have 65 sets of flow data respectively. and 
%In this test, we use the 5-fold cross validation method to divide 65 data slices into five parts and each part has 13 slices. Each time we use four folds (i.e., 52 slides) for training and use the remaining one fold (i.e., 13 slides) for testing. % in the experiment. %Hence, totally, we obtain 52 sets of flow data in training, 13 sets in testing.

%Second, we conduct cross-time experiments to study how the proposed method helps simulate flows in a dynamic scenario. 
%In particular, we use data from  20 consecutive time steps  as the training data, and then test the model in the next 20 time steps. %' flow data as the testing data.
%This is a challenging task since dynamic fluid is changing over time following complex non-linear patterns (driven by Navier-Stokes equation~\cite{foias2001navier}). Hence, the model trained from available data may not be able to generalize to future data that look very different with training data.





\subsection{Results}
\subsubsection{DNS generation using PRU}
Here we assume that we have true DNS data with an interval of $d$ time steps ($d$ = 5 or 10) and we implement PRU and other baselines to predict DNS for the missing time steps.
We summarize the performance of PRU and baselines in Table~\ref{tab:dns_pred_pru} and show their performance change on each channel over time in Figs.~\ref{fig:tf_plot3} and~\ref{fig:tf_plot4}. For both cases (with the true DNS interval $d$ sets to a larger value 10 or a smaller value 5), PRU produces better performance than baselines over all the time steps.
It confirms the effectiveness of PRU in the long-term prediction of DNS from historical flow data (\textbf{E1}). 

Besides, we compute the kinetic energy of the flow data predicted by PRU and baselines and measure the gap with the kinetic energy of the true DNS data. The proposed PRU reduces the kinetic energy gap with the true DNS by 30\% and 67\% compared to TM and rTM, respectively. It confirms that PRU can better preserve underlying physical characteristics of turbulent flows (\textbf{E2}). 
%\textcolor{red}{Merge to results section}
%Referring to the evaluation method in SR results, We evaluated 
 %\textcolor{red}{Here the RMSE XXX while SSIM XXX. }




\begin{table}[!t]
\newcommand{\tabincell}[2]{\begin{tabular}{@{}#1@{}}#2\end{tabular}}
\centering
\caption{Reconstruction performance on $(u,v,w)$ using  LES channels by RMSE and SSIM. SRGAN-TM and PGSR-PRU (proposed) are evaluated using sparse LES data with the interval of 5 steps, the upper half is the average results of a total of 50 time steps, the bottom half is the average results of the first 15 time steps. 
}
\begin{tabular}{l|p{0.15\textwidth}p{0.145\textwidth}}
\hline
%\textbf{Method} & LES Based & Downsample Based &  \\ \hline 
\textbf{Method} & RMSE & SSIM   \\ \hline 
PGSR(LES)&(0.112,0.114,0.133)&(0.771,0.774,0.667)\\ 
rPGSR(LES)& (0.114,0.114,0.129)&(0.772,0.773,0.669)\\ 
SRGAN-TM&(0.118,0.115,0.147)&(0.769,0.767,0.647)\\
PGSR-PRU&(0.111,0.113,0.128)&(0.782,0.781,0.681)\\
\hline
PGSR(LES)&(0.088,0.088,0.101)&(0.846,0.849,0.801)\\ 
rPGSR(LES)& (0.091,0.086,0.099)&(0.848,0.855,0.811)\\ 
SRGAN-TM&(0.091,0.088,0.105)&(0.848,0.849,0.794)\\
PGSR-PRU&(0.081,0.081,0.091)&(0.864,0.866,0.833)\\
\hline
\end{tabular}
\label{tab:recon}
\end{table}

\begin{comment}
\begin{figure*} [!h]
\centering
%\raggedleft
\subfigure[RMSE in $u$ Channel.]{ \label{fig:a}{}
\includegraphics[width=0.36\linewidth]{RMSE_4u_g2.png}
}%\hspace{-0.2in}
\subfigure[SSIM in $u$ Channel.]{ \label{fig:b}{}
\includegraphics[width=0.36\linewidth]{SSIM_4u_g2.png}
}
% \vspace{.05in}
%\vspace{-.05in}
\caption{Change of RMSE/SSIM for different models over time using sparse LES data with the interval of 5 time steps.}%from the 1st to 50th time steps in LES-based experiment with 5 intervals.}
\label{fig:tf_plot5}
Here we assume that we have true DNS data with an interval of $d$ time steps ($d$=5 or 10) and we implement PRU and other baselines to predict DNS for the missing time steps.
We summarize the performance of PRU and baselines in Table~\ref{tab:dns_pred_pru} and show their performance change on each channel over time in Figs.~\ref{fig:tf_plot3} and~\ref{fig:tf_plot4}. Whatever we set the true DNS interval$d$ to larger value 10 or 5, PRU produces better performance than baselines over all the time steps.
This confirms the effectiveness of PRU in long-term prediction of DNS from historical flow data (\textbf{E1}). 
\end{figure*}
\end{comment}



% \begin{figure*} [!h]
% \centering
% \subfigure[SRGAN-Tm.$\backslash$ 0.671]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{SRGAN_TM_t78_45w.png}
% }\hspace{4mm}
% \subfigure[PGSR.$\backslash$ 0.597]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{PGSR_t78_45w.png}
% }\hspace{4mm}
% \subfigure[rPGSR.$\backslash$ 0.631]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{rPGSR_t78_45w.png}
% }\hspace{4mm}
% \subfigure[PGSR-PRU.$\backslash$ 0.703]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{PGSR_PRU_t78_45w.png}
% }\hspace{4mm}
% \subfigure[Target DNS.]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{DNS_t78_45w.png}
% }


% % \subfigure[SRGAN-Tm.$\backslash$ 0.634]{ \label{fig:b}
% % \includegraphics[width=0.34\columnwidth]{SRGAN_TM_t78_17w.png}
% % }\hspace{4mm}
% % \subfigure[PGSR.$\backslash$ 0.564]{ \label{fig:b}
% % \includegraphics[width=0.34\columnwidth]{PGSR_t78_17w.png}
% % }\hspace{4mm}
% % \subfigure[rPGSR.$\backslash$ 0.637]{ \label{fig:b}
% % \includegraphics[width=0.34\columnwidth]{rPGSR_t78_17w.png}
% % }\hspace{4mm}
% % \subfigure[PGSR-PRU.$\backslash$ 0.712]{ \label{fig:b}
% % \includegraphics[width=0.34\columnwidth]{PGSR_PRU_t78_17w.png}
% % }\hspace{4mm}
% % \subfigure[Target DNS.]{ \label{fig:b}
% % \includegraphics[width=0.34\columnwidth]{DNS_t78_17w.png}
% % }

% \subfigure[SRGAN-TM.$\backslash$ 0.766]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{SRGAN_TM_t78_24w.png}
% }\hspace{4mm}
% \subfigure[PGSR(LES).$\backslash$ 0.770]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{PGSR_t78_24w.png}
% }\hspace{4mm}
% \subfigure[rPGSR(LES).$\backslash$ 0.787]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{rPGSR_t78_24w.png}
% }\hspace{4mm}
% \subfigure[PGSR-PRU.$\backslash$ 0.836]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{PGSR_PRU_t78_24w.png}
% }\hspace{4mm}
% \subfigure[Target DNS.]{ \label{fig:b}
% \includegraphics[width=0.3\columnwidth]{DNS_t78_24w.png}
% }

% \vspace{.08in}
% \caption{LES-based reconstruction of $w$ channel along the $z$ dimension with 4 intervals. We also show the SSIM value for each reconstructed data.}
% \vspace{.1in}
% \label{fig:tf_plot2}
% \end{figure*}



\begin{figure*} [!h]
\centering
\subfigure[SRGAN-TM]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{SRGAN_TM_t78_45w.png}
}\hspace{4mm}
\subfigure[PGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_t78_45w.png}
}\hspace{4mm}
\subfigure[rPGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{rPGSR_t78_45w.png}
}\hspace{4mm}
\subfigure[PGSR-PRU]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_PRU_t78_45w.png}
}\hspace{4mm}
\subfigure[Target DNS]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{DNS_t78_45w.png}
}


\subfigure[SRGAN-TM]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{SRGAN_TM_t78_17w.png}
}\hspace{4mm}
\subfigure[PGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_t78_17w.png}
}\hspace{4mm}
\subfigure[rPGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{rPGSR_t78_17w.png}
}\hspace{4mm}
\subfigure[PGSR-PRU]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_PRU_t78_17w.png}
}\hspace{4mm}
\subfigure[Target DNS.]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{DNS_t78_17w.png}
}

\subfigure[SRGAN-TM]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{SRGAN_TM_t78_24w.png}
}\hspace{4mm}
\subfigure[PGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_t78_24w.png}
}\hspace{4mm}
\subfigure[rPGSR(LES)]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{rPGSR_t78_24w.png}
}\hspace{4mm}
\subfigure[PGSR-PRU]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{PGSR_PRU_t78_24w.png}
}\hspace{4mm}
\subfigure[Target DNS]{ \label{fig:b}
\includegraphics[width=0.27\columnwidth]{DNS_t78_24w.png}
}
%\vspace{-.05in}
\caption{Three example slides  of reconstructed $w$ channel (in three rows) along the $z$ dimension with the LES interval of 5 time steps.} %We also show the SSIM value for each reconstructed data.}
% \vspace{-.1in}
\label{fig:tf_plot2}
%\vspace{-0.2in}
\end{figure*}

\subsubsection{DNS reconstruction using PGSR-PRU}
We implement the DNS reconstruction using PGSR-PRU and SRGAN-TM using the LES data for every five time steps (\textbf{E3}). As shown in Table~\ref{tab:recon} and Fig.~\ref{fig:tf_plot5}, PGSR-PRU produces better performance than SRGAN-TM. Particularly in the first 15 time steps, it is more clear to see PGSRN-PRU can obtain lower RMSE and higher SSIM values. Fig.~\ref{fig:tf_plot5} also shows that the reconstruction performance gets degraded over time because the LES data have a significant difference with the training period. More interestingly, we notice that PGSR-PRU even outperforms PGSR (LES) and rPGSR (LES). This is because LES data often miss many important physical components compared to the true DNS, which makes SR models difficult to recover flow data directly from LES data. 
We also show three sets of examples of reconstructed slides of flow data in Fig.~\ref{fig:tf_plot2}. It is clear to observe that PGSR-PRU can better capture the detailed flow patterns compared to other methods as it incorporates the underlying Navier-Stokes equation through PRU.


\begin{table}[!t]
%\vspace{-0.1in}
\newcommand{\tabincell}[2]{\begin{tabular}{@{}#1@{}}#2\end{tabular}}
\centering
\caption{Evaluation of SR models in terms of the reconstruction RMSE and SSIM on $(u,v,w)$ channels using LES data. 
The performance is measured % by mean value of first 5 time steps  
on the testing data of the first 5 time steps. 
}
\begin{tabular}{l|cccc}
\hline
%\textbf{Method} & LES Based & Downsample Based &  \\ \hline 
\textbf{Method} & RMSE & SSIM   \\ \hline 
RCAN&(0.061,0.061,0.075)&(0.891,0.891,0.863)\\ 
DCS/MS&(0.085,0.086,0.115)&(0.896,0.897,0.845)\\ 
SRGAN&(0.065,0.062,0.067)&(0.901,0.913,0.875)\\
PGSR-D&(0.057,0.052,0.053)&(0.914,0.923,0.900)\\
PGSR&(0.053,0.050,0.051)&(0.924,0.935,0.911)\\
\hline
\end{tabular}
\label{tab:pgsr_recon}
%\vspace{-0.1in}
\end{table}

%vspace{-0.1in}
\subsubsection{DNS reconstruction using PGSR}
As shown in Table~\ref{tab:pgsr_recon}, PGSR achieves better performance than other baselines in terms of both RMSE and SSIM. In particular, we can observe the  improvement from SRGAN to PGSR-D and from PGSR-D to PGSR. This confirms  the effectiveness of the degradation process and the physical constraints used in PGSR (\textbf{E4}).  



\vspace{-.1in}
\section{Conclusion}
We propose a physics-guided neural network framework for predicting high-resolution flow data at high temporal frequency. The PRU structure leverages the physical knowledge embodied in the Navier-Stokes equation to capture the flow dynamics over time while the PGSR model incorporates additional physical constraints to improve the reconstruction from the LES data.  We have demonstrated the superiority of PRU in predicting future DNS data from historical DNS data. We also show that PGSR-PRU can effectively reconstruct DNS from sparse LES series. 
%Although our method has been developed in the context
%of simulating fluid dynamics, the involved techniques can


More importantly, the proposed method is generally applicable to %be widely used for other 
many  scientific problems with similar properties, e.g., complex temporal dynamics, and the availability of low-resolution simulations with reduced accuracy. The PRU structure can also be used as a building block to enhance existing deep learning models for modeling of complex dynamics with the guidance of known governing PDEs.  %For
%example, simulations of cloud-resolving models (CRM) at subkilometer horizontal resolution are critical for effectively representing boundary-layer eddies and low clouds. However, it is
%not feasible to generate simulations at such fine resolution even
%with the most powerful commuters expected to be available
%in the near future. Hence, the method developed in this paper
%can provide a great potential for reconstructing high-resolution
%simulations.

\section{acknowledgements}
The material presented in this paper is based upon work supported by the National Science Foundation (NSF) through  Grant 2028001 and Grant OAC-2203581, the Defense Advanced Research Projects Agency (DARPA) under contract number FA8750-18-C-0089, and the Air Force Office of Scientific Research (AFOSR) under contract number FA9550-22-1-0019. Any opinions, findings, and conclusions or recommendations expressed in this paper are those of the authors and do not necessarily reflect the views of AFOSR, DARPA, or NSF. Computational resources are provided, in part, by the  the University of Pittsburgh Center for research Computing (CRC). 
%\bibliographystyle{unsrt}

\bibliography{Bao_47}
\end{document}
