\section{Arbitrary Tight Error Bound}\label{sec:theory}

% \LL{why do we say tight and arbitrarily tight bounds? Tight generally means the best possible upper bound, while in our case we just have an upper bound, no? Am I missing something? Also, when you say arbitrarily tight, I guess you mean converging to zero or vanishing? }



% To bound the worst-case approximation error as in Problem~\ref{prob:1}, 
Here, we derive upper bounds for the approximation error $e_1$, specifically, for the right-hand side of Eq.~\eqref{eq:error_series}. 
% We first present intermediate results to reason the behavior of the error series in Eq.~\eqref{eq:error_series} under certain conditions. Next we present the error bound theory with arbitrary-tight property using finite number of neural networks. We end this section with feasibility analysis and discussions.
% We derive bounds for the approximation error $e(x,t):=p(x,t)-\hat{p}(x,t)$.  
% We first characterize $e(x,t)$ as a series of approximate solutions to PDEs.  Then, 
We show that, by training just two PINNs under certain sufficient conditions, the series can be bounded with arbitrary precision. 
% resulting in second-order bound on $e_1(x,t)$. 
% We discuss that while these conditions are feasible, they may be challenging to verify in practice.
% We end this section with feasibility analysis and discussions
All proofs for the lemmas and theorems are provided in Appendices~\ref{proof:lemma1}--\ref{proof:arbitrary_tight}.

First, we express how well $\hat{e}_i$ approximates the $i$-th error $e_i$ by defining the \textit{relative approximation} factor $\alpha_i(t)$ as 
% \ck{the below equation includes original remark 2}
\begin{equation}
    \alpha_i(t)  := \frac{\max_{x \in X'} | e_i(x,t) - \hat{e}_i(x,t)|} {\max_{x \in X'} |\hat{e}_i(x,t)|}.
    \label{eq:alpha_def}
\end{equation}
% Note that $\alpha_i(t)\geq 0$ for all $ i\geq 1$ and $t \in T$.
Recall from Definition~\ref{def:1} that $e_i-\hat{e}_i = e_{i+1}$.  Hence, Eq.~\eqref{eq:alpha_def} can be written in a recursive form as
\begin{equation}
    \max_{x \in X'}|e_{i+1}(x,t)| = \alpha_i(t) \max_{x \in X'} |\hat{e}_i(x,t)|,
    \label{eq:assume_alpha_relation}
\end{equation}
which relates the unknown $(i+1)$-th error to the $i$-th error approximation. 
Now, let $e_i^*(t)$ and $\hat{e}_i^*(t)$
denote the maximum of $e_i(x,t),\hat{e}_i(x,t)$ over $X'$, respectively, i.e., 
\begin{subequations}
    \label{eq:max_error}
    \begin{align}
        e_i^*(t) &:= \max_{x \in X'} |e_i(x,t)|,\\ 
        \hat{e}_i^*(t) &:= \max_{x \in X'} |\hat{e}_i(x,t)|.
    \end{align}
\end{subequations}
%     e_i^*(t) := \max_{x \in X'} |e_i(x,t)|,\quad \hat{e}_i^*(t) := \max_{x \in X'} |\hat{e}_i(x,t)|.
%     \label{eq:max_error}
% \end{equation} 
Recall that each $\hat{e}_i(x,t)$ can be represented using a PINN. 
Hence, it is safe to assume that the  absolute value of its upper-bound is strictly greater than zero.
\begin{assumption}\label{assumption:ehat_bigger_than_zero}
    Assume that, for all 
    % $i \in \mathbb{N}$, (excluding the limit case), 
    $1 \leq i < n$,
    $\hat{e}^*_i(t) > 0$.
\end{assumption}
Then, the following lemma upper-bounds the approximation error $e_1(x,t)$ using $\hat{e}_i^*(t)$.
\begin{lemma}\label{lemma:bouding series of ratios}
    Consider the approximation error $e_1(x,t) = p(x,t)-\hat{p}(x,t)$ in Eq.~\eqref{eq:error_series} with $n \geq 2$, and the upper-bounds  $\hat{e}^*_i(t)$ for $1 \leq i < n$ in Eq.~\eqref{eq:max_error}.  Define ratio
    \begin{equation}
        \label{eq:approx ratio}
        \gamma_{\frac{i+1}{i}}(t) := \frac{\hat{e}_{i+1}^*(t)}{\hat{e}_{i}^*(t)}.
    \end{equation}
    Then, 
    under Assumption~\ref{assumption:ehat_bigger_than_zero}, 
    it holds that, $\forall x\in X'$,
    \begin{multline}\label{eq:error_bound_by_max_error_series_ratios}
    |e_1(x,t)| \le \hat{e}_1^*(t)\Bigl(1 + \sum_{m=2}^n \prod_{i=1}^{m-1}\gamma_{\tfrac{i+1}{i}}(t) \\ 
    + \frac{e_{n+1}^*}{\hat{e}_{n-1}^*}\prod_{i=1}^{n-2}\gamma_{\tfrac{i+1}{i}}(t)\Bigr)
    \end{multline}
\end{lemma}

Next, we derive an upper- and lower-bound for the ratio $\gamma_{\frac{i+1}{i}}(t)$ in Eq.~\eqref{eq:error_bound_by_max_error_series_ratios} using $\alpha_i(t)$.
\begin{lemma}\label{lemma:bounds_on_ratios}
    If the relative approximation factors $\alpha_i(t)<1$ for all $2 \leq i < n $, then 
    \begin{equation}
        \frac{\alpha_{i-1}(t)}{1+\alpha_{i}(t)} \leq \gamma_{\frac{i}{i-1}(t)} \leq \frac{\alpha_{i-1}(t)}{1-\alpha_{i}(t)}.
    \label{eq:bounds_on_ratios}
    \end{equation}
\end{lemma}

Lemma~\ref{lemma:bounds_on_ratios} establishes the relationship between ratio $\gamma_{\frac{i}{i-1}}$ and relative approximation factors $\alpha_i$ under condition $\alpha_i < 1$. 
Intuitively, this condition holds when $\hat{e}_i$ approximates $e_i$ reasonably well (see Eq.~\eqref{eq:alpha_def}). 
Lastly, we show that under certain conditions on $\alpha_1$ and $\alpha_2$, an ordering over $\gamma_{\frac{2}{1}},\gamma_{\frac{3}{2}},\dots,\gamma_{\frac{i}{i-1}}$ can be achieved.
\begin{lemma}\label{lemma:exist_decrease_ratio}
    If, for all $t \in T'$,
    \begin{subequations}
        \begin{align}
            \label{eq:alpha1_condition}
            & 0 < \alpha_1(t) < 1, \\
            \label{eq:alpha2_condition1}
            & 0 < \alpha_2(t) < 1-\alpha_1(t), \\
            \label{eq:alpha2_condition2}
            & \alpha_2(t)(1+\alpha_2(t)) < \alpha_1(t)^2,
        \end{align}
    \label{eq:alpha_conditions}
    \end{subequations}
    then there exist feasible $0 \leq \alpha_{i}(t) < 1$ for $2 < i < n$ such that
    \begin{equation}
        \gamma_{\frac{i}{i-1}}(t) < \gamma_{\frac{2}{1}}(t) < 1.
        \label{eq:decrease_ratio}
    \end{equation}
\end{lemma}

The intuition behind Lemma \ref{lemma:exist_decrease_ratio} is that if $\hat{e}_1$ and $\hat{e}_2$ are trained to certain accuracy (satisfying Conditions~\ref{eq:alpha_conditions}), 
% then the functions $\hat{e}_3,\dots,\hat{e}_{n-1}$ that closely approximate the unknown $e_3,\dots,e_{n-1}$ result in ratios $\gamma_{\frac{3}{2}},\dots,\gamma_{\frac{n-1}{n-2}}$ that are upper bounded by $\gamma_{\frac{2}{1}} < 1$.
% ?it is possible $\hat{e}_3,\dots,\hat{e}_{n-1}$
% \old{
then there exist feasible $\hat{e}_3,\hat{e}_4,\dots,\hat{e}_{n-1}$ such that the ratios $\gamma_{\frac{3}{2}},\gamma_{\frac{4}{3}},\dots,\gamma_{\frac{n-1}{n-2}}$ are upper bounded by $\gamma_{\frac{2}{1}} < 1$. 
% }
Equipped with Lemmas~\ref{lemma:bouding series of ratios}-\ref{lemma:exist_decrease_ratio}, we can state our main result, which is an upper-bound on the approximation error of $\hat{p}$.
Specifically, the following theorem shows that the approximation error bound in Lemma~\ref{lemma:bouding series of ratios} becomes a geometric series as $n\to \infty$ under Conditions~\ref{eq:alpha_conditions}; hence, solving Problem~\ref{prob:1}.

\begin{theorem}[Second-order error bound]\label{theorem:temporal_error_bound}
    Consider Problem~\ref{prob:1} and two approximate error functions $\hat{e}_1(x,t), \hat{e}_2(x,t)$ constructed by Definition~\ref{def:1} 
    that satisfy Conditions~\ref{eq:alpha_conditions}. Then, 
    \begin{equation}
        |p(x,t)-\hat{p}(x,t)| \leq B_2(t) = \hat{e}_1^*(t)\Big( \frac{1}{1-\gamma_{\frac{2}{1}}(t)} \Big),
        \label{eq: temporal error bound}
    \end{equation}
    where $\hat{e}_1^*(t)$ is defined in Eq.~\eqref{eq:max_error}, and $\gamma_{\frac{2}{1}}(t) = \hat{e}_2^*(t)/ \hat{e}_1^*(t)$.
\end{theorem}

The above theorem shows that the second-order error bound $B_2(t)$ can be obtained by training only two PINNs that approximate the first two errors $e_1, e_2$ according to Definition~\ref{def:1} and that satisfy Conditions~\ref{eq:alpha_conditions}.
In fact, using these two PINNs, it is possible to construct an arbitrary tight $B_2$ as stated below.

\begin{theorem}[Arbitrary tightness]\label{theorem:error_bound_tightness}
    Given Problem~\ref{prob:1} and tolerance $\epsilon \in (0,\infty)$ on the error bound, an error bound $B_2(t)$ in Theorem~\ref{theorem:temporal_error_bound} can be obtained by training two approximate error functions $\hat{e}_1(x,t)$ and $\hat{e}_2(x,t)$ through physics-informed learning such that 
    \begin{equation}
        B_2(t) - \max_{x \in X'}|e_1(x,t)| < \epsilon.
        \label{eq:arbitrary_tight_def}
    \end{equation}
\end{theorem}

The proof of Theorem~\ref{theorem:error_bound_tightness} is based on the observation that $\gamma_{\frac{2}{1}} \to 0$ when (i) $\hat{e}_1(x,t) \to e_1(x,t)$ and (ii) $\hat{e}_2(x,t) \to e_2(x,t)$.  Then, according to Eq.~\eqref{eq: temporal error bound}, $B_2(t) \to \hat{e}^*_1(t)$, which itself $\hat{e}^*_1(t) \to e^*_1(t)$ under (i). 
By the theoretical convergence of PINNs \citep{shin2020convergence,mishra2023estimates}, $\hat{e}_1$ and $\hat{e}_2$ can be made arbitrary well; thus $B_2$ can be arbitrary tight. This result is important because it shows that arbitrary tightness can be achieved without the need for training infinite number of PINNs, i.e., $\hat{e}_i$, $i=1,2, \ldots$

\begin{remark}
The construction of $B_2(t)$ in Theorem~\ref{theorem:temporal_error_bound} only requires the values of $\hat{e}^*_1(t)$ and $\gamma_{\frac{2}{1}}(t)$ which are obtained from the known functions $\hat{e}_1(x,t),\hat{e}_2(x,t)$.
Checking for $\alpha_1$ and $\alpha_2$ conditions can be performed \textit{a posterior}.  
\end{remark}

%%% For ArXiv %%%
% \paragraph{$n$-th Order Space-time Error Bound ($n > 2$)}

% Here, we derive a generalized error bound using approximation error PINNs $\hat{e}_i$, where $i = 1,\ldots, n$ for $n > 2$.
% Note that an alternative way to express the error bound in Theorem~\ref{theorem:temporal_error_bound} is as an interval $e_1(x,t) \in \big[-B_2(t), B_2(t) \big],$ 
% which is uniform over $x$ for any $t\in T$. 
% Below, we show that, for $n > 2$, an error bound that depends on both space and time can be constructed.
% \begin{corollary}[Space-time Error Bound]
%     Consider PINNs $\hat{e}_i(x,t)$, $i = 1,\ldots, n$, for some $n > 2$ trained per Def.\ref{def:1}
%     such that $\alpha_{n-1}$ and $\alpha_{n}$ satisfy Conditions~\ref{eq:alpha_conditions}, and define the $n$-th order temporal error bound to be
%     $$B_n(t) = \hat{e}_{n-1}^*(t)( \frac{1}{1-\gamma_{\frac{n}{n-1}}(t)}),$$ where $\hat{e}_{n-1}^*(t)$ is defined in~\eqref{eq:max_error}, and $\gamma_{\frac{n}{n-1}}(t) = \hat{e}_{n}^*(t)/ \hat{e}_{n-1}^*(t)$.
%     Then, 
%     \begin{align}
%         e_1(x,t) \in \Big[ \sum_{i=1}^{n-2} \hat{e}_i(x,t) - B_n(t), \; \sum_{i=1}^{n-2} \hat{e}_i(x,t) + B_n(t) \Big].
%     \end{align}
%     \label{corollary:spacetime error bound}
% \end{corollary}
% This corollary shows that, even though the $2$-nd order error approximation is sufficient to obtain a time-varying bound (Theorem~\ref{theorem:temporal_error_bound}), higher order approximations lead to more information, i.e., space in addition to time, on the error bound.


\paragraph{Feasibility Analysis} 
% \ck{The original Remark 3 and 4 are removed; could be put in this paragraph if need.}
Now, we analyze the feasibility for $\hat{e}_1$ and $\hat{e}_2$ satisfying Conditions~\ref{eq:alpha_conditions} in Theorem~\ref{theorem:temporal_error_bound}.
Specifically, Condition~\ref{eq:alpha1_condition} on $\alpha_1$ indicates that $\hat{e}_1$ must be learned well enough so that the magnitude of its maximum approximation error is less than its own maximum magnitude (see Eq.~\eqref{eq:alpha_def}).  
By fixing $\alpha_1$, Conditions~\ref{eq:alpha2_condition1}-\ref{eq:alpha2_condition2} on $\alpha_2$ require $\hat{e}_2$ to approximate $e_2$ more accurately than the approximation of $e_1$ by $\hat{e}_1$.  
These conditions are feasible in principle by the same convergence argument above. However, there are some practical challenge as discussed below.

\paragraph{Practical Challenge}
% \ck{this paragraph is added to expand the discussion of practical challenge.}
% \add{
The challenge to construct $B_2(t)$ stems from the condition that $\hat{e}_2$ needs to approximate $e_2$ far more accurately than $\hat{e}_1$ approximates $e_1$, making training a PINN for $\hat{e}_2$ extremely difficult. Additionally, since the explicit values of $\alpha_1$ and $\alpha_2$ are unknown, there is no clear criterion for determining when to stop training $\hat{e}_1$ and $\hat{e}_2$. 
To address this, we provide a method for verifying the condition on $\alpha_1$ and derive a bound that depends only on this condition below.

% \begin{remark}
%     The feasibility analysis and practical challenge discussed above also apply to the Space-time Error Bound in Corollary~\ref{corollary:spacetime error bound} of order $n$ since the sufficient conditions are imposed on $\alpha_{n-1}$ and $\alpha_{n}$.
% \end{remark}