\section{Proofs}

\subsection{Derivation of Definition~\ref{def:1}}
\label{sec:proof_def1}
Let $e_1(x,t) = p(x,t) - \hat{p}(x,t)$ and initialize $e_0(x,t):= p(x,t)$ and $\hat{e}_0(x,t)=\hat{p}(x,t)$. Then, Eq.~\eqref{eq:error_1_e1_func_pde} becomes Definition 1 for $i=1$: $$ \mathcal{D}[e_1(x,t)]+\mathcal{D}[\hat{e}_0(x,t)]=0, \quad \text{subject to } e_1(x,0)=e_0(x,0)-\hat{e}_0(x,0).$$ 
For $i=2$, we define $e_2(x,t):=e_1(x,t)-\hat{e}_1(x,t)$ and obtain $\mathcal{D}[e_2(x,t)]=\mathcal{D}[e_1(x,t)]-\mathcal{D}[\hat{e}_1(x,t)]$ (because $\mathcal{D}[\cdot]$ is a linear operator). Since $\hat{e}_1\neq e_1$, a residual will remain such that $$\mathcal{D}[\hat{e}_1]+\mathcal{D}[\hat{e}_0]:=r_1\neq 0.$$ Hence, we have the recursive PDE for $i=2$ (omitting $x$ and $t$ for simplicity of presentation): $$\mathcal{D}[e_2]=\mathcal{D}[e_1]-\mathcal{D}[\hat{e}_1] =(-\mathcal{D}[\hat{e}_0])-(-\mathcal{D}[\hat{e}_0]+r_1)=-r_1\implies \mathcal{D}[e_2]+r_1:=\mathcal{D}[e_2]+\sum_{j=1}^2 \mathcal{D}[\hat{e}_{j-1}]=0.$$ The derivation recursively follows for $i>2$.
In addition, following PINNs training in Eq.~\eqref{eq:pinn_loss}, the training loss of each $\hat{e}_i$ is:
\begin{subequations}
\begin{align} 
& \mathcal{L}^{(i)} = w_{0}\mathcal{L}_{0}^{(i)} + w_{r}\mathcal{L}_{r}^{(i)}, \;w_0,w_r \in \mathbb{R}_{>0}, \\ 
& \mathcal{L}_{0}^{(i)} = \frac{1}{N_0}\sum_{k=1}^{N_0}\Big( e_i(x_k,0) - \hat{e}_i(x_k,0) \Big)^2, \\
& \mathcal{L}_{r}^{(i)} = \frac{1}{N_r}\sum_{k=1}^{N_r}\Big(\mathcal{D}[\hat{e}_i(x_k,t_k)] + \sum_{j=1}^i \mathcal{D}[\hat{e}_{j-1}(x_k,t_k)]\Big)^2.
\end{align}
\label{eq:pinn_loss_general}
\end{subequations}
    
\subsection{Proof of Lemma~\ref{lemma:bouding series of ratios}}\label{proof:lemma1}
\begin{proof}
    From Definition~\ref{def:1}, we have that, for all $x \in X'$,
    \begin{align*}
        |p(x,t)-\hat{p}(x,t)| &= \left| \sum_{i=1}^{n} \hat{e}_i(x,t) + e_{n+1}(x,t) \right| 
        \leq \sum_{i=1}^{n} |\hat{e}_i(x,t)| + |e_{n+1}(x,t)| \notag \\
        & \leq \sum_{i=1}^{n} \max_x|\hat{e}_i(x,t)| + \max_x|e_{n+1}(x,t)| := \sum_{i=1}^{n} \hat{e}_i^*(t) + e_{n+1}^*(t).
        \label{eq:error_bound_by_max_error_series}
    \end{align*}
    From the definition of $\gamma_{\frac{i+1}{i}}$ in Eq.~\eqref{eq:approx ratio}, we obtain (omitting $t$ for simplicity of presentation)
    \begin{align*}
        & |p(x,\cdot)-\hat{p}(x,\cdot)| \leq \hat{e}_1^*\Big( 1 + \frac{\hat{e}_{2}^*}{\hat{e}_{1}^*} + \frac{\hat{e}_{3}^*}{\hat{e}_{1}^*} + \dots + \frac{\hat{e}_{n}^*}{\hat{e}_{1}^*} + \frac{e_{n+1}^*}{\hat{e}_1^*}
        \Big) \notag \\
        & = \hat{e}_1^* \Big[ 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} + \dots + 
        (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n}{n-1}}) + (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} \dots \gamma_{\frac{n-1}{n-2}} \gamma_{\frac{n}{n-1}} \frac{e_{n+1}^*}{\hat{e}_n^*}) \Big] \notag \\
        & = \hat{e}_1^* \Big[ 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} + \dots + 
        (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n}{n-1}}) + (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} \dots \gamma_{\frac{n-1}{n-2}}  \frac{\hat{e}_{n}^*}{\hat{e}_{n-1}^*} \frac{e_{n+1}^*}{\hat{e}_n^*}) \Big] \notag \\
        & = \hat{e}_1^* \Big[ 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} + \dots + 
        (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n}{n-1}}) + (\gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} \dots \gamma_{\frac{n-1}{n-2}}  \frac{e_{n+1}^*}{\hat{e}_{n-1}^*}) \Big].
    \end{align*}
\end{proof}

\subsection{Proof of Lemma~\ref{lemma:bounds_on_ratios}}
\begin{proof}
From Definition~\ref{def:1}, we have, for $i \geq 0$,
\begin{equation}
    e_{i}(x,t) = \hat{e}_{i}(x,t) + e_{i+1}(x,t).
    \label{eq:error_recursive_relation}
\end{equation}
% By moving the terms in , 
By taking the maximum on the absolute value of Eq.~\eqref{eq:error_recursive_relation}, 
we get
\begin{equation}
    \max_{x} |e_{i}(x,t)| \leq \max_{x} |\hat{e}_{i}(x,t)| + \max_{x} |e_{i+1}(x,t)|. \label{eq:basic_inequalities_1}
\end{equation}
Similarly, from Eq.~\eqref{eq:error_recursive_relation}, we obtain
\begin{align}
    &\hat{e}_{i}(x,t) = e_{i}(x,t) - e_{i+1}(x,t) \implies \notag \\
    & \max_{x} |\hat{e}_{i}(x,t)| \leq \max_{x} |e_{i}(x,t)| + \max_{x} |e_{i+1}(x,t)|.
    \label{eq:basic_inequalities}
\end{align}
Now take $2 \leq i < n$, and suppose the corresponding $\alpha_i(t)<1$. Then, we can write the two inequalities in Eqs.~\eqref{eq:basic_inequalities_1} and \eqref{eq:basic_inequalities} with the definition of $\hat{e}_i^*(t)$ in Eq.~\eqref{eq:max_error} and the expression in Eq.~\eqref{eq:assume_alpha_relation} as
\begin{align}
\left\{
    \begin{aligned}
        & \alpha_{i-1}(t) \hat{e}_{i-1}^*(t) \leq \hat{e}_i^*(t) + \alpha_i(t) \hat{e}_i^*(t) \\
        & \hat{e}_i^*(t) \leq \alpha_{i-1}(t) \hat{e}_{i-1}^*(t) + \alpha_i(t) \hat{e}_i^*(t).
    \end{aligned}
\right.
\label{eq:e1e2_inequality}
\end{align}
By rearranging Eq.~\eqref{eq:e1e2_inequality}, we obtain the lower and upper bounds of $\gamma_{\frac{i}{i-1}}(t)$:
\begin{equation}
    \frac{\alpha_{i-1}(t)}{1+\alpha_i(t)} \leq \frac{\hat{e}_i^*(t)}{\hat{e}_{i-1}^*(t)} = \gamma_{\frac{i}{i-1}(t)} \leq \frac{\alpha_{i-1}(t)}{1-\alpha_i(t)},\; 2 \leq i < n,
    \label{eq:ratio_and_alphas_general}
\end{equation}
which is well defined because the denominator $\hat{e}_{i-1}^* > 0$ by Assumption~\ref{assumption:ehat_bigger_than_zero}, and the (RHS) of Eq.~\eqref{eq:ratio_and_alphas_general} is always $\geq$ the (LHS) of Eq.~\eqref{eq:ratio_and_alphas_general} if $0\leq \alpha_i(t) < 1$ for all $2 \leq i < n$.
\end{proof}

\subsection{Proof of Lemma~\ref{lemma:exist_decrease_ratio}}
\begin{proof}
    For simplicity of presentation, we omit writing the dependent variable $t$. 
    Assume the conditions in Eq.~\eqref{eq:alpha_conditions} are satisfied; then it is true that $0 < \alpha_2 < 1$.
    Since both $\alpha_1,\alpha_2 < 1$, by Lemma~\ref{lemma:bounds_on_ratios} and Condition~\eqref{eq:alpha2_condition1}, we obtain 
    $$\gamma_{\frac{2}{1}} \leq \frac{\alpha_1}{1-\alpha_2} < 1, $$
    proving the RHS of Eq.~\eqref{eq:decrease_ratio}.

    For the LHS of Eq.~\eqref{eq:decrease_ratio}, let $\alpha_{i} \leq \alpha_2$ for all $2 < i < n$.
    Since $\alpha_2 < 1$, then by Lemma~\ref{lemma:bounds_on_ratios}, we have
    \begin{align}
        \gamma_{\frac{i}{i-1}} \leq \frac{\alpha_{i-1}}{1-\alpha_{i}} 
        \leq \frac{\alpha_{i-1}}{1-\alpha_2} 
        \leq \frac{\alpha_2}{1-\alpha_2}. \label{eq: mid step}
    \end{align}
    What remains is to show that RHS of Eq.~\eqref{eq: mid step} is $<\gamma_{\frac{2}{1}}$.  
    From Condition~\eqref{eq:alpha2_condition2}, we have
    \begin{align}
        \alpha_2 (1+\alpha_2) &< \alpha_1^2 \\
        & < \alpha_1 (1-\alpha_2), \label{eq: mid step 2}
    \end{align}
    where Eq.~\eqref{eq: mid step 2} holds by Condition~\eqref{eq:alpha2_condition1}.  From Eq.~\eqref{eq: mid step 2}, we obtain
    \begin{align}
        \frac{\alpha_2}{1-\alpha_2} < \frac{\alpha_1}{1+\alpha_2}. \label{eq: mid step 3}
    \end{align}
    By combining Eqs. \eqref{eq: mid step} and \eqref{eq: mid step 3}, we have
    \begin{equation*}
        \gamma_{\frac{i}{i-1}} < \frac{\alpha_1}{1+\alpha_2} < \gamma_{\frac{2}{1}}, \; 2 < i < n.
    \end{equation*}
\end{proof}

\subsection{Proof of Theorem~\ref{theorem:temporal_error_bound}}
\begin{proof} 
    Take $n\rightarrow \infty$ for Lemma~\ref{lemma:bouding series of ratios}, and
    train $\hat{e}_1$ and $\hat{e}_2$ such that the sufficient conditions of Eq.~\eqref{eq:alpha_conditions} are met, therefore, $\gamma_{\frac{3}{2}},\gamma_{\frac{4}{3}},\dots, \gamma_{\frac{n-1}{n-2}} < \gamma_{\frac{2}{1}}<1$ by Lemma~\ref{lemma:exist_decrease_ratio}. Then we have
    % Because $\hat{e}_1^*, \hat{e}_{n-1}^* > 0$, and $e_{n+1}^*$ are bounded by construction, we have
    \begin{align}
        & |p(x,t)-\hat{p}(x,t)| \notag \\
        &\leq \hat{e}_1^* \lim_{n\rightarrow \infty} \Big( 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} + \dots + \Big[ \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n-1}{n-2}} \gamma_{\frac{n}{n-1}} \Big] + \Big[ \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n-1}{n-2}}\frac{e_{n+1}^*}{\hat{e}_{n-1}^*} \Big] \Big) \notag \\
        &= \hat{e}_1^* \lim_{n\rightarrow \infty} \Big( 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}} + \dots + \Big[ \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n-1}{n-2}} \frac{\hat{e}_n^*}{\hat{e}_{n-1}^*} \Big] + \Big[ \gamma_{\frac{2}{1}}\gamma_{\frac{3}{2}}\dots \gamma_{\frac{n-1}{n-2}}\frac{e_{n+1}^*}{\hat{e}_{n-1}^*} \Big] \Big) \notag \\
        &\leq \hat{e}_1^* \lim_{n\rightarrow \infty} \Big( 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}^2 + \dots + \gamma_{\frac{2}{1}}^{n-2} + \Big[ \gamma_{\frac{2}{1}}^{n-2}\frac{\hat{e}_n^*}{\hat{e}_{n-1}^*} \Big] + \Big[ \gamma_{\frac{2}{1}}^{n-2} \frac{e_{n+1}^*}{\hat{e}_{n-1}^*} \Big] \Big) \notag \\
        &= \Big[ \hat{e}_1^* \lim_{n\rightarrow \infty} \Big( 1 + \gamma_{\frac{2}{1}} + \gamma_{\frac{2}{1}}^2 + \dots + \gamma_{\frac{2}{1}}^{n-2} \Big) \Big] + \Big[ \hat{e}_1^* \lim_{n\rightarrow \infty} \Big( \gamma_{\frac{2}{1}}^{n-2}\frac{(\hat{e}_n^* + e_{n+1}^*)}{\hat{e}_{n-1}^*} \Big) \Big]. 
        \label{eq:gamma_series_bound}
    \end{align}
    The first term in Eq.~\eqref{eq:gamma_series_bound} forms a geometric series, and the second term in Eq.~\eqref{eq:gamma_series_bound} is zero as $n$ goes to infinity, because $\hat{e}_1^*, \hat{e}_{n-1}^*, \hat{e}_n^*, e_{n+1}^*$ are bounded by construction and $\hat{e}_{n-1}^* > 0$ by Assumption~\ref{assumption:ehat_bigger_than_zero}. Hence,
    \begin{align}
        |p(x,t)-\hat{p}(x,t)| 
        % &
        \leq 
        \hat{e}_1^* \Big( \frac{1}{1-\gamma_{\frac{2}{1}}}(t) \Big) := B_2(t).
        \label{eq:geo_series}
    \end{align}
\end{proof}

\subsection{Proof of Theorem~\ref{theorem:error_bound_tightness}}
\label{proof:arbitrary_tight}
\begin{proof}
We omit the time variable $t$ in this proof for readability. By Definition~\ref{def:1}, the maximum approximation error $\max_x|e_1(x,\cdot)| := e_1^*$.
Using the relations of $\hat{e}_1 = e_1 - e_2, \hat{e}_1^* \leq e_1^* + e_2^*$, the error bound in Theorem~\ref{theorem:temporal_error_bound} can be upper-bounded by
\begin{align}
    B_2 = \hat{e}_1^* \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} \Big) \leq (e_1^* + e_2^*) \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} \Big).
\end{align}
Hence, the gap between $B_2$ and the maximum approximation error $e_1^*$ is upper-bounded by
\begin{align}
    B_2 - e_1^* \leq e_1^* \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} - 1 \Big) + e_2^* \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} \Big).
    \label{eq:derive_arbitrary_tight_1}
\end{align}
Now suppose $\hat{e}_1$ approximates $e_1$ sufficiently well such that $e_2(x,t)=e_1(x,t)-\hat{e}_1(x,t):=\delta(x,t)$, where $\delta(x,t)$ denotes a sufficiently small function for all $(x,t) \in \Omega$. Furthermore, suppose $\hat{e}_2$ approximates $e_2$ sufficiently well such that $\hat{e}_2(x,t) \rightarrow e_2(x,t) = \delta(x,t)$ for all $(x,t) \in \Omega$. 
Define $\delta^* := \max_x|\delta(x,\cdot)|$, then $\hat{e}_2^* \rightarrow \delta^*$, and $\delta^* \rightarrow 0$ as $\delta(x,t) \rightarrow 0$ for all $(x,t) \in \Omega$. Consequently, the RHS of Eq.~\eqref{eq:derive_arbitrary_tight_1}, at the limit, becomes
\begin{align}
    & \lim_{\hat{e}_2^* \rightarrow \delta^*, \delta^* \rightarrow 0} \Big[ e_1^* \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} - 1 \Big) + e_2^* \Big(\frac{1}{1-\hat{e}_2^*/\hat{e}_1^*} \Big) \Big] \notag \\
    & =  \lim_{\delta^* \rightarrow 0} \Big[ e_1^* \Big(\frac{1}{1-\delta^*/\hat{e}_1^*} - 1 \Big) + \delta^* \Big(\frac{1}{1-\delta^*/\hat{e}_1^*} \Big) \Big] = \delta^*
\end{align}
Lastly, for every $\epsilon \in (0,\infty)$, take $\delta^*$ to be smaller than $\epsilon$, then the proof is completed.
\end{proof}

% \subsection{Proof of Corollary~\ref{corollary:spacetime error bound}}
% \begin{proof}
%     The proof is a natural extension to that of theorem \ref{theorem:temporal_error_bound}. Assume $m > 1$ be a finite integer. By Definition \ref{def:1}, we have
%     \begin{align}
%         & p(x,t)-\hat{p}(x,t) = \lim_{n\rightarrow \infty}\sum_{i=1}^{n} \hat{e}_i(x,t) + e_{n+1}(x,t) \notag \\
%         & = \sum_{i=1}^{m-1} \hat{e}_i(x,t) + \lim_{n\rightarrow \infty} \Big( \sum_{i=m}^{n} \hat{e}_i(x,t) + e_{n+1}(x,t) \Big) \notag \\
%         \implies & p(x,t)-\hat{p}(x,t) - \sum_{i=1}^{m-1}\hat{e}_i(x,t) = \lim_{n\rightarrow \infty} \Big( \sum_{i=m}^{n} \hat{e}_i(x,t) + e_{n+1}(x,t) \Big) \notag \\
%         \implies & |p(x,t)-\hat{p}(x,t) - \sum_{i=1}^{m-1}\hat{e}_i(x,t)| = |\lim_{n\rightarrow \infty} \Big( \sum_{i=m}^{n} \hat{e}_i(x,t) + e_{n+1}(x,t) \Big)| \notag \\
%         \implies & |p(x,t)-\hat{p}(x,t) - \sum_{i=1}^{m-1}\hat{e}_i(x,t)| \leq \lim_{n\rightarrow \infty} \sum_{i=m}^{n} \hat{e}_i^*(x,t) + e_{n+1}^*(x,t) \notag \\
%         &\leq \hat{e}_m^*(t) \Big(1 + \frac{\hat{e}_{m+1}^*(t)}{\hat{e}_m^*(t)} + \frac{\hat{e}_{m+2}^*(t)}{\hat{e}_m^*(t)} + \dots + \frac{\hat{e}_{n}^*(t)}{\hat{e}_m^*(t)} + \frac{e_{n+1}^*(t)}{\hat{e}_m^*(t)} \Big) \notag \\
%         &= \lim_{n \rightarrow \infty} \hat{e}_m^* \Big( 1 + \gamma_{\frac{m+1}{m}} + \gamma_{\frac{m+1}{m}}\gamma_{\frac{m+2}{m+1}} + \ldots + \gamma_{\frac{m+1}{m}}\gamma_{\frac{m+2}{m+1}} \dots \gamma_{\frac{n}{n-1}} \frac{e_{n+1}^*}{\hat{e}_n^*} \Big)
%     \label{eq:derive_spacetime_error_bound_1}
%     \end{align}
% Under the same condition in Lemma \ref{lemma:exist_decrease_ratio}, but now impose on $\alpha_m(t)$ and $\alpha_{m+1}(t)$ such that $0<\alpha_m(t)<1$ and $0<\alpha_{m+1}(t)<1-\alpha_m(t),\alpha_{m+1}(t)(1+\alpha_{m+1}(t)) < \alpha_m^2(t)$. Then $\gamma_{\frac{m+1}{m}}(t) < 1$ is greater than all the other $\gamma_{\frac{m+2}{m+1}}(t), \gamma_{\frac{m+3}{m+2}}(t), \dots$. Thus,~\eqref{eq:derive_spacetime_error_bound_1} is bounded by
% \begin{align}
%     & |p(x,t)-\hat{p}(x,t) - \sum_{i=1}^{m-1}\hat{e}_i(x,t)|
%     \leq \lim_{n\rightarrow \infty}\hat{e}_m^*(t) \Big(1 + \gamma_{\frac{m+1}{m}} + \gamma_{\frac{m+1}{m}}^2 + \dots + \gamma_{\frac{m+1}{m}}^{n-1} + \gamma_{\frac{m+1}{m}}^{n-1}\frac{e_{n+1}^*}{\hat{e}_n^*} \Big) \notag \\
%     & = \Big[ \hat{e}_m^*(t) \lim_{n \rightarrow \infty} \Big(1 + \gamma_{\frac{m+1}{m}} + \gamma_{\frac{m+1}{m}}^2 + \dots + \gamma_{\frac{m+1}{m}}^{n-1} \Big) \Big] + \Big[ \hat{e}_m^*(t)\lim_{n \rightarrow \infty} \alpha_{n-1}(t) \gamma_{\frac{m+1}{m}}^{n-1}(t) \Big].
% \label{eq:derive_spacetime_error_bound_3}
% \end{align}
% Since $\hat{e}_m^*(t)$ is bounded, $\gamma_{\frac{m+1}{m}} < 1$, and $\exists \alpha_{n-1} \leq \alpha_{m+1} < 1$, the first term in~\eqref{eq:derive_spacetime_error_bound_3} forms a geometric series, and the second term goes to zero. Hence~.\eqref{eq:derive_spacetime_error_bound_3} becomes
% \begin{align}
%     & |p(x,t)-\hat{p}(x,t) - \sum_{i=1}^{m-1}\hat{e}_i(x,t)|
%     \leq \hat{e}_m^*(t)\Big( \frac{1}{1-\gamma_{\frac{m+1}{m}}(t)} \Big) \notag \implies \\
%     & p(x,t)-\hat{p}(x,t) \in \big[ \sum_{i=1}^{m-1} \hat{e}_i(x,t) - \hat{e}_m^*(t)\Big( \frac{1}{1-\gamma_{\frac{m+1}{m}}(t)} \Big), \sum_{i=1}^{m-1} \hat{e}_i(x,t) + \hat{e}_m^*(t)\Big( \frac{1}{1-\gamma_{\frac{m+1}{m}}(t)} \Big) \big].
% \label{eq:derive_spacetime_error_bound_4}
% \end{align}
% Now take $n = m+1$, then the proof is completed.
% \end{proof}

\subsection{Proof of Corollary~\ref{corollary:special_error_bound}}\label{proof:coro1}
\begin{proof}
    For all $t\in T'$, let $0 < \alpha_1(t) <1$. 
    First, $e_1 = \hat{e}_1 + e_2$ by Def.~\ref{def:1}, which implies
    \begin{equation}\label{eq:40}
        |e_1(x,t)| \leq \max_x|\hat{e}_1(x,t)| + \max_x|e_2(x,t)|
    \end{equation}
    for all $x\in X'$.
    Then by $0< \alpha_1 < 1$ and its definition in Eq.~\eqref{eq:assume_alpha_relation}, we have
    \[
    \alpha_1 \max_x|\hat{e}_1(x,t)| := \max_x|e_1(x,t)-\hat{e}_1(x,t)| = \max_x|e_2(x,t)|.
    \]
    Hence, Eq.~\eqref{eq:40} becomes
    \begin{align*}
        |e_1(x,t)| & \leq \max_x|\hat{e}_1(x,t)| + \alpha_1 (t)\max_x|\hat{e}_1(x,t)| \\
        &= \max_x|\hat{e}_1(x,t)|(1+\alpha_1(t)) \\
        & < 2 \hat{e}_1^*(t) := B_1(t)
    \end{align*}
    % Suppose there exists a "virtual" $\hat{e}_{2}(x,t)$ such that $\hat{e}_{2}(x,t) = e_{2}(x,t)$ for all $(x,t) \in \Omega$
    % ; this implies that the third error $e_3(x,t)$ is zero. Hence, the series in Eq.~\eqref{eq:error_bound_by_max_error_series_ratios} with $n=2$ becomes
    % \begin{align}
    % |p(x,t)-\hat{p}(x,t)|
    % & \leq \hat{e}_1^*(t) + \hat{e}_2^*(t) + 0 \notag \\
    % & = \hat{e}_1^*(t) \Big( 1 + \gamma_{\frac{2}{1}}(t) \Big).
    % \label{eq:loosB_2ound_finitB_1eries}
    % \end{align}
    % By the virtual $\hat{e}_2 = e_2$, and the relation $e_2^* = \alpha_1 \hat{e}_1^*$, we have
    % \begin{align}
    %     & \max_{x} |\hat{e}_2(x,t)| = \max_{x} | e_2(x,t)| \notag \\
    %     \implies & \hat{e}_2^*(t) = e_2^* = \alpha_1(t) \hat{e}_1^*(t) \notag \\
    %     \implies & \gamma_{\frac{2}{1}}(t) = \alpha_1(t). 
    % \end{align}
    % Combined $\gamma_{\frac{2}{1}} = \alpha_1$ with Eq.~\eqref{eq:loosB_2ound_finitB_1eries}, we prove that
    % \begin{align}
    %     |p(x,t)-\hat{p}(x,t)|
    % & \leq \hat{e}_1^*\Big( 1 + \gamma_{\frac{2}{1}}(t) \Big) \notag \\
    % & = \hat{e}_1^*(t)\Big( 1 + \alpha_1(t) \Big) \notag \\
    % & < \hat{e}_1^*(t)(1+1) = 2\hat{e}_1^*(t) := B_1(t).
    % \end{align}
    It is clear that $B_1(t)$ is not arbitrary tight because of the constant 2.
\end{proof}

\subsection{Proof of Proposition~\ref{prop:checking_alpha1}}\label{proof:prop1}
\begin{proof}
    Let $x \in \mathbb{R}^n$. By \cite[theorem 2.6]{mishra2023estimates}, we know
    \begin{align}
        & \varepsilon_{G} := \|e_1-\hat{e}_1\|_{W^{1,q}} \leq C_{pde}\mathcal{L}^{(1)} + C_{pde}C_{quad}^{\frac{1}{q}}N^{\frac{-\beta}{q}},
        \label{eq:total_error_bound_norms}
    \end{align}
    where $\mathcal{L}^{(1)}$ is the training loss of $\hat{e}_1$, $C_{pde}>0$ is the stability estimate of the first error PDE associated with the $W^{1,q}$ norm ($q \geq 2$), and $C_{quad},\beta >0$ are the constants according to the quadrature sampling points. By Definition~\ref{def:1}, $e_2 = e_1 - \hat{e}_1$, and since $e_1(x,t),\hat{e}_1(x,t)$ and their first derivatives are bounded over the considered domain of Problem~\ref{prob:1},
    we know there exists a universal embedding constant $C_{embed}$ \citep{mizuguchi2017estimation} such that 
    \begin{equation}
        |e_2(x,t)| \leq C_{embed} \|e_2(x,t)\|_{W^{1,q}}.
    \end{equation}
    Hence, we have
    \begin{equation}
        |e_2(x,t)| \leq C_{embed} \Big( C_{pde}\mathcal{L}^{(1)} + C_{pde}C_{quad}^{\frac{1}{q}}N^{\frac{-\beta}{q}} \Big).
    \end{equation}
    Using the definition of $\alpha_1(t) := \frac{\max_x|e_2(x,t)|}{\hat{e}_1^*(t)}$, we obtain
    \begin{align}
        \alpha_1(t) & \leq \frac{\max_x|e_2(x,t)|}{\min_t \hat{e}_1^*(t)} \notag \\
        & \leq \frac{1}{\min_t \hat{e}_1^*(t)} \Big[  C_{embed} \Big( C_{pde}\mathcal{L}^{(1)} + C_{pde}C_{quad}^{\frac{1}{q}}N^{\frac{-\beta}{q}} \Big) \Big].
    \end{align}
\end{proof}

\subsection{Derivation of Extension to Heat PDE with Dirichlet Boundary Condition}\label{proof:1D_Heat}
Here, we take heat equation for example. The governing partial differential equation of solution $u:\Omega = (\mathbb{R}^n \times [0,t_f]) \rightarrow \mathbb{R}$ is
\begin{align*}
    \frac{\partial u(x,t)}{\partial t} = \Delta [u(x,t)],
\end{align*}
subject to initial and Dirichlet boundary conditions
\begin{align*}
& u(x,0) = u_{ic}(x), \\
& u(x,t) = u_{bc}(x,t),\; (x,t) \in \partial \Omega,
\end{align*}
where $\partial \Omega$ is the boundary, and $\Delta[\cdot]:=\sum_i^n \frac{\partial ^2}{\partial x_i^2}[\cdot]$. Define the heat differential operator $\mathcal{D}_h[\cdot] := \frac{\partial}{\partial t}[\cdot] - \Delta[\cdot]$. 
By adding the boundary constraints into the training loss in Eq.~\eqref{eq:pinn_loss}, which is common in standard PINNs \citep{sirignano2018dgm}, we can train $\hat{u}$ that approximates the solution $u$.
Define the approximation error $e_1 = u - \hat{u}$, then a trained $\hat{u}(x,t)$ yields
\begin{align*}
 & \mathcal{D}_h[\hat{u}] = r_1(x,t), \\
 & \hat{u}(x,0)=u_{ic}(x)-e_{1,ic}(x,0), \\
 & \hat{u}(x,t) = u_{bc}(x,t) - e_{1,bc}(x,t),\; (x,t) \in \partial \Omega.
\end{align*}
Apply the heat differential operator on the first error, we obtain
\begin{align}
& \mathcal{D}_h[e_1] + r_1 = 0, \notag \\
& e_1(x,0) = u_{ic}(x)-e_{1,ic}(x,0), \notag \\
& e_1(x,t) = e_{1,bc}(x,t),\;(x,t)\in \partial \Omega.
\label{eq:heat_derive}
\end{align}
Compared Eq.~\eqref{eq:heat_derive} to Eq.~\eqref{eq:pinn_loss_e1}, the only difference is the boundary condition on $\partial \Omega$. Thus, if an additional loss term regarding boundary condition $\mathcal{L}_{bc}$ is added into Eq.~\eqref{eq:pinn_loss_general} to construct $\hat{e}_1$ (as well as other $\hat{e}_i$), and $u$ is smooth and bounded, then the derivation of theorem \ref{theorem:temporal_error_bound} can be followed.  