% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% nasim
\usepackage[caption=false]{subfig}
\usepackage[ruled,linesnumbered]{algorithm2e}
\usepackage{amsthm}

\theoremstyle{plain}
\newtheorem{claim}{Claim}[section]
\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{definition}{Definition}[section]
\input{math_commands.tex}

\usepackage{xr}
\makeatletter
\newcommand*{\addFileDependency}[1]{% argument=file name and extension
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{%
    \externaldocument{#1}%
    \addFileDependency{#1.tex}%
    \addFileDependency{#1.aux}%
}
\myexternaldocument{nasim_323}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Efficient Learning of Sparse and Decomposable PDEs using Random Projection}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
% \author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2022 paper}{Jane~J.~von~O'L\'opez}{}}
% \author[1]{Md~Nasim}
% \author[2]{Xinghang~Zhang}
% \author[2]{Anter~El-Azab}
% \author[1]{Yexiang~Xue}

% % Add affiliations after the authors
% \affil[1]{%
%     Department of Computer Science\\
%     Purdue University\\
%     West Lafayette, IN, USA
% }
% \affil[2]{%
%     School of Materials Engineering\\
%     Purdue University\\
%     West Lafayette, IN, USA
% }

  
\begin{document}

% \maketitle

\newcommand{\br}{{\mathbf{r}}}
\newcommand{\dd}{{\mathrm{d}}}




\newcommand{\rapid}{\textsc{Rapid-PDE}}





\section{Supplementary Information}

\subsection{Computing Infrastructure}
We used 2 different architectures for our experiment. One consists of a dedicated machine with Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz CPU, 64 GB Memory, Nvidia Quadro RTX 4000 GPU, Ubuntu 20.04.3 LTS Operating system, Python 3.8.10, Pytorch 1.9.0. Another architecture is a shared server with two 64-core AMD Epyc 7662 "Rome" processors and 256 GB memory per server node, CentOS 7 Operating System, Python 3.8.10, Pytorch 1.9.0.

\subsection{Training and Testing}
For both grain growth and void evolution applications, we use data for $T=1000$ timesteps for training, and then use data for another 300 timesteps for testing. We use a learning rate of 0.01 and batch size 1, maximum 1000 epochs for both cases. For testing MSE error, we simulated the dynamics for 100 timesteps for both applications, and compared the original ground truth model's simulated result with that of the trained model's simulated results. \textbf{The code is submitted as a separate zip file.}

\subsection{Representation of Phase-field Equations in Canonical Forms}
\begin{figure}[ht]

\subfloat[Feature Extraction]{%
  \includegraphics[clip,width=\columnwidth]{figures/void_features.png}%
  \label{fig:void_features}
}

\subfloat[Relation between system state change and features]{%
  \includegraphics[clip,width=\columnwidth]{figures/void_cv_change_w_features.png}%
  \label{fig:void_change}
}
\caption{Visual representation of change in $c_v$ (one of the 3 phase field variables in the model for nanovoid defect evolution) as a weighted sum of sparse features. $c_v$ is a phase field variable in the phase field model of void evolution, the sparse features are computed from the phase field variables of the model and these features are non-zero only along the boundaries of the void defect as shown here.}
\vskip -0.2cm
\end{figure}
Here, we present the details of how phase-field equations for nanovoid defect evolutions fit into the form in Equation~\ref{eq:pde} and Equation~\ref{eq:fpde}. The same process for grain growth was shown in main text.

\textbf{Case Study 2: Nanovoid Defect in Crystalline Materials}
From the 3 update equations for 3 phase field variables ($c_v,c_i,\eta$) in the phase field model of void defect evolution PDEs, we show how the update equation of $c_v$ can be decomposed into parameter function and features. Similar decomposition can be done for $c_i$ and $\eta$. Writing out the expression for functional derivative $\frac{\delta F}{\delta c_v}$, we get:
\begin{align}
    \frac{\partial c_v}{\partial t} = M_v \nabla^2 &\biggl[(\eta-1)^2\times \nonumber \\
    &(E_v + kBT[\log c_v - \log (1-c_v-c_i)]) +\nonumber \\
    &\eta^2\times 2(c_v-1) - \kappa_v \nabla^2(c_v)\biggr]
\end{align}
%
Here, $E_v,kBT,\kappa_v$ are model parameters. Separating each component on the right hand side and rewriting, we get:
\begin{align}
    \frac{\partial c_v}{\partial t} = &M_v E_v \nabla^2(\eta-1)^2 +\nonumber \\
    & M_v kBT \nabla^2((\eta-1)^2\times \log c_v) +\nonumber \\
    & M_v kBT \nabla^2((\eta-1)^2\times \log(1-c_v-c_i))+\nonumber \\
    & M_v \nabla^2(\eta^2\times2(c_v-1)) \nonumber \\
    & M_v \kappa_v \nabla^2(\nabla^2 c_v)
    \label{eq:cv}
\end{align}
%
Comparing this with Equation~\ref{eq:pde}, we can write:
%
\begin{align*}
    M_1 &= M_v E_v,\\
    M_2 & = M_v kBT,\\
    M_3 & = M_v kBT,\\
    M_4 &= M_v,\\
    M_5 &= M_v \kappa_v,\\
    F_1 &= (\eta-1)^2,\\
    F_2 &= (\eta-1)^2\times \log c_v,\\
    F_3 &= (\eta-1)^2\times \log(1-c_v-c_i),\\
    F_4 &= \eta^2\times2(c_v-1),\\
    F_5 &= \nabla^2 c_v,\\
    N &= G = D = 0.\\
\end{align*}
%
Comparing this with Equation~\ref{eq:fpde}, we can write:
%
\begin{align*}
    \phi_i = M_i,\quad W_i = F_i\quad \text{for } i = {1,2,\dots,5}
\end{align*}
%
where the learnable parameter set $\theta=\{M_v,E_v,kBT,\kappa_v\}$.

\subsection{Theorems used in proving error bounds of the random projections}

\begin{theorem}
(Lemma 5.1 in \cite{baraniuk2008simple}) Let $\Phi\in \mathbb{R}^{n\times N}$ be a random matrix, drawn according to a distribution, such that for any ${x}\in \mathbb{R}^N$
\begin{equation*}
    Pr(\left| ||\Phi x||_2^2-||x||_2^2 \right| \geq \epsilon||x||_2^2) \leq 2e^{-n c_0(\epsilon)}, \quad 0<\epsilon<1
    \label{ieq:jl_bound}
\end{equation*}
Here, $c_0$ is a constant only dependent on $\epsilon$, and $c_0(\epsilon)>0$ for $\epsilon \in (0,1)$.
Then, for any set $T$ with $\#(T ) = k < n$ and any $0 < \delta < 1$, we have

\begin{equation*}
    (1-\delta)||x||_2 \leq ||\Phi x||_2 \leq (1+\delta)||x||_2 \quad \text{ for all } x \in X_T
\end{equation*}
with probability
\begin{equation*}
    \geq 1-2(12/\delta)^k e^{-c_0(\delta/2)n}
\end{equation*}
Here, $T$ is a set of indices, and $X_T$ is the set of all vectors in $\mathbb{R}^n$ that are zero outside $T$.

\end{theorem}

\begin{proof} (Theorem \ref{th:general})
Because $y_{i,j}$ are i.i.d. sampled, has zero mean and unit variance, it is easy to verify that $E(||y_i^T x||_2^2) = ||x||_2^2$ for any vector $x$. Hence,
$$E(||\mY x||_2^2) = \sum_{i=1}^n E(||y_i^T x||_2^2) = n||x||_2^2.$$
Because $||y_i^T x||_2^2 / ||x||_2^2$ is sub-exponential with parameter $(\sigma^2, b)$, and $||\mY x||_2^2 = \sum_{i=1}^n ||y_i^T x||_2^2$, $||\mY x||_2^2 / ||x||_2^2$ is sub-exponential with parameter $(n\sigma^2, b)$.

Using Chernoff bound of sub-exponential variables, we have for $0 < \delta' < \min\{n,  n\sigma^2 / b\}$:
\begin{equation}
    Pr\left(\bigg|\frac{||\mY x||_2^2}{||x||_2^2} - n\bigg| > \delta' \right) \leq 2\exp\left(-\frac{\delta'^2}{2 n\sigma^2}\right).
\end{equation}
Substitute $n\delta = \delta' $ into the previous equation, we have for $0 < \delta < \min\{1,  \sigma^2 / b\}$:
\begin{equation}
    Pr\left(\bigg|\frac{||\mY x||_2^2}{n||x||_2^2} - 1\bigg| > \delta \right) \leq 2\exp\left(-\frac{n\delta^2}{2 \sigma^2}\right).
\end{equation}
Or, written using matrix $\mP$,
\begin{equation}
    Pr\left(| ||\mP x||_2^2 - ||x||_2^2 | > \delta ||x||_2^2  \right) \leq 2\exp\left(-\frac{n\delta^2}{2 \sigma^2}\right).
\end{equation}
This is Equation (4.3) in \cite{baraniuk2008simple} with $c_0(\delta) = \delta^2 / (2\sigma^2)$.
Because $\Delta u_{t_i}'$ and $\Delta u_{t_i}$ has at most $k$ non-zero elements, $\Delta u_{t_i}' - \Delta u_{t_i}$ has at most  $2k$ non-zero elements. Using Lemma 5.1 in \cite{baraniuk2008simple}, we know with probability at least
$$1 - 2(12/\delta)^{2k}\exp(-n\delta^2/(8\sigma^2)),$$
we have:
\begin{align}
    (1-\delta) ||\Delta u_{t_i}' - \Delta u_{t_i}||_2 &\leq ||P \Delta u_{t_i}' - P \Delta u_{t_i}||_2 \nonumber\\
    &\leq (1+\delta) ||\Delta u_{t_i}' - \Delta u_{t_i}||_2.
\end{align}
Square the previous equation and summing up over $i=1,\ldots, N$, we get what needs to be proved.
\end{proof}

\begin{proof}
% \xyx{Can you put a proof for corollary 5.1 here?}
(Corollary 5.1) We will prove that for $y_{i,j}\sim \mathcal{N}(0,1)$, $X = ||y_i x||^2/||x||_2^2$ is sub-exponential with parameters $(4,4)$. First, we note that:
\begin{align*}
    y_{i,j} &\sim \mathcal{N}(0,1)\\
    y_{i,j}x_j &\sim \mathcal{N}(0,x_j^2)\\
    \sum_j y_{i,j}x_j &\sim \mathcal{N}(0,{||x||_2^2})\\
    \frac{(y_i x)}{||x||_2} &\sim \mathcal{N}(0,{1})\\
\end{align*}
Therefore, $X\sim \chi_1^2$, chi-square distribution with 1 degree of freedom. Now,

\begin{align*}
    &E[\exp (\lambda(X-1))]\\
    &= \frac{1}{\sqrt{2\pi}} \int_{-\infty}^{+\infty} \exp(\lambda(z^2-1))\exp(-\frac{z^2}{2}) dz\\
    &= \frac{\exp(-\lambda)}{\sqrt{2\pi}} \int_{-\infty}^{+\infty} \exp(-z^2(\frac{1}{2}-\lambda)) dz\\
    &= \frac{\exp(-\lambda)}{\sqrt{2\pi}} \sqrt{\frac{\pi}{\frac{1}{2}-\lambda}}\\
    &= \frac{\exp(-\lambda)}{\sqrt{1-2\lambda}}
\end{align*}
It is easy to verify that for $|\lambda|<\frac{1}{4}$:
\begin{equation*}
    \frac{\exp(-\lambda)}{\sqrt{1-2\lambda}} \leq \exp(2\lambda^2)
\end{equation*}
Therefore, $X$ is sub-exponential with parameters $(4,4)$. Now we can apply Theorem~\ref{th:general} to get our desired bound on the loss functions.
\end{proof}


\begin{proof}
(Corollary 5.2) We will first prove that for random variable $y_{i,j}\sim \mathcal{U}(-\sqrt{3},\sqrt{3})$, the expression $X = ||y_i x||^2/||x||_2^2$ is sub-exponential. By change of variable and rescaling trick, we can then extend this for any arbitrary uniform distribution.
It is trivial to verify that mean $E[y_{i,j}] = 0$, and variance $\sigma^2(y_{i,j}) = 1$. Let $b_d=3d-1,c_d=\frac{d(5d+4)}{5}-1$. We will prove that $X$ is sub-exponential with parameters ($2c_d,2b_d$), by proving that $|X-E[X]|<b_d$ and $Var(X)\leq c_d$.
%
Now,
\begin{align*}
    E[X] &= E[\frac{||y_i x||^2}{||x||_2^2}] = \frac{1}{||x||_2^2}E[||y_i x||_2^2] = 1
\end{align*}
%
Now we find a maximum bound on $|X-E[X]|$. Applying Cauchy-Schwartz inequality, we get:
\begin{align*}
    |X - E[X]| &\leq \max_{y_i} \frac{||y_i x||^2}{||x||_2^2}-1\\
    &\leq \max_{y_i} \frac{||y_{i}||_2^2 ||x||_2^2}{||x||_2^2}-1\\
    &\leq \max_{y_i} \sum_{j=1}^d y_{i,j}^2-1\\
    &\leq 3d-1
\end{align*}
%
Now we look into computing $Var(X)$:
\begin{align*}
    Var(X) &= E[X^2] - (E[X])^2
\end{align*}
We have already shown that $E[X]=1$. Now, using Cauchy-Schwartz inequality again we get:
\begin{align*}
    E[X^2] &= E[\frac{||y_i x||^4}{||x||_2^4}]\\
    % E[||y_i x||^4] &= E[(\sum_j y_{i,j} x_j)^4]
    &= E[\frac{||y_i x||^2 ||y_i x||^2}{||x||_2^4}]\\
    &\leq E[\frac{||y_i||_2^2 ||x||_2^2 ||y_i||_2^2 ||x||_2^2}{||x||_2^4}]\\
    &= E[||y_i||_2^4]\\
    &= E[(\sum_{j}y_{i,j}^2)^2]\\
    &= E[\sum_{j}y_{i,j}^4 + 2\sum_{k\neq l}y_{i,k}^2 y_{i,l}^2]\\
    &= \sum_j E[y_{i,j}^4] + 2\sum_{k\neq l}E[y_{i,k}^2 y_{i,l}^2]\\
    &= d\times \frac{9}{5} + 2\times \frac{d(d-1)}{2}\\
    &= \frac{d(5d+4)}{5}
\end{align*}
Now we can put a bound on $Var(X)$ as follows:
\begin{align*}
    Var(X) \leq \frac{d(5d+4)}{5}-1
\end{align*}
Let $c_d = (\frac{d(5d+4)}{5}-1)$, and $b_d=3d-1$. Now,

\begin{align*}
    E[exp(\lambda(X-E[X]))] &= 1 + \frac{\lambda^2 E[(X-E[X])^2]}{2} +\\
    & \sum_{i=3}^\infty \lambda^i \frac{E[(X-E[x])^i]}{i!}\\
    &\leq 1 + \frac{\lambda^2 Var(X)^2}{2} +\\
    &\frac{\lambda^2 Var(X)^2}{2}\sum_{i=3}(|\lambda|b_d)^{i-2}
\end{align*}
For $|\lambda| < \frac{1}{2b_d}$:
\begin{align*}
    E[exp(\lambda(X-E[X]))] &\leq 1 + \frac{\lambda^2 Var(X)^2}{2(1-b_d|\lambda|)}\\
    &\leq \exp^\frac{\lambda^2 Var(X)^2}{2(1-b_d|\lambda|)}\\
    &\leq \exp^{\lambda^2 Var(X)^2}\\
    &\leq \exp^{\lambda^2 c_d^2}
\end{align*}
Therefore, $\frac{||y_i x||^2}{||x||_2^2}$ is sub-exponential with parameters $(2c_d,2b_d))$.

Let's assume now $\mP$ is a matrix, and each element of $\mP$ is drawn from a uniform distribution of mean $\mu$ and variance $\sigma^2$. Therefore, according to Theorem~\ref{th:general}, for $0<\delta<\min\{1,c_d/b_d\}$ with probability at least $1-2(12/\delta)^{2k}\exp (-n\delta^2/(16c_d))$ we have:
\begin{align*}
    (1-\delta) ||\Delta u_{t_i}' - \Delta u_{t_i}||_2 &\leq ||\frac{(\mP - \mu)}{\sigma} (\Delta u_{t_i}' - \Delta u_{t_i})||_2 \nonumber\\
    &\leq (1+\delta) ||\Delta u_{t_i}' - \Delta u_{t_i}||_2.
\end{align*}
After simplification, we get the following limit:
\begin{align*}
    (\sigma(1-\delta) - \mu) &||\Delta u_{t_i}' - \Delta u_{t_i}||_2\\ &\leq ||\mP \Delta u_{t_i}' - \mP \Delta u_{t_i})||_2\\
    &\leq (\sigma(1+\delta) + \mu) ||\Delta u_{t_i}' - \Delta u_{t_i}||_2.
\end{align*}
Squaring all sides and then summing over $i=1,2,\dots,N$, we get our final error bound on the projected loss function.
\end{proof}

\bibliography{nasim_323}

\end{document}
