%\documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}



\usepackage{amssymb,latexsym,amsfonts,amsmath,amsthm,mathrsfs}
\usepackage{bbm}
\usepackage{xr}
\usepackage{multicol}
\usepackage{multirow}
%\usepackage{algorithmic}
%\usepackage{cite}
\usepackage{graphicx}
\graphicspath{ {./images/} }
%\usepackage{enumerate}
\usepackage{url}
\usepackage{caption}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{dsfont}
\usepackage{algorithm,algorithmic}
\usepackage{nidanfloat}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{comment}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{definition}{Definition}
\newtheorem{assumption}{Assumption}
\newtheorem{remark}{Remark}
\newtheorem{properties}{Properties}[section]
\newtheorem{example}{Example}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{proposition}{Proposition}
\newtheorem{claim}{Claim}[section]
\newtheorem{observation}{Observation}[section]

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example


\newcommand{\Py}{{\mathbb P}}
\newcommand{\E}{{\mathbb E}}
\newcommand{\R}{{\mathbb R}}
\newcommand{\U}{{\mathcal U}}
\newcommand{\Y}{{\mathcal Y}}
\newcommand{\X}{{\mathcal X}}
\newcommand{\G}{{\mathcal G}}
\newcommand{\ltlf}{\textsc{LTL}_f}
\newcommand{\rmax}{r_\textsc{max}}
\newcommand{\supp}{\mathrm{supp}}

\newcommand{\dk}[1]{\normalsize{{\color{magenta}(DK:\ #1)}}}
\newcommand{\kris}[1]{\normalsize{{\color{blue}(KR:\ #1)}}}
\newcommand{\dkm}[1]{\normalsize{{\color{orange}(DK:\ #1)}}}
\newcommand{\kri}[1]{\normalsize{{\color{green}(PN:\ #1)}}}
\newcommand{\RJ}[1]{\normalsize{{\color{red}(RJ:\ #1)}}}

\newcommand{\pn}[1]{\normalsize{{\color{cyan}(PN:\ #1)}}}

\makeatletter
\newcommand*{\addFileDependency}[1]{% argument=file name and extension
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{%
    \externaldocument{#1}%
    \addFileDependency{#1.tex}%
    \addFileDependency{#1.aux}%
}

\myexternaldocument{kalagarla_619}
%\title{Synthesis of Optimal Policies Under  Linear  Temporal  Logic  Specifications and Partial Observability}
\title{Optimal Control of Partially Observable Markov Decision Processes with Finite Linear Temporal Logic Constraints (Supplementary Material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2022 paper}{Krishna C. Kalagarla}{}}
\author[1]{Dhruva Kartik}
\author[1]{Dongming Shen}
\author[1]{Rahul Jain}
\author[1]{Ashutosh Nayyar}
\author[1]{Pierluigi Nuzzo}
%\author[1]{Further~Coauthor}
% Add affiliations after the authors
\affil[1]{%
    Department of Electrical and Computer Engineering\\
    University of Southern California\\
    Los Angeles, CA, USA
}

  
\begin{document}
\maketitle


\appendix

% NOTE: necessary when ptmx or no mathfont class option is given
\section{Proof of Theorem \ref{equiv1}}\label{equiv1proof}
For any policy $\mu$, we have
\begin{align}
    \mathcal{R}^{\mathscr{M}^\times}(\mu) &= \E_\mu\left[\sum_{t=0}^T r_t^\times(X_t,A_t)\right]\\
    &= \E_\mu\left[\sum_{t=0}^T r_t^\times((S_t,Q_t),A_t)\right]\\
    & \stackrel{a}{=} \E_\mu\left[\sum_{t=0}^T r_t(S_t,A_t)\right] = \mathcal{R}^{\mathscr{M}}(\mu),
\end{align}
where the equality in $(a)$ follows from the definition of $r_t^\times$ in \eqref{prodrewdef}. Further, using $\eqref{finalrewdef}$, we have 
\begin{align}
    r^f(X_{T+1}) &= r^f((S_{T+1},Q_{T+1}))  = \mathds{1}_F(Q_{T+1}).
\end{align}
Following the acceptance condition of the DFA $\mathscr{A}$, which is equivalent to the $\ltlf$ specification $\varphi$, a run $\xi$ of the POMDP satisfies $\varphi$ if and only if the word generated by the run satisfies the acceptance condition of the DFA  $\mathscr{A}$, i.e., its run on $\mathscr{A}, \xi_{\mathscr{A}}$, ends in the acceptance set $F$. Hence,
\begin{align}
    \mathcal{R}^{f}(\mu) = \E_\mu\left[ r^f(X_{T+1})\right] = \Py_{\mu}^{\mathscr{M}}(\varphi).
\end{align}



\section{Proof of Lemma \ref{epsopt}}\label{epsoptproof}
We have
\begin{align}
    \mathcal{R}^* &= l^* \\
    &\leq l^*_B \\
    &\leq \inf_{0\leq \lambda \leq B}L(\bar{\mu},\lambda) + \epsilon\\
    &= \mathcal{R}^{\mathscr{M}^\times}(\bar{\mu}) + \inf_{0\leq \lambda \leq B}\lambda(\mathcal{R}^{f}(\bar{\mu})-1+\delta) + \epsilon.
\end{align}
There are two possible cases: (i) $\mathcal{R}^{f}(\bar{\mu})-1+\delta \geq 0$ and (ii) $\mathcal{R}^{f}(\bar{\mu})-1+\delta < 0$. 

If case (i) is true, then \eqref{consat} is trivially satisfied. Further, in this case, we have 
\begin{align}
    \inf_{0\leq \lambda \leq B}\lambda(\mathcal{R}^{f}(\bar{\mu})-1+\delta) = 0.
\end{align}
Therefore, $\mathcal{R}^* \leq \mathcal{R}^{\mathscr{M}^\times}(\bar{\mu}) + \epsilon$, and hence, \eqref{rewsat} is satisfied.

If case (ii) is true, we have
\begin{align}
    \inf_{0\leq \lambda \leq B}\lambda(\mathcal{R}^{f}(\bar{\mu})-1+\delta) &= B(\mathcal{R}^{f}(\bar{\mu})-1+\delta)\\
    &<0.
\end{align}
Therefore, $\mathcal{R}^* \leq \mathcal{R}^{\mathscr{M}^\times}(\bar{\mu}) + \epsilon$, and hence, \eqref{rewsat} is satisfied.
Further, we have
\begin{align}
    B(\mathcal{R}^{f}(\bar{\mu})-1+\delta) &\geq \mathcal{R}^*- \mathcal{R}^{\mathscr{M}^\times}(\bar{\mu})-\epsilon\\
    &\geq \mathcal{R}^*- R_m-\epsilon.
\end{align}
The last inequality holds because $R_m$ is the maximum achievable reward. Hence, \eqref{consat} is satisfied.


\section{Proof of Theorem \ref{lagrangethm}}\label{lagrangethmproof}
Consider the dual of \eqref{bsupinf}. Let
\begin{align}
    u^*_B:=\inf_{0\leq\lambda\leq B}\sup_{\mu} L(\mu,\lambda)\tag{P5}\label{binfsup}.
\end{align}
We have
\begin{align}
    l^*_B &\stackrel{a}{\leq} u^*_B\\
     &= \inf_{0\leq\lambda\leq B}\sup_{\mu} L(\mu,\lambda)\\
    &\stackrel{}{\leq}  \sup_{\mu}L(\mu,\bar{\lambda})\\
    &\stackrel{b}{=} \frac{1}{K}\sum_{k=1}^KL(\mu_{\bar{\lambda}},{\lambda}_k)\\
    &\stackrel{c}{\leq} \frac{1}{K}\sum_{k=1}^KL(\mu_k,{\lambda}_k)\\
    &\stackrel{d}{\leq} \frac{1}{K}\inf_{0\leq\lambda\leq B}\sum_{k=1}^KL(\mu_k,{\lambda})+ 2B\sqrt{2\log2/K}\\
    &\stackrel{e}{=} \inf_{0\leq\lambda\leq B}L(\bar{\mu},{\lambda})+ 2B\sqrt{2\log2/K}.
\end{align}
The inequality in $(a)$ holds because of weak duality \citep{boyd2004convex}. The equality in $(b)$ holds because of the bilinearity (affine) of $L(\cdot)$. The inequality in $(c)$ holds because $\mu_k$ is the maximizer associated with $\lambda_k$. Inequality $(d)$ follows from Corollary 5.7 in \citep{hazan2016introduction}. Equality in $(e)$ is again a consequence of bilinearity of $L(\cdot)$.

% \subsection{Tables}\label{sec:tables}
% Tables should go in the \texttt{table} environment and be centered therein.
% The caption should go above the table and be in title caps.
% For an example, see Table~\ref{tab:data}.


\section{Proof of Lemma \ref{disc}}\label{discproof}
The rewards $\mathcal{R}^{\mathscr{M}^\times}(\mu)$ and $\mathcal{R}^{f}(\mu)$ in the corresponding product POMDP are given by
\begin{align}
    \mathcal{R}^{\mathscr{M}^\times}(\mu) &= \E_\mu\left[\sum_{t=0}^T r_t^\times(X_t,A_t)\right]\\
    &= \E_\mu\left[\sum_{t=0}^\infty \gamma^{t}r_t^\times(X_t,A_t)\right]\\
    \mathcal{R}^{f}(\mu) &= \E_\mu\left[ r^f(X_{T+1})\right]\\
    &= (1-\gamma)\E_\mu\left[\sum_{t=0}^\infty \gamma^{t}r^f(X_{t+1})\right]\\
    &= \frac{(1-\gamma)}{\gamma}\E_\mu\left[\sum_{t=1}^\infty \gamma^{t}r^f(X_{t})\right].
\end{align}
Therefore, we have
\begin{align}
    &L(\mu,\lambda) \\
    &= \E_\mu\left[\sum_{t=0}^\infty \gamma^{t}\left(r_t^\times(X_t,A_t)+ \frac{\lambda(1-\gamma)}{\gamma}\gamma^{t}r^f(X_{t})\right)\right]\nonumber \\
    &\qquad-\frac{\lambda(1-\gamma)}{\gamma}\E[r^f(X_{0})]-{\lambda(1-\delta)}.\nonumber
\end{align}

\section{Additional Details on Experiments}\label{expdetails}

\subsection{Model Description}

In this subsection, we provide further details on the grid world POMDP models used in our experiments. The images corresponding to the various models indicate the state space and the labeling function, e.g, in Fig.~\ref{fig:model1}, we have $L[(1,2)] = \{b\}, L[(3,3)] = \{a\}$, and $L[(i,j)] = \{\}$ for all other grid locations $(i,j)$. In all models, the agent starts from the grid location $(0,0)$. Further, the reward for all actions is $0$ in all grid locations, unless specified otherwise. In the supplementary material, we also provide videos that capture some representative behaviors of the policies generated by Algorithm \ref{algjr}. We will discuss them in greater detail below.

\subsubsection{Location Uncertainty}

\paragraph{Reach-Avoid Tasks.}

In model $\mathscr{M}_1$, reward $r((0,3),a) = 2$ and $r((3,3),a) = 1$ for all actions $a$. We observe that the agent satisfies the reach-avoid constraint with high probability and ends up in the top-right corner where the reward is highest. A representative trajectory for this model can be found in the video \texttt{mu1\_1.mp4}.\\
In model $\mathscr{M}_2$, reward $r((1,6)) = 3, r((4,3),a) = 3$, and $r((7,7),a) = 1$ for all actions $a$. In this model, we observe two characteristic behaviors. The agent reaches the goal state $a$ and remains there (see video \texttt{mu2\_1.mp4}). This behavior ensures that the specification is met but the reward is relatively lower. The other behavior is that the agent goes towards the location $(4,3)$ and tries to remain there to obtain higher reward (see video \texttt{mu2\_2.mp4}). However, since the the obstacle is very close and the transitions are stochastic, it is prone to violating the constraint. Nonetheless, this violation is rare enough such that the overall satisfaction probability exceeds the desired threshold.

\begin{figure}[h]
     \centering
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case1.1_54_00000.png}
         \caption{Model $\mathscr{M}_1$}
         \label{fig:model1}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case1.2_18_00000.png}
         \caption{Model $\mathscr{M}_2$}
         \label{fig:model2}
     \end{subfigure}
        \caption{Reach-Avoid Tasks}
\end{figure}

\paragraph{Ordered Tasks.}

For models $\mathscr{M}_3, \mathscr{M}_4$, and $\mathscr{M}_5$, reward $r((3,3),a) = 1$ for all actions $a$. In model $\mathscr{M}_3$, the agent visits $a$ and then $b$ in that order most of the time (see video \texttt{mu3\_1.mp4}). Very rarely, the agent narrowly misses one of the goals due to the stochasticity in transitions and partial observability (see video \texttt{mu3\_2.mp4}).
In model $\mathscr{M}_4$, the agent is almost always successful in satisfying the constraint and maximizing the reward (see video \texttt{mu4\_1.mp4}). In model $\mathscr{M}_5$, we see both successes (see video \texttt{mu5\_1.mp4}) and failures (see video \texttt{mu5\_2.mp4}). However, the failure probability is within the threshold, as suggested by Table \ref{tab:add_data}.


\begin{figure}[h]
     \centering
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case2_18_00000.png}
         \caption{Model $\mathscr{M}_3$}
         \label{fig:model3}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case3_5_00000.png}
         \caption{Model $\mathscr{M}_4$}
         \label{fig:model4}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case4_36_00000.png}
         \caption{Model $\mathscr{M}_5$}
         \label{fig:model5}
     \end{subfigure}
        \caption{Ordered Tasks}
\end{figure}

\paragraph{Reactive Tasks.}

In model $\mathscr{M}_6$, reward $r((3,0),a) = 1$ and $r((3,3),a) = 2$ for all actions $a$. In this case, the agent goes to $a$ and remains there, thus satisfying the constraint (see video \texttt{mu6\_1.mp4}). Occasionally, the agent also goes to state $b$ and remains there to obtain a large reward. However, this violates the constraint since if the agent ever visits $b$, it must eventually go to $c$ (see video \texttt{mu6\_2.mp4}). \\
In model $\mathscr{M}_7$, reward $r((3,0),a) = 5$ and $r((0,3),a) = 2$ for all actions $a$. In this model, the agent goes to $a$ and then to $b$ so that it can go to $c$. If it had not gone to $b$ immediately after reaching $a$, then it will be compelled to go to $d$. We observe that the agent consistently visits $b$ after $a$ (see video \texttt{mu7\_1.mp4}).

\begin{figure}[h]
     \centering
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case5_78_00000.png}
         \caption{Model $\mathscr{M}_6$}
         \label{fig:model6}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.18\textwidth}
         \centering
         \includegraphics[width=\textwidth]{case6_22_00000.png}
         \caption{Model $\mathscr{M}_7$}
         \label{fig:model7}
     \end{subfigure}
        \caption{Reactive Tasks}
\end{figure}

\subsubsection{Predicate Uncertainty}

In the experiments of this section, there are two possible locations for object $b$: $(3,0)$ and $(0,3)$. In both cases, whenever the agent is `far' away (Manhattan distance greater than 1) from the object $b$, it gets an observation `F' indicating that it is \emph{far} with probability $1$. When the object is at the bottom left and the agent is adjacent to it, the agent gets an observation `C' with probability $0.9$ indicating that the object is \emph{close}. However, if object $b$ is at the top right and the agent is adjacent to it, the agent gets an observation `C' only with probability $0.1$. Therefore, the detection capability of the agent is stronger when the object is in the bottom-left location as opposed to when it is in the top-right location.

\paragraph{Reach-Avoid Tasks.} In model $\mathscr{M}_8$, reward $r((3,0),a) = 2$ and $r((0,3),a) = 4$ for all actions $a$. In this model, generally, the agent first collects some information from the bottom-left, reaches $a$, and goes to the rewarding location that is not an obstacle (see videos \texttt{mu8\_1.mp4}, \texttt{mu8\_2.mp4}, \texttt{mu8\_3.mp4}). We see rare instances where the agent completely ignores the constraint and maximizes the reward (see video \texttt{mu8\_4.mp4}).

\begin{figure}[h]
     \centering
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{fa1_90_00000.png}
         \caption{Model $\mathscr{M}_8$ with obstacle at $(0,3)$}
         \label{fig:model8a}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{fa1_91_00000.png}
         \caption{Model $\mathscr{M}_8$ with obstacle at $(3,0)$}
         \label{fig:model8b}
     \end{subfigure}
        \caption{Reach-Avoid Tasks}
\end{figure}

\paragraph{Ordered Tasks.} In model $\mathscr{M}_9$, reward $r((0,0),a) = 2$ for all actions $a$. In this model, we observe that the agent mostly succeeds in satisfying the constraint and maximizing the reward (see videos \texttt{mu9\_1.mp4} and \texttt{mu9\_2.mp4}).
\begin{figure}[H]
     \centering
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{seq_44_00000.png}
         \caption{Model $\mathscr{M}_9$ with obstacle at $(0,3)$}
         \label{fig:model9a}
     \end{subfigure}
     \hfill
     \begin{subfigure}[b]{0.17\textwidth}
         \centering
         \includegraphics[width=\textwidth]{seq_45_00000.png}
         \caption{Model $\mathscr{M}_9$ with obstacle at $(3,0)$}
         \label{fig:model9b}
     \end{subfigure}
        \caption{Ordered Tasks}
\end{figure}
\subsection{Hyper-parameters and Runtimes}


The parameter $\delta$ in all the experiments is chosen in the following manner: i) We first solve a POMDP problem in which we are only interested in maximizing the probability of satisfaction of the $\ltlf$ constraint. Let this probability be denoted by $p_{max}$. The SARSOP solver gives concrete approximation bounds on its solution, and therefore, on our estimate of $p_{max}$. ii) Since any threshold $1-\delta$ larger than $p_{max}$ is infeasible, we choose a $\delta$ such that $1-\delta$ is around $0.9p_{max}$. The values $\eta$ and $B$ are hyperparameters in our experiments. The $\eta$ suggested by Theorem \ref{lagrangethm} in our paper is guaranteed to result in convergence, but in practice, slightly larger values of $\eta$ can lead to faster convergence.


In Table.~\ref{tab:add_data}, we provide additional hyper-parameters that were used in our experiments. The parameter $simu$ denotes the number of Monte-Carlo simulations that were used to estimate the constraint in each iteration. $T_{solve}$ is the total time (over $K$ iterations) spent in solving the unconstrained POMDP using the SARSOP solver \cite{kurniawati2008sarsop}. $T_{simu}$ is the total time spent in simulating policies generated by the SARSOP solver. $T_{total}$ is the overall computation time for that model.

Most of our models have a state size of $16$ ($4\times 4$). However, the runtime (see Table \ref{tab:add_data}) for these models is drastically different. This is because of two factors: (i) the DFA size and (ii) the complexity of the POMDP problem. The size of the DFA can be large for a complex task. This naturally scales up the state space of the product POMDP. SARSOP returns an alpha-vector policy \cite{kurniawati2008sarsop}. When the POMDP is complex, the alpha-vector policy returned by SARSOP may have many alpha vectors. This would imply that whenever the agent has to make a decision, it needs to solve a fairly large maximization problem. This makes the simulations time-consuming.
\begin{table*}[b]
    \centering
    \caption{Performance Value and Hyper-parameters}\label{tab:add_data}
    \begin{tabular}{lccccccccccccc}
      \toprule % from booktabs package
      \bfseries Model & \bfseries Spec & $|S|$ & $|Q|$ & $\mathcal{R}^{\mathscr{M}}(\bar{\mu})$ & $\mathcal{R}^f(\bar{\mu})$ & $1-\delta$ & $B$ & $\eta$ & $K$ & $simu$ & $T_{solve}$ & $T_{simu}$ & $T_{total}$ \\
      \midrule % from booktabs package
      $\mathscr{M}_1$ & $\varphi_1$&16 &3 & $1.72$ & $0.75$ & $0.75$ & $5$ & $2$ & $100$ & $200$ & $142$ & $3518$ & $3661$\\ %1.1
      $\mathscr{M}_2$ & $\varphi_1$&64 &3 &$0.95$ & $0.70$ & $0.70$ & $8$ & $2$ & $50$ & $100$ & $17299$ & $7825$ & $25125$\\ %1.2
      $\mathscr{M}_3$ &$\varphi_2$ &16 &3 & $0.83$ & $0.76$ & $0.75$ & $5$ & $2$ & $100$ & $200$ & $158$ & $3614$ & $3773$\\ %2
      $\mathscr{M}_4$ & $\varphi_3$&16 &4 & $0.80$ & $0.71$ & $0.70$ & $6$ & $2$ & $100$ & $200$ & $1893$ & $14534$ & $16428$\\ %3
      $\mathscr{M}_5$  & $\varphi_4$&16 &4& $0.83$ & $0.71$ & $0.70$ & $6$ & $2$ & $100$ & $200$ & $368$ & $8440$ & $8809$\\ %4
      $\mathscr{M}_6$  &$\varphi_5$ &16 &4 & $1.01$ & $0.79$ & $0.80$ & $10$ & $2$ & $100$ & $200$ & $109$ & $718$ & $828$\\ %5new
      $\mathscr{M}_7$  &$\varphi_6$ &16 &10 & $4.28$ & $0.82$ & $0.80$ & $25$ & $2$ & $50$ & $100$ & $5865$ & $57833$ & $63699$\\ %6new
      $\mathscr{M}_8$  &$\varphi_1$ &32 &3 & $2.73$ & $0.81$ & $0.85$ & $20$ & $0.02$ & $100$ & $200$ & $370$ & $21676$ & $22046$\\ %Fa1
      $\mathscr{M}_9$  &$\varphi_4$ &32 & 4& $1.68$ & $0.81$ & $0.75$ & $10$ & $0.2$ & $100$ & $200$ & $973$ & $25618$ & $26591$\\ %sequence1 (previous)
      \bottomrule % from booktabs package
    \end{tabular}
\end{table*}
%\bibliography{kalagarla_619-supp}
\end{document}
