% %% Begin supplemental formatting
% \documentclass{article}
% \usepackage{natbib}
% \usepackage[margin=1in,footskip=0.25in]{geometry}
% \usepackage[utf8]{inputenc}
% \usepackage{graphicx}
% \usepackage{xcolor}
%     \definecolor{darkgreen}{rgb}{0.0, 0.2, 0.13}
%     \definecolor{cadmiumgreen}{rgb}{0.0, 0.42, 0.24}
%     \definecolor{byzantium}{rgb}{0.44, 0.16, 0.39}
% \usepackage{amsmath}
% \usepackage{mathrsfs}
% \usepackage[font=small]{caption}
% \usepackage[font=small]{subcaption}
% \usepackage{booktabs}
% \usepackage{multirow}
% \usepackage{enumitem}
% \usepackage{times}
% \usepackage{hyperref}
%     \hypersetup{
%         colorlinks=true,
%         urlcolor=blue,
%         urlbordercolor=blue,
%         linkcolor=blue,
%         linkbordercolor=blue,
%         filecolor=magenta,
%         pdfborderstyle={/S/U/W 1},
%     }
% \usepackage{mathrsfs}
% \usepackage[titlenumbered,ruled, linesnumbered]{algorithm2e}
%     \newcommand\commentstyle[1]{\textcolor{cadmiumgreen}{#1}}
%     \SetCommentSty{commentstyle}
%     \SetKwInOut{Input}{input}
%     \SetKwInOut{Output}{output}
% \usepackage{enumitem}
%     \setlistdepth{9}
%     \setlist[itemize,1]{label=$\bullet$}
%     \setlist[itemize,2]{label=$\cdot$}
%     \setlist[itemize,3]{label=$\cdot$}
%     \setlist[itemize,4]{label=$\cdot$}
%     \setlist[itemize,5]{label=$\cdot$}
%     \setlist[itemize,6]{label=$\cdot$}
%     \setlist[itemize,7]{label=$\cdot$}
%     \setlist[itemize,8]{label=$\cdot$}
%     \setlist[itemize,9]{label=$\cdot$}
%     \renewlist{itemize}{itemize}{9}
% \usepackage{algorithm2e}


%% end supplemental formatting


% %% Begin AAAI-22 formatting
% \def\year{2022}\relax
% %File: formatting-instructions-latex-2022.tex
% %release 2022.1
% \documentclass[letterpaper]{article} % DO NOT CHANGE THIS
% \usepackage{aaai22}  % DO NOT CHANGE THIS
% \usepackage{times}  % DO NOT CHANGE THIS
% \usepackage{helvet}  % DO NOT CHANGE THIS
% \usepackage{courier}  % DO NOT CHANGE THIS
% \usepackage[hyphens]{url}  % DO NOT CHANGE THIS
% \usepackage{graphicx} % DO NOT CHANGE THIS
% \urlstyle{rm} % DO NOT CHANGE THIS
% \def\UrlFont{\rm}  % DO NOT CHANGE THIS
% \usepackage{natbib}  % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
% \usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
% \DeclareCaptionStyle{ruled}{labelfont=normalfont,labelsep=colon,strut=off} % DO NOT CHANGE THIS
% \frenchspacing  % DO NOT CHANGE THIS
% \setlength{\pdfpagewidth}{8.5in}  % DO NOT CHANGE THIS
% \setlength{\pdfpageheight}{11in}  % DO NOT CHANGE THIS
% %% End AAAI-22 formatting





%%% Begin UAI-22 formatting
\documentclass{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
%                                     % version; also before submission to
%                                     % see how the non-anonymous paper
%                                     % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
\bibliographystyle{plainnat}
\renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
\usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
% Use the postscript times font!
\usepackage{times}
\usepackage{soul}
\usepackage{url}
\usepackage{hyperref}
    % \hypersetup{
    %     colorlinks=true,
    %     urlcolor=blue,
    %     urlbordercolor=blue,
    %     linkcolor=blue,
    %     linkbordercolor=blue,
    %     filecolor=magenta,
    %     pdfborderstyle={/S/U/W 1},
    % }
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{amsfonts}
\usepackage{booktabs}
% \usepackage{subfigure}
\usepackage{amssymb}
\urlstyle{same}
%%% End UAI-22 formatting

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CUSTOM PACKAGES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{float}
\usepackage{xspace} % package being used for \newcommand to remove extra space
                    %     when a command is invoked without an argument list
\usepackage{textcase}
\usepackage[toc, nopostdot]{glossaries}
% \usepackage{amsmath}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{enumitem}
\usepackage{refcount}
\usepackage[leftmargin=6pt, vskip=3pt-\parskip]{quoting}
\usepackage[titlenumbered,ruled, linesnumbered]{algorithm2e}
\usepackage{mathrsfs} %for \mathscr
% \usepackage[font=small,labelfont=bf]{caption}
% \usepackage[font=small,labelfont=bf]{subcaption}
% \usepackage[labelfont=bf]{caption}
\usepackage[labelfont=bf]{subcaption}
\usepackage{xcolor}
    \definecolor{darkgreen}{rgb}{0.0, 0.2, 0.13}
    \definecolor{cadmiumgreen}{rgb}{0.0, 0.42, 0.24}
    \definecolor{byzantium}{rgb}{0.44, 0.16, 0.39}
    \definecolor{darkelectricblue}{rgb}{0.33, 0.41, 0.47}
    \definecolor{battleshipgrey}{rgb}{0.52, 0.52, 0.51}
    \definecolor{warmblack}{rgb}{0.0, 0.26, 0.26}
\usepackage{newfloat}
\usepackage{chngcntr}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CUSTOM COMMANDS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%create new float environment called plotfigure with it's own counter
\DeclareFloatingEnvironment[name=Plot]{plotfigure} 

%create new float environment called tablefigure with it's own counter
\DeclareFloatingEnvironment[name=Table]{tablefigure} 

%set the floats table and tablefigure to use the same counters
\makeatletter\let\c@tablefigure\c@table\makeatother 

%consider the floats table and tablecounter as the same set of floats (so location in document will be in order in which they appear)
\makeatletter\let\ftype@tablefigure\ftype@table\makeatother 

\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\mathchardef\mhyphen="2D % Define a "math hyphen"

% algorithm2e
% \newcommand\commentstyle[1]{\textcolor{cadmiumgreen}{#1}}
\SetCommentSty{commentstyle}
\SetKwInOut{Input}{input}
\SetKwInOut{Output}{output}

\newtheoremstyle{break}
  {\topsep}{\topsep}%
  {\itshape}{}%
  {\bfseries}{}%
  {\newline}{}%
\theoremstyle{break}
\newtheorem{theorem}{Theorem}[subsubsection]
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}{Definition}[subsubsection]

\input{cmds}
\renewcommand*{\glstextformat}{\textbf}

\renewcommand{\quote}{\list{}{\rightmargin=\leftmargin\topsep=0pt}\item\relax}







%%% for supplemental

\usepackage{enumitem}
    \setlistdepth{9}
    \setlist[itemize,1]{label=$\bullet$}
    \setlist[itemize,2]{label=$\cdot$}
    \setlist[itemize,3]{label=$\cdot$}
    \setlist[itemize,4]{label=$\cdot$}
    \setlist[itemize,5]{label=$\cdot$}
    \setlist[itemize,6]{label=$\cdot$}
    \setlist[itemize,7]{label=$\cdot$}
    \setlist[itemize,8]{label=$\cdot$}
    \setlist[itemize,9]{label=$\cdot$}
    \renewlist{itemize}{itemize}{9}






%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\setcounter{secnumdepth}{3} %May be changed to 1 or 2 if section numbers are desired.
\setcounter{tocdepth}{3}

\title{Abstraction Sampling Meeting Updates}


% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<pezeshkb@uci.edu>?Subject=[Abstraction Sampling]}{Bobak Pezeshki}{}}
% \author[2]{\href{mailto:<radu.marinescu@ie.ibm.com>?Subject=Abstraction Sampling}{Radu Marinescu}{}}
\author[1]{\href{mailto:<ihler@ics.uci.edu>?Subject=[Abstraction Sampling]}{Kalev Kask}{}}
\author[1]{\href{mailto:<kkask@uci.edu>?Subject=[Abstraction Sampling]}{Alexander Ihler}{}}
\author[1]{\href{mailto:<dechter@ics.uci.edu>?Subject=[Abstraction Sampling]}{Rina Dechter}{}}
% Add affiliations after the authors
\affil[1]{%
    University of California, Irvine
}
% \affil[2]{%
%     IBM Research
% }


\input{gls}

\begin{document}
    % \onecolumn
    \setlength{\abovedisplayskip}{3pt}
    \setlength{\belowdisplayskip}{3pt}

    \maketitle
    
    \begin{abstract}
        Monte Carlo methods have proven to be powerful tools for solving a wide range of computational problems, including those involving complex probability distributions. Despite their versatility, these methods often suffer from computational inefficiencies, especially when dealing with rare events. As such, importance sampling emerged as a prominent technique for alleviating these challenges. Recently, a new scheme called Abstraction Sampling was developed that incorporated stratification to importance sampling over graphical models helping to improve estimates further. Nevertheless, work on Abstraction Sampling to date has explored a only a handful of abstraction functions that guide the stratification.\\
        
        This work expands the set of general abstraction functions for AND/OR Abstraction Sampling by introducing three new classes of abstraction functions combined with seven distinct partitioning schemes resulting in a total of twenty-one new abstraction functions that are motivated by theory and intuition - which has been expanded upon, and for which extensive empirical analysis on over 400 benchmarks were conducted. 
    \end{abstract}

    % \vfill\eject
    % \tableofcontents
    
    % \clearpage
    \section{Introduction} \label{sec:introduction}


        
    \section{General Background} \label{sec:background}


%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%

        \subsection{Graphical Models}
            
            % \begin{figure}[]
            %     \centering
            % 	\includegraphics[scale=0.25]{images/AncestorBranchingMass.pdf}
            % 	\caption{Ancestor branching mass of an AND node.}
            % 	\label{fig-ancestor-branching-mass}
            % \end{figure}
    
            % % \begin{comment}
            % \begin{figure}[]
            %     \centering
            % 	\includegraphics[scale=0.25]{images/ProperAbstractionGroups.pdf}
            % 	\caption{Scope of proper abstractions.}
            % 	\label{fig-proper-abstraction-groups}
            % \end{figure}
            % % \end{comment}
            
            A {\bf graphical model}, such as a Bayesian or Markov network \citep{pearl88,darwiche-book,DBLP:series/synthesis/2013Dechter}, can be defined by  a 3-tuple
            $\mathcal{M} \! = \! (\mathbf{X,D,F})$, where
            $\mathbf{X} \! = \! \{X_i \! : i \! \in \! \I\}$
            is a set of variables indexed by a set $\I$,
            and $\mathbf{D} \! = \! \{D_i \! : i \! \in \! \I\}$
            is the set of finite domains of values for each $X_i$. Each function $f_{\alpha} \in   \mathbf{F}$ is defined over a subset of the variables
            called its scope, $X_{\alpha}$, %\subseteq X$, also  denoted $scope(f_{\alpha})$
            where  $\alpha \subseteq \I$ are  the indices of  variables in its scope  and $D_{\alpha}$ denotes  the Cartesian product of their domains, so that %
            % Namely,
            $f_{\alpha} \! : D_{\alpha} \! \rightarrow R^{\geq 0}$. {\bf Primal graph} $\mathcal{G} \! = \! (\mathbf{V,E})$ of $\mathcal{M}$ associates each variable with a node ($\mathbf{V} \! = \! \mathbf{X}$), while arcs $e \! \in \! \mathbf{E}$ connect nodes whose variables appear in the scope of the same function.
            %We define $scope(F) = \{\alpha | f_{\alpha} \in F \}$.
            %  $\mathbf{F} = \{f_{\alpha} : \alpha \in scopes(F)\}$ is a set of discrete functions, where $\alpha \subseteq V $ and
            %$X_\alpha \subseteq X$ is the scope of $f_\alpha$.
            %Graphical models can be used to represent a global function, often a probability distribution on $\mathbf{X}$,
            $\mathcal{M}$ defines a global function, often a factorized probability distribution on $\mathbf{X}$,
            $P(\mathbf{X}) = \frac{1}{Z} \prod_{\alpha}f_\alpha(X_\alpha)$, where 
            $
            Z = \sum_X \prod_{\alpha}
            f_\alpha(X_\alpha)
            $, known as the partition function, is a normalization factor.

            
        \subsubsection{Search Spaces of Graphical Models} 
            A graphical model can be transformed into a weighted state space graph.
            In an OR search space, which is constructed layer-by-layer relative to a variable ordering, paths from the root to the leaves represent full configurations - or assignments to all variables - where each successive level corresponds to an assignment of the next variable in the ordering.
            A graphical model can also be transformed  into a more compact AND/OR search space  by capturing its conditional independencies, % in the  model,
            thus facilitating more effective algorithms \cite{DBLP:journals/ai/DechterM07}.
            
            An AND/OR search space is defined relative to a \emph{pseudo tree} of a primal graph.	A {\bf pseudo tree} $\mathcal{T} \! = \! (\mathbf{V,E'})$ of a  primal graph $\mathcal{G} \! = \! (\mathbf{V,E})$ is a directed rooted tree that spans $\mathcal{G}$ such that every arc of $\mathcal{G}$ not in $\mathbf{E'}$ is a back-arc in ${\cal T}$ connecting a node to one of its ancestors (Figure \ref{fig-simple}(a),(b)).  A variable is a {\bf branching variable} if it has multiple children in $\mathcal{T}$.
            %The arcs in $E'$ may not all be included in $E$ .  
            
            
            
            
            \begin{figure}[!htb]
            	\centering
            	\begin{subfigure}{0.9\linewidth}
            	\centering
            	       \includegraphics[width=0.8\linewidth]{./_attachments/images/pseudotree.png}
            	\end{subfigure}
                    \begin{subfigure}{0.9\linewidth}
            	\centering
                        \includegraphics[width=0.8\linewidth]{./_attachments/images/AncestorBranchingMass.png}
                    \end{subfigure}
            	\caption{A full AND/OR tree representing 16 possible solutions guided by the pseudo tree shown above . Boxed in green is the ancestor branching sub tree for the path $\rightarrow \!\! (A \!\! = \!\! 0) \!\! \rightarrow  \!\! (C \!\! = \!\! 1)$.}
                        \label{fig-ancestor-branching-mass}
            \end{figure}

            
            Given a
            pseudo tree $\mc{T}$ of a primal graph $\mathcal{G}$, the \emph{AND/OR search tree}
            $T_{\mc{T}}$ guided by $\mc{T}$ has alternating levels of OR nodes
            corresponding to variables, and AND nodes corresponding to
            an assignment from  its domain  with edge costs extracted from
            the original functions \cite{DBLP:journals/ai/DechterM07}. %(By this logic, we can think of the nodes of an OR tree as AND nodes).  
            Let $n$ be an AND node in $T_{\tau}$, also denoted $n_X$ if $X$ is the last variable of its partial configuration.
            Each arc into an AND node $n$ %(or the arc from its OR parent to the AND node)
            has a cost $c(n)$ defined to be the product of all factors $f_{\alpha}$ in $\mathcal{M}$ that are instantiated at $n$ but not before.
            % \textcolor{red}{Moved to section "Value of A Node": (see Figure \ref{fig-simple}(c)).}                                            
            \subsection{Notation}
                When not otherwise stated, capital letters ($X$) represent variables and small letters ($x$) represent their values.  (An exception is when using $n$, which we use to represent search nodes).  Boldfaced letters represent a collection. For example,
                boldfaced capital letters ({\bf X}) denote a collection of variables,
                $|{\bf X}|$ its cardinality, 
                $D_{\X}$ their joint domains (ie. all possible configurations of \X), 
                and bolded $\xx$ a particular realization in that joint domain (ie. a particular configuration of \X).

                In the context of search, $path(n)$ is the partial configuration corresponding to assignments to variables according to the path from the root of $T_{\mc{T}}$ to $n$, and $g(n)$ is the cost of $path(n)$.
            

                \subsubsection{$Z(n)$} \label{sec:partition-function-of-a-node}
                    We define $Z(n)$ recursively as: 
                    \begin{equation} \label{eq:and-or-z-prod}
                        Z(n_X) = \prod_{Y \in ch_{\cal T}(X)} Z(Y_{n_X})
                    \end{equation}
                    where
                    \begin{equation}
                        Z(Y_{n_X}) = \sum_{n_Y \in ch_Y(n_X)}  c(n_Y) \cdot Z(n_Y)
                    \label{eq2}
                    \end{equation}
                    and where $ch()$ denotes child variables either in the pseudo-tree or the search tree itself (depending on the context). Here, $ch_Y(n_X)$ are specifically the child AND nodes of $Y$ descended from AND node $n_X$.  Thus, $Z(r)$ equals the partition function $Z$ of the underlying full model (see Figure \ref{fig-simple}c). We denote sampling estimations of $Z(n)$ as $\hat{Z}(n)$.  Heuristic estimates of functions $Z(n)$ are denoted as $h(n)$.
    
                \subsubsection{$R(n)$} \label{sec:partition-function-of-a-node}
                     On the path from the root of $T_{\mc{T}}$ to some $n_{X}$, there may an intermediate node $n_{B}$ such that its associated variable $X$ is a branching variable in \PT.  Whenever this happens, the remaining variables of the model are split between the branches, and thus no single branch captures all the remaining variables. $R(n)$, or the \textbf{ancestor branching mass}, captures the $Z(n_{Y})$ for all $Y$ that branch off of the path to $X$. In Figure \ref{fig-ancestor-branching-mass}, the dotted green box shows the portion of the search space corresponding to the $R(n)$ for the node highlighted in red.

                     More formally, let $branchings(n_{X})$ be the set of nodes $n_{B}$ on the path to $n_{X}$ such that $B$ is a branching variable in \PT. Let $W$ be the child of $B$ that on the path to $X$.  We define $R(n_{X})$ as:
                     \begin{align}
                         \label{eq4}
                         R(n_{X}) =   \prod_{n_{B} \in branchings(n_{X})} \frac{Z(n_{B})}{ Z(W_{n_{B}})}
                     \end{align}


                     \textbf{Example.} In Figure \ref{fig-ancestor-branching-mass}, consider the path from the root to the node $n \! = \! (A \!\! = \!\! 0,C \!\! = \!\! 1)$ marked in red. Following under $(A=0)$ to our node, we see there is a node of variable $B$ that branches off of the path.
                     Thus, $Q(n_{A=0,C=1})$
                     \begin{align}
                        &= g(n_{A=0,C=1}) \! \cdot \! R(n_{A=0,C=1}) \! \cdot \! Z(n_{A=0,C=1}) \\
                        &= g(n_{A=0,C=1}) \mul \;\; Z(n_{A=0,B})\;\; \! \cdot \! Z(n_{A=0,C=1}) 
                     \end{align}

                     We denote approximations to $R(n)$ as $r(n)$.
                
                \subsubsection{$Q(n)$} \label{sec:partition-function-of-a-node}
    
                    Putting it all together, we can now concisely define a quantity $Q(n)$ as the contribution to $Z$ from all full configurations consistent with $path(n)$. In other words, $Q(n)$ is the unnormalized probability of the configuration $path(n)$ based on the distribution defined by \M, with $P(path(n)) = \frac{Q(n)}{Z}$.  $Q(n)$ can be computed simply as:
                    \begin{align}
                        Q(n) = g(n)  \! \cdot \!  R(n)  \! \cdot \!  Z(n)
                    \end{align}
                    We denote approximations to $Q(n)$ as $q(n)$.
                
             
             
        \subsection{Stratified Importance Sampling.} 
            Abstraction Sampling builds on Importance Sampling and Stratified Sampling. {\em Importance Sampling} (IS) is  a Monte Carlo scheme for approximating likelihood queries over graphical models.
            %\cite{Rubinstein_2007,DBLP:journals/ai/GogateD11,liu2015probabilistic}.
            {\em Stratified Sampling} is a variance reduction technique for sampling a search space by first dividing it into disjoint strata. This can be used with importance sampling to further reduce variance.
            In {\em Stratified Importance Sampling}, we first divide the sample space into $k$ strata of equal area under the distribution $q$, then choose re-weighted representatives from each strata. %, and uses these representatives to form an estimator over the entire model. 
            In order to maximize reduction in variance, the variance between strata should be maximized (see \cite{rizzo_2007}).
            
            
            \newcommand{\soltree}{\hat{x}_M}
            \newcommand{\parttree}{\bar x}
            
            
    

%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5


    \section{Abstraction Sampling}\label{sec:abstraction-sampling}

        {\em Abstraction Sampling} (AS) algorithms \cite{DBLP:conf/uai/BrokaDIK18} applies concepts of Stratified Importance Sampling to sampling over probabilistic graphical models.  An abstraction event in Abstraction Sampling is analogous to sampling representatives from strata in stratified importance sampling and reweighing to account for the rest of the members that were not chosen.  Guided by an abstraction
        function $a(.)$ that dictates how nodes of a variable should be partitioned into \textbf{abstract states}, Abstraction Sampling iteratively expands a search tree variable by variable, at each expansion also performing abstractions.  This results to the generation of a  sub tree of the full search tree $T_{\mc{T}}$ called a {\bf probe}, and corresponds to a single sample.

\begin{figure}[!htb]
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step1.pdf}
            \caption{}
            \label{fig:AOAS-step1}
          \end{subfigure}%
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step2.pdf}
            \caption{}
            \label{fig:AOAS-step2}
          \end{subfigure}
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step3.pdf}
            \caption{}
            \label{fig:AOAS-step3}
          \end{subfigure}%
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step4.pdf}
            \caption{}
            \label{fig:AOAS-step4}
          \end{subfigure}
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step5.pdf}
            \caption{}
            \label{fig:AOAS-step5}
          \end{subfigure}%
          \begin{subfigure}{0.24\textwidth}
            \includegraphics[width=0.98\linewidth]{./_attachments/images/AlgorithmTraces/AOAS-step6.pdf}
            \caption{}
            \label{fig:AOAS-step6}
          \end{subfigure}
          \caption{From \cite{kask20-scaling-up-as}, a sample trace of AOAS following DFS ordering $B\rightarrow A\rightarrow C\rightarrow D$. Nodes with the same domain values are abstracted into the same abstract state (also indicated by node color) with only one node of each color selected as a representative from its respective abstract state. Step (c) shows an optional optional pruning step. Transparent nodes indicate portions of the tree not pruned and yet to be explored.}
          \label{fig:main}
        \end{figure}

            
        \subsection{AOAS}
            Taking Abstraction Sampling futher, \cite{kask20-scaling-up-as} introduced algorithm AOAS that applied abstraction sampling to AND/OR search spaces and significantly improved performance over previous version. AOAS used a proposal distribution $p(n) \propto q(n) = w(n) \! \cdot \! g(n) \! \cdot \! h(n) \! \cdot \! r(n)$ (see Figure \ref{fig:proposal}), where $g(n)$ is computed exactly, $Z(n)$ is approximated by $h(n)$, and $R(n)$ is estimated by $r(n)$, and a weight $w(n)$ is applied to account for the nodes abstracted into the path to $n$. An overview of the algorithm can be seen in Algorithm \ref{alg:aoas-overview} and a more detailed version taken from \cite{kask20-scaling-up-as} found in the supplemental materials.

        
        \begin{algorithm}[t!]
              \caption{AOAS Overview}
                \label{alg:aoas-overview}
        
            \begin{enumerate}
                \item \textbf{Initialization:}
                    Begin with a dummy root node $r$.
                \item \textbf{Probe Generation:}
                    Proceeding in a DFS manner according to a pseudo tree $\PT$...
                    \begin{enumerate}
                        \item \textbf{Expansion:} \label{alg:aoas-overview:expansion}
                            Generate children nodes $n$ corresponding to the next variable in the DFS ordering of $\PT$. Inherit $w(n)$ from parents and assign appropriate $g(n), h(n), \tn{and } r(n)$ values.
                        \item \textbf{Abstraction:} \label{alg:aoas-overview:abstraction}
                            \begin{enumerate}
                                \item \textbf{Form Abstract States:}
                                    Using $a(.)$, partition newly expanded nodes into abstract states.
                                \item \textbf{Select Representative:}
                                    Using the proposal $p(.)$ defined, select a representative from each abstract state and reweigh it such that $w(n) \leftarrow \frac{w(n)}{p(n)}$
                            \end{enumerate}
                        \item \textbf{Backtrack:} \label{alg:aoas-overview:backtracking}
                            After reaching a leaf in $\PT$, recursively backtrack until reaching nodes of the next unexplored branch of $\PT$. While backtracking, update parent node $\hat{Z}(n')$ estimates based on children's $w(n), g(n),$ and $\hat{Z}(n)$ values.
                        \item \textbf{Repeat:}
                            Repeat steps \ref{alg:aoas-overview:expansion}-\ref{alg:aoas-overview:backtracking} until having backtracked all the way to the root node.
                    \end{enumerate}
                \item \textbf{Return:}
                    $\hat{Z} = w(r)\,\hat{Z}(r)$ for the root node $r$.
            \end{enumerate}
        \end{algorithm}



        \begin{figure}[!htb]
            \centering
            \includegraphics[width=0.8\linewidth]{./_attachments/images/proposal.png}
            \caption{The $q(n)$ visualized to show it estimating the mass of nodes previously abstracted (via $w(n)$), the ancestor branching mass (via $r(n)$), the current path cost (via $g(n)$), and the sub tree mass (via $h(n))$.}
                    \label{fig:proposal}
        \end{figure}

        
        \subsection{Existing Abstraction Functions} \label{sec:abstraction-sampling:existing-abstraction-functions}

            \cite{DBLP:conf/uai/BrokaDIK18} designed abstractions based on assignments to a variable's context $C(X)$, where $C(X)$ is a subset of its ancestor in a pseudo-tree $\cal T$ whose assignment uniquely determines the AND/OR subtree below it, ie. its $Z(n)$ \cite{DBLP:journals/ai/DechterM07}.  Thus, abstractions 
            based on a subset of the context aim to group nodes based on having similar $Z(n)$ values. However, all the possible assignments to the context, $|\D_{C(X)}|$, is exponential and unfeasible to use in its entirity. Thus, \cite{DBLP:conf/uai/BrokaDIK18, kask20-scaling-up-as} utilize relaxed context-based (\textbf{RelCB}) and randomized context-based (\textbf{RandCB}) abstractions as in \cite{DBLP:conf/uai/BrokaDIK18}.  
            
            RelCB is parametrized by a level $j$, selecting the closest $j \! - \! 1$ variables from a variable's context (ie. its {\em relaxed context}) plus itself. It abstracts nodes of the same domain value that also share the same assignment to the relaxed context. Assuming domain size of $k$, this yields at most $k^j$ abstract states at each level. 
            
            The randomized scheme, RandCB, considers the entire context however is parameterized by a parameter $nAbs$ constraining the number of abstract states per level nodes can be placed into.  Each of the $nAbs$ abstract states are randomly associated with a set of possible full context assignments defining the nodes that will belong to that state.



    \section{Paradigms Intuiting Abstraction Strategies} \label{sec:paradigms}

        Next we review concepts from search and sampling that offer paradigms from which we draw ideas for abstraction functions.

        \subsection{Search Paradigms} \label{sec:paradigms:search}

            In [tree] search, one can merge nodes that have the same value to produce a more efficient graph search \citep{DBLP:journals/jair/MateescuDM08}. Abstraction functions by \cite{DBLP:conf/uai/BrokaDIK18} focused on this paradigm and approached it by using the concept of a node's context - the assignments to the smallest subset of a node's ancestor variables that dictates its value.  Due to the potentially large context size for variables, and consequently the exponentially high number of combinations of assignments to the context, the full context of variables could not be used in most cases.  \cite{DBLP:conf/uai/BrokaDIK18} resolved this by creating two context-based abstraction functions that were relaxed to allow nodes with different contexts to be grouped in the same abstract state.  A key observation we make is that, in the general setting, sharing the same partial context does not necessarily imply the same, nor even similar, node values.

            Our new heuristic-based abstraction functions hope to address this as will be presented in Section \ref{sec:value-based-abstraction-classes}.


        \subsection{Sampling Paradigms} \label{sec:paradigms:sampling}

            % From \cite{DBLP:conf/uai/BrokaDIK18}, we learn that when abstraction sampling over a classical OR search space, we can say:
            % \begin{theorem}[Exact Proposal] \label{thm:old-exact-proposal}
            %     If the proposal function $p$ in AS uses an exact heuristic $h(n)=Z(n)$, then $\hat{Z}$ has zero variance (single probe is exact), for any abstraction.
            % \end{theorem}

            Given a distribution $p^{*}(x)$ and a proposal $p(x)$ over a variable $X$, and given a non-zero value function $f(x)$, it is well known that importance sampling achieves zero variance when 1) $p(x)=0 \implies p*(x)=0$, and 2) otherwise $p(x)$ is proportional to $p^{*}(x)f(x)$ \citep{KahnAndMarshall1953-variance-reduction, mcbook}.  
            
            \begin{lemma}[Importance Sampling Exact Proposal Based on Proportionality with Target Distribution]
                Given a distribution $p^{*}(.)$ and a value function $f(.)$, importance sampling achieves zero variance when using a proposal function $p(.)$ such that 1) $p(n)=0 \implies p^{*}(n)f(n)=0$, and 2) $p(n) \propto p^{*}(n)f(n)$, otherwise.
            \end{lemma}
            
            Note that in Abstraction Sampling each abstract state involves a node selection procedure analogous to importance sampling.  Considering using AOAS to estimate the partition function and assuming $h(n)=0 \implies Z(n)=0$, it naturally follows that designing each abstract states $\bs{A_{i}}$ such that $\forall n \in \bs{A_{i}}, p(n) = \alpha\, p^{*}(n)$, for some constant $\alpha$ (we omit $f(x)$ as $f(x)=1$ for the task of computing the partition function), we similarly achieve zero variance. 


            \begin{definition}[Abstraction Function $h(n)r(n)$ vs. $Z(n)R(n)$ Proportionality]
                An abstraction function $a(n)$ maintains $h(n)r(n)$ vs. $Z(n)R(n)$ proportionality if, for every abstract state $A_i$ formed by $a(n)$, $\forall n \in A_i, h(n)r(n) = \alpha \, Z(n)R(n)$, for some constant $\alpha$ specific to $A_i$.
            \end{definition}

            \begin{definition}[Exact Abstraction Function]
                 An abstraction function $a(.)$ is exact for an abstraction sampling algorithm, AS, if use of $a(.)$ with AS always leads to AS estimates having zero variance and $\hat{Z} = Z$ for every AS probe.
            \end{definition}
            
            Thus, we can say:
            \begin{theorem}[AOAS Exact Abstractions from $h(n)r(n)$ vs. $Z(n)R(n)$ Proportionality] \label{thm:aoas-proportionality-exact-proposal}
                  If an abstraction function $a(.)$ maintains $h(n)r(n)$ vs. $Z(n)R(n)$ Proportionality, then it is an exact abstraction function for AOAS. 
            \end{theorem}


            Of course being able to determine 
            %$\frac{h(n)r(n)}{Z(n)R(n)}$,
            $\alpha$,
            or even whether nodes have the same 
            %$\frac{h(n)r(n)}{Z(n)R(n)}$,
            $\alpha$,
            is hard.  However one idea is to use the magnitude of $h(n)r(n)$ itself as a heuristic for similarities in $\alpha$.  This drives the intuition for the new hR-based abstractions that will be described in Section \ref{sec:value-based-abstraction-classes}.

            Also from the sampling perspective, \cite{rizzo_2007} showed the following about stratified importance sampling when sampling from equal area strata under the proposal:
    
            \begin{proposition}[Stratified Importance Sampling Variance Reduction] \label{prop:rizzo-variance-reduction}
                Suppose that $M = mk$ is the number of replicates for an importance sampling estimator $\hat{\theta^{I}}$, and $\hat{\theta^{SI}}$ is a stratified importance sampling estimator, with estimates $\hat{\theta_{j}}$ for $\theta_{j}$ on the individual strata, each with $m$ replicates.  If $Var(\hat{\theta^{I}}) = \sigma^{2} / M$ and $Var(\hat{\theta_{j}}) = \sigma^{2}_{j} / m$, $j = 1, ..., k$, then
                \begin{align}
                    \sigma^{2} - k \sum^{k}_{j=1} \sigma^{2}_{j} \geq 0,
                \end{align}
                with equality if and only if $\theta_{1}=...=\theta_{k}$.  Hence stratification never increases variance, and there exists a stratification that reduces the variance except when [the proposal function] $g(x)$ is constant.
            \end{proposition}
    
            Two takeaways from this proposition are that 1) we can achieve variance reduction with respect to importance sampling (analogous to Abstraction Sampling with all nodes placed into a single abstract state) by stratifying into equal area strata under the proposal, and 2) reducing the variance of each strata $\sigma^{2}_{j}$ leads to greater variance reduction.  These help drive the intuition for the new Proposal-Based abstraction class presented in Section \ref{sec:value-based-abstraction-classes}, as well as motivate several of the abstraction schemes presented in Section \ref{sec:ordered-partitioning-schemes}.


        \subsection{Combined Paradigms}\label{sec:paradigms:combined}

            With a helpful heuristic, in both the search and sampling domains there are notions of potential benefit by spending more time in optimistic areas of the search/sampling space.  In heuristic search, this corresponds to proceeding in an order that prioritizes expansion of nodes believed to have higher values first.  And similarly in sampling, among many schemes it is beneficial to spend the most effort sampling high impact events. We use these perspectives to motivate the new abstraction schemes that will be described in Section \ref{sec:ordered-partitioning-schemes}.


    \section{Value-Based Abstraction Classes} \label{sec:value-based-abstraction-classes}

        We introduce three new classes of abstraction functions that each define a unique notion of similarity between nodes based on value measurements on a positive scale.  These value measurements, which we refer to as \textbf{\textit{abstraction values}} denoted $\mu(.)$, are used as a measure of similarity to abstract nodes together.  The three classes we present are the: Heuristic-Based (HB), HR-Based (HRB), and Q-Based (QB) abstraction classes.  Each is motivated by theory in search or sampling (as alluded to above; Section \ref{sec:paradigms}) and each that can be used with various abstraction schemes that will be presented in the subsequent section (Section \ref{sec:ordered-partitioning-schemes}).

    
        \subsection{Heuristic-Based Abstractions} \label{sec:value-based-abstraction-classes:HB}
        
            \begin{quote}
                $\mu(n) = h(n)$
            \end{quote}
            
            Using the motivation of abstracting nodes with similar sub tree $Z(n)$ intuited from the search domain, we propose associating an abstraction value $\mu(.)$ to each node based on the heuristic estimate $h(n)$ of a node's $Z(n)$.  Unlike the use of partial (or hashed) contexts as was used by \cite{DBLP:conf/uai/BrokaDIK18}, heuristic estimates of $Z(n)$ can often provide \textit{quantitative} insight into potential similarities in $Z(n)$ values, and this is particularly true when using wMBE heuristics which provides bounds.

            In conjunction with the schemes that will be presented in Section \ref{sec:ordered-partitioning-schemes}, HB algorithms aim to form abstractions such that nodes with similar $Z(n)$ are grouped together.


        \subsection{HR-Based Abstractions} \label{sec:value-based-abstraction-classes:HRB}

            \begin{quote}
                $\mu(n) = h(n)  \! \cdot \!  r(n)$
            \end{quote}

            As demonstrated in Theorem \ref{thm:aoas-proportionality-exact-proposal}, similarity of $\alpha = \frac{h(n)r(n)}{Z(n)R(n)}$ among nodes in the same abstract state can lead to reduction in variance.  Although this ratio is infeasible to compute, we can use $\mu(n) = h(n)r(n)$ as a surrogate for $\alpha$ and group nodes accordingly.
            
            In conjunction with the schemes that will be presented in Section \ref{sec:ordered-partitioning-schemes}, HRB algorithms aim to form abstractions such that nodes with similar $\alpha = \frac{h(n)r(n)}{Z(n)R(n)}$ are grouped together in hopes of driving down variance within the abstract states.


        \subsection{Q-Based Abstractions} \label{sec:value-based-abstraction-classes:QB}

            \begin{quote}
                $\mu(n) = w(n) \! \cdot \! g(n) \! \cdot \! h(n) \! \cdot \! r(n)$
            \end{quote}

            On the other hand, \cite{rizzo_2007} showed the potential of variance reduction minimizing variance within strata when stratifying based on the proposal distribution.  Thus, in Q-Based Abstractions we use the quantity $q(n) = w(n)g(n)h(n)r(n) \propto p(n)$ \citep{kask20-scaling-up-as} as the value function.

            In addition to serving as un-normalized proposal function for a node $n$, $q(n)$ also estimates the $n$'s
            contribution to the overall $Z$. Therefore, $q(n)$ estimates the impact of $n$ (and all of the nodes $n$ represents as the selected representative from previous abstractions) on $Z$, which could be a valuable quantity to base our choice of nodes on as discussed in Section \ref{sec:paradigms:combined}.
            

                








            
    
    
    
    
    
        % \subsection{Proposal Based Abstractions} \label{sec:q-based-abstractions}
        %     As a main algorithmic contribution of this work, we introduce a new class of abstractions based on proposal estimates.  We will motivate this new class of abstractions with theory, and then describe three schemes that were developed as a result.
            
        %     \subsubsection{Motivation} \label{sec:q-based-abstractions:motivation}
    
                
    
                
                 
        %     \subsubsection{\NoCaseChange{simpleQB}} \label{sec:q-based-abstractions:SimpleQB}
    
    
                
    
        %     \subsubsection{\NoCaseChange{simpleQB}} \label{sec:q-based-abstractions:SimpleQB}
    
    
    
    
        %         \begin{itemize}
        %             \item
        %                 \textbf{randQB}: nodes are ordered by $q$ and then partitioned into $nAbs$ abstract states of random sizes
    
        %             \item
        %                 \textbf{minVarQB}: nodes are partitioned into $nAbs$ abstract states using Ward's Minimum Variance hierarchical clustering
        %             \item
        %                 \textbf{equalDistQB}: nodes are ordered by $q$ (from \emph{low $q$ to high $q$}) and placed into abstract states based on $nAbs$ quantiles (with respect to the total $q$ of nodes already assigned abstract states).  If a quantile has not been reached or surpassed, the next node in the ordering is added into the current abstract state.  If the current quantile is surpassed, the next abstract state and quantile are instead considered.
        %             \item
        %                 \textbf{equalDistQB2}: nodes are ordered by $q$ (from \emph{high $q$ to low $q$}) and placed into abstract states based on $nAbs$ quantiles (with respect to the total $q$ of nodes already assigned abstract states).  If a quantile has not been reached or surpassed, the next node in the ordering is added into the current abstract state.  If the current quantile is surpassed, the next abstract state and quantile are instead considered.
        %             \item
        %                 \textbf{equalDistQB3}: nodes are ordered by $q$ (from \emph{high $q$ to low $q$}) and placed into abstract states based on $nAbs$ quantiles (with respect to the total $q$ of nodes already assigned abstract states) with the caveat that at least one node is added to each abstract state.  If a quantile has not been reached or surpassed, the next node in the ordering is added into the current abstract state.  If the current quantile is surpassed, the next abstract state and quantile are instead considered and the next node in the ordering added to that abstract state by default.
        %             \item
        %                 \textbf{equalDistQB4}: nodes are ordered by $q$ (from \emph{high $q$ to low $q$}) and placed into the current abstract state until the accumulation on $q$ of the added nodes is equal to or exceeds the $1/nAbs_{remaining}$ quantile considering the total $q$ of nodes in the current abstract state and nodes yet to be assigned.
        %         \end{itemize}
            
        %     \subsubsection{MinVarQB} \label{sec:q-based-abstractions:MinVarQB}
        %     \subsubsection{EqualDistQB} \label{sec:q-based-abstractions:EqualDistQB}
        %         From the performance of the previous two abstraction functions an their analysis as seen in Section \ref{sec:empirical-evaluation}, it was observed that the more computationally intensive MinVarQB abstraction function was producing probes with better estimates where as SimpleQB was producing better overall estimates (presumably due to its speed enabling many more probes).  Thus, a third heuristic based abstraction, EqualDistQB, was created inspired by the simplicity and speed of SimpleQB and, in a greedy way, also attempting to minimize variance of the heuristic values of the probes in the resulting abstract states.
    
        %         EqualDistQB works by 
                
        %         The variance reduction proven for stratified importance sampling by \cite{rizzo_2007} assumed that each stratified layer had an equal mass under the proposal distribution \todo{double check that it is the proposal and not true}.  Thus, to approach emulation of such a scenario at each level of abstraction, we can sort nodes
    
    
    
    
    \section{Ordered Value-Based Abstraction Functions} \label{sec:ordered-value-based-abstraction-functions}

        We first define a new class of abstraction functions, Ordered Value-Based Abstraction Functions:

        \begin{definition}[Ordered Value-Based Abstraction Function]
            An abstraction function $a(.)$ that partitions a set of nodes $\bs{n}$ into at most $nAbs$ abstract states such that nodes remain sorted across all abstract states according to a provided value function $\mu(.)$ and sort-ordering $o$.
            % \vspace{-6pt}
            % \begin{itemize}
            %     \item 
            %         takes as input: A set of nodes $\bs{n}$ to be partitioned into abstract states; an abstraction value function $\mu(.)$; a sorting algorithm $SORT(.)$ that sorts $\bs{n}$ according to $\mu(.)$ and sort order $o$; a parameter $nAbs$ bounding the number of abstract states; a partitioning function $\Psi(.)$ that partitions the sorted nodes into abstract states maintaining their order
            %     \item 
            %         outputs: Nodes $\bs{n}$ partitioned into abstract states $\bs{A} = \setst{\bs{A_{i}}}{i<=nAbs}$ such that sort order $o$ of $\mu(n)$ is maintained across all $\bs{A_{i}}$
            % \end{itemize}
        \end{definition}

        \begin{algorithm}[t]
            \caption{$a_{\tn{\textit{ordered value}}}$}
            \label{alg:general-ordered-value-based-abstraction-function}
            \begin{footnotesize}
                \SetInd{0.25em}{0.55em}
                \DontPrintSemicolon 
            \Input{A set of nodes $\bs{n}$ to be partitioned into abstract states; an abstraction value function $\mu(.)$; a sorting algorithm $SORT(.)$ that sorts $\bs{n}$ according to $\mu(.)$ and sort order $o$; a parameter $nAbs$ bounding the number of abstract states; a partitioning function $\Psi(.)$ that partitions the sorted nodes into abstract states maintaining their order}
            \Output{Nodes $\bs{n}$ partitioned into abstract states $\bs{A} = \setst{\bs{A_{i}}}{i<=nAbs}$ such that sort order $o$ of $\mu(n)$ is maintained across all $\bs{A_{i}}$.}
            
            \Begin{
                \uIf{$|\bs{n}| <= m$}{
                    $\bs{A} = \setst{\set{n}}{n \in \bs{n}}$\\
                }
                % \uIf{$|\bs{n}| <= m$}{
                %     \tcp{Each node is its own abstract state}
                %     $\bs{A} = \set{}$\\
                %     $nAbs' \leftarrow |\bs{n}|$\\
                %     \ForEach{$i \in \set{1,...,nAbs'}$}{
                %         $\bs{A_{i}} = \set{n_{i}}$\\
                %         $\bs{A} \leftarrow \bs{A} \cup \set{\bs{A_{i}}}$
                %     }
                % }
                \uElse{
                    $\bs{n^{*}} \leftarrow SORT(\bs{n},v,o)$\\
                    $\bs{A} = \Psi(\bs{n^{*}}, v)$
                }
                \Return $\bs{A}$       
            }
            \end{footnotesize}
        \end{algorithm}

        We provide a generic example of an ordered value-based abstraction function in Algorithm \ref{alg:general-ordered-value-based-abstraction-function}.
        
        % \semph{Complexity of $a_{\tn{\textit{ordered value}}}$.}\hfill\\
        %     $\mathcal{O}(\; \mathcal{O}(SORT(\bs{n},v,o)) + \mathcal{O}(\Psi(\bs{n^{*}}, v) \;)$
            
        Assuming the value function $\mu(.)$ is not dominating, the complexity is either determined by the sorting method's complexity or the partitioning complexity.

        Next we present seven ordered value partitioning schemes that, in conjunction with a provided $\mu(.)$, can be used with Algorithm \ref{alg:general-ordered-value-based-abstraction-function} to define a unique ordered value-based abstraction function.


    \section{Ordered Partitioning Schemes} \label{sec:ordered-partitioning-schemes}

        We now present seven schemes, each defined by a unique sort order $o$ and partition strategy $\Psi$ combination.  Each scheme uses a different method to partition nodes into abstract states keeping the nodes in sort order according to $o$. With a provided value function $\mu(.)$, each scheme can be used to form an ordered value abstraction function.  In addition to defining each scheme, we also describe the motivation behind its creation.

        \paragraph{Running Example} \label{sec:ordered-partitioning-schemes:running-example}  As we motivate and describe the schemes, we will also provide an example of abstract states that would result from partitioning the following nodes:
        \begin{align} \label{eq:running-partitioning-example}
            \set{1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 10, 100}
        \end{align}
        into $nAbs=4$ abstract states.
            
        \subsubsection{\NoCaseChange{simpleVB}} \label{sec:ordered-partitioning-schemes:simpleVB}
            \begin{quote}
                $o = \tn{low to high}$\\
                $\Part{simpleVB}$ (Algorithm \ref{alg:psi-simpleVB})
            \end{quote}
            
            \begin{algorithm}[t!]
                \caption{$\Part{simpleVB}$}
                \label{alg:psi-simpleVB}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{$\bs{n^{*}}$ partitioned into abstract states\footnotemark{} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that $\forall \bs{A_{i}},\bs{A_{j}} \in \bs{A}, |\bs{A_{i}}|-|\bs{A_{j}}| <= 1$}
                
                \Begin{
                    $baseCardinality \leftarrow \floor{\frac{|\bs{n^{*}}|}{nAbs}}$\\
                    $extras \leftarrow |\bs{n^{*}}| \mod nAbs$\\
                    $j_{begin}=1$\\
                    \ForEach{$i \leftarrow 1,...,nAbs$}{
                        \uIf{$extras > 0$}{
                            $j_{end} \leftarrow j_{begin} + baseCardinality$\\
                            $extras \leftarrow extras - 1$
                        }
                        \uElse{
                            $j_{end} \leftarrow j_{begin} + baseCardinality - 1$
                        }
                        $\bs{A_{i}} = \set{n^{*}_{{j_{begin}}}, ..., n^{*}_{{j_{end}}}}$\\
                        $j_{begin} \leftarrow j_{end}+1$
                    }
                    $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm} \footnotetext{\label{ftn:ordered-schemes-maintain-sort-order}Such that nodes maintain sort order $o$ across all abstract states.}

            The simpleVB (simple value-based) scheme follows the motivation of grouping nodes of similar value in the same abstract state by a simple 2-step method: 1) first, nodes are ordered by their heuristic value (low to high), and 2) next the ordered nodes are partitioned into [approximately] equal cardinality abstract states.

            \semph{Time Complexity.}\hfill\\
                Partitioning is achieved via one pass through $|\bs{n^{*}}|$ leading to $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{1.0, 1.1}, \set{1.2, 1.3}, \set{1.4, 1.5}, \set{10, 100}$
                
            Through its simplicity, this method aims to leverage speed allowing for abstractions to be formed much quicker leading to greater number of samples.



        \subsubsection{\NoCaseChange{minVarVB}} \label{sec:ordered-partitioning-schemes:minVarVB}

            \begin{quote}
                $o = \tn{low to high}$\\
                $\Psi = \Part{minVarVB}$ (Algorithm \ref{alg:psi-minVarVB})
            \end{quote}
            
            \begin{algorithm}[t!]
                \caption{$\Part{minVarVB}$}
                \label{alg:psi-minVarVB}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{$\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ satisfying $\min \sum_{\bs{A_{i}} \in \bs{A}} Var(\bs{A_{i}}, v)$}
                
                \Begin{
                    $\bs{A} = WardsMethod(|\bs{n^{*}}|,v, \tn{Euclidian distance})$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm}

            As mentioned in Section \ref{sec:paradigms:sampling}, Proposition \ref{prop:rizzo-variance-reduction}, \cite{rizzo_2007} showed that in stratified importance sampling minimizing variance of the estimates within individual strata can lead to a reduction in overall variance.

            The minVarVB scheme was designed based on this intuition.  The scheme uses Ward's Minimum Variance Hierarchical Clustering (or Ward's Method, for short) \cite{ward1963} to group nodes into a $nAbs$ abstract states so as to minimize variance within each abstract state with respect to the provided value function $\mu(.)$.

            Ward's Minimum Variance Hierarchical Clustering is an agglomerative hierarchical clustering algorithm designed to create a dendrogram by iteratively merging clusters. The primary objective is to minimize the total within-cluster variance. Ward's method works as outlined in Algorithm \ref{alg:wards-method}.
                        
            \begin{algorithm}[t!]
              \caption{Ward's Method}
                \label{alg:wards-method}
              \begin{enumerate}
                \item \textbf{Initialization:} Treat each data point as an individual cluster. Assign each cluster a label or identifier.
                
                \item \textbf{Compute Pairwise Distances:} Calculate the pairwise distances between all clusters. Various distance metrics can be used, such as Euclidean distance.
                
                \item \textbf{Cluster Merging Iteration:} 
                  \begin{enumerate}
                    \item Identify the pair of clusters $\bs{C_{i}}$ and $\bs{C_{j}}$ that, when merged into a new cluster $\bs{C_{ij}}$, results in the smallest increase in the overall within-cluster variance. This is determined using the formula:
                      \[ \Delta Var = Var(\bs{C_{ij}}) - (Var(\bs{C_{i}}) + Var(\bs{C_{j}})) \]
                      where \(Var(\bs{C_{ij}})\) is the variance of the merged cluster, and \(Var(\bs{C_{i}})\) and \(Var(\bs{C_{j}})\) are the variances of clusters $\bs{C_{i}}$ and $\bs{C_{j}}$, respectively.
                    \item Update distance measures between the newly merged cluster and all other clusters.
                  \end{enumerate}
                
                \item \textbf{Repeat:} Repeat steps 2-3 until the desired number of clusters is achieved.
              \end{enumerate}
            \end{algorithm}
                        
            Ward's Method can be combined with Lance-Williams linear distance updates \todo{cite} to increase efficiency. Lance-Williams linear distance updates, in the context of agglomerative clustering, refer to the formula used to calculate the distance between clusters as they are merged during the hierarchical clustering process. The general form of Lance-Williams distance updates can be expressed as follows:
            \begin{align}
                d_{(ij)k} = \alpha_{i} d_{ik} + \alpha_{j} d_{jk} + \alpha d_{ij} + \gamma |d_{ik} - d_{jk}|
            \end{align}
            where:
            \begin{itemize}
                \vspace{-6pt}
                \item [\tiny$\bullet$]
                    $d_{ij}$, $d_{ik}$, and $d_{jk}$ are the pair-wise distances between clusters $\bs{C_{i}}$, $\bs{C_{j}}$, and $\bs{C_{k}}$
                \item [\tiny$\bullet$]
                    $d_{(ij)k}$ is the distance between the newly merged cluster $\bs{C_{i}} \cup \bs{C_{j}}$ and cluster $\bs{C_{k}}$
                \item [\tiny$\bullet$]
                    $\alpha_i, \alpha_j, \alpha, \text{ and } \gamma$ are coefficients that depend on the linkage criterion used
            \end{itemize}
            
            In the case of Ward's method, the coefficients are specific to the minimization of within-cluster variance and are calculated as follows:
            \begin{align}
            \begin{split}
                \alpha_i &= \frac{|\bs{C_{i}}| + |\bs{C_{k}}|}{|\bs{C_{i}}| + |\bs{C_{j}}| + |\bs{C_{k}}|} \\
                \alpha_j &= \frac{|\bs{C_{j}}| + |\bs{C_{k}}|}{|\bs{C_{i}}| + |\bs{C_{j}}| + |\bs{C_{k}}|} \\
                \alpha &= -\frac{|\bs{C_{k}}|}{|\bs{C_{i}}| + |\bs{C_{j}}| + |\bs{C_{k}}|} \\
                \gamma &= 0
            \end{split}
            \end{align}
            (The inclusion of \(\gamma\) provides additional flexibility in the more general case, adjusting the distance updates based on the specific clustering criterion being used).

            \semph{Time Complexity.\footnote{\label{ftn:time-complexity-assumes-constant-time-v}Assuming $\mu(n)$ is $\mathcal{O}(1)$ in both time and space.}}\hfill\\
                The choice of clusters to merge generally leads to having a $\mathcal{O}(|\bs{n^{*}}|^{3})$ time complexity due to the need to compare pair-wise distances between all clusters at each iteration.  However, in the case where nodes are distributed linearly in one dimension, only neighboring distances need to be considered at each iteration and can be made efficient by use of a priority queue, however since the Lance-Williams distance updates themselves take linear time, once per iteration, the reduced time complexity is still $\mathcal{O}(|\bs{n^{*}}|^{2})$.
                
            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                The space complexity is implementation dependent, with most time-efficient variants making use of a distance matrix leading to $\mathcal{O}(|\bs{n^{*}}|^{2})$ space complexity.

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{1.0, 1.1, 1.2}, \set{1.3, 1.4, 1.5}, \set{10}, \set{100}$

            In contrast to simpleVB, minVarVB places considerable resources into computing abstractions, leading to fewer samples, but with potentially better estimates with an appropriate value function $\mu(.)$. 



        \subsubsection{\NoCaseChange{equalDistVB}} \label{sec:ordered-partitioning-schemes:equalDistVB}

            \begin{quote}
                $o = \tn{low to high}$\\
                $\Part{equalDistVB}$ (Algorithm \ref{alg:psi-equalDistVB})
            \end{quote}
            
            \begin{algorithm}[t!]
                \caption{$\Part{equalDistVB}$}
                \label{alg:psi-equalDistVB}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{
				With 
				%
				$Z(\bs{A_{1,...,i}}) = (\sum_{j=1}^{i} \sum_{n' \in \bs{A_{j}}} Z(n')$,
				%
				$n_{\bs{A_{i}}}^{\tn{last}}$ be the last node in $\bs{A_{i}}$, 
				%
				and 
				%
				$P_{i} = \frac{ i \cdot \sum_{n \in \bs{n^{*}}}\mu(n)}{nAbs}$,
				%
                $\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that for $i=1,...,nAbs$ in order,
                $(\; Z(\bs{A_{1,...,i}}) \geq P_{i} \;)$
                $\land$ \\ $(\; (\, \bs{A_{i}}=\set{} \,) \lor (\, Z(\bs{A_{1,...,i}}) - Z(n_{\bs{A_{i}}}^{\tn{last}}) < P_{i} \,) \;)$ }
                
                \Begin{
                    $j \leftarrow 1$\\
                    \ForEach{$i \leftarrow 1,...,nAbs$}{
                        $\bs{A_{i}} = \set{}$\\
                        \While{$Z(\bs{A_{1,...,i}}) < P_{i}$}{
                            $\bs{A_{i}} \leftarrow A_{i} \cup \set{n^{*}_{{j}}}$\\
                            $j \leftarrow j + 1$
                        }
                    }
                    $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm}

            As discussed in Section \ref{sec:paradigms:combined}, there is intuition in wanting to focus on high impact regions of the search/sampling space.  Allowing the provided value function $\mu(.)$ to serve as a heuristic of nodes that are part of these high impact spaces, equalDistVB attempts to balance this intuition with the notion of variance reduction from minVarVB in attempts to group fewer predicted high impact nodes together in abstract states and allowing for the predicted lower impact nodes to be part of larger abstract states.  Also inspired by the simplicity of simpleVB, the scheme works by greedily adding nodes in value order (low to high) into abstract state $\bs{A_{i}}$ until the total sum of node values from $\bs{A_{1}},...,\bs{A_{i}}$ reaches or exceeds the $\frac{i}{nAbs}$ quantile.
            
            When paired with the QB abstraction class (see Section \ref{sec:value-based-abstraction-classes:QB}), the equalDistVB schemes also attempts to partition notes into abstract states of equal mass under the proposal.  This in corresponds to the condition for Proposition \ref{prop:rizzo-variance-reduction} for stratified importance sampling variance reduction.

            \semph{Time Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n^{*}}|$ leading to $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 10, 100}, \set{}, \set{}, \set{}$
                
            Although, this method hopes to find a balance between intuitions previously explored, and without compromising speed and efficiency of abstract state generation, from the running example we can see how this method yield undesirable results in the presence of certain distributions of node values.  In this example, the first quantile is only reached after all the nodes have been added to the first abstract state, leaving no nodes remaining to be partitioned into the subsequent abstract states. 



        \subsubsection{\NoCaseChange{equalDistVB2}} \label{sec:ordered-partitioning-schemes:equalDistVB2}

            \begin{quote}
                $o = \tn{high to low}$\\
                $\Part{equalDistVB}$ (Algorithm \ref{alg:psi-equalDistVB})
            \end{quote}

            By simply reversing the sort order, equalDistVB2 is able to use the same partitioning strategy $\Part{equalDistVB}$ associated with equalDistVB meanwhile mitigate some of the overfilling of abstract states.
            
            \semph{Time Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n^{*}}|$ leading to $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{100}, \set{}, \set{}, \set{10, 1.5, 1.4, 1.3, 1.2, 1.1, 1.0}$

            We see that equalDistVB2 can still be subject to over packing of abstract states.  Next we present two more equalDistvB variants that continue to mitigate this artifact.



        \subsubsection{\NoCaseChange{equalDistVB3}} \label{sec:ordered-partitioning-schemes:equalDistVB3}

            \begin{quote}
                $o = \tn{high to low}$\\
                $\Part{equalDistVB3}$ (Algorithm \ref{alg:psi-equalDistVB3})
            \end{quote}

            \begin{algorithm}[t!]
                \caption{$\Part{equalDistVB3}$}
                \label{alg:psi-equalDistVB3}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{
				With 
				%
				$Z(\bs{A_{1,...,i}}) = (\sum_{j=1}^{i} \sum_{n' \in \bs{A_{j}}} Z(n')$,
				%
				$n_{\bs{A_{i}}}^{\tn{last}}$ be the last node in $\bs{A_{i}}$, 
				%
				and 
				%
				$P_{i} = \frac{ i \cdot \sum_{n \in \bs{n^{*}}}\mu(n)}{nAbs}$,
				%
                $\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that for $i=1,...,nAbs$ in order,
                $(\; Z(\bs{A_{1,...,i}}) \geq P_{i} \;)$
                $\land$ \\ $(\; (\, \bs{|A_{i}}|=1 \,) \lor (\, Z(\bs{A_{1,...,i}}) - Z(n_{\bs{A_{i}}}^{\tn{last}}) < P_{i} \,) \;)$ }
                
                \Begin{
                    $j \leftarrow 1$\\
                    \ForEach{$i \leftarrow 1,...,nAbs$}{
                        $\bs{A_{i}} = \set{n^{*}_{{j}}}$\\
                        $j \leftarrow j+1$\\
                        \While{$Z(\bs{A_{1,...,i}}) < P_{i}$}{
                            $\bs{A_{i}} \leftarrow A_{i} \cup \set{n^{*}_{{j}}}$\\
                            $j \leftarrow j+1$
                        }
                    }
                    $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm}

            In order to lessen over packing and ensure abtract states are not left empty, equalDistVB3 modifies equalDistVB2 so that, after processing of each abstract state, the next state is forced an addition of at least a single node by default.
            
            \semph{Time Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n^{*}}|$ leading to $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{100}, \set{10}, \set{1.5}, \set{1.4, 1.3, 1.2, 1.1, 1.0}$

            Still highly efficient, equalDistVB3 manages to ensure that the provided $nAbs$ granularity is honored, allowing users better control of the search vs. sampling interpolation possible with Abstraction Sampling.



        \subsubsection{\NoCaseChange{equalDistVB4}} \label{sec:ordered-partitioning-schemes:equalDistVB4}

            \begin{quote}
                $o = \tn{high to low}$\\
                $\Part{equalDistVB4}$ (Algorithm \ref{alg:psi-equalDistVB4})
            \end{quote}

            \begin{algorithm}[t!]
                \caption{$\Part{equalDistVB4}$}
                \label{alg:psi-equalDistVB4}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{
				With 
				%
				$Z(\bs{A_{1,...,i}}) = (\sum_{j=1}^{i} \sum_{n' \in \bs{A_{j}}} Z(n')$,
				%
				$n_{\bs{A_{i}}}^{\tn{last}}$ be the last node in $\bs{A_{i}}$, 
				%
				and 
				%
				$L_{i} = \frac{Z(\bs{n^{*}})-Z(\bs{A_{1,...,i-1}})}{nAbs-i+1}$,
				%
                $\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that for $i=1,...,nAbs$ in order,
                $(\; Z(\bs{A_{i}}) \geq L_{i} \;)$
                $\land$ \\ $(\; (\, \bs{|A_{i}}|=1 \,) \lor (\, Z(\bs{A_{i}}) - Z(n_{\bs{A_{i}}}^{\tn{last}}) < L_{i} \,) \;)$ }
                
                \Begin{
                    $j \leftarrow 1$\\
                    \ForEach{$i \leftarrow 1,...,nAbs$}{
                        $\bs{A_{i}} = \set{}$\\
                        \While{$Z(\bs{A_{i}}) < L_{i}$}{
                            $\bs{A_{i}} \leftarrow A_{i} \cup \set{n^{*}_{{j}}}$\\
                            $j \leftarrow j+1$
                        }
                    }
                    $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm}

            The final varaint of the equalDist schemes, equalDistVB4 attempts to perform a more even partitioning than the previous variants by recomputing quantiles. Each time the algorithm progesses to processing a new abstract state, remaining nodes and abstract states are used to compute new quantiles which are then used to guide filling of the current abstract state in the same way previously done.
            
            \semph{Time Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $L_{i}$ at each iteration can also be done in constant time.  Partitioning is achieved via one pass through $|\bs{n^{*}}|$ leading to $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{100}, \set{10}, \set{1.5, 1.4, 1.3}, \set{1.2, 1.1, 1.0}$

            Still highly efficient, equalDistVB3 manages to ensure that the provided $nAbs$ granularity is honored, allowing users better control of the search vs. sampling interpolation possible with Abstraction Sampling.


        \subsubsection{\NoCaseChange{randVB}} \label{sec:ordered-partitioning-schemes:randVB}

            \begin{quote}
                $o = \tn{high to low}$\\
                $\Part{randVB}$ (Algorithm \ref{alg:psi-randVB})
            \end{quote}

            \begin{algorithm}[t!]
                \caption{$\Part{randVB}$}
                \label{alg:psi-randVB}
                \begin{footnotesize}
                    \SetInd{0.25em}{0.55em}
                    \DontPrintSemicolon 
                \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(.)$}
                \Output{$\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ }
                
                \Begin{
                    $\bs{s} \sim Unif(\setst{\bs{M} \subseteq \set{1,...,|\bs{n^{*}}|-1}}{|\bs{M}|=nAbs-1})$\\
                    $\bs{s^{*}_{}} \leftarrow SORT(\bs{s})$\\
                    $j \leftarrow 1$\\
                    \ForEach{$i \leftarrow 1,...,nAbs\!-\!1$}{
                        $\bs{A_{i}} = \set{n^{*}_{j},...,n^{*}_{s^{*}_{i}}}$\\
                        $j \leftarrow s^{*}_{i}+1$
                    }
                    $\bs{A_{nAbs}} = \set{n^{*}_{j},...,n^{*}_{|n^{*}|}}$\\
                    $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
                    \Return $\bs{A}$       
                }
                \end{footnotesize}
            \end{algorithm}

            If the quality of $\mu(.)$ as a measure of similarity is unknown or poor, it could instead be beneficial to rely on randomness to ensure a diverse sampling of abstractions.  randVB does this by sampling $nAbs\!-\!1$ partition points between the sorted nodes $\bs{n^{*}}$ uniformly at random and without replacement, and then partitions the nodes accordingly. As a result, abstract states are formed such that nodes are still grouped according to $\mu(.)$, but the size of those groups varies.
            
            \semph{Time Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                $\mathcal{O}(|\bs{n^{*}}|)$ time complexity.

            \semph{Space Complexity.\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}\hfill\\
                No more than linear space is required.  $\mathcal{O}(|\bs{n^{*}}|)$

            \semph{Result on \hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}.}\hfill\\
                $\set{100, 10}, \set{1.5}, \set{1.4, 1.3, 1.2}, \set{1.1, 1.0}$;\\
                $\set{100}, \set{10, 1.5, 1.4, 1.3}, \set{1.2, 1.1}, \set{1.0}$;\\
                ...etc.







    \section{Empirical Evaluation} \label{sec:empirical-evaluation}

        \subsection{Results} \label{sec:empirical-evaluation:results}

            \subsubsection{Aggregated Result}

                \subsubsection{Comparing Value Classes and Partitioning Schemes}

                    \begin{tablefigure*}[t]
                        \centering     %%% not \center
                        \begin{subfigure}[DBN]{0.49\linewidth}
                            \label{fig:DBN_aggregation}
                            \includegraphics[width=0.99\linewidth]{./_attachments/Results/DBN-algs-all-i-10.pdf}
                        \end{subfigure}
                        \begin{subfigure}[Grids]{0.49\linewidth}
                            \label{fig:DBN_aggregation}
                            \includegraphics[width=0.97\linewidth]{./_attachments/Results/Grids-algs-all-i-10.pdf}
                        \end{subfigure}
                        \begin{subfigure}[Linkage-Type4]{0.49\linewidth}
                            \label{fig:DBN_aggregation}
                            \includegraphics[width=0.99\linewidth]{./_attachments/Results/Linkage-Type4-algs-all-i-10.pdf}
                        \end{subfigure}
                        \begin{subfigure}[Promedas]{0.49\linewidth}
                            \label{fig:DBN_aggregation}
                            \includegraphics[width=0.99\linewidth]{./_attachments/Results/Promedas-algs-all-i-10.pdf}
                        \end{subfigure}
                        \caption{Aggregated statistics. Displayed are the Abstraction Class (\textit{Class}), Partitioning Scheme (\textit{Scheme}), bound on number of abstract states (\textit{nAbs}), number of problems only a zero solution could be found (\textit{Fail}), average $log_{10}Z$ error (\textit{|Error|}), average number of samples (\textit{Samples}), and average size of probes (\textit{Size}). Color bars visualize the magnitude of the values. Red \textit{Fail} cells indicate an algorithm's inability to solve relatively many problems. Bold indicates best performances. Lines in bold indicate the best performing algorithms. Each benchmark also displays the total number of problems within it (N), average number of variables (|X|), max domain size (k), tree width ranges (w*), and AND/OR search tree depth ranges (d).}
                        \label{fig:AggregationTables}
                    \end{tablefigure*}

                \subsubsection{Comparing Value Classes and Partitioning Schemes}

            \subsubsection{Sample Efficiency}

                \begin{plotfigure}[H]
                    \centering
                    \caption{For the given abstraction granularity and benchmark, the number of samples (in log10) relative to the probe size (in log10) using iB-10.}
                    \includegraphics[width=1.0\linewidth]{./_attachments/Results/Promedas-nAbs-2048_samples-vs-probe-size.pdf}
                    \label{plot:results:Promedas-nAbs-2048_samples-vs-probe-size}
                \end{plotfigure}

            \subsection{Probe Size}
            
                \begin{plotfigure}[H]
                    \centering
                    \caption{For the given abstraction granularity and benchmark, the size of the probe (in log10) relative to the number of problem variables (in log10) using iB-10.}
                    \includegraphics[width=1.00\linewidth]{./_attachments/Results/Promedas-nAbs-2048_probe-size-vs-nVars}
                    \label{plot:results:Promedas-nAbs-2048_probe-size-vs-nVars}
                \end{plotfigure}

        \subsection{Analysis} \label{sec:empirical-evaluation:analysis}


    % \section{Increasing Understanding of Abstraction Sampling Probes}\label{sec:understanding-abstraction-sampling-probes}

    %     As the main theoretical contribution of this work, we provide theory and understanding of probe structure and how different probe generation can lead to better or worse estimates.
        
    %     \subsection{Probe Size} \label{sec:understanding-abstraction-sampling-probes:probe-size}
    %         As will be highlighted in Section \ref{sec:empirical-evaluation}, even when abstractions are set to have the same granularity they can result in different probe structures, which can also cause probes that vary greatly in the number of nodes generated during their construction, and ultimately their final size.  This has implications on the configurations captured by the probes as well as the computation (ie. time) necessary to generate them.

    %         In the corresponding section of the supplemental, you can find detailed examples of how, even for the same granularity, probe generation can vary leading to different probe sizes.  Below, we provide a theoretical analysis of the potential variation and then we follow with the practical implications of this understanding.

    %         \begin{theorem}[Minimum size of an AOAS probe based on a chain psuedo tree]
    %             \hfill \\
    %             Consider a chain pseudo tree \PT based on the ordered variable set $\X = \set{X_{1}, ..., X_{N}}$ and an abstraction function with granularity of $nAbs$ per variable.
                
    %             If $|D_{\X}| > nAbs$, then let the ordered set
    %             \begin{align*}
    %                 \bs{Y} = \set{X_{y}, ..., X_{N}}
    %             \end{align*}
    %             s.t. $|D_{\bs{Y}}| \geq nAbs$, yet $|D_{\bs{Y} \setminus \set{X_{y}}}| < nAbs$.  Otherwise, let
    %             $\bs{Y} = \X$.
                
    %             Then the smallest probe that can be generated will have size
    %             \begin{align}
    %                 \begin{split}
    %                     min(\{|\widetilde{T}^{(m)}|\}_{m=1}^{\infty}) =& 
    %                           \;
    %                            |\X| - |\bs{Y}| \\
    %                         &+ |T_{\bs{Y}}| \\
    %                         &- \sum_{X_{N}, ..., X_{y}} prune(X_{i})
    %                 \end{split},
    %             \end{align}
    %             \begin{align}
    %                 prune(X_{i}) =
    %                 \begin{cases}
    %                     max(0,|D_{\bs{Y}}| - nAbs), &X_{i}=X_{N} \\
    %                     \floor{prune(X_{i+1})/D_{X_{i+1}}}, &otherwise
    %                 \end{cases}
    %             \end{align}
                
                
    %             \begin{proof}[Proof by construction]
    %                 \hfill \\
    %                 \begin{quoting}
    %                     \textbf{Case 1: ($|D_{\X}| \leq nAbs$})
    %                     \begin{quoting}
    %                         Since even the fully expanded search tree does not produce levels that have $nAbs$ number of nodes, the full search tree will be expanded for every probe and
    %                         \begin{align}
    %                             |\widetilde{T}^{(m)}| &= T_{\X}
    %                         \end{align}
    %                         According to the case criterion and definition of the ordered variable set $\bs{Y}$, $\bs{Y} = \X$ and thus $(|\X| - |\bs{Y}|) = 0$ and $|D_{\bs{Y}}| \leq nAbs$. The latter further implies that $max(0,|D_{\bs{Y}}| - nAbs)=0$ which in turn implies that $\sum_{X_{N}, ..., X_{y}} prune(X_{i}) = 0$. Thus,
    %                         \begin{align}
    %                             \begin{split}
    %                                 |\widetilde{T}^{(m)}| &= T_{\X}\\
    %                                                       &= |\X| - |\bs{Y}| \\
    %                                                             &\;\;\;\;+ |T_{\bs{Y}}| \\
    %                                                             &\;\;\;\;+ \sum_{X_{N}, ..., X_{y}} prune(X_{i})
    %                             \end{split}
    %                         \end{align}
    %                     \end{quoting}

    %                     \textbf{Case 2: ($|D_{\X}| > nAbs$})
    %                     \begin{quoting}
    %                         Then $\bs{Y} = \set{X_{y}, ..., X_{N}}$ s.t. $|D_{\bs{Y}}| \geq nAbs$ and $|D_{\bs{Y} \setminus \set{X_{y}}}| < nAbs$.\\

    %                         We start with the observation that the final level of the probe must have $nAbs$ leaves since, at some level the probe will expand to at least $nAbs$ leaves since $|D_{\X}| > nAbs$ and so, at that level and subsequent levels, abstractions will group newly expanded nodes to $nAbs$ abstractions.  We also make the observation that the smallest such sub tree contains $nAbs$ nodes \textit{only} at the final level (with all other levels having fewer than $nAbs$ nodes).  Assuming such a probe can be constructed, these observations imply that there exists some level $y$ where expansions from level $y$ to $N$ result in $nAbs$ or more leaves, but where expansions from level $y$ to level $N-1$ still produce fewer than $nAbs$ leaves.  With these observations, we can see that the smallest probe would then be one that consists of a single path for levels $1$ through $y-1$ and then results in $nAbs$ leaves at level $N$.

    %                         However, assuming expansions from level $y$ to level $N$ can produce more than $nAbs$ leaves, there can be many probes that consist of a path from levels $1$ through $y-1$ and $nAbs$ leaves at level $N$.  However, following similar logic as before, the smallest such probe will be one with the smallest number of nodes at higher levels, the smallest being achieved when the final $nAbs$ nodes of level $N$ extend from the fewest nodes of level $N-1$, and those nodes of level $N-1$ extend from the fewest nodes of level $N-2$, and so on.  
                            
    %                         Formally, this means that every level until level $y$ contributes a single node to the probe; in other words $|\X|-|\bs{Y}|$ nodes.  To express the size of the remaining sub tree consisting of levels $y$ through $N$, we can first consider the fully expanded sub tree $T_{\bs{Y}}$ and then recursively remove nodes from bottom up so that there are $nAbs$ nodes in the last level and the nodes are grouped so that they extend from the fewest nodes of the previous level, and recursively repeat the procedures with the nodes of the previous level all the way up to level $y$.  This results in $|T_{\bs{Y}}| - \sum_{X_{N}, ..., X_{y}} prune(X_{i})$ nodes for levels $y$ to $N$.

    %                         Thus, the final size of such a minimum size probe $\widetilde{T}^{(m)}_{min}$ will be
    %                         \begin{align}
    %                             \begin{split}
    %                                 |\widetilde{T}^{(m)}_{min}| =& 
    %                                       \;
    %                                        |\X| - |\bs{Y}| \\
    %                                     &+ |T_{\bs{Y}}| \\
    %                                     &- \sum_{X_{N}, ..., X_{y}} prune(X_{i})
    %                             \end{split}
    %                         \end{align}

    %                         Finally, we are left with the task of proving that such a probe can be constructed by AOAS.  Indeed this is so.  Such a probe could occur if, at every level that abstractions occur, the abstraction functions group nodes such that it is possible for the chosen $nAbs$ representatives to extended from as few parent nodes as possible for $nAbs$ number of nodes.  Not every abstraction function can satisfy this criterion, however, there can always be some abstraction function that can (ex. an abstraction function that places $nAbs-1$ of the nodes mentioned above into a singleton abstract state, and the rest of the nodes into the last).
    %                     \end{quoting}

    %                     And given that \textbf{Case 1} and \textbf{Case 2} explore all the possible cases, we prove our claim.
    %                 \end{quoting}
    %             \end{proof}
    %         \end{theorem}

    %         \todo{corollary with simple complexity for min probe size}

    %         \todo{theorem max probe size}
    %         \todo{corollary with simple complexity for max probe size}
    %         \todo{corollary at any point, probe size will be bounded by the max probe size}


    \section{Conclusion} \label{sec:analysis}


        
\clearpage

    \bibliography{ref}




\end{document}