\documentclass{uai2024} % for initial submission
%\documentclass[accepted]{uai2024} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Instructions for Authors: Title in Title Case}

% The standard author block has changed for UAI 2024 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
% \author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2024 paper}{Jane~J.~von~O'L\'opez}{}}
% \author[1]{Harry~Q.~Bovik}
% \author[1,2]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[1]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[3,1]{Further~Coauthor}
% % Add affiliations after the authors
% \affil[1]{%
%     Computer Science Dept.\\
%     Cranberry University\\
%     Pittsburgh, Pennsylvania, USA
% }
% \affil[2]{%
%     Second Affiliation\\
%     Address\\
%     …
% }
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }
\author{\href{mailto:<pezeshkb@uci.edu>?Subject=Abstraction Sampling - UAI 2024}{Bobak Pezeshki}{}}
\author{\href{mailto:<kkask@uci.edu>?Subject=Abstraction Sampling - UAI 2024}{Kalev Kask}{}}
\author{\href{mailto:<ihler@ics.uci.edu>?Subject=Abstraction Sampling - UAI 2024}{Alexander Ihler}{}}
\author{\href{mailto:<dechter@ics.uci.edu>?Subject=Abstraction Sampling - UAI 2024}{Rina Dechter}{}}
% Add affiliations after the authors
\affil[1]{%
    University of California, Irvine
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CUSTOM PACKAGES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{caption}
\usepackage{subcaption}
\usepackage{float}
\usepackage{xspace} % package being used for \newcommand to remove extra space
                    %     when a command is invoked without an argument list
\usepackage{textcase}
\usepackage[toc, nopostdot]{glossaries}
% \usepackage{amsmath}
\usepackage{amsthm, amssymb}
\usepackage{mathtools}
\usepackage{enumitem}
\usepackage{refcount}
\usepackage[leftmargin=6pt, vskip=3pt-\parskip]{quoting}
\usepackage[titlenumbered,ruled, linesnumbered]{algorithm2e}
\usepackage{mathrsfs} %for \mathscr
\usepackage[font=smaller,labelfont=bf]{caption}
% \usepackage[font=small,labelfont=bf]{subcaption}
% \usepackage[labelfont=bf]{caption}
% \usepackage[labelfont=bf]{subcaption}
\usepackage{xcolor}
    \definecolor{darkgreen}{rgb}{0.0, 0.2, 0.13}
    \definecolor{cadmiumgreen}{rgb}{0.0, 0.42, 0.24}
    \definecolor{byzantium}{rgb}{0.44, 0.16, 0.39}
    \definecolor{darkelectricblue}{rgb}{0.33, 0.41, 0.47}
    \definecolor{battleshipgrey}{rgb}{0.52, 0.52, 0.51}
    \definecolor{warmblack}{rgb}{0.0, 0.26, 0.26}
\usepackage{newfloat}
\usepackage{chngcntr}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CUSTOM COMMANDS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%create new float environment called plotfigure with it's own counter
\DeclareFloatingEnvironment[name=Plot]{plotfigure} 

%create new float environment called tablefigure with it's own counter
\DeclareFloatingEnvironment[name=Table]{tablefigure} 

%set the floats table and tablefigure to use the same counters
\makeatletter\let\c@tablefigure\c@table\makeatother 

%consider the floats table and tablecounter as the same set of floats (so location in document will be in order in which they appear)
\makeatletter\let\ftype@tablefigure\ftype@table\makeatother 

\DeclareMathOperator*{\argmin}{argmin}
\DeclareMathOperator*{\argmax}{argmax}
\mathchardef\mhyphen="2D % Define a "math hyphen"

% algorithm2e
% \newcommand\commentstyle[1]{\textcolor{cadmiumgreen}{#1}}
\SetCommentSty{commentstyle}
\SetKwInOut{Input}{input}
\SetKwInOut{Output}{output}

\newtheoremstyle{break}
  {\topsep}{\topsep}%
  {\itshape}{}%
  {\bfseries}{}%
  {\newline}{}%
\theoremstyle{break}
% \newtheorem{theorem}{Theorem}[subsubsection]
\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
% \newtheorem{definition}{Definition}[subsubsection]
\newtheorem{definition}{Definition}[section]

\input{cmds}
\renewcommand*{\glstextformat}{\textbf}

\renewcommand{\quote}{\list{}{\rightmargin=\leftmargin\topsep=0pt}\item\relax}







%%% for supplemental

\usepackage{enumitem}
    \setlistdepth{9}
    \setlist[itemize,1]{label=$\bullet$}
    \setlist[itemize,2]{label=$\cdot$}
    \setlist[itemize,3]{label=$\cdot$}
    \setlist[itemize,4]{label=$\cdot$}
    \setlist[itemize,5]{label=$\cdot$}
    \setlist[itemize,6]{label=$\cdot$}
    \setlist[itemize,7]{label=$\cdot$}
    \setlist[itemize,8]{label=$\cdot$}
    \setlist[itemize,9]{label=$\cdot$}
    \renewlist{itemize}{itemize}{9}






%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\setcounter{secnumdepth}{3} %May be changed to 1 or 2 if section numbers are desired.
\setcounter{tocdepth}{3}

\title{Abstraction Sampling with Heuristic-Based, HR-Based,\\ and Proposal-Based Abstraction Functions}


\input{gls}


\begin{document}
    % \onecolumn
    \setlength{\abovedisplayskip}{3pt}
    \setlength{\belowdisplayskip}{3pt}

    \maketitle
    
    \begin{abstract}
        \vspace{-12pt}
        Monte Carlo methods are powerful tools for solving problems involving complex probability distributions. Despite their versatility, these methods often suffer from computational inefficiencies, especially when dealing with rare events. As such, importance sampling emerged as a prominent technique for alleviating these challenges. Recently, a new scheme called Abstraction Sampling was developed that incorporated stratification to importance sampling over graphical models helping to improve estimates further. However, existing work on Abstraction Sampling has explored only a limited set of abstraction functions guiding the stratification. This study expands this set by introducing three new classes of abstraction functions combined with seven distinct partitioning schemes, resulting in twenty-one new abstraction functions. These are motivated by theory and intuition from both the search and sampling domains. An extensive empirical analysis on over 400 benchmarks compares these new abstraction functions, highlighting several well-performing candidates. 
    \end{abstract}

    % \vfill\eject
    % \tableofcontents
    
    % \clearpage
    \section{Introduction} \label{sec:introduction}

        The partition function ($Z$) is an important quantity in probabilistic graphical model inference, and is often estimated using Monte Carlo methods such as Importance Sampling (IS) \citep{Rubinstein_2007,liu2015probabilistic,DBLP:journals/ai/GogateD11}. Recently a framework called Abstraction Sampling (AS) \citep{DBLP:conf/uai/BrokaDIK18} was introduced, inspired by the prior works of \citet{knuth75,Chen92}, that extends IS by enabling samples to represent multiple configurations.  AS uses concepts from Stratified Sampling \citep{Rubinstein_2007,rizzo_2007} and compact search  \citep{DBLP:journals/ai/DechterM07} to build a sampled subtree called a probe.  The construction progresses level-by-level according to a variable ordering where, at each level, an \textit{abstraction function} groups nodes into \textit{abstract states} and then a representative node from each group is picked and reweighted 
        %(according to a proposal distribution) 
        to extend the paths in the probe.
        
        Using what are referred to as context-based abstraction functions, \citet{DBLP:conf/uai/BrokaDIK18} showed competitive performance by AS against IS, Weighted Mini-Bucket IS (wMBIS) \citep{liu2015probabilistic}, and IJGP-SampleSearch (IJGP-ss) \citep{DBLP:journals/ai/GogateD11}. \citet{kask20-scaling-up-as} further extended AS scalability introducing AS algorithm AOAS that more efficiently applied AS to AND/OR search spaces, and showed its superior performance using the same context-based abstraction functions against previous versions of Abstraction Sampling (and thus implicitly also against IS, wMBIS, and IJGP-ss) and also against state-of-the-art scheme Dynamic Importance Sampling \citep{lou2019interleave}.
        
        However, a shortcoming of Abstraction Sampling development has been construction of more versatile and effective abstraction functions.  \citet{hsiao23-gnn-dynamic-as} approached this challenge by using graph neural networks for learning abstraction functions.  However, such methodology has the drawback of requiring learning on problems before use.  In this work we present:
                
        \begin{itemize}
        
            \item
                A new Value-Based abstraction framework for grouping nodes according to values on a positive scale
                
            \item
                Twenty-one value-based abstraction functions constructed by combining:
                
                \begin{itemize}
                
                    \item
                        Three distinct abstraction value functions, each inspired by paradigms from search and sampling.
                        
                    \item
                        Seven schemes for partitioning nodes according to their assigned values.
                        
                \end{itemize}
                        
            \item
                An extensive empirical evaluation on over 400 problems comparing the above-mentioned schemes against: each other, the previously vetted relCB and randCB abstraction functions \citep{DBLP:conf/uai/BrokaDIK18,kask20-scaling-up-as} (and thus implicitly against IS, wMBIS, and IJGP-ss), and a purely randomized abstraction scheme.
        \end{itemize}

    From the findings, we identify three new abstraction functions that show significantly better performance than any previous scheme, and also explore trends in their hyper-parameterization.


    \section{General Background} \label{sec:background}


%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%

        \paragraph{Graphical Models.}
            
            % \begin{figure}[]
            %     \centering
            % 	\includegraphics[scale=0.25]{images/AncestorBranchingMass.pdf}
            % 	\vspace{-6pt}\caption{Ancestor branching mass of an AND node.}
            % 	\label{fig-ancestor-branching-mass}
            % \end{figure}
    
            % % \begin{comment}
            % \begin{figure}[]
            %     \centering
            % 	\includegraphics[scale=0.25]{images/ProperAbstractionGroups.pdf}
            % 	\vspace{-6pt}\caption{Scope of proper abstractions.}
            % 	\label{fig-proper-abstraction-groups}
            % \end{figure}
            % % \end{comment}
            
            A \textit{graphical model}, such as a Bayesian or Markov network \citep{pearl88,darwiche-book,DBLP:series/synthesis/2013Dechter}, can be defined by  a 3-tuple
            $\mathcal{M} \! = \! (\mathbf{X,D,F})$, where
            $\mathbf{X}$
            is a set of variables,
            and $\mathbf{D}$
            is the set of variable domains, and $\mathbf{F}$ is a set of functions such that each function $f_{\alpha} \in \mathbf{F}$ is defined over $\alpha \subseteq \bs{X}$
            called its scope. A \textit{Primal graph} $\mathcal{G} \! = \! (\mathbf{V,E})$ of $\mathcal{M}$ associates each variable with a node ($\mathbf{V} \! = \! \mathbf{X}$), while arcs $e \! \in \! \mathbf{E}$ connect nodes whose variables appear in the scope of the same function.
            $\mathcal{M}$ defines a global function, often a factorized probability distribution on $\mathbf{X}$,
            $P(\mathbf{X}) = \frac{1}{Z} \prod_{\alpha}f_\alpha(X_\alpha)$, where 
            $
            Z = \sum_X \prod_{\alpha}
            f_\alpha(X_\alpha)
            $, known as the partition function, is a normalization factor.

            
        \paragraph{Search Spaces of Graphical Models.} 
            % A graphical model can be transformed into a weighted state space graph.
            % In an OR search space, which is constructed layer-by-layer relative to a variable ordering, paths from the root to the leaves represent \textbf{full configurations} - or assignments to all variables - where each successive level corresponds to an assignment of the next variable in the ordering.
            
            A graphical model can
            % also 
            be transformed  into a compact AND/OR search space that leverages conditional independencies in the  model and facilitates use of efficient search algorithms \citep{DBLP:journals/ai/DechterM07}. Given a primal graph $\mathcal{G}$ of \M, an AND/OR search space is defined relative to a \textit{pseudo tree} $\mathcal{T} \! = \! (\mathbf{V,E'})$ that is a directed rooted tree that spans $\mathcal{G}$ according to a variable ordering and captures conditional independences encoded in the model.  $\mathcal{T}$ is constructed such that every arc of $\mathcal{G}$ not in $\mathbf{E'}$ is a back-arc in ${\cal T}$ connecting a node to one of its ancestors (Figure \ref{fig:primal-graph-and-pseudo-tree}).  A variable is a \textit{branching variable} if it has multiple children in $\mathcal{T}$.
            %The arcs in $E'$ may not all be included in $E$ .  
            
            
            
            
            \begin{figure}[!htb]
            	\centering
            	\begin{subfigure}{0.9\linewidth}
            	\centering
            	       \includegraphics[width=0.75\linewidth]{UAI-24/_attachments/images/pseudotree.png}
                        \vspace{-6pt}\caption{}
                        \label{fig:primal-graph-and-pseudo-tree}
            	\end{subfigure}
                    \begin{subfigure}{0.9\linewidth}
            	\centering
                        \includegraphics[width=0.75\linewidth]{UAI-24/_attachments/images/AncestorBranchingMass.pdf}
                        \vspace{-6pt}\caption{}
                        \label{fig:ancestor-branching-mass}
                    \end{subfigure}
            	\vspace{-6pt}\caption{A full AND/OR tree representing 16 possible solutions guided by the pseudo tree shown above. Boxed in green is the ancestor branching subtree for the path $\rightarrow \!\! (A \!\! = \!\! 0) \!\! \rightarrow  \!\! (C \!\! = \!\! 1)$.}
                        \label{fig:psuedo-tree-with-ancestor-branching-mass}
            \end{figure}

            
            Given a
            pseudo tree $\mc{T}$, an \textit{AND/OR search tree}
            $T_{\mc{T}}$ guided by $\mc{T}$ has alternating levels of OR nodes
            corresponding to variables and AND nodes corresponding to
            possible assignments, 
            % to the variables, 
            %with edge costs extracted from
            %the original functions \citep{DBLP:journals/ai/DechterM07} such that %(By this logic, we can think of the nodes of an OR tree as AND nodes).  
            % Let $n$ be an AND node in $T_{\tau}$, also denoted $n_X$ if $X$ is the last variable of its partial configuration.
            and the arc into an AND node $n_{X}$ associated with variable $X$ %(or the arc from its OR parent to the AND node)
            has a cost $c(n_{X})$ equal to the product of functions $f_{\alpha} \in \F$ such that the path to $n$ fully instantiates all $X' \in \alpha$ and such that $X \in \alpha$ \citep{DBLP:journals/ai/DechterM07}.
            % \textcolor{red}{Moved to section "Value of A Node": (see Figure \ref{fig-simple}(c)).}  
            
        \paragraph{Notation.}
            When not otherwise stated, capital letters ($X$) represent variables and small letters ($x$) represent their values.  Boldfaced letters represent a collection. For example,
            boldfaced capital letters ({\bf X}) denote a collection of variables,
            $|{\bf X}|$ its cardinality, 
            $D_{\X}$ their joint domains (ie. all possible configurations of \X), 
            and bolded $\xx$ a particular realization in that joint domain (ie. a particular configuration of \X).

            In the context of search, $n$ is used to represent search nodes.  In the context of AND/OR search, $n_{X}$ specifically refers to an AND node in $T_{\mc{T}}$ associated with variable $X$, and $Y_{n_X}\!$ the OR node associated with variable $Y$ that is the child of $n_{X}$. $path(n)$ is the configuration of the variables along the path from the root of $T_{\mc{T}}$ to $n$ according to assignments corresponding to that path. $g(n)$ is the cost of $path(n)$. $ch(n)$ denotes the children of node $n$.
            % $ch_{\PT}(X)$ denotes the children of $X$ in pseudo tree \PT.
            

        \paragraph{$\bs{Z(n)}$.} \label{sec:partition-function-of-a-node}  
            % In the context of AND/OR search, let $ch_Y(n_X)$ refer to the children AND nodes of variable $Y$ that are descended from the AND node $n_X$.
            We define $Z(n)$ recursively as: 
            \begin{equation} \label{eq:and-or-z-prod}
                Z(n_X) = \prod_{Y_{n_{X}} \in ch(n_{X})} Z(Y_{n_X})
            \end{equation}
            where
            \begin{equation}
                Z(Y_{n_X}) = \sum_{n_Y \in ch(Y_{n_X})}  c(n_Y) \cdot Z(n_Y)
            \label{eq2}
            \end{equation}
            With $n_{\varnothing}$ as a dummy root node of $T_{\mc{T}}$, $Z(n_{\varnothing})$ equals the partition function $Z$ of the underlying full model \M. We denote estimation of $Z(n)$ as $\hat{Z}(n)$.  Heuristic estimates $Z(n)$ are denoted as $h(n)$.

        \paragraph{$\bs{R(n)}$.} \label{sec:ancestor-branching-mass}
             On the path from the root of $T_{\mc{T}}$ to some $n_{X}$, there may an intermediate node $n_{Y}$ such that its associated variable $Y$ is a branching variable in \PT (for example, $A$ is a branching variable in Figure \ref{fig:psuedo-tree-with-ancestor-branching-mass}).  Whenever this happens, the remaining variables of the model are split between branches. $R(n)$, or the \textit{ancestor branching mass}, captures the $Z(Y_{n_{X}})$ for all $Y$ that branch off of the path to $n_{X}$. In Figure \ref{fig:ancestor-branching-mass}, the dotted green box shows the portion of the search space corresponding to the $R(n)$ for the node highlighted in red.
             %(That same boxed portion would also be the ancestor branching mass for the sibling node of the red node, and also for any of their children).

             More formally, let $branchings(n_{X})$ be the set of nodes $n_{Y}$ on the path to $n_{X}$ such that $Y$ is a branching variable in \PT. Let $W_{n_{Y}}$ be the child OR node of $n_{Y}$ that that is also on the path to $X$.  We define $R(n_{X})$ as:
             \begin{align}
                 \label{eq4}
                 R(n_{X}) =   \prod_{n_{Y} \in branchings(n_{X})} \frac{Z(n_{Y})}{ Z(W_{n_{Y}})}
             \end{align}
             We denote approximations to $R(n)$ as $r(n)$.
        
        \paragraph{$\bs{Q(n)}$.} \label{sec:q-of-a-node}

            Putting it all together, we can now concisely define a quantity $Q(n)$ as the contribution to $Z$ from all full configurations consistent with $path(n)$. In other words, $Q(n)$ is the unnormalized probability of the configuration $path(n)$ based on the distribution defined by \M, with $P(path(n)) = \frac{Q(n)}{Z}$.  $Q(n)$ can be computed simply as:
            \begin{align}
                Q(n) = g(n)  \! \cdot \!  R(n)  \! \cdot \!  Z(n)
            \end{align}
                
             \textbf{Example.} In Figure \ref{fig:ancestor-branching-mass}, consider the path from the root to the node $n_{A= 0,C=1}$ marked in red. Following under $n_{A=0}$ to our node, we see there is an OR node $B_{n_{A=0}}$ that branches off of the path to our red node.
             Thus, $Q(n_{A=0,C=1})$
             \begin{align}
                &= g(n_{A=0,C=1}) \! \cdot \! R(n_{A=0,C=1}) \! \cdot \! Z(n_{A=0,C=1}) \\
                &= g(n_{A=0,C=1}) \mul \;\; Z(n_{A=0,B})\;\; \! \cdot \! Z(n_{A=0,C=1}) 
             \end{align}
             
             
        \paragraph{Stratified Importance Sampling.} 
            Abstraction Sampling builds on Importance Sampling and Stratified Sampling. {\em Importance Sampling} (IS) is  a Monte Carlo scheme for approximating likelihood queries over graphical models.
            %\citep{Rubinstein_2007,DBLP:journals/ai/GogateD11,liu2015probabilistic}.
            {\em Stratified Sampling} is a variance reduction technique for sampling a search space by first dividing it into disjoint strata. This can be used with importance sampling to further reduce variance.
            In {\em Stratified Importance Sampling}, we first divide the sample space into $k$ strata of equal area under the distribution $q$, then choose re-weighted representatives from each strata. %, and uses these representatives to form an estimator over the entire model. 
            In order to maximize reduction in variance, the variance between strata should be maximized (see \citep{rizzo_2007}).
            
            
            \newcommand{\soltree}{\hat{x}_M}
            \newcommand{\parttree}{\bar x}
            
            
    

%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5
%%%%%%%%%%%%%%%%%%%%%%%%%5


    \section{Abstraction Sampling}\label{sec:abstraction-sampling}

        {\em Abstraction Sampling} (AS) algorithms \citep{DBLP:conf/uai/BrokaDIK18} apply concepts of Stratified Importance Sampling to sampling over probabilistic graphical models.  
        %An abstraction event in Abstraction Sampling is analogous to sampling representatives from strata in stratified importance sampling and reweighing to account for the rest of the members that were not chosen.  
        Guided by an abstraction
        function $a(\cdot)$ that dictates how nodes of a variable should be partitioned into \textbf{abstract states} (analogous to strata in stratified sampling), Abstraction Sampling iteratively expands a search tree along a variable ordering, %variable by variable, 
       usinguses $a(\cdot)$ to group nodes into abstract states, and uses an importance-sampling-like process to select an individual representative from each abstract state and reweight it to account for the other members of its group that were not selected.  The chosen nodes are then expanded leading to the generation of a subtree of the full search tree $T_{\mc{T}}$ (called a {\bf probe}) as a sample.
            
        \paragraph{AOAS.}
            Taking Abstraction Sampling further, \citet{kask20-scaling-up-as} introduced algorithm AOAS (\textbf{A}nd/\textbf{O}R \textbf{A}bstraction \textbf{S}ampling) that applied Abstraction Sampling to AND/OR search spaces and significantly improved the performance of AS. AOAS uses a proposal distribution $p(n) \propto q(n) = w(n) \! \cdot \! g(n) \! \cdot \! h(n) \! \cdot \! r(n)$ (see Figure \ref{fig:proposal}), where $g(n)$ is the cost of the path to $n$, $h(n)$ is the heuristic estimate of $Z(n)$, $r(n)$ is the estimate of $R(n)$, and a weight $w(n)$ is applied to account for the nodes abstracted into the path to $n$. A high level description of the algorithm can be seen in Algorithm \ref{alg:aoas-overview} and a more detailed version and an example probe construction trace (from \citet{kask20-scaling-up-as}) found in the Supplemental Materials.

        
        \begin{algorithm}[t!]
                \caption{AOAS Overview}
                \label{alg:aoas-overview}
        
            \begin{enumerate}
                \vspace{2pt}
                \item \textbf{Initialization:}
                    Begin with a dummy root node $r$.
                \item \textbf{Probe Generation:}
                    Proceeding in a DFS manner according to a pseudo tree $\PT$...
                    \begin{enumerate}
                        \item \textbf{Expansion:} \label{alg:aoas-overview:expansion}
                            Generate children nodes $n$ corresponding to the next variable in the DFS ordering of $\PT$. Inherit $w(n)$ from parents and assign appropriate $g(n), h(n), \tn{and } r(n)$ values.
                        \item \textbf{Abstraction:} \label{alg:aoas-overview:abstraction}
                            \begin{enumerate}
                                \item \textbf{Form Abstract States:}
                                    Using $a(\cdot)$, partition newly expanded nodes into abstract states.
                                \item \textbf{Select Representative:}
                                    Using proposal $p(n) \propto q(n)$, stochasticallh select a representative from each abstract state and reweigh it such that $w(n) \leftarrow \frac{w(n)}{p(n)}$
                            \end{enumerate}
                        \item \textbf{Backtrack:} \label{alg:aoas-overview:backtracking}
                            After reaching a leaf in $\PT$, recursively backtrack until reaching nodes of the next unexplored branch of $\PT$. While backtracking, update parent node $\hat{Z}(n')$ estimates based on its children's $w(n), g(n),$ and $\hat{Z}(n)$ values.
                        \item \textbf{Repeat:}
                            Repeat steps \ref{alg:aoas-overview:expansion}-\ref{alg:aoas-overview:backtracking} until having backtracked all the way to the root node.
                    \end{enumerate}
                \item \textbf{Return:}
                    $\hat{Z} = w(r)\,\hat{Z}(r)$ for the root node $r$.
            \end{enumerate}
        \end{algorithm}



        \begin{figure}[!htb]
            \centering
            \includegraphics[width=0.8\linewidth]{UAI-24/_attachments/images/proposal.png}
            \vspace{-6pt}\caption{The $q(n)$ visualized to show it estimating the mass of nodes previously abstracted (via $w(n)$), the ancestor branching mass (via $r(n)$), the current path cost (via $g(n)$), and the subtree mass (via $h(n))$.}
                    \label{fig:proposal}
        \end{figure}

        
        \paragraph{Existing Abstraction Functions.} \label{sec:abstraction-sampling:existing-abstraction-functions}

            \citet{DBLP:conf/uai/BrokaDIK18} designed abstractions based on assignments to a variable's context $C(X)$, where $C(X)$ is a subset of its ancestors in pseudo-tree $\cal T$ whose assignments uniquely determine the AND/OR subtree below it \citep{DBLP:journals/ai/DechterM07}.  Abstracting nodes together that have the same context configuration ensures that abstracted nodes have the same $Z(n)$. However, the number of all possible assignments to the context, $|\D_{C(X)}|$, is exponential in the context's size and is infeasible to use if the induced-width of the graph is high. Thus, \citet{DBLP:conf/uai/BrokaDIK18,kask20-scaling-up-as} utilize \textit{relaxed} context-based (\textbf{RelCB}) and \textit{randomized} context-based (\textbf{RandCB}) abstractions.  
            
            RelCB is parametrized by a level $j$, selecting the closest $j \! - \! 1$ variables from a variable's context (ie. its {\em relaxed context}) plus itself. It puts nodes having the same assignment to the $j \! - \! 1$ context variables in the same abstract state. Assuming domain size of $k$, this results in at most $k^j$ abstract states at each level. 
            
            The randomized scheme, RandCB, considers the entire context, however bounds the number of abstract states per level according to an $nAbs$ parameter.  Using a randomized hashing scheme, each of the resulting $nAbs$ abstract states are randomly associated with a set of possible full context assignments.






    \section{Value-Based Abstractions} \label{sec:ordered-value-based-abstraction-functions}

        We introduce a new framework for abstractions which we call Value-Based Abstractions.  Value-based abstraction functions consist of two parts: (1) a value function $\mu: n \rightarrow \mathbb{R}$ that assigns a real value on a positive scale to nodes $n$ that are to be abstracted, and (2) a partitioning scheme that then abstracts nodes based on $\mu(n)$.  

        \begin{algorithm}[t]
            \caption{$a_{\tn{\textit{value-ordered}}}$}
            \label{alg:general-ordered-value-based-abstraction-function}
            \begin{footnotesize}
                \SetInd{0.25em}{0.55em}
                \DontPrintSemicolon 
            \Input{A set of nodes $\bs{n}$ to be partitioned into abstract states; an abstraction value function $\mu(\cdot)$;
            %a sorting algorithm $SORT(\cdot)$ that sorts $\bs{n}$ according to $\mu(\cdot)$ and sort order $o$; 
            a parameter $nAbs$ bounding the number of abstract states; a partitioning function $\Psi_{o}(\cdot)$ that partitions $\bs{n}$ into abstract states such that nodes are ordered by $\mu(n)$ according to sort-order $o$}
            \Output{Nodes $\bs{n}$ partitioned into abstract states $\bs{A} = \setst{\bs{A_{i}}}{i<=nAbs}$ such that sort order $o$ of $\mu(n)$ is maintained across all $\bs{A_{i}}$.
            %as defined in Definition \ref{def:value-ordered-abstraction-function}.
            }
            
            \Begin{
                % $\bs{n^{*}} \leftarrow SORT(\bs{n},\mu,o)$\\
                \uIf{$|\bs{n}| <= nAbs$}{
                    $\bs{A} = \setst{\set{n}}{n \in \bs{n}}$\\
                }
                \uElse{
                    $\bs{A} = \Psi_{o}(\bs{n}, \mu, nAbs)$
                }
                \Return $\bs{A}$       
            }
            \end{footnotesize}
        \end{algorithm}
        
        Algorithm \ref{alg:general-ordered-value-based-abstraction-function} provides a general value-based abstraction scheme that maintains an ordering of nodes according to $\mu(n)$. Assuming the value function $\mu(\cdot)$ is not dominating, the complexity is determined by the complexity of the partitioning function used.

        Next we present three value-based abstraction classes, each defining a unique $\mu$.  After that, we will will present seven ordered partitioning schemes that, in conjunction with a provided $\mu(\cdot)$, can be used with Algorithm \ref{alg:general-ordered-value-based-abstraction-function} to define a unique value-ordered abstraction function.
        

        \subsection{Value-Based Abstraction Classes} \label{sec:value-based-abstraction-classes}
    
            We introduce three Value-Based Abstraction Classes, each characterized by a unique value function $\mu(\cdot)$ that signifies a notion of similarity between nodes.  
            
            % In this work we present three value-based abstraction classes: Heuristic-Based (HB), HR-Based (HRB), and Q-Based (QB) abstraction value-classes.  Each is motivated by theory in search or sampling discussed in Section \ref{sec:paradigms}, and each can be used with node partitioning schemes (Section \ref{sec:ordered-partitioning-schemes}), which together form a value-ordered abstraction function.
    
        
            \paragraph{Heuristic-Based Abstractions.} \label{sec:value-based-abstraction-classes:HB}
            
                % \begin{quote}
                %     $\mu(n) = h(n)$
                % \end{quote}
                
                Using the motivation of abstracting nodes with similar subtree $Z(n)$ intuited from previous work and concepts of graph search, Heuristic-Based (HB) abstractions use $\mu(n) = h(n)$, where $h(n)$ is a heuristic estimate of $Z(n)$.  Unlike the use of partial or hashed contexts as was used by \citet{DBLP:conf/uai/BrokaDIK18}, heuristic estimates of $Z(n)$ can often provide \textit{quantitative} insight into potential similarities of $Z(n)$ values, and this is particularly true when using wMBE heuristics which provide bounds.
    
                % In conjunction with the node partitioning schemes that will be presented in Section \ref{sec:ordered-partitioning-schemes}, the presented HB abstraction functions aim to form abstractions such that nodes with similar $Z(n)$ are grouped together.
    
    
            \paragraph{HR-Based Abstractions.} \label{sec:value-based-abstraction-classes:HRB}
    
                % \begin{quote}
                %     $\mu(n) = h(n)  \! \cdot \!  r(n)$
                % \end{quote}

                Consider the following definition of "exact" abstraction functions:
                \begin{definition}[Exact Abstraction Function]
                     An abstraction function $a(\cdot)$ is exact for an Abstraction Sampling algorithm, AS, if use of $a(\cdot)$ with AS always leads to AS estimates having zero variance and $\hat{Z} = Z$ for every AS probe.
                \end{definition}

                Recall that $h(n)$ is a heuristic estimate of \hyperref[sec:partition-function-of-a-node]{$Z(n)$} and $r(n)$ is an estimate of $n$'s \hyperref[sec:partition-function-of-a-node]{ancestor branching mass $R(n)$}. With AOAS we can say:
                \begin{theorem}[AOAS Exact Abstractions from $h(n)r(n)$ vs. $Z(n)R(n)$ Proportionality] \label{thm:aoas-proportionality-exact-proposal}
                      If an abstraction function $a(\cdot)$ forms abstract states $\bs{A_{i}}$ such that $\forall n \in \bs{A_{i}}, \frac{h(n)r(n)}{Z(n)R(n)} = \; \propto_{i}$ for some $\propto_{i} \in \!\!  \mathbb{R}_{>0}$ when $Z(n)R(n) \in \!\!  \mathbb{R}_{>0}$, or $h(n)r(n) = 0$ otherwise, then it is an exact abstraction function for AOAS. (Proof in Supplemental Materials).
                \end{theorem}
    
                We see that similarity of $\frac{h(n)r(n)}{Z(n)R(n)}$ among nodes in the same abstract state can lead to reduction in variance.  Although this ratio is infeasible to compute, HR-Based (HRB) abstractions use $\mu(n) = h(n)r(n)$ as a surrogate for similarity of this ratio and group nodes accordingly.
    
    
            \paragraph{Q-Based Abstractions.} \label{sec:value-based-abstraction-classes:QB}
    
                % \begin{quote}
                %     $\mu(n) = w(n) \! \cdot \! g(n) \! \cdot \! h(n) \! \cdot \! r(n)$
                % \end{quote}
    
                On the other hand, \citep{rizzo_2007} showed the potential of variance reduction when minimizing variance within strata when forming strata that had equal mass under the proposal distribution.  Thus, in Q-Based Abstractions we use $\mu(n) = q(n) \! = \! w(n)g(n)h(n)r(n) \propto p(n)$, where $p(n)$ is the proposal function.
    
                In addition to serving as an un-normalized proposal function, $q(n)$ also estimates $n$'s
                contribution to the overall $Z$. Therefore, $q(n)$ estimates the impact of $n$ (and all previously abstracted nodes that $n$ represents) on the overall $Z$.
                %which could be a valuable quantity to base our choice of nodes on as discussed in Section \ref{sec:paradigms:combined}.
     


        \subsection{Ordered Partitioning Schemes} \label{sec:ordered-partitioning-schemes}

            We now present seven distinct schemes of partitioning nodes into abstract states such that nodes are sorted according to a provided abstraction value function $\mu(\cdot)$. In addition to defining each scheme we also describe the motivation behind its creation and show the results on a running example we will use presented below.
    
            \paragraph{Running Example} \label{sec:ordered-partitioning-schemes:running-example}  
                As we motivate and describe the various partitioning schemes, we will also provide examples of the abstract states that would result from partitioning nodes with the following $\mu(n)$:
                \begin{align} \label{eq:running-partitioning-example}
                    % \set{
                        1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 10, 100
                    % }
                \end{align}
                into $nAbs=4$ abstract states using each partitioning scheme. 

            For brevity, we have omitted the algorithmic representation of each scheme, however, they are included in the Supplemental Materials for your reference.
                
            \paragraph{\NoCaseChange{simpleVB}.} \label{sec:ordered-partitioning-schemes:simpleVB}
    
                The simpleVB (simple value-based) scheme follows the motivation of grouping nodes of similar value in the same abstract state by a simple 2-step process: 1) first, nodes are ordered by $\mu(n)$ (low to high), and 2) the ordered nodes are partitioned into [approximately] equal cardinality abstract states.
    
                % \textit{Time Complexity:}
                %     Partitioning is achieved via one pass through $|\bs{n}|$ leading to $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{1.0, 1.1}, \smallset{1.2, 1.3}, \smallset{1.4, 1.5}, \smallset{10, 100}.
                Nodes are partitioned evenly, and through its simplicity this method aims to leverage speed allowing for abstractions to be formed quickly so that many samples can be drawn.
                %\footnotetext{\label{ftn:ordered-schemes-maintain-sort-order}Such that nodes maintain sort order $o$ across all abstract states.}
    
    
    
            \paragraph{\NoCaseChange{minVarVB.}} \label{sec:ordered-partitioning-schemes:minVarVB}
    
                The minVarVB scheme uses Ward's Minimum Variance Hierarchical Clustering, also known as Ward's Method \citep{ward1963}, to cluster nodes into $nAbs$ abstract states. The objective is to minimize total with-in variance of $\mu(\cdot)$ across all abstract states.  Ward's Method is an agglomerative hierarchical clustering algorithm that creates a dendrogram by iteratively merging clusters. Ward's Method can be combined with Lance-Williams linear distance updates \citep{LanceWillaims1967-distanceUpdates} to increase efficiency.
                % We include more details on Ward's Method and Lance-Williams linear distance updates in the Supplemental Materials.

                % \textit{Time Complexity:\footnote{\label{ftn:time-complexity-assumes-constant-time-v}Assuming $\mu(n)$ is $\mathcal{O}(1)$ in both time and space.}}
                %     The choice of clusters to merge generally leads to having a $\mathcal{O}(|\bs{n^{*}}|^{3})$ time complexity due to the need to compare pair-wise distances between all clusters at each iteration.  However, in the case where nodes are distributed linearly in one dimension, only neighboring distances need to be considered at each iteration and can be made efficient by use of a priority queue, however since the Lance-Williams distance updates themselves take linear time, once per iteration, the reduced time complexity is still $\mathcal{O}(|\bs{n}|^{2})$.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     The space complexity is implementation dependent, with most time-efficient variants making use of a distance matrix leading to $\mathcal{O}(|\bs{n}|^{2})$ space complexity.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{1.0, 1.1, 1.2}, \smallset{1.3, 1.4, 1.5}, \smallset{10}, \smallset{100}.
                In contrast to simpleVB, minVarVB places considerable resources into computing abstractions, potentially leading to fewer samples, but provably forms abstractions that minimize the total with-in variance of $\mu(n)$ among the abstract states.
    
    
    
            \paragraph{\NoCaseChange{equalDistVB}.} \label{sec:ordered-partitioning-schemes:equalDistVB}
    
                equalDistVB is inspired by the goal of minVarVB and the simplicity of simpleVB.  The scheme works by greedily adding nodes in value order (low to high) into abstract state $\bs{A_{i}}$ until $\sum_{j=1}^{i} \sum_{n \in \bs{A_{j}}} \mu(n) \geq \frac{i \cdot \sum_{n' \in \bs{n}} \mu(n)}{nAbs}$,
                namely until the total sum of node values from $\bs{A_{1}},...,\bs{A_{i}}$ reaches or exceeds the $\frac{i}{nAbs}$ quantile.
                
                When paired with the QB valueabstraction class (see Section \ref{sec:value-based-abstraction-classes:QB}), the equalDistVB schemes also attempts to partition notes into abstract states of equal mass under the proposal.  This in corresponds to the condition in \citet{rizzo_2007}'sfor pProposition \ref{prop:rizzo-variance-reduction} for stratified importance sampling variance reduction.
    
                % \textit{Time Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n}|$ leading to $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 10, 100}, \smallset{}, \smallset{}, \smallset{}.
                Although, this method hopes to find a balance between intuitions previously explored while maintain, and without compromising speed and efficiency of abstract state generation, from the running example we can see how this method can yield potentially undesirable partitioningundesirable results in the presence of certain distributions of node values.  In this example, the first quantile is only reached after all the nodes have been added to the first abstract state, leaving no nodes remaining to be partitioned into the subsequent abstract states. 
    
    
    
            \paragraph{\NoCaseChange{equalDistVB2}.} \label{sec:ordered-partitioning-schemes:equalDistVB2}

                By simply reversing the sort order, equalDistVB2 is able to use the same partitioning strategy as equalDistVB meanwhile mitigate some of the overfilling of abstract states as seen in the example shown above for equalDistVB.
                
                % \textit{Time Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n}|$ leading to $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{100}, \smallset{}, \smallset{}, \smallset{10, 1.5, 1.4, 1.3, 1.2, 1.1, 1.0}.
                Nevertheless, weWe see that equalDistVB2 can still be subject to over packing of abstract states.  Next we present two more equalDistVvB variants that continue to mitigate this artifact.
    
    
    
            \paragraph{\NoCaseChange{equalDistVB3}.} \label{sec:ordered-partitioning-schemes:equalDistVB3}
    
                % \begin{quote}
                %     $o = \tn{high to low}$\\
                %     $\Part{equalDistVB3}$ (Algorithm \ref{alg:psi-equalDistVB3})
                % \end{quote}
    
        %         \begin{algorithm}[t!]
        %             \vspace{-6pt}\caption{$\Part{equalDistVB3}$}
        %             \label{alg:psi-equalDistVB3}
        %             \begin{footnotesize}
        %                 \SetInd{0.25em}{0.55em}
        %                 \DontPrintSemicolon 
        %             \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(\cdot)$}
        %             \Output{
    				% With 
    				% %
    				% $Z(\bs{A_{1,...,i}}) = (\sum_{j=1}^{i} \sum_{n' \in \bs{A_{j}}} Z(n')$,
    				% %
    				% $n_{\bs{A_{i}}}^{\tn{last}}$ be the last node in $\bs{A_{i}}$, 
    				% %
    				% and 
    				% %
    				% $P_{i} = \frac{ i \cdot \sum_{n \in \bs{n^{*}}}\mu(n)}{nAbs}$,
    				% %
        %             $\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that for $i=1,...,nAbs$ in order,
        %             $(\; Z(\bs{A_{1,...,i}}) \geq P_{i} \;)$
        %             $\land$ \\ $(\; (\, \bs{|A_{i}}|=1 \,) \lor (\, Z(\bs{A_{1,...,i}}) - Z(n_{\bs{A_{i}}}^{\tn{last}}) < P_{i} \,) \;)$ }
                    
        %             \Begin{
        %                 $j \leftarrow 1$\\
        %                 \ForEach{$i \leftarrow 1,...,nAbs$}{
        %                     $\bs{A_{i}} = \set{n^{*}_{{j}}}$\\
        %                     $j \leftarrow j+1$\\
        %                     \While{$Z(\bs{A_{1,...,i}}) < P_{i}$}{
        %                         $\bs{A_{i}} \leftarrow A_{i} \cup \set{n^{*}_{{j}}}$\\
        %                         $j \leftarrow j+1$
        %                     }
        %                 }
        %                 $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
        %                 \Return $\bs{A}$       
        %             }
        %             \end{footnotesize}
        %         \end{algorithm}
    
                In order to lessen over packing and ensure abtract states are not left empty, equalDistVB3 modifies equalDistVB2 so that, after processing of each abstract state, the next state is forced an addition of at least a single node by default, and then proceeds as the previous equalDistVB's does.
                
                % \textit{Time Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $P_{i}$ at each iteration can also be done in constant time. Partitioning is achieved via one pass through $|\bs{n}|$ leading to $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{100}, \smallset{10}, \smallset{1.5}, \smallset{1.4, 1.3, 1.2, 1.1, 1.0}.
                Still highly efficient, equalDistVB3 manages to ensure that the provided $nAbs$ granularity is honored, allowing users better control of the search vs. sampling interpolation possible with Abstraction Sampling.
    
    
    
            \paragraph{\NoCaseChange{equalDistVB4}.} \label{sec:ordered-partitioning-schemes:equalDistVB4}
    
        %         \begin{quote}
        %             $o = \tn{high to low}$\\
        %             $\Part{equalDistVB4}$ (Algorithm \ref{alg:psi-equalDistVB4})
        %         \end{quote}
    
        %         \begin{algorithm}[t!]
        %             \vspace{-6pt}\caption{$\Part{equalDistVB4}$}
        %             \label{alg:psi-equalDistVB4}
        %             \begin{footnotesize}
        %                 \SetInd{0.25em}{0.55em}
        %                 \DontPrintSemicolon 
        %             \Input{A set of ordered nodes $\bs{n^{*}}$ to be partitioned into $nAbs$ abstract states; a value function $\mu(\cdot)$}
        %             \Output{
    				% With 
    				% %
    				% $Z(\bs{A_{1,...,i}}) = (\sum_{j=1}^{i} \sum_{n' \in \bs{A_{j}}} Z(n')$,
    				% %
    				% $n_{\bs{A_{i}}}^{\tn{last}}$ be the last node in $\bs{A_{i}}$, 
    				% %
    				% and 
    				% %
    				% $L_{i} = \frac{Z(\bs{n^{*}})-Z(\bs{A_{1,...,i-1}})}{nAbs-i+1}$,
    				% %
        %             $\bs{n^{*}}$ partitioned into abstract states\super{\ref{ftn:ordered-schemes-maintain-sort-order}} $\bs{A} = \setst{\bs{A_{i}}}{i \in \set{1,...,nAbs}}$ such that for $i=1,...,nAbs$ in order,
        %             $(\; Z(\bs{A_{i}}) \geq L_{i} \;)$
        %             $\land$ \\ $(\; (\, \bs{|A_{i}}|=1 \,) \lor (\, Z(\bs{A_{i}}) - Z(n_{\bs{A_{i}}}^{\tn{last}}) < L_{i} \,) \;)$ }
                    
        %             \Begin{
        %                 $j \leftarrow 1$\\
        %                 \ForEach{$i \leftarrow 1,...,nAbs$}{
        %                     $\bs{A_{i}} = \set{}$\\
        %                     \While{$Z(\bs{A_{i}}) < L_{i}$}{
        %                         $\bs{A_{i}} \leftarrow A_{i} \cup \set{n^{*}_{{j}}}$\\
        %                         $j \leftarrow j+1$
        %                     }
        %                 }
        %                 $\bs{A} = \cup_{i = 1}^{nAbs} \bs{A_{i}}$\\
        %                 \Return $\bs{A}$       
        %             }
        %             \end{footnotesize}
        %         \end{algorithm}
    
                The final varaint of the equalDist schemes, equalDistVB4 attempts to perform a more even partitioning than the previous variants by recomputing quantiles. Each time the algorithm progesses to processing a new abstract state, remaining nodes and abstract states are used to compute new quantiles which are then used to guide filling of the current abstract state.
                
                % \textit{Time Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     $Z(A_{1...i})$ can be updated progressively in constant time, and thus computation of $L_{i}$ at each iteration can also be done in constant time.  Partitioning is achieved via one pass through $|\bs{n}|$ leading to $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    \smallset{100}, \smallset{10}, \smallset{1.5, 1.4, 1.3}, \smallset{1.2, 1.1, 1.0}.
                Still highly efficient, equalDistVB4 manages to spread nodes with smaller values across abstract states more evenly than the previous versions.
    
    
            \paragraph{\NoCaseChange{randVB}.} \label{sec:ordered-partitioning-schemes:randVB}
    
                It can be beneficial to rely on randomness to ensure a diverse sampling of abstractions.  randVB does this by sampling $nAbs\!-\!1$ partition points uniformly at random and without replacement from between the sorted nodes $\bs{n^{*}}$, and then partitions the nodes accordingly. As a result, abstract states are formed such that nodes are still grouped according to $\mu(\cdot)$, but the size of those groups varies.
                
                % \textit{Time Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     $\mathcal{O}(|\bs{n}|)$ time complexity.
                % \textit{Space Complexity:\super{\ref{ftn:time-complexity-assumes-constant-time-v}}}
                %     No more than linear space is required.  $\mathcal{O}(|\bs{n}|)$.
                \textit{\hyperref[sec:ordered-partitioning-schemes:running-example]{Running Example}:}
                    ex1: \smallset{100, 10}, \smallset{1.5}, \smallset{1.4, 1.3, 1.2}, \smallset{1.1, 1.0};
                    ex2: \smallset{100}, \smallset{10, 1.5, 1.4, 1.3}, \smallset{1.2, 1.1}, \smallset{1.0};
                    ...etc.

            \paragraph{Complexity.} Assuming $\mu(\cdot)$ is $\mathcal{O}(1)$, all above-mentioned partitioning schemes have time complexity $\mathcal{O}(|\bs{n}| \, log |\bs{n}|)$ and space complexity of $\mathcal{O}(|\bs{n}|)$, with the exception of minVarVB, which requires $\mathcal{O}(|\bs{n}|^{2})$ for both.





    \section{Empirical Evaluation} \label{sec:empirical-evaluation}

        %%%%%%%%%%%%%%%%%%% AS Algorithms Tested

       
        \paragraph{Setup Overview.}
            All combinations of the three new Value-Based Abstraction Classes: Heuristic-Based \textbf{HB}, HR-Based \textbf{HRB}, and Q-Based \textbf{QB}; with each of the seven Ordered Partitioning Schemes: \textbf{simple}, \textbf{minVar}, \textbf{equalDist1-4}, and \textbf{rand}, were tested, resulting in a total of twenty-one abstraction functions.  For comparison, the formerly evaluated context-based abstraction functions of randCB and relCB were also used. (We abbreviate the context-based class of functions as \textbf{CTX}).  In addition, a pure randomized abstraction function, simpleRand, was also included. (We abbreviate the purely randomized class as \textbf{RAND}).  With the exception of RelCB, each abstraction function accepts a hyper parameter called $nAbs$ which bounds the number of abstract states at any level. RelCB instead uses an $nContext$ parameter that limits the number of context variables used in assigning abstract states.  To facilitate comparison, we report RelCB's $nContext$ parameter instead as an equivalent $nAbs$ parameter assuming a domain size of $2$.  (For example, if RelCB was run using $nContext = 6$, in results we report it as RelCB with $nAbs = 2^{6}$). All of the abstraction functions were tested using the AOAS algorithm \citep{kask20-scaling-up-as}.  All algorithms were implemented in C++. All experiments were run on a 2.66 GHz processor and allotted 8 GB of memory.
        
        
        
        %%%%%%%%%%%%%%%%%%% Heuristic Description
        \paragraph{Heuristics.}
            To inform the sampling proposal, Weighted Mini-Bucket Elimination (wMBE) \citep{DBLP:journals/jacm/DechterR03,DBLP:conf/icml/LiuI11} is used as a heuristic.  The i-bound (\textbf{iB}) parameter controls the strength of wMBE, where higher i-bounds generally lead to stronger heuristics and, thus, better proposals at the expense of higher computation and memory. We standardize our experiments by using the same i-bound when comparing across algorithms. 
        
        
        
        
        %%%%%%%%%%%%%%%%%%% Benchmark Description
        \paragraph{Benchmarks.}
            
            In line with previous work on Abstraction Sampling, we perform experiments on the same set of over 480 problems from five well known benchmarks: DBN, Grids, Linkage-Type4, Pedigree, and Promedas used by \citep{kask20-scaling-up-as}. 
            
            We refer to problem instances with known $Z$ values as "Exact".  Larger problems for which exact solutions are not known are called "LARGE".  For LARGE problems, estimates from 100hr of context-based Abstraction Sampling (obtained from \citet{kask20-scaling-up-as}) are used as the true $Z$.  When experimenting on Exact problems, algorithms use a small i-bound of 5 (thus weakening heuristic estimates) and were given a short time limit of 300sec in order to increase difficulty.  For LARGE problems, an i-bound of 10 and time limit of 1200 sec are used.

            For both brevity and preciseness, we focus on results from the Exact problem instances. 
            % thus here excluding the Linkage-Type4 benchmark whose problems do not have known solutions.  
            Results for LARGE problems can be found in the Supplemental Materials and their trends generally agree with those from the EXACT problems.

            
            
            \begin{centering}
            \begin{tabular}{lrrrrr}
              \toprule
              Benchmark &   N &   |\textbf{X}| &     k &          w* &        d \\ 
              \midrule
                    DBN &  66 &      67 &          2 &      29 &      30 \\ 
                  Grids &   8 &     250 &          2 &      22 &      49 \\ 
               Pedigree &  25 &     690 &          5 &      25 &      89 \\ 
               Promedas &  65 &     612 &          2 &      21 &      62 \\ 
              \bottomrule
            \end{tabular}
            \vspace{-6pt}\captionof{table}{
                \textbf{Exact Benchmark Statistics}. Average benchmark statistics for Exact problems (with known $Z$ values). \textbf{N}: number of instances, \textbf{\tabs{X}}: average number of variables per instance, \textbf{k}: average of problems' largest domain sizes, \textbf{w\super{*}}: average induced tree-width, \textbf{d}: average pseudo-tree depth. 
                \label{tbl:small-benchmark-statistics}
            }
            \end{centering}

           \begin{centering}
            \begin{tabular}{lrrrrr}
              \toprule
              Benchmark &   N &   |\textbf{X}| &        k &          w* &        d \\ 
              \midrule
                        DBN &   48 &     216 &        2 &     78 &    78\\
                      Grids &   19 &    3432 &        2 &    117 &   220\\
              Linkage-Type4 &   82 &    6550 &        5 &     45 &   761\\
                   Promedas &  173 &    1194 &        2 &     72 &   114\\
              \bottomrule
            \end{tabular}
            \vspace{-6pt}\captionof{table}{
                \textbf{LARGE Benchmark Statistics}. Average benchmark statistics for LARGE problems (with estimated $Z$ values). \textbf{N}: number of instances, \textbf{\tabs{X}}: average number of variables per instance, \textbf{k}: average of problems' largest domain sizes, \textbf{w\super{*}}: average induced tree-width, \textbf{d}: average pseudo-tree depth. 
                \label{tbl:small-benchmark-statistics}
            }
            \end{centering}
        
        
        %%%%%%%%%%%%%%%%%%% Performance Measure
        
        \paragraph{Performance Measure.}
            To evaluate the performance of the various algorithms, we define error as:    
            $Error = |log_{10} \hat{Z} - log_{10} Z^{*}|$,
            where $\hat{Z}$ is estimate obtained and $Z^{*}$ is the true $Z$ value.



        \subsection{Results} \label{sec:empirical-evaluation:results}
        
            % \subsubsection{Aggregated Results Tables}
            
                \paragraph{Summary Comparison.}

                    To examine the potential of the different methods, we tested each algorithm with a range of $nAbs \in \set{1, 4, 16, 64, 256, 512, 1024, 2048}$. For each $nAbs$ and each benchmark, we calculated the average error across problems of the benchmark and identified the $nAbs$ that resulted in the lowest average error. In Table \ref{tbl:small-aggregations} we focus only on Exact problems and show this lowest average error and the corresponding $nAbs$ for each algorithm and benchmark, highlighting schemes that performed well across all benchmarks.  Table \ref{tbl:large-qb-aggregations} shows the corresponding results for LARGE problems on the better performing QB abstraction class as well as the CTX and RAND classes for comparison.  If an algorithm was unable to produce a positive Monte Carlo $Z$ estimate for a problem (denoted as "Fail"), the wMBE heuristic bound was used as its $Z$ estimate and error computed accordingly.
    
                    % Tables \ref{tbl:DBN_aggregation}-\ref{tbl:Promedas_aggregation} show aggregated performance of the various Value-Based Abstraction Classes with the various Partitioning Schemes on problems of DBN, Grids, Linkage-Type4, and Promedas benchmarks.

                    \begin{tablefigure*}[!htb]
                        \centering     %%% not \center
                        \begin{subtablefigure}{0.99\linewidth}
                            \includegraphics[width=0.98\linewidth]{UAI-24/_attachments/Results/ALL-SMALL-aggregations-i-5-t-300.pdf}
                            \caption{}
                            \label{tbl:small-aggregations}
                        \end{subtablefigure}
                        \begin{subtablefigure}{0.99\linewidth}
                            \includegraphics[width=0.98\linewidth]{UAI-24/_attachments/Results/QB-CTX-RAND-LARGE-aggregations-i-10-t-1200.pdf}
                            \caption{}
                            \label{tbl:large-qb-aggregations}
                        \end{subtablefigure}
                        \vspace{-6pt}\caption{\textbf{Summary Comparison}. For each table, displayed are the Abstraction Class (\textit{Class}), Partitioning Scheme (\textit{Scheme}), bound on the number of abstract states per level (\textit{nAbs}), number of problems for which a positive solution could not be estimated (\textit{Fail}), and average $log_{10}Z$ error (\textit{Avg. Error}) across Exact problems of the given benchmark.  Color bars visualize the magnitude of the values. Overall best performing algorithms are highlighted.  (a) shows results on Exact problems. (b) Shows results on LARGE problems.}
                        \label{tbl:summary-aggregations}
                    \end{tablefigure*}
    

                \paragraph{Comparison using 100 Samples.} \label{sec:empirical-evaluation:results:aggregation-tables:set-number-of-samples}

                    \begin{tablefigure}[!htb]
                        \centering
                        \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/ALL-SMALL-iB-5-nAbs-256-nR-100-QB-CB-RAND.pdf}
                        \vspace{-6pt}\caption{\textbf{100-Sample Comparison}. For abstraction granularity of $nAbs=256$, aggregated statistics (as described in Table \ref{tbl:summary-aggregations}) for Exact problems of each benchmark with each algorithm allotted 100 samples.}
                        \label{tbl:results:ALL-SMALL-iB-5-nAbs-256-nR-100-QB-CB-RAND}
                    \end{tablefigure}
        
                    To assess the quality of abstraction functions in an implementation-agnostic manner and irrespective of resulting probe-sizes or speed of processing abstractions,
                    %However, as detailed in Section \ref{sec:ordered-partitioning-schemes}, some schemes may exhibit variations in execution time, and implementation differences can contribute to this variability. 
                    % And as discussed in Section \ref{sec:empirical-evaluation:results:abstraction-speed-plot}, probe sizes can also vary. 
                    %Probe sizes can also vary between use of different abstraction functions.
                    %To circumvent these artifacts, 
                    we also conducted experiments using a one-hundred sample termination condition (denoted \textbf{m-100}) rather than a time constraint. Table \ref{tbl:results:ALL-SMALL-iB-5-nAbs-256-nR-100-QB-CB-RAND} shows these results on Exact problems of each benchmark for the better performing QB algorithms using an abstraction granularity of $nAbs=256$.  We use $nAbs=256$ as it is (1) an intermediate granularity and (2) all schemes were able to produce 100 samples in a reasonable amount of time.  We again highlight the overall best performing schemes.


            \paragraph{Varying \NoCaseChange{nAbs}.}

                \begin{tablefigure}[!htb]
                    \centering
                    \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/varying-nAbs-SMALL-i-5-t-300-best-QB.pdf}
                    \vspace{-6pt}\caption{\textbf{Varying nAbs}. Average error when using $nAbs \in \set{4, 64, 1024}$ for minVarQB, equalDistQB3, equalDistQB4, the CTX based algorithms, and RAND, each with iB-5 and time limit of 300 sec.}
                    \label{tbl:varying-nAbs-SMALL-i-5-t-300-best-QB}
                \end{tablefigure}

                \begin{plotfigure}[!htb]
                    \centering
                    \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/error-vs-nAbs-plot-equalDistQB4-iB-5}
                    \vspace{-6pt}\caption{Varying $nAbs$ for equalDistQB4. Plotted is the average error on Exact problems using iB-5 and time limit of 300 sec for each benchmark for various abstraction granularities (in log2).}
                    \label{plt:results:error-vs-nAbs-plot-equalDistQB4-iB-5}
                \end{plotfigure}

                \begin{plotfigure}[!htb]
                    \centering
                    \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/error-vs-nAbs-plot-minVarQB-iB-5}
                    \vspace{-6pt}\caption{Varying $nAbs$ for minVarQB. Plotted is the average error on Exact problems using iB-5 and time limit of 300 sec for each benchmark and for various abstraction granularities (in log2).}
                    \label{plt:results:error-vs-nAbs-plot-minVarQB-iB-5}
                \end{plotfigure}
                
                In order to observe the effect of changing $nAbs$, Table \ref{tbl:varying-nAbs-SMALL-i-5-t-300-best-QB} shows average error for different $nAbs \in \set{4, 64, 1024}$ for Exact problems of each benchmark.  Here we focus on only the better performing abstraction functions of QB: minVarQB, equalDistQB3, equalDistQB4; the well performing purely randomized scheme: RAND; and also show the context-based schemes (CTX) for comparison. In Plots \ref{plt:results:error-vs-nAbs-plot-minVarQB-iB-5} and \ref{plt:results:error-vs-nAbs-plot-equalDistQB4-iB-5}, we also show average error across a wider array of $nAbs$ for minVarQB and equalDistQB4, respectively, the latter also acting as a representative for the profile of the plots of equalDistQB3 and RAND.



            \paragraph{Time Series Plot.}

                Plots \ref{plt:results:grid20x20.f15-time-series}-\ref{plt:results:or_chain_209.fg-time-series} show time-series $Z$ estimates for the better performing QB algorithms, the purely randomized scheme, and context-based schemes (CTX) on a representative Grids and representative Promedas problem.  For each algorithm was plotted with the $nAbs$ that resulted in the lowest average error for the problem's respective benchmark.  Each plot line is labeled with the scheme, $nAbs$ used, and the final $Error$ of its estimate.

                \begin{plotfigure}[!htb]
                    \centering
                    \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/grid20x20.f15-time-series.png}
                    \vspace{-16pt}\caption{Z estimates from various algorithms versus time on Grids problem grid20x20.f15  using $iB=5$. The dashed black line shows the true Z value.}
                    \label{plt:results:grid20x20.f15-time-series}
                \end{plotfigure}

                \begin{plotfigure}[!htb]
                    \centering
                    \includegraphics[width=0.99\linewidth]{UAI-24/_attachments/Results/or_chain_209.fg-time-series.png}
                    \vspace{-16pt}\caption{Z estimates from various algorithms versus time on Promedas problem or\us chain\us 209.fg  using $iB=5$. The dashed black line shows the true Z value.}
                    \label{plt:results:or_chain_209.fg-time-series}
                \end{plotfigure}

                 



        \subsection{Analysis} \label{sec:empirical-evaluation:analysis}

            \paragraph{Performance Comparison with Context-Based Schemes.}

                Comparing errors of the the HB and HRB classes to the CTX class for Exact problems in Table \ref{tbl:small-aggregations}, we see that there always exist a partitioning scheme that can outperform the best context based scheme.  For the HB class, the \textit{simple} and \textit{rand} partitioning scheme tends to perform best, whereas for the HRB class it seems to be more benchmark dependent.  The QB scheme with \textit{minVar}, \textit{equalDist3}, and \textit{equalDist4} outperform the context-based schemes across all benchmark averages.  The purely randomized scheme (RAND) also consistently outperforms the context-based schemes.  Results from Table \ref{tbl:large-qb-aggregations} on LARGE problems agree with the exception of the \textit{minVar} QB and RAND schemes which falls slightly shy of randCB (CTX \textit{rand}) average performance on Promedas.


            \paragraph{Performance Comparison with Purely Randomized Abstractions.}
                Table \ref{tbl:summary-aggregations} show RAND as a particularly well performing scheme across all benchmarks.  However, the QB class using \textit{equalDist3} and \textit{equalDist4} partitioning strategies consistently did as well or better than the purely randomized scheme, with no other scheme doing as well or better than these schemes consistently.


            \paragraph{Performance Comparison with Non Abstraction Sampling Schemes.}
                In prior work by \citet{DBLP:conf/uai/BrokaDIK18} and \citet{kask20-scaling-up-as}, Abstraction Sampling using CTX based abstraction functions was shown to be competitive against several commonly used or state-of-the-art schemes such as traditional Importance Sampling (IS), Weighted Mini-Bucket Importance Sampling (wMBIS) \citep{liu2015probabilistic}, IJGP-SampleSearch (IJGP-ss) \citep{DBLP:journals/ai/GogateD11}, and Dynamic Importance Sampling \citep{lou2019interleave}.  Thus, by showing superior performance relative to the CTX based schemes implicitly indicates competitive performance to the above-mentioned non Abstraction Sampling schemes as well.


            \paragraph{Abstraction Quality of the QB Schemes.}
                When drawing an equal number of samples with the same abstraction granularity of $nAbs=256$, QB with \textit{equalDist3} and \textit{equalDist4} and RAND are well performing across all benchmarks as seen when using a time limit.  However, a key difference we see is that QB with minVar, which had showed only slightly worse performance using a time limit, is now best performing when normalizing the number of samples drawn.  This can, in part, explain the success of the QB \textit{equalDist3} and \textit{equalDist4} schemes, which attempt to emulate the QB \textit{minVar} scheme while using faster greedy strategies.


            \paragraph{Anytime Behavior.}
                From Plots \ref{plt:results:grid20x20.f15-time-series} and \ref{plt:results:or_chain_209.fg-time-series} we see that the Abstraction Sampling algorithms continue to improve their estimates as time progresses.  We also notice that estimates are often underestimates that increase over time.
                


            % \subsubsection{The Effect of \NoCaseChange{iB}


            \paragraph{Choice of Abstraction Granularity.}
                From Table \ref{tbl:varying-nAbs-SMALL-i-5-t-300-best-QB} that, for the well performing QB \textit{equalDist3} and \textit{equalDist4} schemes and for the RAND scheme, there is a trend that greater $nAbs$ (corresponding to a greater allotment of abstract states) improves performance to a point and then has little effect.  Plot \ref{plt:results:error-vs-nAbs-plot-equalDistQB4-iB-5} further supports this for QB with \textit{equalDist4}, for which plots of QB \textit{equalDist3} and RAND have similar profiles (omitted for brevity).  However in Plot \ref{plt:results:error-vs-nAbs-plot-minVarQB-iB-5} and Table \ref{tbl:varying-nAbs-SMALL-i-5-t-300-best-QB} we see that for \textit{minVar} error begins to increase when $nAbs$ becomes too high.  This makes sense due to the the higher computational cost of forming \textit{minVar} abstractions (which uses Ward's minimum variance hierarchical clustering), leaving little time for drawing samples.


            \subsubsection{Summary of Results.}
                In summary, we observed that the QB scheme with \textit{equalDist3} or \textit{equalDistQB4} and the RAND scheme were best performing all around, and showed significantly improved performance with respect the previous CTX based abstractions.  We also note that these best performing schemes all tend to improve performance as $nAbs$  increases to a point, past which we see little difference in performance.  Thus, given an i-bound, we suggest use of one of these three algorithms with as high of $nAbs$ as possible that still allows time for drawing of samples.




    \section{Conclusion} \label{sec:conlcusion}

        \paragraph{Summary.}
            Through this work, we advance Abstraction Sampling by presenting a new abstraction framework, Value-Based Abstractions, which abstracts nodes based on values along a positive scale.  We introduce three Value-Based Abstraction Classes - HB, QB, and HRB - each defined by a unique value function motivated by paradigms in search and sampling. We also introduce seven Ordered Partitioning Schemes - \textit{simple}, \textit{equalDist}, \textit{equalDist2}, \textit{equalDist3}, \textit{equalDist4}, and \textit{rand} - that partition nodes into abstract states by their values maintaining value order across abstract states.  We also present a purely randomized abstraction function - RAND - that places nodes into abstract states randomly.  We evaluated the schemes with the AOAS Abstraction Sampling algorithm on over 400 problems part of standard benchmarks characterizing their performance.  Three schemes – QB with \textit{equalDist3}, QB with \textit{equalDist4}, and RAND – were identified as exceptionally strong performers, demonstrating significantly superior performance compared to any abstraction function previously introduced.  Our evaluation also indicates that allotting these abstraction functions a high number of abstract states tends to help them perform best.

        \paragraph{Suggested Extensions.}
            A potentially extensive, but fruitful, followup to this work would be to identify properties of problem instances that play key roles in the quality of Abstraction Sampling estimates, and how identification of these properties can be used to inform choice of abstraction function and hyper-parameters.  Additionally, a deep dive into how different abstraction functions affect probe construction, and how probe structure and characteristics may influence or inform the quality of estimates, would also be a valuable future addition.

        
\clearpage
    % \bibliographystyle{named}
    \bibliography{ref}




\end{document}