\documentclass[accepted]{uai2024} % for initial submission
%\documentclass[accepted]{uai2024} % after acceptance, for a revised version; 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{mathtools}
\usepackage{amsfonts}
\usepackage{amsthm}
\usepackage{amsmath, amssymb}
\usepackage{times}  % DO NOT CHANGE THIS
\usepackage{helvet}  % DO NOT CHANGE THIS
\usepackage{courier}  % DO NOT CHANGE THIS
\usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT
\usepackage{xcolor}
\usepackage{marvosym}
\usepackage{adjustbox}
\usepackage{subcaption}

% Use the postscript times font!
\usepackage{times}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{amsfonts}
\usepackage{amsthm}
\usepackage{booktabs}
\usepackage{tikz}
\usetikzlibrary{automata, positioning}
\usepackage{multirow}
\usepackage{enumitem}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{tikz}
\usetikzlibrary{automata, positioning, fit}

% the following package is optional:
%\usepackage{latexsym}

% See https://www.overleaf.com/learn/latex/theorems_and_proofs
% for a nice explanation of how to define new theorems, but keep
% in mind that the amsthm package is already included in this
% template and that you must *not* alter the styling.
\newtheorem{example}{Example}
\newtheorem{remark}{Remark}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{definition}{Definition}
\newtheorem{proposition}{Proposition}
\newtheorem{assumption}{Assumption}
\newtheorem{problem}{Problem}
\newtheorem{claim}{Claim}
\newtheorem{corollary}{Corollary}

\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator*{\argsup}{argsup}

\newcommand{\qh}[1]{\textcolor{purple}{[QH: #1]}}
\newcommand{\ml}[1]{\textcolor{blue}{[ML: #1]}}
\newcommand{\zs}[1]{\textcolor{teal}{[ZS: #1]}}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
% \newcommand{\swap}[3][-]{#3#1#2} % just an example

% \title{Sound and Efficient Algorithms for POMDPs with Reachability Objectives via Trial-Based Heuristic Search \qh{any ideas for title?}}

\title{Sound Heuristic Search Value Iteration for Undiscounted POMDPs with Reachability Objectives}

\author[1]{\href{mailto:<qi.ho@colorado.edu>?Subject=Your UAI 2024 paper}{Qi Heng Ho}{}}
\author[2]{Martin S. Feather}
\author[2]{Federico Rossi}
\author[1]{Zachary Sunberg}
\author[1]{Morteza Lahijanian}
\affil[1]{%
    Department of Aerospace Engineering Sciences\\
    University of Colorado Boulder\\
    Boulder, Colorado, USA
}
\affil[2]{%
    Jet Propulsion Laboratory\\
    California Institute of Technology\\
    Pasadena, California, USA
}

\begin{document}
\maketitle

\begin{abstract}
  Partially Observable Markov Decision Processes (POMDPs) are powerful models for sequential decision making under transition and observation uncertainties. This paper studies the challenging yet important problem in POMDPs known as the (indefinite-horizon) Maximal Reachability Probability Problem (MRPP), where the goal is to maximize the probability of reaching some target states.  This is also a core problem in model checking with logical specifications and is naturally undiscounted (discount factor is one). Inspired by the success of point-based methods developed for discounted problems, we study their extensions to MRPP. Specifically, we focus on trial-based heuristic search value iteration techniques and present a novel algorithm that leverages the strengths of these techniques for efficient exploration of the belief space (informed search via value bounds)
  while addressing their drawbacks in handling loops for indefinite-horizon problems. The algorithm produces policies with two-sided bounds on optimal reachability probabilities. We prove convergence to an optimal policy from below under certain conditions. Experimental evaluations on a suite of benchmarks show that our algorithm outperforms existing methods in almost all cases in both probability guarantees and computation time.
\end{abstract}

\input{sections/Introduction}
\input{sections/Related}
\input{sections/Preliminaries}
\input{sections/Problem}
\input{sections/Difficulty}
\input{sections/Methodology}
\input{sections/Analysis}
\input{sections/Evaluations}
\input{sections/Conclusion}

% \begin{contributions} % will be removed in pdf for initial submission 
% 					  % (without ‘accepted’ option in \documentclass)
%                       % so you can already fill it to test with the
%                       % ‘accepted’ class option
%     Briefly list author contributions. 
%     This is a nice way of making clear who did what and to give proper credit.
%     This section is optional.

%     H.~Q.~Bovik conceived the idea and wrote the paper.
%     Coauthor One created the code.
%     Coauthor Two created the figures.
% \end{contributions}

\begin{acknowledgements}
    This work was supported by Strategic University Research Partnership (SURP) grants from the NASA Jet Propulsion Laboratory (JPL) (RSA 1688009 and 1704147).
    Part of this research was carried out at JPL, California Institute of Technology, under a contract with the National Aeronautics and Space Administration (80NM0018D0004).
\end{acknowledgements}

% References
\bibliography{bib}

\newpage

\onecolumn

\title{Sound Heuristic Search Value Iteration for Undiscounted POMDPs with Reachability Objectives \\(Supplementary Material)}
\maketitle

\appendix
\input{sections/Appendix}

\end{document}
