%\documentclass{uai2025}% for initial submission
\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
%\usepackage[american]{babel}
\usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{amsmath}
\usepackage{colonequals}
\usepackage{amssymb}
\usepackage{xcolor}
\usepackage{amsthm}
\usepackage{etoolbox}
\usetikzlibrary{arrows,shapes,calc,automata,positioning}
\usepackage{pgfplots}
\pgfplotsset{compat=newest}
\input{plots.tex}

\usepackage{nicefrac}
\usepackage{multirow}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

% MACROS
\newcommand{\tool}[1]{\textsc{#1}\xspace}
\newcommand{\dist}{\ensuremath{\mu}}
\newcommand{\dists}[1]{\ensuremath{\mathit{Dist(#1)}}}
\newcommand{\supp}[1]{\ensuremath{\mathit{supp}(#1)}}
\newcommand{\set}[1]{\ensuremath\{#1\}}

\newcommand{\tuple}[1]{\ensuremath{\langle #1 \rangle}}
\newcommand{\bigtuple}[1]{\big\langle#1\big\rangle}
\newcommand{\Bigtuple}[1]{\Big\langle#1\Big\rangle}
\newcommand{\tupleaccess}[2]{{#1}[#2]}
\newcommand{\myfrac}[2]{#1/#2}
\newcommand{\divides}{\mathbin{\big|}}
\newcommand{\positiveNats}{\ensuremath{(\mathbb{N}\setminus\{0\})}}

% MDP
\newcommand{\mdp}{\ensuremath{M}}
\newcommand{\examplemdp}{\ensuremath{M^{ex}}}
\newcommand{\exampletransitions}{\ensuremath{\mathbf{P}^{ex}}}
\newcommand{\states}{\ensuremath{S}}
\newcommand{\numstates}{n}
\newcommand{\numbeliefswithobs}[1]{n_{#1}}
\newcommand{\actions}{\ensuremath{\mathit{Act}}}
\newcommand{\transitions}{\ensuremath{\mathbf{P}}}
\newcommand{\sinit}[1][]{\ensuremath{{s#1_{\mathit{init}}}}}
\newcommand{\mdptuple}{\ensuremath{ \left\langle\states, \actions, \transitions, \sinit\right\rangle }}
\newcommand{\state}{\ensuremath{s}}
\newcommand{\action}{\ensuremath{a}}
\newcommand{\act}[1]{\ensuremath{\mathit{Act}\ifthenelse{\equal{#1}{}}{}{(#1)}}}
\newcommand{\post}[3][\mdp]{\ensuremath{\mathit{post}^{#1}(#2,#3)}}
\newcommand{\predecessors}[1]{\ensuremath{\mathit{pre}(#1)}}
\newcommand{\mc}{\ensuremath{MC}}
\newcommand{\mctuple}{\ensuremath{ \tuple{\states, \transitions, \sinit} }}

% POMDP
\newcommand{\pomdp}{\ensuremath{\mathcal{M}}}
\newcommand{\examplepomdp}{\ensuremath{\mathcal{M}^{ex}}}
\newcommand{\observations}{\ensuremath{Z}}
\newcommand{\observation}{\ensuremath{z}}
\newcommand{\obsfunction}{\ensuremath{\mathbf{O}}}
\newcommand{\obsof}[1]{\obsfunction(#1)}
\newcommand{\pomdptuple}{\ensuremath{\left\langle\mdp,\observations,\obsfunction\right\rangle}}

\newcommand{\transition}[3]{\ensuremath{{{#1}\xrightarrow{#2}{#3}}}}

% Belief MDP
\newcommand{\beliefs}{\ensuremath{\mathcal{B}}}
\newcommand{\beliefswithobs}[1]{\ensuremath{\mathcal{B}_{#1}}}
\newcommand{\beliefstates}{\ensuremath{B}}
\newcommand{\belieftransitions}{\ensuremath{\transitions^\beliefstates}}
\newcommand{\belief}{\ensuremath{b}}
\newcommand{\beliefstate}{\belief}
\newcommand{\binit}{\ensuremath{\beliefstate_{\mathit{init}}}}
\newcommand{\beliefmdp}[1]{\ensuremath{\mathit{bel}(#1)}}
\newcommand{\beliefrewards}{\ensuremath{\rewards^\beliefstates}}

\newcommand{\successorbelief}[3]{\ensuremath{\mathsf{succ}(#1,#2,#3)}}


% Paths
\newcommand{\genpath}{\ensuremath{\pi}}
\newcommand{\infpath}{\ensuremath{\tilde{\pi}}}
\newcommand{\finpath}{\ensuremath{\hat{\pi}}}
\newcommand{\inducedpath}[2]{\ensuremath{\pi_{#1}^{#2}}}
\newcommand{\first}[1]{\ensuremath{\mathit{first}(#1)}}
\newcommand{\last}[1]{\ensuremath{\mathit{last}(#1)}}
\newcommand{\infpaths}[1]{\ensuremath{\mathit{Paths}_\mathrm{inf}^{#1}}}
\newcommand{\finpaths}[1]{\ensuremath{\mathit{Paths}_\mathrm{fin}^{#1}}}
\newcommand{\paths}[1]{\ensuremath{\mathit{Paths}^{#1}}}
\newcommand{\stateofpath}[2]{\ensuremath{#1[#2]}}
\newcommand{\lengthofpath}[1]{\ensuremath{|#1|}}

\newcommand{\obstrace}{\ensuremath{\tau}}
\newcommand{\obstraces}[1]{\ensuremath{\mathit{ObsTraces}^{#1}}}

% Policies
\newcommand{\pol}{\ensuremath{\sigma}}
\newcommand{\pols}[1]{\ensuremath{\Sigma^{#1}}}
\newcommand{\obspols}[1]{\ensuremath{\Sigma^{#1}_\textnormal{obs}}}

% Rewards (New!)
\newcommand{\costdim}{k}
\newcommand{\costdom}{\mathbb{N}^\costdim}
\newcommand{\coststruct}{\ensuremath{\mathbf{C}}}
\newcommand{\costthresh}{\mathsf{t}}
\newcommand{\costval}{\mathsf{c}}
\newcommand{\costrel}{\bowtie}
\newcommand{\costtriplenobracket}{\coststruct\costrel\costthresh}
\newcommand{\costtriple}{(\costtriplenobracket)}
\newcommand{\cumurew}{\ensuremath{\mathsf{cost}}}
\newcommand{\costvectorset}{\ensuremath{\Gamma}}

% Levels
\newcommand{\leveldegree}{\mathsf{d}}
\newcommand{\levelfunc}{\mathit{lvl}}
\newcommand{\levelcosts}{\mathbf{L}}
\newcommand{\levelthresh}{\costthresh_{\leveldegree}}
\newcommand{\leveltriplenobracket}{\levelcosts \costrel \levelthresh}
\newcommand{\leveltriple}{(\leveltriplenobracket)}
\newcommand{\leveltarget}{{T_\leveldegree}}
\newcommand{\costEventually}[1]{\ensuremath{\lozenge_{#1}}}

\newcommand{\upperbound}{\ensuremath{V^U}}


\newcommand{\epochs}{\ensuremath{\mathsf{E}}}
\newcommand{\epoch}{\ensuremath{\mathsf{e}}}
\newcommand{\bottomepoch}{\ensuremath{\bot}}
\newcommand{\einit}{\costthresh}
\newcommand{\actBound}{\ensuremath{\mathsf{actv}}}


\newcommand{\unfoldingmdp}[2]{\ensuremath{\mathsf{un}_{#1}({#2)}}}
\newcommand{\costawareunfoldingmdp}[2]{\ensuremath{\mathsf{cau}_{#1}({#2)}}}
\newcommand{\costawarepol}{\ensuremath{\rho}}
\newcommand{\costawarepols}{\ensuremath{\Sigma_{\mathit{ca}}}}
\newcommand{\poltransform}{\ensuremath{\Theta}}
\newcommand{\costawaretransform}[2]{\ensuremath{\mathsf{ca}_{#1}\left({#2}\right)}}

\newcommand{\storm}{\textsc{Storm}}
\newcommand{\quest}[1]{(\textbf{Q}#1)}
\newcommand{\model}[1]{\textsf{#1}}

\newcommand{\config}[2][]{\ifthenelse{\equal{#1}{}}{\textsc{#2}}{\textsc{#2}$_{#1}$}}

\newtheorem{problem}{Problem}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{proposition}{Proposition}
\newtheorem*{remark}{Remark}


\newtheorem{definition}{Definition}

\title{Multi-Cost-Bounded Reachability Analysis of POMDPs}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<alexander.bork@cs.rwth-aachen.de>?Subject=Your UAI 2025 paper}{Alexander Bork}{}}
\author[1]{Joost-Pieter Katoen}
\author[1]{Tim Quatmann}
\author[1]{Svenja Stein}
% Add affiliations after the authors
\affil[1]{%
    RWTH Aachen University\\
    Aachen, Germany
}

\begin{document}
\maketitle
\begin{abstract}
We consider multi-dimensional cost-bounded reachability probability objectives for partially observable Markov decision processes (POMDPs).
The goal is to compute the maximal probability to reach a set of target states while simultaneously satisfying specified bounds on incurred costs.
Such objectives generalise well-studied POMDP objectives by allowing multiple upper and lower bounds on different cost or reward measures, e.g.~to naturally model scenarios where an agent acts under limited resources.
We present a reduction of the multi-cost-bounded problem to unbounded reachability probabilities on an unfolding of the original POMDP.
We employ a refined approach in case the agent is cost-aware---i.e.,~collected costs are fully observed---and also consider a setting where only partial information about the collected costs is known.
% 
Our approaches elegantly lift existing results from the fully observable MDP case to POMDPs.
An empirical evaluation shows the potential of analysing POMDPs under multi-cost-bounded reachability objectives in practical settings.
\end{abstract}

\section{Introduction}
\label{sec:intro}
\emph{Partially observable Markov decision processes (POMDPs)} are a powerful modelling formalism for sequential decision making in uncertain domains where non-determinism is present. They extend \emph{Markov decision processes (MDPs)} \citep{Put94} for agents that in addition to uncertain transitions also only have incomplete information about the state of the environment \citep{smallwood1973,russell2020}.
POMDPs have applications in a plethora of domains, including robotics \citep{spaan2004}, ecology \citep{chades2012}, and cyber security \citep{miehling2018}.

The classical planning problem in POMDPs is to compute a \emph{policy}---a plan for resolving non-determinism in the system---that optimises a given objective. 
This problem is notoriously difficult for many kinds of objectives. 
Various approaches consider \emph{finite horizons}, where the objective has to be satisfied in a finite amount of steps \citep{smallwood1973}, or \emph{discounting}, where events in later stages become less relevant \citep{smith2004, kurniawati2008, shani2013}.

In recent years, work focusing on objectives \emph{without discounting} over an \emph{infinite} time horizon has emerged \citep{norman2017,horak2018,bork2022,andriushchenko2022,andriushchenko2023,ho2024}.
One commonly considered objective is to compute the maximal probability to reach a set of target states.
In the field of probabilistic model checking, this \emph{maximal reachability probability} objective is the basis for the analysis of models with respect to more involved logical specifications such as linear time temporal logic \citep{baier2008}.

\paragraph{Expected Costs vs. Cost-Bounded Reachability}
In many practical scenarios, the objective to reach a target is subject to hard constraints on resources. For example, an autonomous vehicle navigating towards a goal position has to consider its fuel level and emission levels of pollutants.
A policy minimising the \emph{expected} fuel and emission costs 
does not take the resource limits into account which may lead to the vehicle running out of fuel with unnecessarily high probability.
In such scenarios, a policy that maximises the \emph{probability} of a successful run---where the vehicle reaches the goal without running out of fuel and within pollution limits---is preferable.
Thus, objectives that constrain the \emph{expected} costs fail to capture scenarios where satisfying hard constraints on actually incurred costs is key for a run to be successful.

\emph{(Multi-)cost-bounded reachability probability} objectives characterise such scenarios where a strict adherence to certain resource constraints is crucial.
Numerical costs (or rewards) are assigned to transitions in the POMDP.
The goal of the agent is to maximise its probability to reach the target states while satisfying all bounds on the \emph{actual} accumulated costs.
In the most general setting, both upper and lower bounds on multiple cost measures can be considered simultaneously.
Cost-bounded reachability objectives have mainly been studied for (fully observable) MDPs \citep{hahn2016, klein2018, hartmanns2020}.

\paragraph{Motivating Example}
\begin{figure}[t]
\centering\includegraphics{motivating_example_new.pdf}
\caption{Hallway Cleaning Task}
\label{fig:motivation}
\end{figure}
Consider the scenario depicted in Figure~\ref{fig:motivation}.
A robot is tasked with cleaning a hallway consisting of 6 tiles, all initially dirty.
The robot starts in the left-most tile with an energy level of $60$ units.
In every step, the robot can either attempt to clean the current tile or move to the next tile.
A cleaning attempt can fail with a probability of $0.2$, leaving the tile dirty.
The robot, however, is not able to observe if it has successfully cleaned a tile.
Moving to the next tile always consumes one unit of energy.
A cleaning attempt consumes either $2$ or $4$ units of energy, each with probability $0.5$.
The robot is successful if it cleans all $6$ tiles and reaches the target position by moving in the right-most tile of the hallway without running out of energy.

Our goal is to find a policy for the robot that schedules in each step the best action based on the available information in order to maximise the probability that the task succeeds. 
We can model the scenario as a POMDP with two cost measures $\coststruct_\mathrm{energy}$ and $\coststruct_\mathrm{clean}$ (the latter assigning a cost of $1$ when a tile is successfully cleaned and $0$ otherwise), and the cost-bounded reachability query ``\emph{maximise the probability to reach the target while accumulating at most $60$ cost units for $\coststruct_\mathrm{energy}$ and at least $6$ cost units for $\coststruct_\mathrm{clean}$}.''

Usual POMDP objectives fail to capture the objective we are interested in. 
In particular, minimising the expected energy use or maximising the expected number of cleaned tiles fails to consider the respective other constraint and does not accurately reflect the hard requirements on incurred costs.
A multi-cost-bounded objective with cost bounds on both measures, however, adequately reflects our objective.

\paragraph{Contributions}
We formalise the problem of multi-cost-bounded reachability probability objectives on POMDPs and observe undecidability of its decision variant.
We then consider three variations of this problem that all differ in the degree of observability of the accumulated cost so far. 
(As usual, we assume that the observations in the POMDP are visible to the decision-making agent.)
For the extreme case in which the accumulated costs are completely invisible, we provide a transformation to an equivalent unbounded reachability problem on an often larger POMDP. 
The key concept is to encode cost collection in the state space. The resulting \emph{cost-unfolding POMDP} can then be analysed using existing approximation methods for unbounded infinite-horizon reachability problems.

We then consider the other extreme case in which the accumulated cost is fully observable by the agent and can be used in its decision making. 
We show that the sequential approach of \citet{hartmanns2020} for multi-cost-bounded reachability in MDPs, i.e.,~fully observable POMDPs, can be readily lifted to this setting of \emph{cost-aware} POMDPs.
Furthermore, we consider the novel setting where the agent cannot observe the exact costs gathered so far, but only certain \emph{cost levels}
(e.g.~high, mid and low).
We show that this setting can often be reduced to the analysis of a cost-aware POMDP.

Our algorithmic solutions are designed such that they leverage strengths of existing and (time- and space-)efficient techniques, and are thus able to directly profit from future advancements in unbounded reachability in POMDPs and cost-bounded reachability in MDPs.

We provide detailed proofs for main theoretical results and additional technical information in the appendix.

\subsection{Related Work}
Multi-cost-bounded reachability objectives for (fully observable) MDPs are well-studied \citep{ohtsubo2004, baier2014, randour2017,hahn2016,klein2018,hartmanns2020}.
We focus on related research dedicated to partially observable models.

Most closely related to our paper is the work on \emph{risk-sensitive POMDPs} \citep{hou2016} where a special case of a cost-bounded reachability objective is considered. In particular, the authors present an unfolding of cost-bounds on the level of beliefs, similar to our unfolding on the level of the POMDP. They also consider the case of observable costs. 
However, our framework is more general by allowing for \emph{multiple} bounds over different cost measures as well as mixtures of upper and lower bounds. In addition, our unfolding allows the use of non-belief-based solution methods for the problem.

\citet{wu2019} consider an unfolding that encodes cost and step bounds in the belief space to solve a classification problem for \emph{hidden model MDPs}, which are a special class of POMDPs. 
In that setup, the goal of an agent is to find out in which specific instance of structurally similar MDP environments it is located while not exceeding certain costs.

Another related formalism are constrained POMDPs (CPOMDPs) \citep{isom2008,poupart2015,santana16}.
In a CPOMDP, the objective is to maximise the expected sum of rewards subject to a bound on the \emph{expected} cumulative costs.
While classically, the setting considers discounted values with upper bounds on expected costs, there are extensions to an undiscounted setting \citep{kalagarla2025}.
Furthermore, \cite{undurti2010} consider a setting where violation of bounds for expected and actual costs coincide.
In contrast, our work considers mixtures of different bound types on the actual incurred costs and does not make assumptions on the POMDP or the cost bounds.

\citet{chatterjee2016} consider the setting of minimising expected costs over all strategies in a POMDP that reach a target \emph{almost surely}, i.e.,~with probability $1$. This can be considered a related problem where a strict bound is placed on the reachability probability rather than the incurred costs.

\section{Preliminaries}
We briefly outline the theoretical background for POMDPs. Further details can be found in \citet{russell2020}. \citet[Chapter 10]{baier2008} gives an introduction to MDPs from a formal methods perspective.

Let $X \neq \emptyset$ be a countable set.
A \emph{(probability) distribution} over $X$ is a function $\dist: X \to [0,1]$ with $\sum_{x \in X} \dist(x) = 1 $.
$\dists{X}$ is the set of distributions over $X$.
We write $x \in \dist$ if $\dist(x) > 0$.
The \emph{support} of $\dist$ is $\supp{\dist} \colonequals \{x \mid x \in \dist \}$.
For $k \in \mathbb{N}$ and \emph{vector} $\mathsf{x} = \tuple {x_1, \dots, x_k} \in X^k$, we write $\tupleaccess{\mathsf{x}}{i} = x_i$ for the $i$-th element ($1 \le i \le k$).

\paragraph{MDP}
A \emph{Markov decision process (MDP)} is a tuple $\mdp = \mdptuple$ with a (finite or countably infinite) set $\states$ of states, a finite set $\actions$ of actions, a transition function $\transitions \colon \states \times \actions \to \dists{\states}$, and an initial state $\sinit \in \states$.
In every state $s$, an agent making decisions in the MDP chooses an action $\action \in \actions$ and the state is updated to state $s'$ with probability $\transitions(s,\action)(s')$. If $s' \in \transitions(s,\action)$, we call $(s,\action,s')$ a transition and write $\transition{s}{\action}{s'}$.

\paragraph{POMDP}
A \emph{partially observable MDP (POMDP)} is a tuple $\pomdp = \pomdptuple$, where $\mdp$ is the underlying MDP with $|\states| \in \mathbb{N}$, i.e.,~$\states$ is finite, $\observations$ is a finite set of observations, and $\obsfunction \colon \states \times \actions \times \states \to \dists{\observations}$ is an observation function.

In a POMDP, the agent does not have complete access to the current state of the system to base its decisions on.
Instead, upon taking a transition $\transition{s}{\action}{s'}$ the agent receives an \emph{observation} $\observation$ with probability $\obsfunction(s,\action,s')(\observation)$.
% 
A \emph{(finite, initial) path} in an MDP or POMDP is a sequence $\finpath = \state_0 \action_1\state_1 \ldots \action_n \state_n$ with $s_0 = \sinit$ such that for all $0 < i \leq n$ we have $\state_{i} \in \transitions(\state_{i-1},\action_i)$.
We denote by $|\finpath| \colonequals n$ the length, by $\finpath[i] \colonequals s_i$ the $i$-th state, and by $\last{\finpath} \colonequals s_n$ the last state of $\finpath$.
An \emph{observation trace} of a POMDP is a sequence of observations $\obstrace= \observation_1\ldots\observation_n \in \observations^*$.
Given a path $\finpath = \state_0 \action_1\state_1 \ldots \action_n \state_n$, the probability of observing trace $\obstrace = \observation_1\ldots\observation_n$ in $\pomdp$ is 
$P^{\pomdp}(\obstrace | \finpath) = \prod_{i=0}^{n-1} \obsfunction(s_i,\action_{i+1}, s_{i+1})(\observation_{i+1}).$

\emph{Policies} resolve the non-determinism of POMDPs by determining the next action to play after observing an observation trace $\obstrace$.
Formally, a \emph{policy} for $\pomdp$ is a function $\pol \colon \observations^* \to \actions$. 
We denote the set of policies for POMDP $\pomdp$ by $\pols{\pomdp}$.

\paragraph{Belief MDP} 
A \emph{belief} is a tuple $\belief = \tuple{\observation, \dist_\belief}$ of an observation $\observation \in \observations \uplus \{\observation_\mathit{init}\}$, where $\observation_\mathit{init} \notin \observations$ is a dedicated initial observation, and a probability distribution $\dist_\belief \in \dists{\states}$ over POMDP states.
\footnote{In the literature, beliefs are typically considered to only be the distribution $\dist_\belief$. We extend this definition by the explicit inclusion of the observation that yields the belief to simplify later definitions.}
The distribution captures the evolution of an agent's information about its current state given histories of actions and observations, while the observation represents the last observation made in such a history.

An agent starts with the initial belief $\binit = \tuple{\observation_\mathit{init},\dist_{\binit}}$, with $\dist_{\binit}(\sinit) = 1$.
Beliefs are updated when an action is played and a new observation is received.
The probability to observe $z \in \observations$ after playing action $a$ in belief $b = \tuple{\hat{z},\dist_b}$ is 
\begin{equation*}
    P(z | b,a) = \sum_{s \in \dist_b} \dist_b(s) \cdot \sum_{s' \in S} \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z).
\end{equation*}
The successor belief of $b$ after playing $a$ and observing $z$ is $\successorbelief{b}{a}{z} = \tuple{z, \dist^\mathsf{succ}_{b,a,z}}$ where $\dist^\mathsf{succ}_{b,a,z}$ is given by 
\begin{align*}
   \dist^\mathsf{succ}_{b,a,z}(s') \colonequals & \ P(s' | b,a,z) \\
    = & \ \frac{\sum_{s \in \dist_b} \dist_b(s) \cdot \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z)}{P(z | b,a)}
\end{align*}
if $P(z | b,a) > 0$ and \emph{undefined} otherwise.
Successive computation of successor beliefs yields an infinite-state fully observable MDP capturing the POMDP dynamics. 
This \emph{belief MDP} \citep{astrom1965} is the basis for many solution methods for analysis problems on POMDPs.
Let $\beliefs_\pomdp^{n}$ be the set of beliefs \emph{reachable in $n$ steps}, given by $\beliefs^0 \colonequals \{ \binit \}$ and $\beliefs_\pomdp^{n+1} \colonequals \beliefs_\pomdp^n \cup \{ \successorbelief{b}{a}{z} \mid b \in \beliefs_\pomdp^n,\allowbreak a \in Act,\allowbreak z\in \observations \}$.
$\beliefs_\pomdp \colonequals \lim_{n\to \infty} \beliefs_\pomdp^n$ is the set of reachable beliefs.

The belief MDP of POMDP $\pomdp$ is $\beliefmdp{\pomdp} = \langle \beliefs_\pomdp, \actions, \belieftransitions, \binit \rangle$ where $\belieftransitions(b,a)(b')\colonequals P(z | b,a)$ if $b'=\successorbelief{b}{a}{z}$ and $\belieftransitions(b,a)(b') \colonequals 0$ otherwise.

\paragraph{Costs} 
We annotate POMDPs with \emph{costs} (also referred to as \emph{rewards}).
Let $\costdim \in \mathbb{N}$.
A \emph{($\costdim$-dimensional) cost structure} for $\pomdp$ is a function $\coststruct \colon \states \times \actions \times \states \to \costdom$.
When taking the transition $\transition{s}{\action}{s'}$, the cost values $\coststruct(s,\action,s') = \tuple{c_1, \dots, c_k}$ are collected.
A cost structure allows the encoding of different, independent cost measures in its dimensions. For example, one dimension can model the expired time, while another models energy consumption.
The \emph{cumulative cost of a finite path \finpath} in $\pomdp$ with respect to $\coststruct$ is $\cumurew_\coststruct(\finpath) \colonequals \sum_{i=1}^{\lengthofpath{\finpath}} \coststruct(s_{i-1},a_i,s_i)$.
The set of all distinct cost vectors occurring in $\coststruct$ is
$\costvectorset_{\coststruct} \colonequals \{\coststruct(s,a,s') \mid s,s' \in \states, a \in \actions\}$.

\section{Problem Statement}
The cost-bounded reachability (CBR) problem asks for the maximal probability to reach a set of states in the POMDP via paths that respect (multi-dimensional) bounds on the cumulative costs.
% 
Formally, for a $\costdim$-dimensional cost structure $\coststruct$, relations ${\costrel} \in \{\leq, >\}^\costdim$ and threshold $\costthresh \in \costdom$, we call $\costtriple$ a \emph{($\costdim$-dimensional) cost bound} over $\coststruct$.
Cost bounds represent constraints on the cumulative cost of paths with respect to the corresponding cost structure.
For a finite path $\finpath$, bound $\costtriple$ is \emph{active in dimension $i$} iff $\tupleaccess{\cumurew_{\coststruct}(\finpath)}{i}~\mathbin{\tupleaccess{\costrel}{i}}~\tupleaccess{\costthresh}{i}$. Moreover, $\costtriple$ is \emph{active} for $\finpath$ iff it is active in all dimensions $1 \le i \le k$, i.e.,~$\cumurew_\coststruct(\finpath) \costrel \costthresh$, where the relations in $\costrel$ are applied element-wise.
As we consider only natural values for costs, relations $\geq$ and $<$ are supported by adapting the thresholds in $\costthresh$.
Non-negative rational costs are supported by suitable scaling of $\coststruct$ and $\costthresh$.

We fix a POMDP $\pomdp$ and a $\costdim$-dimensional cost bound $\costtriple$. % \in \costbounds(\coststruct)$.
Given a policy $\pol$, the \emph{cost-bounded reachability probability} for a state set $T \subseteq S$ is
\begin{align*}
\mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right)
    ~\colonequals~&
    \mathsf{Pr}^{\pomdp}_{\pol}\big\{\pi \in \mathit{Cyl}(\finpath) ~\big|~
     \last{\finpath} \in T \text{ and }\\
      & \costtriple \text{ is active for } \finpath
    \big\},
\end{align*}%
where $\mathit{Cyl}(\finpath)$ is the set of infinite extensions of finite path $\finpath$ and $\mathsf{Pr}_{\pol}^{\pomdp}$ denotes the standard probability measure for $\pomdp$ under policy $\pol$~\citep{Put94}.
We call $\mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right)$ the \emph{value} of policy $\pol$. % for the cost-bounded reachability query $\costEventually{\costtriplenobracket} \ T$.
The \emph{maximal} cost-bounded reachability probability is
    $\mathsf{Pr}_{\max}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right)\colonequals
    \sup_{\pol \in \pols{\pomdp}} \mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right).
    $
% 
Similar to related problems in POMDPs, a policy realising the maximal cost-bounded reachability probability is not guaranteed to exist.
We formulate our problem in terms of a two-sided $\epsilon$-approximation.

\begin{problem}[(Multi-)Cost-Bounded Reachability (CBR)]
\label{prob:cbrp}
For POMDP $\pomdp$, $T \subseteq S$, cost bound $\costtriple$, and $\varepsilon \in [0,1]$, compute $\upperbound \in [0,1]$ and a policy $\tilde{\pol} \in \pols{\pomdp}$, such that
$\upperbound - \varepsilon ~\le~ \mathsf{Pr}_{\tilde{\pol}}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right) ~\le~ \mathsf{Pr}_{\max}^{\pomdp}\left(\costEventually{\costtriplenobracket}\ T \right) ~\le~ \upperbound$.
\end{problem}

\begin{theorem}
The decision variant of CBR is undecidable.
\end{theorem}
\begin{proof}
By considering 0-dimensional cost structures, CBR subsumes unbounded, undiscounted indefinite-horizon reachability, which is undecidable~\citep{madani2003}.
\end{proof}

We write $\mathsf{Pr}_{\max}^{\pomdp}\left(\costEventually\ T\right)$ for the unbounded problem as a special case of CBR with 0-dimensional costs.
Methods to tackle unbounded reachability using two-sided approximations have been described in the literature (see Section~\ref{sec:intro}).

Decidability of several subclasses of CBR can be established.
For example, \emph{finite-horizon} reachability probabilities---which can be computed exactly \citep{smallwood1973}---are a special instance of cost-bounded reachability probabilities, where \emph{each} transition induces exactly a cost of $1$ and the costs are bounded upwards by the horizon.
Lifting this to arbitrary, but strictly positive costs yields the \emph{risk sensitive} setup from \citet{hou2016}, which---as mentioned by the authors---is still decidable.
Our setting is more general since we allow transitions with $0$ costs in any dimension as well as queries with only lower bounds.

\section{From Cost-Bounded To Unbounded Reachability}
\label{sec:unboundedreach}
We present an extension of the unfolding approach \citep{andova2003,ohtsubo2004} for the cost-bounded reachability problem in MDPs to the partially observable domain.

\begin{definition}[Cost Epoch]
A \emph{(cost) epoch} of dimension $k$ is a tuple $\epoch = \langle e_1, \ldots, e_k \rangle \in (\mathbb{N} \cup \{ \bottomepoch \})^k$. We denote the domain of all $k$-dimensional epochs by $\epochs_k \colonequals (\mathbb{N} \cup \{ \bottomepoch \})^k$.
\end{definition}

Each entry $\tupleaccess{\epoch}{i}$ of an epoch keeps track of the costs that can be accumulated until the bound $\costtriple$ changes its status in dimension $i$ (active to inactive or vice versa).

The initial epoch is defined by the threshold vector $\costthresh$.
To evolve cost epochs, we subtract the costs collected in each dimension. For values below $0$, we use the dedicated symbol $\bottomepoch$ to indicate that the bound changed its status from the initial one. Formally, this is captured by the \emph{monus} operation.
\begin{definition}[Monus for Epochs]
The \emph{monus} operator for cost epochs $\ominus: \epochs_k \times \mathbb{N}^k \to \epochs_k$ is given component-wise as
\begin{equation*}
\tupleaccess{(\epoch \ominus \mathsf{c})}{i} \colonequals 
\begin{cases} 
\tupleaccess{\epoch}{i} - \tupleaccess{\mathsf{c}}{i} & \text{ if } \bottomepoch \neq \tupleaccess{\epoch}{i} \land \tupleaccess{\epoch}{i} \geq \tupleaccess{\mathsf{c}}{i}, \\
\bottomepoch & \text{ otherwise.}
\end{cases}
\end{equation*}
\end{definition}

We lift the notion of being active to epochs. The indicator function $\actBound_{\costtriplenobracket}: \epochs_k \to \{0,1\}$ is $1$ iff bound $\costtriple$ is active in a given epoch, i.e.,~$\actBound_{\costtriplenobracket} (\epoch) \colonequals 1$ if for all $1 \leq i \leq k$, $\tupleaccess{\costrel}{i} = \ \leq$ implies $\tupleaccess{\epoch}{i} \neq \bottomepoch$ and $\tupleaccess{\costrel}{i} = \ >$ implies $\tupleaccess{\epoch}{i} = \bottomepoch$, and $\actBound_{\costtriplenobracket} (\epoch) \colonequals 0$ otherwise.

Using the idea of epochs, we construct a POMDP that enables us to reason about the activation of a bound on the level of states instead of the path level.
We first recap the construction for an MDP as it is described in the literature, e.g.~in~\citet{hartmanns2020}. Let the (finite) set of reachable epochs from an epoch $\epoch$ be given by
\begin{equation*}\epochs_k(\epoch) \colonequals \{ \epoch' \in \epochs_k \mid \exists \mathsf{c} \in \mathbb{N}^k\colon\, \epoch' = \epoch \ominus \mathsf{c} \}.\end{equation*}

\begin{definition}[Bound Unfolding MDP]
For MDP $\mdp = \mdptuple$ and cost bound $\costtriple$, 
the \emph{bound unfolding MDP} is $\unfoldingmdp{\costtriplenobracket}{\mdp} \colonequals \left\langle \states \times \epochs_k(\costthresh), \actions, \transitions_\mathsf{un}, \langle \sinit, \costthresh\rangle\right\rangle$, and
for $s_\epoch \colonequals \langle s,\epoch \rangle$, $s'_{\epoch'} \colonequals \langle s',\epoch' \rangle$, and $a \in \actions$:
\begin{equation*}\transitions_\mathsf{un}(s_\epoch, a)(s'_{\epoch'}) \colonequals 
\begin{cases} 
    \transitions(s, a) (s') & \text{if } \epoch' = \epoch \ominus \coststruct(s,a,s'), \\
    0 & \text{otherwise.}
\end{cases}\end{equation*}
\end{definition}

\begin{definition}[Bound Unfolding POMDP]\label{def:bndunfpomdp}
Given a POMDP $\pomdp = \pomdptuple$ with underlying MDP $\mdp = \mdptuple$ and cost bound $\costtriple$, 
the \emph{bound unfolding POMDP} is $\unfoldingmdp{\costtriplenobracket}{\pomdp} \colonequals \left\langle \unfoldingmdp{\costtriplenobracket}{\mdp}, \observations, \obsfunction_\mathsf{un}\right\rangle$ where for $s_\epoch \colonequals \langle s,\epoch \rangle$, $s'_{\epoch'} \colonequals \langle s',\epoch' \rangle$, and $a \in \actions$:
$\obsfunction_\mathsf{un}(s_\epoch, a, s'_{\epoch'}) \colonequals \obsfunction(s, a, s').$
\end{definition}

Finally, we lift the notion of an active bound to states of the unfolding POMDP which enables us to reason about conformance to the bound on states instead of paths.

\begin{definition}[Active States]
A state of the unfolding $\langle s, \epoch \rangle \in \states \times \epochs_k(\costthresh)$ is \emph{active} iff $\actBound_{\costtriplenobracket}(\epoch) = 1$. Given $T \subseteq S$, the set of \emph{active $T$-states} is $\actBound_{\costtriplenobracket}(T) = \{\langle s_T, \epoch \rangle \in \states \times \epochs_k(\costthresh) \mid s_T \in T \ \land \ \actBound_{\costtriplenobracket}(\epoch) = 1 \}$.
\end{definition}

As $\pomdp$ and $\unfoldingmdp{\costtriplenobracket}{\pomdp}$ share the set $\observations$ of observations, their possible policies coincide, i.e.,~$\pols{\pomdp} = \pols{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$.
We formalise the relationship between the original POMDP and its unfolding POMDP regarding cost-bounded reachability. 

\begin{theorem}
\label{thm:unfolding}
    Given a POMDP $\pomdp$, set $T \subseteq S$ and cost bound $\costtriple$, it holds that for all policies $\pol \in \pols{\pomdp}$:
    $ \mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right) = \mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left(\costEventually{} \ \actBound_{\costtriplenobracket}(T) \right).$
\end{theorem}
\begin{proof}[Proof Sketch]
Let $f$ be the mapping from paths of $\unfoldingmdp{\costtriplenobracket}{\pomdp}$ to paths of $\pomdp$ obtained by dropping the epochs from the states. %in $\unfoldingmdp{\costtriplenobracket}{\pomdp}$.
$f$ is bijective and $\mathsf{Pr}_{\pol}^{\pomdp}\left(\{f(\pi) \mid \pi \in \Pi\}\right) = \mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left(\Pi\right)$ for any policy $\pol$ and set $\Pi$ of paths in $\unfoldingmdp{\costtriplenobracket}{\pomdp}$.
The claim follows by taking $\Pi$ as the set of paths that reach $\actBound_{\costtriplenobracket}(T)$. The corresponding paths in $\pomdp$ reach $T$ while the bound is active.
\end{proof}

We get the following result about maximal probabilities.
\begin{corollary}
    For POMDP $\pomdp$, $T \subseteq S$ and $\costtriple$: 
    $\mathsf{Pr}_{\max}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right) = \mathsf{Pr}_{\max}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left(\costEventually{} \ \actBound_{\costtriplenobracket}(T) \right).$
\end{corollary}

Thus, to tackle CBR as in Problem~\ref{prob:cbrp}, we can consider the unbounded indefinite-horizon reachability problem on the unfolding POMDP. Solution methods for this problem include smart exploration of the belief space \citep{norman2017, bork2022, ho2024} or the policy space \citep{andriushchenko2022}.

\section{Cost-(Level-)Aware POMDPs}
The general cost bound framework assumes that an agent's decisions are solely based on environmental observations the agent receives.
However, costs might reflect quantities---such as the level of a battery---that the agent observes.
We refine our cost-bounded analysis for the special case where the observation model captures the additional information provided by costs. This \emph{cost-awareness} notion is related to the reward-based belief updates in \citet{izadi2005}.

\begin{definition}[Cost-Aware POMDP]\label{def:capomdp}
A POMDP $\pomdp = \pomdptuple$ with $\mdp = \mdptuple$ is \emph{cost-aware} with respect to a $k$-dimensional cost structure $\coststruct$ if 
% 
for all $\observation \in \observations$ there is $\mathsf{c}_\observation \in \mathbb{N}^k$ such that
for any transition $\transition{s}{\action}{s'}$ with $\observation \in \supp{\obsfunction(s,a,s')}$ we have $\coststruct(s,a,s') = \mathsf{c}_\observation$.
\end{definition}

In a cost-aware POMDP, all observations $\observation$ can be assigned a cost vector $\mathsf{c}_\observation$. 
\emph{An observation $\observation$ only occurs at transitions that yield costs equal to $c_\observation$, effectively guaranteeing that the collected costs are observable.}
Cost-awareness implies that, given an observation trace $\observation_1\ldots\observation_n \in \observations^*$, an agent can derive the costs $\sum_{i=1}^n \mathsf{c}_{\observation_i}$ that have been accumulated so far.
Thus, in the bound unfolding POMDP, an agent can always be certain about the cost epoch it is currently in as it has access to the history.
This is reflected in the belief space: if the POMDP $\pomdp$ is cost-aware, all reachable beliefs in the belief MDP of its unfolding $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ only contain states that belong to the same epoch.
Formally, if $\pomdp$ is cost-aware, then for every reachable belief $\belief = \tuple{\observation, \dist}$ of $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ there is an epoch $\epoch \in \epochs_k$ such that $\supp{\dist_\belief} \subseteq \states \times \{\epoch\}$.
This enables an optimised analysis of $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ as discussed in the sequel.

\subsection{Sequential Epoch Analysis}
\label{sec:sequential}
Our approach so far is to analyse unbounded reachability for the unfolding POMDP $\unfoldingmdp{\costtriplenobracket}{\pomdp}$, e.g.~via (abstractions of) its belief MDP $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$. 
However, such an analysis operates on the potentially large unfolding POMDP.

Recent works on fully observable MDPs avoid the construction of a large unfolding MDP by considering epochs one after another in a dynamic programming fashion \citep{hahn2016, klein2018, hartmanns2020}.
This \emph{sequential epoch analysis} is based on the epoch dependency graph 
$\big\langle{\epochs_k(\costthresh), \big\{ \tuple{\epoch,\epoch \ominus \mathsf{c}} ~\big|~ \mathsf{c} \in \costvectorset_{\coststruct} }\big\} \big\rangle$
which has an edge from epoch $\epoch$ to epoch $\epoch'$ iff a transition of the form $\transition{\tuple{s,\epoch}}{\action}{\tuple{s',\epoch'}}$ exists in the unfolding POMDP.
Since costs are non-negative, the epoch graph is acyclic (except for self-loops).
The idea is to process epochs in a reversed topological order $\epoch_0, \epoch_1, \dots, \epoch_n$ with $\epoch_0 = \bottomepoch^k$ and $\epoch_n = \costthresh$.
For each considered epoch $\epoch_i$, an \emph{epoch MDP}---which essentially is the restriction of the bound unfolding MDP to states with epoch $\epoch$---is constructed and analysed while propagating results from previous epochs $\epoch_0, \dots, \epoch_{i-1}$.
Implementations can exploit similarities between different epoch MDPs.
This way, the approach efficiently analyses properties of the large unfolding MDP without an explicit construction.

We lift sequential epoch analysis to POMDPs.
In the general case, the POMDP dynamics do not allow a clear separation of epochs. In particular, when considering belief-based solution methods, beliefs may have states representing several different epochs in their support. 
We therefore focus on cost-aware POMDPs.
% 
Our approach is to perform sequential epoch analysis on the belief MDP---or a finite abstraction thereof. To this end, we lift cost bounds to the belief space.
% 
We fix a POMDP $\pomdp$ and cost bound $\costtriple$ such that $\pomdp$ is cost-aware with respect to $\coststruct$.

\begin{definition}[Cost-Aware Belief Cost Bound]
The \emph{belief cost structures} for $\beliefmdp{\pomdp}$ is $\coststruct^B$ where 
for $s \in \dist_b$, ${s' \in \dist_{b'}}$, $\coststruct^B ((z,\dist_b), a , (z',\dist_{b'})) \colonequals \coststruct(s,a ,s')$.
The belief cost bound is $\mathit{bel}(\costtriplenobracket) = (\coststruct^B \costrel \costthresh)$.
\end{definition}
$\coststruct^B$ is well-defined as cost-awareness guarantees that $\coststruct(s,a ,s') = \coststruct(q,a ,q')$ for all $s,q \in \dist_b$ and $s',q' \in \dist_{b'}$.
% 
For cost-aware POMDPs, applying the cost unfolding and then constructing the belief MDP is equivalent to \emph{first} constructing the belief MDP and \emph{then} applying cost unfolding.
For MDPs $\mdp_1$ and $\mdp_2$ we write $\mdp_1 \cong \mdp_2$ iff the reachable fragments are isomorphic, i.e.,~equal up to renaming.

\begin{theorem}
\label{thm:belunfisomorph}
% For cost bound $\costtriple$ and cost-aware POMDP $\pomdp$ we have
$\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$.
\end{theorem}
\begin{proof}[Proof Sketch]
Let $\tuple{\observation, \dist_\belief}$ be a state of $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$. Since $\pomdp$ is cost-aware, there is an epoch $\epoch$ with $\supp{\dist_\belief} \subseteq \states \times \{\epoch\}$.
$\tuple{\observation, \dist_\belief}$ is isomorphic to state $\tuple{\tuple{z,\dist'},\epoch}$ of $\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$, where 
% $\dist'\in\dists{S}$ is given by 
$\dist'(s) = \dist_\belief(\tuple{s,\epoch})$.
\end{proof}

The sequential epoch analysis for MDPs outlined above can readily be applied to $\beliefmdp{\pomdp}$ to show properties for its unfolding $\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$.
Due to Theorem~\ref{thm:belunfisomorph}, analysis results immediately carry over to $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$.
Approaches like the ones described in \citet{norman2017,bork2020,bork2022} handle large or even infinite belief MDPs through abstraction, yielding a finite MDP $\mathit{abstr}(\beliefmdp{\pomdp})$ which over- or under-approximates the behaviour of $\beliefmdp{\pomdp}$.
If bound unfolding retains the abstraction---i.e.,~$\mathit{abstr}(\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}) \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\mathit{abstr}(\beliefmdp{\pomdp})}$---the sequential epoch analysis is compatible with such techniques.
In our experiments, we use this observation for the cut-off abstraction of \citet{bork2022} and the discretisation of \citet{bork2020}.

\subsection{Cost Level Awareness}
\label{sec:costlevelawareness}
In many problem instances, neither full cost-awareness nor absolute unawareness are realistic.
For example, a robot can have a rough estimate of its energy level (high, medium, low, empty) while not being aware of exactly how much energy it has spent yet.
We capture this notion of \emph{cost levels} using functions that assign in each dimension a \emph{level} according to the collected cost.
We assume uniform levels, i.e.,~we change the level in dimension $i$ whenever an additional cost of some fixed $\tupleaccess{\leveldegree}{i} >0$ is collected.
Moreover, $0$ cost is its own level.

\begin{definition}[Level Function]
For $\leveldegree \in \positiveNats^k$, the \emph{level function} $\levelfunc_{\leveldegree}: \costdom \to \costdom$ is given by $\tupleaccess{\levelfunc_{\leveldegree}(\mathsf{c})}{i} \colonequals
        \left\lceil \frac{\tupleaccess{\mathsf{c}}{i}}{\tupleaccess{\leveldegree}{i}}\right\rceil.$
\end{definition}

We fix a POMDP $\pomdp$, cost bound $\costtriple$, target states $T$ and a level function $\levelfunc_{\leveldegree}$ and define a \emph{level-aware} instance of CBR where in addition to the observations in $\pomdp$, the agent can use information about the current cost level to make decisions.
This level-aware instance is in general not equivalent to the CBR instance $\pomdp$ with cost bounds $\costtriple$. It introduces new observations that provide additional information which is not observable in the original model.
However, the level-aware instance may more accurately capture the scenario we are interested in as an agent may have access to such information.
Our definition of a level-aware instance decouples the modelling of observations arising from the environment and those arising from possible cost observation.
This simplifies the modelling of such instances and allows, for example, the comparison of different degrees of cost levels.
By choosing $\leveldegree= \tuple{1,\dots,1}$, we can define a variant of the original POMDP with \emph{full} cost-awareness which we call the \emph{cost-aware variant}.
$\leveldegree = \costthresh$ means that only activeness of bounds and the first collection of a non-zero cost can be observed for each dimension.

To incorporate cost level awareness into our framework, we present a transformation of $\pomdp$, $\coststruct$ and $\levelfunc_{\leveldegree}$ into a new cost-aware POMDP (cf. Def.~\ref{def:capomdp}).
The transformation encodes the cost that can still be collected until a new level is reached in the state space of the POMDP and introduces fresh observations to mark transitions in which one or more \emph{level changes} occur.
This way, an observation trace suffices to deduce the current level.
% 
Appendix~\ref{appdx:lvlunfold} provides further details.

\begin{definition}[Level Unfolding POMDP]
\label{def:lvlunf}
    The \emph{level unfolding} with respect to $\levelfunc_{\leveldegree}$ is the POMDP
    $\levelfunc_{\leveldegree}(\pomdp) = \tuple{\mdp_{\levelfunc_{\leveldegree}}, \observations_{\levelfunc_{\leveldegree}}, \obsfunction_{\levelfunc_{\leveldegree}}}$ 
    and the cost structure $\coststruct_{\levelfunc_{\leveldegree}}$
    with 
\begin{itemize}
    \item  $\mdp_{\levelfunc_{\leveldegree}} = \tuple{\states_{\levelfunc_{\leveldegree}}, \actions, \transitions_{\levelfunc_{\leveldegree}}, \tuple{\sinit, \tuple{0,\dots,0}}}$,
    \item $\states_{\levelfunc_\leveldegree} = S \times \set{  \ell \in \costdom \mid \forall i : \tupleaccess{\ell}{i} < \tupleaccess{\leveldegree}{i}} $,
    \item $\observations_{\levelfunc_{\leveldegree}} = \observations \times \left\{\costval \in \costdom \mid \forall i: \tupleaccess{\costval}{i} \le \big\lceil \myfrac{c^\mathrm{max}_i}{\tupleaccess{\leveldegree}{i}} \big\rceil \right\}$, where
    $c^\mathrm{max}_i = \max_{s,s' \in \states, a \in \actions} \tupleaccess{\coststruct(s,a,s')}{i}$,
    \item $\transitions_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},a,\tuple{s',\ell'}) = \transitions(s,a,s')$ if for all $i$: $\tupleaccess{\ell'}{i} = \tupleaccess{\ell}{i} - \tupleaccess{\coststruct(s,a,s')}{i} \mod d$,
    \item $\obsfunction_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},a,\tuple{s',\ell'})(\tuple{z,\costval}) = \obsfunction(s,a,s')(z)$ if for all $i$: $\tupleaccess{\costval}{i} = \left\lceil \myfrac{\big(\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}\big)}{\tupleaccess{\leveldegree}{i}} \right\rceil$,
    \item $\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},a,\tuple{s',\ell'}) = \coststruct(s,a,s')$,
\end{itemize}
and $\transitions_{\levelfunc_{\leveldegree}}$ and $\obsfunction_{\levelfunc_{\leveldegree}}$ are zero in all other cases.
\end{definition}

The CBR instance with POMDP $\levelfunc_{\leveldegree}(\pomdp)$, bounds $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ and target states $\leveltarget = (T \times \costdom) \cap  \states_{\levelfunc_{\leveldegree}}$ is the \emph{level-aware variant} (w.r.t.~$\levelfunc_{\leveldegree}$) for $\pomdp$, $\costtriple$, and $T \subseteq \states$. 

When assuming that a level function captures when a bound becomes (in-)active---e.g.~if an energy limit is exceeded---the level-aware instance can be reduced to an equivalent CBR instance on a \emph{fully cost-aware} POMDP which can then be solved using the sequential approach.
Mathematically, this is the case if for all $i$, $\tupleaccess{\leveldegree}{i}$ divides $\tupleaccess{\costthresh}{i}$ (written $\tupleaccess{\leveldegree}{i}  \divides \tupleaccess{\costthresh}{i}$).

\begin{theorem}
\label{thm:lvl}
    Let $\levelfunc_{\leveldegree}(\pomdp)$
    % cost bound $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ and $\leveltarget$
    such that $\forall i: \tupleaccess{\leveldegree}{i} \divides \tupleaccess{\costthresh}{i}$ and let
    $(\levelcosts \costrel \costthresh_{\leveldegree})$ be a cost bound for $\levelfunc_{\leveldegree}(\pomdp)$ with
    $\forall i{:}$ $\tupleaccess{\levelcosts(\tuple{s,\ell},a,\tuple{s',\ell'})}{i} \colonequals \left\lceil \myfrac{\big(\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}\big)}{\tupleaccess{\leveldegree}{i}} \right\rceil$ and $\tupleaccess{\costthresh_{\leveldegree}}{i} \colonequals \myfrac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}$.
    Then, $\levelfunc_{\leveldegree}(\pomdp)$ is cost-aware w.r.t.~$\levelcosts$ and
    \begin{equation*}\mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget\Big) = 
    \mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget \Big).\end{equation*}
\end{theorem}
\begin{proof}[Proof Sketch]
    $\levelcosts$ captures the number of level jumps that occur when a transition in $\levelfunc_{\leveldegree}(\pomdp)$ is taken. Since $\tupleaccess{\leveldegree}{i} \divides \tupleaccess{\costthresh}{i}$, the bound changes its activeness in dimension $i$ after exactly $\myfrac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} + 1$ level jumps occurred.
    We can then show that for a path $\hat{\pi}$, $\cumurew_{\coststruct_{\levelfunc_{\leveldegree}}}(\hat{\pi}) \costrel \costthresh$ if and only if $\cumurew_{\levelcosts}(\hat{\pi}) \costrel \costthresh_{\leveldegree}$.
\end{proof}

\section{Empirical Evaluation}
We evaluate the practicality of cost-bounded reachability analysis in POMDPs to answer the following questions:
\begin{itemize}
\item[\quest{1}] Are the presented approaches suitable for solving cost-bounded reachability problems in practice?
\item[\quest{2}] How is this influenced by cost (level) awareness?
\item[\quest{3}] Does applying the sequential approach on cost-aware belief MDPs improve performance compared to reducing the problem to unbounded reachability?
\end{itemize}

\paragraph{Implementation}
We extended the probabilistic model checking tool \storm{} \citep{storm} to support CBR queries for POMDPs.
\storm{} can analyse unbounded reachability in POMDPs via finite abstractions of the belief MDP~\citep{bork2020,bork2022} as well as cost-bounded reachability in (fully observable) MDPs via the sequential approach from~\citet{hartmanns2020}.
On top of that, we implemented the construction of bound unfolding and level unfolding POMDPs, along with the exploration of cost-aware belief MDPs with belief cost bounds.
Both constructions are integrated into the existing POMDP verification framework of \storm{}, enabling us to tackle CBR problems in three different configurations:
\begin{itemize}
    \item \config{unfold}: transform to an unbounded reachability problem on the unfolding POMDP (see Section~\ref{sec:unboundedreach}), then verify (finite abstractions of) its belief MDP.
    \item \config{ca-unfold}: Construct a cost-aware variant of the POMDP (see Section~\ref{sec:costlevelawareness}), then analyse it as in \config{unfold}.
    \item \config{ca-bel-seq}: Construct a cost-aware variant of the POMDP, then construct (a finite abstraction of) its cost-aware belief MDP and analyse CBR on this fully observable MDP using the sequential epoch approach see Section~\ref{sec:sequential}).
\end{itemize}

\config{ca-unfold} and \config{ca-bel-seq} both first construct a cost-aware variant based on the input POMDP, extending it with additional observations which in general changes the optimal achievable value.
We opt for this method of defining cost-aware instances to decouple the modelling of environmental observations and those observations stemming from costs.
\config{ca-unfold} and \config{ca-bel-seq} differ in the underlying solution approach. Instead of full cost-awareness, they can also be used with cost level awareness by applying a level unfolding first.
Moreover, cost-awareness can also be induced for only a subset of the dimensions $I \subseteq \{1,\dots,k\}$.
\storm{} uses a state-based observation model $\obsfunction: \states \to \observations$. 
We transform our observation model into such a state-based one by encoding observations in the state space \citep{chatterjee2016}. For \config{ca-unfold} and \config{ca-bel-seq}, this results in larger state spaces compared to the original POMDP due to the additional observations.

For each of the three configurations, we can either use the \emph{cut-off} approach of~\citet{bork2022} or the \emph{discretisation} approach of~\citet{bork2020} to obtain a finite abstraction of the belief MDP yielding sound lower or upper bounds for the optimal cost-bounded reachability probabilities.
Both abstractions---noted below as \config{cut} and \config{discr}---have a hyper-parameter that controls the size of the obtained belief MDP abstractions.

Our implementation is publicly available as part of the supplementary material of this paper \citep{artifact}.\footnote{\href{https://zenodo.org/records/15642233}{https://zenodo.org/records/15642233}}

\begin{table}[t]
\centering
\caption{Information on Benchmark Instances}
\label{tab:benchmark_info}
\input{experiment_data/tablebenchmarks.tex}
\end{table}

\begin{table*}[t]\centering
\caption{Overview of Obtained Value Bounds and Runtimes}\label{tab:results}
\input{experiment_data/tablet1800.tex}
\end{table*}

\paragraph{Benchmarks}
Since there is no established benchmark set for CBR problems, we use partially observable variants of some cost-bounded reachability problems from \citet{hartmanns2020} (\model{resrc}, \model{rover}, \model{serv}).
In addition, we consider three variants of grid world examples where reaching a goal is made difficult by either an incline (\model{incline}), obstacles (\model{obstcl}), water levels (\model{water}), or uncertain movements (\model{walk}40, \model{walk}120).
Finally, we consider our motivating example (\model{clean6}) and a version with 12 tiles (\model{clean12}).
The benchmarks are given in the guarded command language of \textsc{Prism} \citep{prism}.
Appendix~\ref{app:benchmarkproblems} provides further details.

Table~\ref{tab:benchmark_info} outlines the benchmark instances we consider, including the number of POMDP states $|S|$, the number of distinct observations $|\observations|$, the relation of the cost bounds $\costrel$ (also indicating the dimensionality $k$), and the number of reachable epochs $|\epochs|$ indicating the magnitude of the involved cost thresholds $\costthresh = \tuple{t_1, \ldots, t_k}$. We consider two different thresholds for \model{resrc}, \model{rover}, \model{serv}, and \model{water}.
The symbol $\dagger$ for the \model{clean} instances denotes that the second cost dimension remains unobservable, even for the cost-aware configurations.
While some of the considered POMDPs have small state spaces, a high number of relevant epochs leads to intricate cost-bounded reachability queries.
We emphasise that the complexity of the considered instances stems from the inclusion of cost bounds---the unbounded problem variants with the same target states we consider result in a maximal probability of $1$ for all instances.


\paragraph{Setup}
We conducted experiments on Intel Xeon 8468 Sapphire systems (2.1~GHz) with memory limited to 64~GB.
\storm{} runs on a single core.
For each combination of benchmark instance, configuration, and abstraction type, we considered 25 different hyper-parameter assignments to capture different trade-offs between approximation accuracy and computational tractability.
A time limit of 1800 seconds (walltime) was applied for the individual runs.
A detailed setup description is provided in Appendix~\ref{app:setup}.

\paragraph{Results}
Table~\ref{tab:results} lists the best value bounds obtained within the time and memory limit as well as the time it took to obtain these bounds. \config{cut} yields a \emph{lower bound} on the optimal value, and \config{discr} yields an \emph{upper bound} $V^U$ (see Problem~\ref{prob:cbrp}).
Table entries are bold-faced when they depict the tightest lower or upper bound obtained within the fastest runtime.
This does not include \config{unfold} as it computes a different measure.
A dash (-) indicates that no non-trivial bound was obtained within the time limit.
Column $|S_\mathsf{un}|$ denotes the number of reachable states in the unfolding POMDP (if known).
For \config{cut}, an asterisk ($^*$) indicates that the belief MDP was fully explored.

The plots at the top of Figure~\ref{fig:plot} show the obtained value bounds over analysis time for two selected instances.
A data point $\tuple{x,y}$ for configuration \config{z} means that (lower or upper) value bound $y$ was established within $x$ seconds using configuration \config{z} and appropriate hyper-parameters.
Similarly, the plots at the bottom show obtained value bounds for our motivating example \model{clean6}, on the left when increasing the number of observable energy levels (intuitively providing more information for a policy) and on the right when increasing the energy budget. 
Tables containing additional details on the results and further plots are given in Appendix~\ref{app:results}.

\begin{figure}[t]\centering
\defaulttimeresplotleft{clean_rbrmax2_N12-B1120-B211-__lvl11-__lvl20}{1}{1800}%
\defaulttimeresplotright{serv_rbrmax1_B11000}{1}{1800}\\[12pt]
\input{experiment_data/lvlsbndsplot}
\caption{Value bounds obtained over time (top left: \model{clean}12, $|\epochs|{=}1508$; top right: \model{serv}, $|\epochs|{=}40$), for different observation levels (bottom left: \model{clean}6, $|\epochs|{=}413$), and for increasing cost thresholds (bottom right: \model{clean}6, $168\le|\epochs|\le658$).}%
\label{fig:plot}
\end{figure}


\paragraph{Discussion}
Concerning \quest{1}, we see that all considered approaches produce non-trivial bounds on the optimal value for the majority of benchmark instances.
Towards \quest{2}, we observe that the true optimal values in cost-(level-)aware variants of POMDPs is always at least as good as in the original POMDP.
Our results show that cost-awareness does not affect the obtained upper value bounds for many of our benchmarks. However, the obtained lower bounds are often larger with cost-awareness enabled, resulting in tighter gaps between lower- and upper bounds.
As we expect the maximal value to increase under cost-awareness, this indicates that the discretisation method works better for obtaining tight approximations when having more information about incurred costs.
This is also indicated by the data shown in the bottom left of Figure~\ref{fig:plot}. 
With an increasing number of observable cost levels within the cost threshold $\tupleaccess{\costthresh}{0}$, the obtained upper bounds decrease, while we expect the actual (unknown) maximal probability to increase when more information is available for the policy, pointing towards tighter upper bounds.
In addition, we see that with increasingly finer levels, the obtained values indeed increase, hinting at an increasingly higher true optimal value.
An exception to our general observation is \model{clean12} where \config{ca-unfold} yields a smaller value than \config{unfold} for \config{cut}. 
This is a result of the state space increase for cost-aware variants caused by \storm{}'s state-based observation model. The exploration of the cost-aware variant's belief space appears to be not as thorough as for the original POMDP within our given time limit. This results in a worse approximation (smaller lower bound) of a larger optimal value.
%
Regarding \quest{3}, \config{ca-bel-seq} often yields better approximations in less time compared to \config{ca-unfold}.
A notable exception is the \model{serv} case study.
Here, the unfolding POMDP has similar size compared to the original POMDP ($|S_\mathsf{un}| \approx |S|$) as we have comparably few reachable epochs.


\section{Conclusion}
We proposed a general framework for the analysis of reachability probability objectives under multiple cost constraints on POMDPs.
These objectives can be tackled by considering an unbounded objective on an unfolding of the POMDP.
Observation of incurred costs enables an advanced technique based on a sequential analysis of cost epochs on the belief MDP.
Awareness of cost levels provides a more realistic way to model certain scenarios and can often be reduced to the cost-aware setting.
Our experiments using a prototype implementation in \storm{} indicate the suitability of cost-bounded reachability analysis for practical applications.

As future research, we propose the extension of our framework towards cost-bounded expected reward objectives and a more flexible notion of cost levels. Additionally, developing approximation methods tailored towards the cost-bounded setting is useful for increasing scalability.

\paragraph{Data Availability} The supplementary material contains detailed proofs for our theoretical results and additional data from our evaluation.
The artifact accompanying this paper \citep{artifact} contains the source code of our implementation, model files used for the experimental evaluation and logfiles created during our experiments.

\begin{acknowledgements}
This work has been partially funded by the DFG RTG 2236/2 (UnRAVeL), the KI-Starter Project `Verifying AI Systems under Partial Observability' of the Ministry of Culture and Science of the German State of North Rhine-Westphalia and the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement \href{https://cordis.europa.eu/project/id/101008233}{No. 101008233} (MISSION).
Experiments were performed with
computing resources granted by RWTH Aachen University under project rwth1632.
\end{acknowledgements}


% References
\bibliography{main}

\newpage

\onecolumn

\title{Multi-Cost-Bounded Reachability Analysis of POMDPs\\(Supplementary Material)}
\maketitle

\appendix

\section{Example}
\begin{figure}[t]
    \centering
    \includegraphics[width=0.8\linewidth]{pomdp_example.pdf}
    \caption{Example POMDP $\pomdp$ and corresponding cost structure $\coststruct$}
    \label{fig:example}
\end{figure}
\begin{figure}[t]
    \centering
    \includegraphics[width=0.8\linewidth]{unfolding_example.pdf}
    \caption{Reachable fragment of the cost-bounded unfolding $\unfoldingmdp{\mathsf{bnd}}{\pomdp}$ for example POMDP $\pomdp$ with respect to cost bound $\mathsf{bnd} = (\coststruct\ \tuple{\leq,>} \ \tuple{1,0})$}
    \label{fig:unfolding_example}
\end{figure}

We illustrate the unfolding of cost bounds for POMDPs to treat cost-bounded reachability probability problems by an example.
Consider the POMDP $\pomdp$ and 2-dimensional cost structure $\coststruct$ depicted in Figure~\ref{fig:example}.

$\pomdp$ contains $4$ states and 2 actions $\alpha$ and $\beta$. The left-hand side of Figure~\ref{fig:example} depicts the state-transition diagram of $\pomdp$.
For example, we have $\transitions(s_0,a)(s_0) = \nicefrac{1}{2}$. Observations after a transition are deterministic, and the single possible observation after a transition is given next to the transition probability, so for example we have $\obsfunction(s_0,a,s_1)(z_0) = 1$.
The way the observations are chosen means that we always observe $z_0$ if we enter $s_0$ or $s_1$, $z_1$ if we enter $s_2$ and $z_2$ if we enter $t$.

The cost vector of each transition is given on the right-hand side of Figure~\ref{fig:example}.
In particular, we only have two transitions that do not have a cost of $\tuple{0,0}$, namely $\coststruct(s_1,\alpha,s_1) = \tuple{1,0}$ and $\coststruct(s_0,\alpha,s_0) = \tuple{0,1}$.

Consider the cost bound $\mathsf{bnd} = (\coststruct\ \tuple{\leq,>} \ \tuple{1,0})$, i.e.,~in dimension 1, we want to collect at most 1 unit of cost, while in dimension 2, we want to collect more than $0$ units.
We are interested in the cost-bounded reachability probability $\mathsf{Pr}_{\max}^{\pomdp}(\costEventually{\mathsf{bnd}} \{t\})$.
Satisfying the cost bounds requires to take transition $\transition{s_0}{\alpha}{s_0}$ \emph{at least} once while taking the self-loop $\transition{s_1}{\alpha}{s_1}$ \emph{at most} once.

The state-transition diagram of the fragment of the unfolding POMDP $\unfoldingmdp{\mathsf{bnd}}{\pomdp}$ reachable from the initial state is given in Figure~\ref{fig:unfolding_example}.
For illustration of the transitions in $\unfoldingmdp{\mathsf{bnd}}{\pomdp}$, we consider an example. 
Due to the bound thresholds, we get that the initial epoch $\einit$ of the unfolding is $\langle 1,0 \rangle$. Thus, the initial unfolding state is $\langle s_0, \langle 1,0 \rangle \rangle$. 
Consider transition $\transition{s_0}{\alpha}{s_0}$ in $\pomdp$.
We have $\coststruct(s_0,\alpha,s_0) = \tuple{0,1}$, thus the corresponding transition starting in $\langle s_0, \langle 1,0 \rangle \rangle$ changes the epoch to $\langle 1,0 \rangle \ominus \langle 0,1 \rangle = \langle 1,\bot \rangle$, resulting in the transition $\transition{\langle s_0, \langle 1,0 \rangle \rangle}{\alpha}{\langle s_0, \langle 1,\bot \rangle \rangle}$ with probability \begin{equation*}\transitions_{\mathsf{un}}(\langle s_0, \langle 1,0 \rangle \rangle, \alpha)(\langle s_0, \langle 1,\bot \rangle \rangle) = \transitions(s_0, \alpha)(s_0) = \nicefrac{1}{2}.\end{equation*}

The observations resulting from $\transition{\langle s_0, \langle 1,0 \rangle \rangle}{\alpha}{\langle s_0, \langle 1,\bot \rangle \rangle}$ have the same probability as for $\transition{s_0}{\alpha}{s_0}$ in the original POMDP, i.e.,~$z_1$ is observed with probability $1$.

To analyse $\Pr_{\max}^{\pomdp}(\costEventually{\mathsf{bnd}} \{t\})$, we need to identify the active $\{t\}$-states.
The active epochs in $\unfoldingmdp{\mathsf{bnd}}{\pomdp}$ are $\langle 1, \bot \rangle$ and $\langle 0, \bot \rangle$. 
Therefore, we have \begin{equation*}\actBound_{\mathsf{bnd}}(\{t\}) = \{ \langle t, \langle 1, \bot \rangle \rangle,  \langle t, \langle 0, \bot \rangle \rangle \}.\end{equation*}

We can now consider the \emph{unbounded} reachability probability 
\begin{equation*}\mathsf{Pr}_{\max}^{\unfoldingmdp{\mathsf{bnd}}{\pomdp}}( \costEventually{} \actBound_{\mathsf{bnd}}(\{t\})) = \mathsf{Pr}_{\max}^{\unfoldingmdp{\mathsf{bnd}}{\pomdp}}( \costEventually{} \{ \langle t, \langle 1, \bot \rangle \rangle,  \langle t, \langle 0, \bot \rangle \rangle \})\end{equation*}
and solve the problem using methods known from the literature.

This results in a value of $\mathsf{Pr}_{\max}^{\unfoldingmdp{\mathsf{bnd}}{\pomdp}}( \costEventually{} \actBound_{\mathsf{bnd}}(\{t\})) = \nicefrac{3}{8}$, achieved by a policy that chooses $\alpha$ in the first $3$ steps (in which only $z_0$ is observed) and then $\beta$.

By Theroem~\ref{thm:unfolding} we therefore get that
\begin{equation*}\mathsf{Pr}_{\max}^{\pomdp}(\costEventually{\mathsf{bnd}} \{t\}) = \mathsf{Pr}_{\max}^{\unfoldingmdp{\mathsf{bnd}}{\pomdp}}( \costEventually{} \actBound_{\mathsf{bnd}}(\{t\})) = \nicefrac{3}{8}.\end{equation*}

This example also showcases why a na\"ive unfolding approach that directly encodes the collected costs in the state space (and not the costs that remain until the bound changes its status) is inappropriate.
In such a na\"ive unfolding, we have infinitely many copies of every state in the POMDP, and in particular there are infinitely many reachable target states for the unbounded reachability problem.
In contrast, our unfolding results in a finite POMDP which can be treated with standard methods.

\section{Detailed Explanation of Level Unfolding POMDP}
\label{appdx:lvlunfold}
We explain the construction of the level unfolding POMDP (Def.~\ref{def:lvlunf}).
For the remainder of this section, fix a POMDP $\pomdp=\pomdptuple$ with $\mdp = \mdptuple$, $k$-dimensional cost bound $\costtriple$ and level function $\levelfunc_{\leveldegree} : \mathbb{N}^k \to \mathbb{N}^k$ with $\leveldegree \in (\mathbb{N}\setminus\{0\})^k$.

The core idea of the level unfolding is that we keep track of jumps in the level using the observations of the unfolding POMDP. 
An observation also stores by how many levels we jump up when taking a transition.
To keep track of when an incurred cost causes a level jump, we need to do bookkeeping in between jumps.
We do this by storing the remaining cost until the next jump in the state space.

The state space $\states_{\levelfunc_{\leveldegree}}$ is defined such that for there is a copy of each state for each possible combination of costs with which we stay in the current level in any dimension. 
We keep track of this using a set of vectors $\ell \in \mathbb{N}$ where each entry $\tupleaccess{\ell}{i}$ indicates the amount of cost which is allowed to be collected to stay in the current level of dimension $i$. 
We call this cost the \emph{remainder}.
Formally, the state space is defined as:
\begin{equation*}\states_{\levelfunc_{\leveldegree}} \colonequals \states \times \{\ell \in \mathbb{N} \mid \forall 1 \leq i \leq k: 0 \leq \tupleaccess{\ell}{i} < \tupleaccess{\leveldegree}{i} \}\end{equation*}

The transitions $\transitions_{\levelfunc_{\leveldegree}}$ of the unfolding are defined such that for two states in the unfolding, the transition probability is the same as for the corresponding states in the original POMDP if their remainder vectors are compatible. In particular, that means that the transition models the correct transformation of the remainder.
This is the case if for each dimension, the new remainder is the old remainder minus the cost of the transition, modulo $\tupleaccess{\leveldegree}{i}$, effectively modelling that a level jump occurs every time a total of $\tupleaccess{\leveldegree}{i}$ costs has been incurred and the remainder stores the progress in the current level. We get:
\begin{equation*}\transitions_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},a,\tuple{s',\ell'}) \colonequals \\
    \begin{cases}
        \transitions(s,a,s') & \text{if } \forall 1 \leq i \leq k : \tupleaccess{\ell'}{i} = \tupleaccess{\ell}{i} - \tupleaccess{\coststruct(s,a,s')}{i} \mod \tupleaccess{\leveldegree}{i},\\
        0 & \text{otherwise.}
    \end{cases}
\end{equation*}

In the unfolding, the observations are defined such that in addition to the observations of the original POMDP, we keep track of the level changes that occur when taking transitions. 
Thus, we consider copies of the original observations for each vector $\mathsf{j}$ of possible level changes in one step.
In particular, we observe that in dimension $i$, in one step a transition $\transition{s}{a}{s'}$ can increase the level by at most $\left\lceil \frac{\tupleaccess{\coststruct(s,a,s')}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil$.
Thus it suffices to consider vectors $\mathsf{j}$ where the value in each dimension is at most $\left\lceil \frac{\max_{s,s' \in \states, a \in \actions} \tupleaccess{\coststruct(s,a,s')}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil$, resulting in a finite set of observations. In particular, we get:
\begin{equation*}\observations_{\levelfunc_{\leveldegree}} \colonequals \observations \times \left\{\mathsf{j} \in \mathbb{N}^k \mid \forall 1 \leq i \leq k : 0 \leq \tupleaccess{\mathsf{j}}{i} \leq \left\lceil \frac{\max_{s,s' \in \states, a \in \actions} \tupleaccess{\coststruct(s,a,s')}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil\right\}\end{equation*}

$\obsfunction_{\levelfunc_{\leveldegree}}$, i.e.,~the observation function of the unfolding, is defined such that for every transition, the probability of observing a new observation $\tuple{z,\mathsf{j}}$ after a transition is the same as the probability to observe $z$ after taking the corresponding transition in the original POMDP exactly if $\mathsf{j}$ corresponds to the correct jumps in level in all dimensions.
To ensure this, in addition to the costs of the transition, we need to consider the remainders $\ell$ in the origin state $\tuple{s,\ell}$ as the smaller the the value $\tupleaccess{\ell}{i}$, i.e.,~the deeper we already are in the current level, the fewer costs we need to collect to jump to the next level.
This results in the following definition:
\begin{equation*}
\obsfunction_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},a,\tuple{s',\ell'})(\tuple{z,\mathsf{j}}) \colonequals \\
\begin{cases}
    \obsfunction(s,a,s')(z) & \text{if } \forall 1 \leq i \leq k : \tupleaccess{\mathsf{j}}{i} = \left\lceil \frac{\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil, \\
    0 & \text{otherwise.}
\end{cases}
\end{equation*}

Finally, as the initial state we consider the copy of the original initial state $\sinit$ where all remainders are $0$, i.e., $\tuple{\sinit,\tuple{0,\ldots,0}}$.
This captures the behaviour of the level function $\levelfunc_{\leveldegree}$ where if we consider the input to be a vector of total incurred costs, as soon as \emph{some} non-zero cost is collected in a dimension, the level jumps from $0$ to a higher value.

Using the above components, we can define a level unfolding POMDP with respect to $\levelfunc_{\leveldegree}$ as
$\levelfunc_{\leveldegree}(\pomdp) = \tuple{\mdp_{\levelfunc_{\leveldegree}}, \observations_{\levelfunc_{\leveldegree}}, \obsfunction_{\levelfunc_{\leveldegree}}}$ with $\mdp_{\levelfunc_{\leveldegree}} = \tuple{\states_{\levelfunc_{\leveldegree}}, \actions, \transitions_{\levelfunc_{\leveldegree}}, \tuple{\sinit, \tuple{0, \ldots, 0}}}$.

\section{Proofs for Main Results}
\subsection{Details on Path Probability Measure}
As a basis for further theoretical results, we recap the definition of the probability measure for paths.

Given POMDP $\pomdp = \pomdptuple$ with underlying MDP $\mdp = \mdptuple$ and a policy $\pol \in \pols{\pomdp}$, we define the functions \begin{equation*}\pol_a(\tau) \colonequals \begin{cases}
        1 & \text{ if } \pol(\tau) = a, \\
        0 & \text{ otherwise } 
    \end{cases}\end{equation*}
indicating if policy $\pol$ chooses action $a \in \actions$ when trace $\tau$ is observed.

Towards the probability measure for paths, we observe that the probability of a path $\finpath = s_0 a_1 \ldots s_n$ and an observation trace of compatible length $\obstrace = z_1 \ldots z_n$ occurring under a policy $\pol$ is
\begin{equation*}P^\pomdp_\pol(\finpath \land \obstrace) = \prod_{i=1}^n \transitions(s_{i-1},a_i,s_i) \cdot \pol_{a_i}(\obstrace[..{(i-1)}]) \cdot \obsfunction(s_{i-1},a_i,s_i)(z_i)\end{equation*}
where 
\begin{equation*}\obstrace[..{k}] \colonequals \begin{cases}
    z_1 \ldots z_k & \text{ if } k>0, \\
    \varepsilon & \text{ otherwise.}
\end{cases}\end{equation*}
The overall probability of a a path is then the probability that it occurs with \emph{any} observation trace.
We get that overall for a finite path $\finpath = s_0 a_1\ldots s_n \in \finpaths{\pomdp}$, the probability of $\finpath$ under policy $\pol$ is 
\begin{equation*}\mathsf{Pr}_{\pol}^{\pomdp}( \{ \finpath \} ) = \sum_{z_1\ldots z_n \in \obstraces{\pomdp}} P^\pomdp_\pol(\finpath \land z_1\ldots z_n)\end{equation*}

Using the probability for a finite path, the probability measure for infinite paths and by extension (measurable) sets of infinite paths is defined using the standard cylinder set construction. We refer to \citet{baier2008} for details.\\

In the following, we identify LTL-like formulae for reachability with the sets of path they describe, i.e.,~for a set $T \subseteq S$ and $\costtriple$,
we define
\begin{equation*}
\costEventually{\costtriplenobracket} T \colonequals \big\{\pi \in \mathit{Cyl}(\finpath) ~\big|~
     \last{\finpath} \in T \text{ and } \costtriple \text{ is active for } \finpath
    \big\}
\end{equation*}
where $\mathit{Cyl}(\finpath)$ is the set of infinite extensions of finite path $\finpath$.

\subsection{Proof of Theorem \ref{thm:unfolding}}
\setcounter{theorem}{1}
\begin{theorem}
\label{thm:unfolding_appdx}
    Given a POMDP $\pomdp$, set $T \subseteq S$ and cost bound $\costtriple$, it holds that for all policies $\pol \in \pols{\pomdp}$:
    \begin{equation*} \mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right) = \mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left(\costEventually{} \ \actBound_{\costtriplenobracket}(T) \right)\end{equation*}
\end{theorem}
\begin{proof}
    Let $\pomdp = \pomdptuple$ with $\mdp = \mdptuple$.
    Furthermore, $\unfoldingmdp{\costtriplenobracket}{\pomdp} = \left\langle \unfoldingmdp{\costtriplenobracket}{\mdp}, \observations, \obsfunction_\mathsf{un}\right\rangle$ with 
$\unfoldingmdp{\costtriplenobracket}{\mdp} = \left\langle \states \times \epochs_k(\einit), \actions, \transitions_\mathsf{un}, \langle \sinit, \einit\rangle\right\rangle$.
%    
    We define a mapping \begin{equation*}{f: \paths{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \to \paths{\pomdp}}\end{equation*} with
    \begin{equation*}f(\langle s_0, \epoch_0\rangle a_1 \langle s_1, \epoch_1\rangle \ldots \langle s_n, \epoch_n\rangle) \colonequals s_0 a_1 \ldots s_n,\end{equation*}
    i.e.,~$f(\hat{\pi})$ is the path resulting from dropping the epoch component from $\hat{\pi}$.
%    
    We show that $f$ is bijective:
\begin{itemize}
\item 
    \underline{$f$ \emph{is injective:}}
% 
Consider two (finite, initial) paths
\begin{equation*}\hat{\pi} = \langle s_0,\epoch_0\rangle a_1 \langle s_1,\epoch_1\rangle a_2 \dots \langle s_n,\epoch_n\rangle \end{equation*} and \begin{equation*}\hat{\pi}' = \langle s_0',\epoch_0'\rangle a_1' \langle s_1',\epoch_1'\rangle a_2'\dots \langle s_m',\epoch_m'\rangle\end{equation*} of
 $\unfoldingmdp{\costtriplenobracket}{\pomdp}$ with  $\hat{\pi} \neq \hat{\pi}'$.
By definition, both paths start at the initial state $\langle \sinit, \einit\rangle$. We distinguish two cases:
\begin{itemize}
\item If $\hat{\pi}$ and $\hat{\pi}'$ have different lengths ($n \neq m$), then $|f(\hat{\pi})| = |\hat{\pi}| \neq |\hat{\pi}'| = |f(\hat{\pi}')|$ and thus $f(\hat{\pi}) \neq f(\hat{\pi}')$.
\item Otherwise, let $0 < i \le n=m$ be the first index where the paths disagree, i.e.,~$\langle s_{i-1},\epoch_{i-1}\rangle = \langle s_{i-1}',\epoch_{i-1}'\rangle$ and $a_{i} \neq a_{i}'$, $s_{i} \neq s_{i}'$, or $\epoch_{i} \neq \epoch_{i}'$.
If $a_i = a_i'$ and $s_i = s_i'$, we get $\epoch_i = \epoch_{i-1} \ominus \coststruct(s_{i-1},a_i,s_i) = \epoch_i'$.
	Therefore, either $a_{i} \neq a_{i}'$ or $s_{i} \neq s_{i}'$ must hold and we immediately get $f(\hat{\pi}) \neq f(\hat{\pi}')$.
\end{itemize}

\item
    \underline{$f$ \emph{is surjective:}}
%    
    we show that for all $\tilde{\pi} \in \paths{\pomdp}$, there exists ${\hat{\pi} \in \paths{\unfoldingmdp{\costtriplenobracket}{\pomdp}}}$ such that $f(\hat{\pi}) = \tilde{\pi}$.
    Let $\tilde{\pi} = s_0 a_1 s_1 a_2 \ldots s_n \in \paths{\pomdp}$.
    Consider \begin{equation*}\hat{\pi} = \langle s_0, \einit \rangle a_1 \langle s_1, \einit \ominus \coststruct(s_0,a_1,s_1) \rangle \ldots \langle s_n, (\ldots(\einit \ominus \coststruct(s_0,a_1,s_1)) \ominus \ldots )\ominus \coststruct(s_{n-1},a_n,s_n) \rangle\end{equation*}
    where $\hat{\pi}\in \paths{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ by definition of the unfolding MDP.
        
    We have $f(\hat{\pi}) = \tilde{\pi}$, thus $f$ is surjective.
\end{itemize}
    Therefore, $f$ is bijective.
%
Next, recall that $\pomdp$ and $\unfoldingmdp{\costtriplenobracket}{\pomdp}$ share the same set of policies $\pols{\pomdp} = \pols{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$.

    We show that $f$ preserves the probability of measurable sets of (infinite) paths under a given policy $\pol \in \pols{\pomdp}$.
    As those sets can be constructed from cylinder sets of finite paths, we can focus on finite paths.

    We show that \begin{equation*}\mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left( \{ \finpath \} \right) = \mathsf{Pr}_{\pol}^{\pomdp}\left(\{ f(\finpath) \}\right)\end{equation*}

    \begin{align*}
        \mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left( \{ \finpath \} \right) 
        & = \sum_{z_1\ldots z_n \in \obstraces{\unfoldingmdp{\costtriplenobracket}{\pomdp}}} P^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}_\pol(\finpath \land z_1\ldots z_n) \\
        & = \sum_{z_1\ldots z_n \in \obstraces{\unfoldingmdp{\costtriplenobracket}{\pomdp}}} \prod_{i=1}^n\transitions_\mathsf{un}(\langle s_{i-1}, \epoch_{i-1} \rangle,a_i,\langle s_i, \epoch_i \rangle) \cdot \\ 
        & \qquad \qquad  \pol_{a_i}(\obstrace[..{(i-1)}]) \cdot \obsfunction(\langle s_{i-1}, \epoch_{i-1} \rangle,a_i,\langle s_i, \epoch_i \rangle)(z_i) \\
        & = \sum_{z_1\ldots z_n \in \obstraces{\pomdp}} \prod_{i=1}^n\transitions(s_{i-1},a_i,s_i) \cdot \\ 
        & \qquad \qquad  \pol_{a_i}(\obstrace[..{(i-1)}]) \cdot \obsfunction(s_{i-1},a_i,s_i)(z_i) \\
        & = \sum_{z_1\ldots z_n \in \obstraces{\pomdp}} P^{\pomdp}_\pol (f(\finpath) \land z_1\ldots z_n) \\
        & = \mathsf{Pr}_{\pol}^{\pomdp}\left( \{ f(\finpath) \} \right)\\
    \end{align*}

    It remains to show that $f$ correctly transforms the set of paths $\costEventually{} \ \actBound_{\costtriplenobracket}(T)$, i.e.,~
    $\costEventually{\costtriplenobracket} \ T = \{ f(\pi) \mid \pi \in \costEventually{} \ \actBound_{\costtriplenobracket}(T)\}$.

    Let $\pi \in \costEventually{} \ \actBound_{\costtriplenobracket}(T)$, i.e.,~$\pi \in \mathit{Cyl}(\finpath)$ for a $\finpath = \langle s_0, \epoch_0\rangle a_1 \ldots \langle s_n, \epoch_n\rangle$ such that
    $\last{\finpath} = \langle s_n, \epoch_n \rangle \in \actBound_{\costtriplenobracket}(T) = \{ \langle t, \epoch \rangle \in S \times \epochs_k(\einit) \mid t \in T \land \actBound_{\costtriplenobracket}(\epoch) = 1 \}$.
    Consider $f(\finpath) = s_0 a_1 \ldots s_n$. By definition we have $s_n \in T$.
    
    Consider further an arbitrary, but fixed dimension $1 \leq j \leq k$ of bound $\costtriple$. We distinguish two cases:
   
   \underline{$\tupleaccess{\costrel}{j} = (\leq)$}: as $\langle s_n, \epoch_n \rangle \in \actBound_{\costtriplenobracket}(T)$, we have $\actBound_{\costtriplenobracket}(\epoch_n) = 1$. Thus, $\tupleaccess{\epoch_n}{j} \in \mathbb{N}$ and in particular $\tupleaccess{\epoch_n}{j} \neq \bot$.
   By definition of $\transitions_\mathsf{un}$, we then know that for all transitions $\transition{\langle s_i, \epoch_i\rangle}{a_{i+1}}{\langle s_{i+1}, \epoch_{i+1}\rangle}$ along $\finpath$, $\tupleaccess{\epoch_i}{j} - \tupleaccess{\coststruct(s_i,a_{i+1},s_{i+1})}{j} \geq 0$ and therefore also that $\tupleaccess{\einit}{j} - \tupleaccess{\sum_{i=1}^n \coststruct(s_{i-1},a_i,s_i)}{j} \geq 0$. Furthermore,
    \begin{equation*}\tupleaccess{\einit}{j} - \tupleaccess{\sum_{i=1}^n \coststruct(s_{i-1},a_i,s_i)}{j} = \tupleaccess{\einit}{j} - \tupleaccess{\cumurew_{\coststruct}(f(\finpath))}{j}\end{equation*}
    so $\tupleaccess{\einit}{j} - \tupleaccess{\cumurew_{\coststruct}(f(\finpath))}{j} \geq 0$ and $\tupleaccess{\einit}{j} \geq \tupleaccess{\cumurew_{\coststruct}(f(\finpath))}{j}$, so bound $\costtriple$ is active in dimension $j$ for $f(\finpath)$.\\

    \underline{$\tupleaccess{\costrel}{j} = (>)$}: as $\langle s_n, \epoch_n \rangle \in \actBound_{\costtriplenobracket}(T)$, we have $\actBound_{\costtriplenobracket}(\epoch_n) = 1$. 
    Thus, ${\tupleaccess{\epoch_n}{j} = \bot}$.
    By definition of $\transitions_\mathsf{un}$, we then know that there is a transition $\transition{\langle s_i, \epoch_i\rangle}{a_{i+1}}{\langle s_{i+1}, \epoch_{i+1}\rangle}$ along $\finpath$ such that $\tupleaccess{\epoch_i}{j} - \tupleaccess{\coststruct(s_i,a_{i+1},s_{i+1})}{j} < 0$.
    
    Therefore also $\tupleaccess{\einit}{j} - \sum_{i=1}^n \tupleaccess{\coststruct(s_{i-1},a_i,s_i)}{j} < 0$ holds.
    Furthermore, by the argument from before, we then get $\tupleaccess{\einit}{j} < \tupleaccess{\cumurew_{\coststruct}(f(\finpath))}{j}$, so bound $\costtriple$ is active in dimension $j$ for $f(\finpath)$.
         
    Thus, for $f(\finpath)$, we have $\last{f(\finpath)} \in T$ and for all $1 \leq j \leq k$, $\costtriple$ is active for $f(\finpath)$ in dimension $j$, so for all $f(\pi) \in \mathit{Cyl}(f(\finpath))$,  $f(\pi) \in \costEventually{\costtriplenobracket} \ T$.

    Now let $\pi_\pomdp \in \costEventually{\costtriplenobracket} \ T$. By bijectivity of $f$ and with similar arguments as before, we get $f^{-1}(\pi_\pomdp ) \in \costEventually{} \ \actBound_{\costtriplenobracket}(T)$.
    
    Overall we get $\costEventually{\costtriplenobracket} \ T = \{ f(\pi) \mid \pi \in \costEventually{} \ \actBound_{\costtriplenobracket}(T)\}$.

    We conclude that \begin{equation*} \mathsf{Pr}_{\pol}^{\pomdp}\left(\costEventually{\costtriplenobracket} \ T\right) = \mathsf{Pr}_{\pol}^{\unfoldingmdp{\costtriplenobracket}{\pomdp}}\left(\costEventually{} \ \actBound_{\costtriplenobracket}(T) \right)\end{equation*}
    for all policies $\pol \in \pols{\pomdp}$.
\end{proof}

\subsection{Proof of Theorem \ref{thm:belunfisomorph}}
\begin{theorem}
\label{thm:belunfisomorph_appdx}
For cost bound $\costtriple$ and cost-aware POMDP $\pomdp$ we have
\begin{equation*}\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}.\end{equation*}
\end{theorem}

Let $\pomdp = \pomdptuple$ with $\mdp = \mdptuple$ be a cost-aware POMDP with respect to $k$-dimensional cost structure $\coststruct$.

Furthermore, let \begin{equation*}\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} = \langle \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}, \actions, \transitions_{\mathsf{un}}^B, \langle z_{\mathit{init}}, \dist_\mathit{init}\rangle \rangle\end{equation*}
with 
$\dist_\mathit{init}(\langle \sinit, \einit \rangle) = 1$
and 
\begin{equation*}\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}} = \langle \beliefs^{\pomdp} \times \epochs_k(\einit), \actions, \transitions^{\beliefmdp{\pomdp}}_\mathsf{un}, \langle \binit, \einit \rangle \rangle.\end{equation*}


To show that $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$,
we will show that we can identify each belief $\langle z, \dist \rangle$ over state-epoch tuples in $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ with a belief-epoch tuple $\langle \langle z, b \rangle, \epoch \rangle$ in $\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$ by moving the probability distribution inside and the epoch out of the belief and vice versa.

We define a mapping $f: \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \to \beliefs^{\pomdp} \times \epochs_k(\einit)$ with
\begin{equation*} f(\langle z, \dist \rangle) \colonequals \langle \langle z, \dist_{f} \rangle, \epoch \rangle\end{equation*} 
where $\dist_{f}(s) \colonequals \dist(\langle s, \epoch \rangle)$ and $\epoch$ is the epoch of some ${\langle s, \epoch \rangle \in \dist}$.

For $f$ to be well-defined, the epoch $\epoch$ must be unique for a fixed belief $\langle z, \dist \rangle$. As already stated in the main paper, this is indeed the case. We formalise the claim in the following lemma.

\begin{lemma}
\label{lem:uniqueness}
Given a belief $b = \langle z, \dist \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$, for all states $\langle s, \epoch \rangle, \langle s', \epoch' \rangle \in \dist$ it holds that $\epoch = \epoch'$, i.e.,~the epoch for all states in $b$ is unique.
\end{lemma}
\begin{proof}
We show the claim by induction over the structure of \begin{equation*}\beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}} = \lim_{n\to \infty} \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^n.\end{equation*}

For $n=0$, we have $\beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^0 = \{\langle z_\mathit{init}, \dist_\mathit{init} \rangle\}$ with $\dist_\mathit{init} (\langle \sinit, \einit \rangle) = 1$, so the claim holds.

Assume the claim holds for an arbitrary but fixed $n \in \mathbb{N}$. We show that then the claim also holds for $n+1$.
Consider $b = \langle z, \dist \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^{n+1}$. If $b \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^{n}$, the claim holds by assumption.
Otherwise we have that $b = \langle z, \dist \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^{n+1} \setminus \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^{n}$. Thus, $b = \successorbelief{b'}{a}{z}$ for some $b' = \langle z', \dist' \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}^{n}$ and $a \in \actions$.

Towards a contradiction, assume that there are two states $\langle s, \epoch \rangle, \langle s', \epoch' \rangle \in \dist$ such that $\epoch \neq \epoch'$. Let $\langle q,\tilde{\epoch} \rangle, \langle q',\tilde{\epoch} \rangle \in \dist'$ where $\tilde{\epoch}$ denotes the unique epoch of $b'$ and 
\begin{align*}
  \transitions_\mathsf{un}(\langle q,\tilde{\epoch} \rangle,a,\langle s,\epoch \rangle) > 0, & &  \transitions_\mathsf{un}(\langle q',\tilde{\epoch} \rangle,a,\langle s',\epoch' \rangle) > 0, \\
  \obsfunction_\mathsf{un}(\langle q,\tilde{\epoch} \rangle,a,\langle s,\epoch \rangle)(z) > 0, & & \obsfunction_\mathsf{un}(\langle q',\tilde{\epoch} \rangle,a,\langle s',\epoch' \rangle)(z) > 0,
\end{align*}
i.e.,~the states $\langle q,\tilde{\epoch} \rangle$ and $\langle q',\tilde{\epoch} \rangle$ respectively contribute to the probability of $\langle s, \epoch \rangle$ and $\langle s', \epoch' \rangle$ in $\dist$.

By definition of the unfolding, we also get $\obsfunction(q,a,s)(z) > 0$ and $\obsfunction(q',a,s')(z) > 0$.
As $\epoch \neq \epoch'$, we also have that $\tilde{\epoch} \ominus \coststruct(q,a,s) \neq \tilde{\epoch} \ominus \coststruct(q',a,s')$, implying that $\coststruct(q,a,s) \neq \coststruct(q',a,s')$. This, however, contradicts the cost-awareness of $\pomdp$. 

Therefore the claim also holds for $n+1$.

We conclude the uniqueness of the epoch in belief $b$.
\end{proof}

\begin{lemma}
\label{lem:welldef}
The mapping $f: \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \to \beliefs^{\pomdp} \times \epochs_k(\einit)$ as defined above is well-defined.
\end{lemma}
\begin{proof}
By Lemma~\ref{lem:uniqueness}, we have that the epoch $\epoch$ used in the mapping is non-ambiguous.

It remains to show that indeed $f(b) \in \beliefs^{\pomdp} \times \epochs_k(\einit)$ for all $b \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$.

We use induction over the structure of $\beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$. In particular, we show that if for a belief $b \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$,  $f(b) \in \beliefs^{\pomdp} \times \epochs_k(\einit)$ holds, then for all successors $b' \in \transitions_{\mathsf{un}}^B(b,a)$ for some $a \in \actions$, $f(b') \in \transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)$.

As the base case, consider $\langle z_{\mathit{init}}, \dist_\mathit{init}\rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$
with \begin{equation*}\dist_\mathit{init}(\langle \sinit, \einit \rangle) = 1.\end{equation*}

We get $f(\langle z_{\mathit{init}}, \dist_\mathit{init}\rangle) = \langle b_\mathit{init}, \einit \rangle$.

$\langle b_\mathit{init}, \einit \rangle \in {\beliefs^{\pomdp} \times \epochs_k(\einit)}$ by definition of $\beliefs^{\pomdp}$ and $\epochs_k(\einit)$, thus 
\begin{equation*}f(\langle z_{\mathit{init}}, \dist_\mathit{init}\rangle) \in {\beliefs^{\pomdp} \times \epochs_k(\einit)}.\end{equation*}

Now let $b = \langle z, \dist \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ such that the claim holds, i.e.,~\begin{equation*}f(b) \in \beliefs^{\pomdp} \times \epochs_k(\einit).\end{equation*} Let $\epoch$ denote the unique epoch of all $\langle s, \epoch \rangle \in \dist$ and 
let $f(b) = \langle \langle z, \dist_f \rangle, \epoch \rangle$ with $\dist_f(s) = \dist(\langle s, \epoch \rangle)$. Let $\beta = \langle z, \dist_f \rangle$.

We first show that $P(z'| b,a) = P(z'| \beta,a)$.
\begin{align*}
    P(z'| b,a) & = \sum_{\langle s, \epoch \rangle \in \dist} \dist(\langle s, \epoch \rangle) \cdot \smashoperator[lr]{\sum_{\langle s', \epoch' \rangle \in \states \times \epochs^k(\einit)}} \transitions_\mathsf{un}(\langle s, \epoch \rangle, a)(\langle s', \epoch' \rangle) \cdot \obsfunction_\mathsf{un}(\langle s, \epoch \rangle,a,\langle s', \epoch' \rangle)(z') \\
    & = \sum_{\langle s, \epoch \rangle \in \dist} \dist(\langle s, \epoch \rangle) \cdot \smashoperator[lr]{\sum_{\langle s', \epoch' \rangle \in \states \times \epochs^k(\einit)}} \quad \transitions_\mathsf{un}(\langle s, \epoch \rangle, a)(\langle s', \epoch' \rangle) \cdot \obsfunction(s,a,s')(z') \\
    & = \sum_{\langle s, \epoch \rangle \in \dist} \dist(\langle s, \epoch \rangle) \cdot \smashoperator[lr]{\sum_{s' \in \states}} \quad \transitions(s, a)(s') \cdot \obsfunction(s,a,s')(z')\\
    & = \sum_{s \in \dist_f} \dist_f(s) \cdot \smashoperator[lr]{\sum_{s' \in \states}} \quad \transitions(s, a)(s') \cdot \obsfunction(s,a,s')(z') \\
    & = P(z'| \beta,a)
\end{align*}

Let $b' = \langle z', \dist' \rangle \in  \transitions_{\mathsf{un}}^B(b,a)$ for some $a \in \actions$ with $z' \in \observations$.

Then by definition of the belief MDP, $b' = \successorbelief{b}{a}{z'}$ and the unique epoch $\epoch'$ of all states $\langle s', \epoch' \rangle \in \dist'$ is ${\epoch' = \epoch \ominus \coststruct(s,a,s')}$ for some $s \in \dist$ and $s' \in \dist'$.
Cost vector $\coststruct(s,a,s')$ is guaranteed to be unique as $\pomdp$ is cost-aware.

As $b' = \successorbelief{b}{a}{z'}$, we have for $\langle s', \epoch' \rangle \in \dist'$ 
\begin{align*}
    \dist'(\langle s', \epoch' \rangle) = &
    \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction_\mathsf{un}(\langle s,\epoch \rangle,a,\langle s',\epoch' \rangle)(z')}{P(z'| b,a)} \\
    = & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist_f(s) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction(s,a,s')(z')}{P(z'| b,a)} \\
    \overset{\epoch' = \epoch \ominus \coststruct(s,a,s')}{=} & \frac{\sum_{s\in \dist} \dist_f(s) \cdot \transitions(s,a)(s',\epoch') \cdot \obsfunction(s,a,s')(z')}{P(z'| b,a)} \\
    = & \frac{\sum_{s\in \dist} \dist_f(s) \cdot \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z')}{P(z'| \beta,a)} \\
\end{align*}
Let $\beta' = \successorbelief{\beta}{a}{z'} = \langle z', \dist'_f \rangle$ be the successor belief of $\beta$ in $\beliefmdp{\pomdp}$. 

We have that $\coststruct^B (\beta,a,\beta') = \coststruct(s,a,s')$.
So in $\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$, we have that
\begin{equation*}\transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(\langle \beta, \epoch \rangle,a)(\langle \beta', \tilde{\epoch} \rangle) = \transitions^{\beliefmdp{\pomdp}}(\beta,a)(\beta') > 0\end{equation*} if and only if $\tilde{\epoch} = \epoch \ominus \coststruct^{B}(\beta,a,\beta')$.

$\coststruct^{B}(\beta,a,\beta') = \coststruct(s,a,s')$ yields $\epoch \ominus \coststruct^{B}(\beta,a,\beta') = \epoch \ominus \coststruct(s,a,s') = \epoch'$ holds and we get that ${\langle \beta', \epoch' \rangle \in \transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(\langle \beta, \epoch \rangle,a)}$.

Additionally, we have that
\begin{equation*}\dist'_f(s') = \frac{\sum_{s\in \dist} \dist_f(s) \cdot \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z')}{P(z'| \beta,a)} = \dist'(\langle s', \epoch' \rangle)\end{equation*}
for all $s' \in \dist'_f$ and we conclude that $f(b') = \langle \beta', \epoch' \rangle$.

Thus, we have shown that for an arbitrary successor $b'$ of $b$, if $f(b) \in \beliefs^{\pomdp} \times \epochs_k(\einit)$, then also $f(b') \in \beliefs^{\pomdp} \times \epochs_k(\einit)$.

We conclude that $f(b) \in \beliefs^{\pomdp} \times \epochs_k(\einit)$ for all $b \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$.

Therefore, $f$ is well-defined.
\end{proof}

\begin{lemma}
The mapping $f: \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \to \beliefs^{\pomdp} \times \epochs_k(\einit)$ as defined above is a bijection.
\end{lemma}
\begin{proof}
\underline{$f$ is injective}:
    We show that
    $\langle z, \dist \rangle \neq \langle z', \dist' \rangle$ implies 
    \begin{equation*}f(\langle z, \dist \rangle) =  \langle \langle z, \dist_{f} \rangle, \epoch \rangle \neq f(\langle z', \dist' \rangle) = \langle \langle z', \dist'_{f} \rangle, \epoch' \rangle.\end{equation*}
    
   If $z \neq z'$, $\langle \langle z, \dist_{f} \rangle, \epoch \rangle \neq \langle \langle z', \dist'_{f} \rangle, \epoch' \rangle$ directly follows.
   
   If $z = z'$, then $\dist \neq \dist'$ must hold for $\langle z, \dist \rangle \neq \langle z', \dist' \rangle$ to hold.
    Recall that $\epoch$ and $\epoch'$ are the unique epochs of states in $\dist'$ and $\dist'$ respectively.
    We distinguish two cases:
    \begin{itemize}
        \item $\epoch \neq \epoch'$. Then $\langle \langle z, \dist_{f} \rangle, \epoch \rangle \neq \langle \langle z', \dist'_{f} \rangle, \epoch' \rangle$ follows directly.
        \item $\epoch = \epoch'$. As $\dist \neq \dist' $, if $z = z'$ and $\epoch = \epoch'$, there exists an $\langle s, \epoch\rangle$ such that $\dist(\langle s, \epoch\rangle) \neq \dist'(\langle s, \epoch\rangle)$. Therefore, we have $\dist_{f}(s) \neq \dist'_{f}(s)$, establishing \begin{equation*}\langle \langle z, \dist_{f} \rangle, \epoch \rangle \neq \langle \langle z', \dist'_{f} \rangle, \epoch' \rangle.\end{equation*}
    \end{itemize}

 Thus $f$ is injective.\\
    
\underline{$f$ is surjective}: we show that for all $\langle \langle z, \dist_{f} \rangle, \epoch \rangle \in \beliefs^{\pomdp} \times \epochs_k(\einit)$, there exists a $\langle z, \dist \rangle \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ such that $f(\langle z, \dist \rangle) = \langle \langle z, \dist_{f} \rangle, \epoch \rangle$.

Given $\langle \langle z, \dist_{f} \rangle, \epoch \rangle \in \beliefs^{\pomdp} \times \epochs_k(\einit)$, let $b = \langle z, \dist \rangle$ with 
\begin{equation*}
\dist(\langle s,\epoch' \rangle) \colonequals \begin{cases} \dist_{f}(s) & \text{ if } \epoch' = \epoch, \\
    0 & \text{ otherwise.}
\end{cases}\end{equation*}
$b \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ can be established analogous to the proof for Lemma~\ref{lem:welldef}.

We have $f(b) = \langle \langle z, \dist_{f} \rangle, \epoch \rangle$.

As $f$ is surjective and injective, $f$ is a bijection.
\end{proof}

We can now proof the theorem.

\begin{proof}[Proof of Theorem~\ref{thm:belunfisomorph}]
    The bijective function $f$ is the isomorphism establishing \begin{equation*}\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}.\end{equation*}

    It remains to show that $f$ indeed preserves transition probabilities, i.e.,~
    \begin{equation*}\transitions_{\mathsf{un}}^B(b,a)(b') = \transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)(f(b'))\end{equation*}

    Let $b = \langle z, \dist \rangle, b' = \langle z', \dist' \rangle  \in \beliefs_{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ and let $a \in \actions$.
    Furthermore, let $f(b) = \langle \langle z, \dist_f \rangle, \epoch \rangle = \langle \beta, \epoch \rangle $ and $f(b') = \langle \langle z', \dist'_f \rangle, \epoch' \rangle = \langle \beta', \epoch' \rangle$.

    We distinguish three cases:
    \begin{itemize}
    \item $b' \neq \successorbelief{b}{a}{z'}$ and $P(z'| b,a) = 0$. Then for all $\langle s,\epoch \rangle \in \dist$ and $\langle s',\epoch' \rangle \in P_\mathsf{un}(\langle s,\epoch \rangle,a)$,
    we have \begin{equation*}\obsfunction_\mathsf{un}(\langle s,\epoch \rangle,a,\langle s',\epoch' \rangle) (z') = 0\end{equation*} and thus already \begin{equation*}\obsfunction(s ,a, s') (z') = 0\end{equation*} for all $s \in \dist_f$. 
    Therefore, in belief MDP $\beliefmdp{\pomdp}$, $\beta'=\langle z', \dist'_f \rangle$ is already not a successor of $\beta=\langle z, \dist\rangle$ and thus 
    \begin{equation*}\transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)(f(b')) = \transitions^{\beliefmdp{\pomdp}}(\beta,a)(\beta') = 0 = \transitions_{\mathsf{un}}^B(b,a)(b').\end{equation*}

    \item $b' \neq \successorbelief{b}{a}{z'}$ and $P(z'| b,a) > 0$. Then there exists a $\langle s',\epoch' \rangle \in \dist'$ such that 
    \begin{align*}
    & \dist'(\langle s',\epoch' \rangle) \\
     \neq & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction_\mathsf{un}(\langle s,\epoch \rangle,a,\langle s',\epoch' \rangle)(z')}{P(z' | b,a)} \\
     = & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction(s,a,s')(z')}{P(z' | b,a)}
    \end{align*}
    First consider the case if $\epoch' = \epoch \ominus \coststruct(s,a,s')$. Then
    \begin{align*}
        & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction(s,a,s')(z')}{P(z' | b,a)}\\
        = & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z')}{P(z' | b,a)} \\
        = & \frac{\sum_{s \in \dist_{f}} \dist_{f}(s) \cdot \transitions(s,a)(s') \cdot \obsfunction(s,a,s')(z')}{P(z' | \beta,a)}\\
        \neq & \dist'(\langle s',\epoch' \rangle) = \dist'_{f}(s')
    \end{align*}
    
    If $\epoch' \neq \epoch \ominus \coststruct(s,a,s')$, then 
    \begin{align*}
        & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot \transitions_\mathsf{un}(\langle s,\epoch \rangle,a)(\langle s',\epoch' \rangle) \cdot \obsfunction(s,a,s')(z')}{P(z' | b,a)}\\
        = & \frac{\sum_{\langle s,\epoch \rangle \in \dist} \dist(\langle s,\epoch \rangle) \cdot 0 \cdot \obsfunction(s,a,s')(z')}{P(z' | b,a)} \\
        = & 0 \neq \dist'(\langle s',\epoch' \rangle) = \dist'_{f}(s')
    \end{align*}

    Thus in both cases, $\beta'$ is already not a successor belief of $\beta$, 
    meaning that \begin{equation*}\transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)(f(b')) = 0 = \transitions_{\mathsf{un}}^B(b,a)(b').\end{equation*}
    \item $b' = \successorbelief{b}{a}{z'}$:
    The proof of Lemma~\ref{lem:welldef} already establishes that in this case $\transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)(f(b')) = \transitions_{\mathsf{un}}^B(b,a)(b')$ holds.
    \end{itemize}

    In all cases, $f$ indeed preserves transition probabilities, i.e.,~
    \begin{equation*}\transitions_{\mathsf{un}}^B(b,a)(b') = \transitions^{\beliefmdp{\pomdp}}_\mathsf{un}(f(b),a)(f(b'))\end{equation*}
    holds.
    We conclude that $f$ establishes the isomorphism between $\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}}$ and $\unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}$, i.e.,~we get that
    \begin{equation*}\beliefmdp{\unfoldingmdp{\costtriplenobracket}{\pomdp}} \cong \unfoldingmdp{\beliefmdp{\costtriplenobracket}}{\beliefmdp{\pomdp}}.\end{equation*}
\end{proof}

%% PROOF THEOREM LEVEL
\subsection{Proof of Theorem \ref{thm:lvl}}
Fix a POMDP $\pomdp=\pomdptuple$ with $\mdp=\mdptuple$, cost structure $\coststruct$, and a level function $\levelfunc_{\leveldegree}$.
Furthermore, let $\levelfunc_{\leveldegree}(\pomdp)$ be the level unfolding POMDP according to Def.~\ref{def:lvlunf} and define the \emph{level jump (cost) structure} $\levelcosts: {\states_{\levelfunc_{\leveldegree}} \times \actions \times \states_{\levelfunc_{\leveldegree}}} \to \mathbb{N}^k$ as \begin{equation*}\tupleaccess{\levelcosts(\tuple{s,\ell},a,\tuple{s',\ell'})}{i} \colonequals \left\lceil \frac{\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil.\end{equation*}

We will first show three lemmata that will help with the proof of Theorem~\ref{thm:lvl}.

\begin{lemma}
\label{lem:step}
Let $\finpath = \tuple{s_0, \ell_0}a_1 \tuple{s_1, \ell_1}a_2 \ldots a_n\tuple{s_n, \ell_n} \in \finpaths{\levelfunc_{\leveldegree}(\pomdp)}$ and $\tilde{\pi} = \finpath a_{n+1}\tuple{s_{n+1}, \ell_{n+1}}$ with $\transitions_{\levelfunc_\leveldegree}(\tuple{s_n, \ell_n},a_{n+1},\tuple{s_{n+1}, \ell_{n+1}})>0$ an extension of $\finpath$.
Then it holds that
\begin{equation*}
\tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\tilde{\pi})}{i} - \tupleaccess{\ell_{n+1}}{i} =
    \tupleaccess{\coststruct(s_{n}, a_{n+1}, s_{n+1})}{i} +\tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_n}{i}
\end{equation*}
\end{lemma}

\begin{proof}
In the following, we use the facts that
$ (x \mod y) = x - y\cdot\left\lfloor \frac{x}{y} \right\rfloor$
and
$\left\lfloor -x \right\rfloor = - \lceil x \rceil$.
We get:
\begin{align*}
     & \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\tilde{\pi})}{i} - \tupleaccess{\ell_{n+1}}{i}\\
    = & \tupleaccess{\leveldegree}{i} \cdot \sum_{h=1}^{n+1} \left\lceil \frac{\tupleaccess{\coststruct(s_{h-1},a_{h},s_{h})}{i} -\tupleaccess{\ell_{h-1}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil - \tupleaccess{\ell_{n+1}}{i} \\
    %
    = & \left(\tupleaccess{\leveldegree}{i} \cdot \sum_{h=1}^{n+1} \left\lceil \frac{\tupleaccess{\coststruct(s_{h-1},a_{h},s_{h})}{i} -\tupleaccess{\ell_{h-1}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil\right) - \tupleaccess{\ell_{n+1}}{i} \\
    %
    = &\left(\tupleaccess{\leveldegree}{i} \cdot \sum_{h=1}^{n+1} \left\lceil \frac{\tupleaccess{\coststruct(s_{h-1},a_{h},s_{h})}{i} -\tupleaccess{\ell_{h-1}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil\right) - (\tupleaccess{\ell_{n}}{i} - \tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} \mod \tupleaccess{\leveldegree}{i})\\
    %
    = &\tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{\tupleaccess{\coststruct(s_{n},a_{n+1},s_{n+1})}{i} -\tupleaccess{\ell_{n}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil + \left(\tupleaccess{\leveldegree}{i} \cdot \sum_{h=1}^{n} \left\lceil \frac{\tupleaccess{\coststruct(s_{h-1},a_{h},s_{h})}{i} -\tupleaccess{\ell_{h-1}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil\right) \\
    & \qquad - (\tupleaccess{\ell_{n}}{i} - \tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} \mod \tupleaccess{\leveldegree}{i})\\
    %
    = &\tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{\tupleaccess{\coststruct(s_{n},a_{n+1},s_{n+1})}{i} -\tupleaccess{\ell_{n}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil + \tupleaccess{\leveldegree}{i} \cdot \cumurew_{\levelcosts}(\finpath) -\\
    & \qquad \left(\tupleaccess{\ell_{n}}{i} -\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} - \tupleaccess{\leveldegree}{i} \cdot \left\lfloor \frac{\tupleaccess{\ell_{n}}{i} -\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i}}{\tupleaccess{\leveldegree}{i}} \right\rfloor \right)\\
    %
    = &\tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{\tupleaccess{\coststruct(s_{n},a_{n+1},s_{n+1})}{i} -\tupleaccess{\ell_{n}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil + \tupleaccess{\leveldegree}{i} \cdot \cumurew_{\levelcosts}(\finpath) \\
    & \qquad - \left(\tupleaccess{\ell_{n}}{i} -\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} + \tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{- \tupleaccess{\ell_{n}}{i} +\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil \right)\\
    %
    = &\tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{\tupleaccess{\coststruct(s_{n},a_{n+1},s_{n+1})}{i} -\tupleaccess{\ell_{n}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil + \tupleaccess{\leveldegree}{i} \cdot \cumurew_{\levelcosts}(\finpath) \\
    & \qquad - \tupleaccess{\ell_{n}}{i} +\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} - \tupleaccess{\leveldegree}{i} \cdot \left\lceil \frac{\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i}-\tupleaccess{\ell_{n}}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil\\
    %
    = &\tupleaccess{\coststruct(s_n,a_{n+1},s_{n+1})}{i} +\tupleaccess{\leveldegree}{i} \cdot \cumurew_{\levelcosts}(\finpath) - \tupleaccess{\ell_{n}}{i}\\
\end{align*}
\end{proof}

The following lemma formalises the relationship between the cost structure on the level unfolding and the level jump structure.
\begin{lemma}
\label{lem:costfromlvl}
    Given $\finpath = \tuple{s_0, \ell_0}a_1 \tuple{s_1, \ell_1}a_2 \ldots a_n\tuple{s_n, \ell_n} \in \finpaths{\levelfunc_{\leveldegree}(\pomdp)}$,
    we have that 
    \begin{equation*}\tupleaccess{\cumurew_{\coststruct_{\levelfunc_{\leveldegree}}}(\finpath)}{i} = \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_n}{i}.\end{equation*}
\end{lemma}
\begin{proof}
    We proof the claim by induction over the length of $\finpath$.  We denote by $\tilde{\pi}$ the prefix of $\finpath$ without the last transition, i.e.,~$\tilde{\pi} = \tuple{s_0, \ell_0}a_1\ldots a_{\lengthofpath{\finpath}-1}\tuple{s_{\lengthofpath{\finpath}-1}, \ell_{\lengthofpath{\finpath}-1}}$
    
    Let $\lengthofpath{\finpath}=0$. Then
    \begin{align*}
        \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_n}{i} 
        & = \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_0}{i} \\
        & = \tupleaccess{\leveldegree}{i} \cdot 0 - \tupleaccess{\tuple{0, \ldots, 0}}{i} \\
        & = 0 \\
        & = \tupleaccess{\cumurew_{\coststruct_{\levelfunc_{\leveldegree}}}(\finpath)}{i}
    \end{align*}

    Assume the claim holds for an arbitrary, but fixed $n \in \mathbb{N}$.

    Let $\lengthofpath{\finpath}=n+1$
    \begin{align*}
        \tupleaccess{\cumurew_{\coststruct_{\levelfunc_{\leveldegree}}}(\finpath)}{i} 
        &= \sum_{h=1}^{n+1} \tupleaccess{\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s_{h-1}, \ell_{h-1}}a_h\tuple{s_{h}, \ell_{h}})}{i}\\
        & = \tupleaccess{\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s_{n}, \ell_{n}}a_{n+1}\tuple{s_{n+1}, \ell_{n+1}})}{i} + \sum_{h=1}^{n} \tupleaccess{\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s_{h-1}, \ell_{h-1}}a_h\tuple{s_{h}, \ell_{h}})}{i}\\
        & = \tupleaccess{\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s_{n}, \ell_{n}}a_{n+1}\tuple{s_{n+1}, \ell_{n+1}})}{i} + \tupleaccess{\cumurew_{\coststruct_{\levelfunc_{\leveldegree}}}(\tilde{\pi})}{i}\\
        & \overset{(IH)}{=} \tupleaccess{\coststruct_{\levelfunc_{\leveldegree}}(\tuple{s_{n}, \ell_{n}}a_{n+1}\tuple{s_{n+1}, \ell_{n+1}})}{i} + \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\tilde{\pi})}{i} - \tupleaccess{\ell_n}{i}\\
        & = \tupleaccess{\coststruct(s_{n},a_{n+1},s_{n+1})}{i} + \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\tilde{\pi})}{i} - \tupleaccess{\ell_n}{i}\\
        & \overset{\text{(Lem.~\ref{lem:step})}}{=} \tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_{n+1}}{i}
    \end{align*}

Thus the claim is shown for all lengths of $\finpath$.
\end{proof}

The following lemma shows that in the case that $\tupleaccess{\leveldegree}{i} \mid \tupleaccess{\costthresh}{i}$ for all $1 \leq i \leq k$, we can define a bound on the jump structure such that for an arbitrary finite path, the new bound is active if and only if the original cost bound is active.

\begin{lemma}
\label{lem:lvlbound}
Let $\levelfunc_{\leveldegree}(\pomdp)$, cost bound $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ and $\leveltarget$ such that $\tupleaccess{\leveldegree}{i} \mid \tupleaccess{\costthresh}{i}$ for $1 \leq i \leq k$. Furthermore, define
$\costthresh_{\leveldegree}$ with $\tupleaccess{\costthresh_{\leveldegree}}{i} \colonequals \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}$ for all $1 \leq i \leq k$.
Then for $\finpath \in \finpaths{\levelfunc_{\leveldegree}(\pomdp)}$, $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active for $\finpath$ if and only if $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active for $\finpath$.
\end{lemma}
\begin{proof}
    Fix a path $\finpath \in \finpaths{\levelfunc_\leveldegree(\pomdp)}$.
    To show that $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active for $\finpath$ if and only if $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active for $\finpath$, we need to show for all dimensions $i$ that $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active in dimension $i$ for $\finpath$ if and only if $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active in $i$ for $\finpath$. Thus, we need to show that $\tupleaccess{\cumurew_{\levelfunc_{\leveldegree}}(\finpath)}{i} \tupleaccess{\costrel}{i} \tupleaccess{\costthresh}{i}$ if and only if $\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} \tupleaccess{\costrel}{i} \tupleaccess{\costthresh_\leveldegree}{i}$ for all $1 \leq i \leq k$.
    
    Fix an arbitrary dimension $1 \leq i \leq k$. We get
    \begin{align*}
        && \tupleaccess{\cumurew_{\levelfunc_{\leveldegree}}(\finpath)}{i}  & \tupleaccess{\costrel}{i} \tupleaccess{\costthresh}{i} \\
        %
        \iff && \frac{\tupleaccess{\cumurew_{\levelfunc_{\leveldegree}}(\finpath)}{i}}{\tupleaccess{\leveldegree}{i}}  & \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}\\
        %
        \overset{\text{Lem.~\ref{lem:costfromlvl}}}{\iff} && \frac{\tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} & \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}\\
        %
        \iff && \frac{\tupleaccess{\leveldegree}{i} \cdot \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i}}{\tupleaccess{\leveldegree}{i}} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} & \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}\\
        %
        \iff && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} & \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}\\
        %
        \overset{(*)}{\iff} && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} & \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}\\
    \end{align*}

It remains to show that equivalence $(*)$ holds.
Observe that by definition of $\states_{\levelfunc_\leveldegree}$ it always holds that $0 \leq \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} < 1$. Furthermore, as $\tupleaccess{\leveldegree}{i} \mid \tupleaccess{\costthresh}{i}$, we get that $\frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \in \mathbb{N}$. Also observe that $\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} \in \mathbb{N}$.

We distinguish the two cases for $\tupleaccess{\costrel}{i}$.

\underline{$\tupleaccess{\costrel}{i} = (>)$}:
As $0 \leq \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}}$, it follows directly that
\begin{equation*}\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} 
> \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} 
    \quad \Rightarrow \quad \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} > \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}.
\end{equation*}
For 
\begin{equation*}\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} 
> \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} 
    \quad \Leftarrow \quad \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} > \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}},
\end{equation*}
observe that 
\begin{align*}
    && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} & > \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \\
    \Rightarrow && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} & \geq \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} + 1 \\
    \Rightarrow && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - 1 & \geq \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \\
    \Rightarrow && \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \underbrace{\frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}}}_{<1} & > \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}.
\end{align*}

Therefore, we conclude that
\begin{equation*}\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} 
> \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} 
    \quad \iff \quad \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} > \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \\
\end{equation*}

\underline{$\tupleaccess{\costrel}{i} = (\leq)$}:
By contraposition of the previous case, it follows that
\begin{equation*}\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} 
\leq \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} 
    \quad \iff \quad \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} \leq \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \\
\end{equation*}

Thus, we have shown that for all options for $\tupleaccess{\costrel}{i}$, it holds that 
\begin{equation*}\tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} - \frac{\tupleaccess{\ell_n}{i}}{\tupleaccess{\leveldegree}{i}} \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}} \quad \iff \quad \tupleaccess{\cumurew_{\levelcosts}(\finpath)}{i} \tupleaccess{\costrel}{i} \frac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}.\end{equation*}

We conclude that $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active in dimension $i$ for $\finpath$ if and only if $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active in $i$ for $\finpath$ for all $1 \leq i \leq k$ and thus $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active for $\finpath$ if and only if $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active for $\finpath$.
\end{proof}

Finally, we prove Theorem~\ref{thm:lvl}.
\begin{theorem}
    Let $\levelfunc_{\leveldegree}(\pomdp)$
    such that for all $1 \leq i \leq k: \tupleaccess{\leveldegree}{i} \divides \tupleaccess{\costthresh}{i}$ and let
    $(\levelcosts \costrel \costthresh_{\leveldegree})$ be a cost bound for $\levelfunc_{\leveldegree}(\pomdp)$ with
    for all $1 \leq i \leq k{:}$ $\tupleaccess{\levelcosts(\tuple{s,\ell},a,\tuple{s',\ell'})}{i} \colonequals \left\lceil \myfrac{\big(\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}\big)}{\tupleaccess{\leveldegree}{i}} \right\rceil$ and $\tupleaccess{\costthresh_{\leveldegree}}{i} \colonequals \myfrac{\tupleaccess{\costthresh}{i}}{\tupleaccess{\leveldegree}{i}}$.
    Then, $\levelfunc_{\leveldegree}(\pomdp)$ is cost-aware w.r.t.~$\levelcosts$ and
    \begin{equation*}\mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget\Big) = 
    \mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget \Big).\end{equation*}
\end{theorem}
\begin{proof}
    For the cost-awareness, we define for each observation $\tuple{z,\mathsf{j}}$ the vector $\mathsf{c}_{\tuple{z,\mathsf{j}}} = \mathsf{j}$.
    Fix an arbitrary transition $\transition{\tuple{s,\ell}}{a}{\tuple{s',\ell'}}$.
    We have by definition that $\tuple{z,\mathsf{j}} \in \supp{\obsfunction_{\levelfunc_{\leveldegree}}(\tuple{s,\ell},{a},\tuple{s',\ell'})}$ if and only if $\tupleaccess{\mathsf{j}}{i} = \left\lceil \frac{\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil$ (and $\obsfunction_{\levelfunc_{\leveldegree}}(s,{a},s'))(z) > 0$).
    Additionally, we have that $\tupleaccess{\levelcosts(\tuple{s,\ell},a,\tuple{s',\ell'})}{i} = \left\lceil \frac{\tupleaccess{\coststruct(s,a,s')}{i}-\tupleaccess{\ell}{i}}{\tupleaccess{\leveldegree}{i}} \right\rceil = \tupleaccess{\mathsf{j}}{i}$ for all $1 \leq i \leq k$ and thus
    $\levelcosts(\tuple{s,\ell},a,\tuple{s',\ell'}) = \mathsf{j} = \mathsf{c}_{\tuple{z,\mathsf{j}}}$.
    Therefore, $\levelfunc_{\leveldegree}(\pomdp)$ is cost-aware w.r.t.~$\levelcosts$.

    We now show that $\mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget\Big) = 
    \mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\Big(\costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget \Big)$ by showing that the two sets of paths are equal, i.e.,~that $\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget = 
    \costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget$.

    \underline{$\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget \subseteq 
    \costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget$}
    
    Let $\pi \in \costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget$.
    Then $\pi \in \mathit{Cyl}(\finpath)$ for a $\finpath \in \finpaths{\levelfunc_{\leveldegree}(\pomdp)}$ where $\last{\finpath} \in \leveltarget$ and $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active for $\finpath$. Then, by Lemma~\ref{lem:lvlbound}, also $(\levelcosts \costrel \costthresh_{\leveldegree})$ is active for $\finpath$ and thus also $\pi \in \costEventually{\levelcosts \costrel \costthresh_\leveldegree} \ \leveltarget$.

    \underline{$\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget \supseteq 
    \costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget$}
    
    Let $\pi \in \costEventually{\levelcosts \costrel \costthresh_\leveldegree} \ \leveltarget$.
    Then $\pi \in \mathit{Cyl}(\finpath)$ for a $\finpath \in \finpaths{\levelfunc_{\leveldegree}(\pomdp)}$ where $\last{\finpath} \in \leveltarget$ and $(\levelcosts \costrel \costthresh_\leveldegree)$ is active for $\finpath$. Then, by Lemma~\ref{lem:lvlbound}, also $(\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh)$ is active for $\finpath$ and thus also $\pi \in \costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget$.

We conclude that $\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget = 
    \costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget$ and therefore $\mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\left(\costEventually{\coststruct_{\levelfunc_{\leveldegree}} \costrel \costthresh} \ \leveltarget\right) =
    \mathsf{Pr}_{\max}^{\levelfunc_{\leveldegree}(\pomdp)}\left(\costEventually{\levelcosts \costrel \costthresh_{\leveldegree}} \ \leveltarget \right)
    $ holds.
\end{proof}

\section{Benchmark Problems}\label{app:benchmarkproblems}
We give a short overview over the models we use in our experimental evaluation.
Table~\ref{tab:instances} contains more information about the specific instances we consider, in particular the bounds on the different cost structures and the resulting number of epochs $|\epochs|$.
The bounds are chosen such that the resulting instances are challenging for the implementation while still resulting in non-trivial values for most configuration.

The files containing the models encoded in the \textsc{Prism} language are part of the code \& data appendix, located in the folder \texttt{models}. For an explanation of the format, we refer to the \textsc{Prism} manual.\footnote{https://www.prismmodelchecker.org/manual/ThePRISMLanguage/Introduction}

\begin{table}[t]
\caption{Overview of Benchmarking Instances}
\label{tab:instances}
\centering
\begin{tabular}{@{}rrrr@{}}
\toprule
Model             & $|S|$          & Bounds & $|\epochs|$ \\
\midrule
$\model{clean6}$ & $37$           & $\tupleaccess{\coststruct}{1} \leq 60, \tupleaccess{\coststruct}{2} > 5$ & $413$   \\
\midrule
$\model{clean12}$ & $73$           & $\tupleaccess{\coststruct}{1} \leq 120, \tupleaccess{\coststruct}{2} > 11$ & $1508$   \\
\midrule
$\model{incline}$ & $25$           & $\tupleaccess{\coststruct}{1} \leq 75, \tupleaccess{\coststruct}{2} \leq 21$ & $497$   \\
\midrule
$\model{obstcl}$  & $25$           & $\tupleaccess{\coststruct}{1} \leq 25,\tupleaccess{\coststruct}{2} \leq 7$ & $83$  \\
\midrule
\multirow{2}{*}{$\model{resrc}$} & $721$          & $\tupleaccess{\coststruct}{1}> 4, \tupleaccess{\coststruct}{2} > 4, \tupleaccess{\coststruct}{3} \leq 60$ & $2107$\\
   & $721$          & $\tupleaccess{\coststruct}{1}> 14, \tupleaccess{\coststruct}{2} > 14, \tupleaccess{\coststruct}{3} \leq 180$ & $4 \cdot 10^4$\\
   \midrule
\multirow{2}{*}{$\model{rover}$} & \multirow{2}{*}{$16$}          & $\tupleaccess{\coststruct}{1}> 199, \tupleaccess{\coststruct}{2} \leq 360, \tupleaccess{\coststruct}{3} \leq 200$ & $7 \cdot 10^5$ \\
  &           & $\tupleaccess{\coststruct}{1}> 599, \tupleaccess{\coststruct}{2} \leq 1080, \tupleaccess{\coststruct}{3} \leq 600$ & $2 \cdot 10^7$ \\
  \midrule
\multirow{2}{*}{$\model{serv}$} &  \multirow{2}{*}{$8 \cdot 10^4$}  & $\tupleaccess{\coststruct}{1} \leq 570$ & $40$ \\
& & $\tupleaccess{\coststruct}{1} \leq 1000$ & $68$  \\
\midrule
$\model{walk40}$ & $84$           & $\tupleaccess{\coststruct}{1} \leq 80$ & $82$   \\
\midrule
$\model{walk120}$ & $244$           & $\tupleaccess{\coststruct}{1} \leq 80$ & $82$   \\
\midrule
\multirow{2}{*}{$\model{water}$}   & \multirow{2}{*}{$34$}           & $\tupleaccess{\coststruct}{1} \leq 590, \tupleaccess{\coststruct}{2} > 49$ & $3 \cdot 10^4$\\
& & $\tupleaccess{\coststruct}{1} \leq 1790, \tupleaccess{\coststruct}{2} > 149$ & $3 \cdot 10^5$ \\
 \bottomrule
\end{tabular}
\end{table}

\paragraph{\model{clean}}
This model is a generalised version of the cleaning robot scenario described in Section~\ref{sec:intro}. A robot is placed in position $0$ of a hallway consisting of $N$ tiles, all initially dirty. 
In each step, it can decide between moving to the next tile, increasing its position by $1$, or attempting to clean the tile at the current position. 
A cleaning attempt is successful with probability $0.8$.
The robot can repeatedly attempt to clean the same tile.
Moving always consumes $1$ unit of energy, while a cleaning attempt requires either $2$ or $4$ units, each with probability $0.5$.
If the robot moves to position $N$, i.e.,~out of the hallway, it has reached its target position.
The robot can observe its position, but generally does not observe how many tiles it has already cleaned.

We use two cost dimensions to model the energy consumption of the robot and the amount of tiles it has successfully cleaned. The objective is to reach the target position with a specified bound on the energy while successfully cleaning at least a specified amount of tiles.
We consider two sizes, $N=6$ and $N=12$.

\paragraph{\model{incline}} The agent has to reach a target in a $5 \times 5$ grid of cells. The agent can move in any of the four cardinal directions. Each move is either \emph{uphill}, \emph{downhill} or neither (\emph{flat}). 
With probability $0.5$, an attempted move fails due to slipping. 
If the attempted move is uphill, the agent is staying in place if it slips. Similarly, slipping downhill causes the agent to overshoot and move a cell further in the chosen direction. 
With a flat move, slipping has no effect. 
In case a move would lead out of bounds, the agent moves as far as possible in the direction and then stays in place. 
The agent does not observe its current position, but knows its starting position in the south-west corner of the grid. 
We consider a map where the incline is such that all moves north and east are uphill and all moves south and west are downhill.

We consider a cost model where downhill steps consume 1 unit of energy, flat moves 2 units and uphill moves 3 units. The objective is to reach the target in the north-east corner of the grid.
We are interesting in reaching the goal within the energy budget (modelled in the first dimension) and a maximum number of steps (modelled in the second dimension).

\paragraph{\model{obstcl}} Similar to \model{incline}, the agent is supposed to reach a target in a $5 \times 5$ grid of cells. Only the outermost ring of cells can be traversed freely, the inner ring contains light forest and the center cell is dense forest. The agent cannot observe its position and it slips, i.e.,~stays in place when executing a move from light forest or dense forest with probability $0.25$ or $0.5$ respectively. Moves from free tiles, light forest tiles and dense forest tiles consume $1$, $2$ or $3$ units of energy each. Like before, the objective is to reach the goal within the energy budget and a maximum number of steps (modelled by a bound on $\tupleaccess{\coststruct}{1}$ and $\tupleaccess{\coststruct}{2}$ respectively).

\begin{figure}
    \centering
    \includegraphics[width=0.25\linewidth]{resource_gathering.pdf}
    \caption{Resource Gathering Scenario}
    \label{fig:resource}
\end{figure}
\paragraph{\model{resrc}} This benchmark is a variant of the resource gathering model from \citet{barrett2008}. The scenario is similar to a problem arising in many strategy games. We provide a depiction of the setting in Figure~\ref{fig:resource}. An agent, starting in a home base marked by $H$, is tasked with collecting two kinds of resources--\emph{gold} (depicted by the coin) and \emph{gems} (depicted by the gem)--in a grid environment.
In any step,The agent can move in any of the four cardinal directions. When it reaches either the gold or the gem location, it picks up one unit of the respective resource. To collect it, the agent needs to return the resource to the base. The agent can hold at most one unit of each resource.

An \emph{enemy} patrols the diagonal of the grid. It starts in the south-west corner and changes its position to one of the adjacent locations on the diagonal with each step of the agent. If the enemy is in one of the corners, it will certainly move to the adjacent location. When the agent and the enemy enter the same location at the same time, the agent loses all currently held resources and is teleported home without collecting anything.
The objective is to maximise the probability to collect a minimum amount of each resource (lower bounds on $\tupleaccess{\coststruct}{1}$ for gold and $\tupleaccess{\coststruct}{2}$ for gems) within a given step bound (upper bound on $\tupleaccess{\coststruct}{3}$).

\begin{table}[t]
    \caption{Values of the Tasks in the \model{rover} Problem}
    \label{tab:rover}
    \centering
    \begin{tabular}{l | l l l l}
        \hline
         Task & Time & Energy & Sci.~Value & Success Prob.  \\
         \hline\hline
         1 & $10$ & $\{3,5\}$ & $10$ & $\nicefrac{1}{2}$ \\
         2 & $5$ & $\{5,7\}$ & $10$ & $\nicefrac{3}{5}$ \\
         3 & $5$ & $3$ & $2$ & $\nicefrac{4}{5}$ \\
         4 & $10$ & $7$ & $30$ & $\nicefrac{1}{10}$ \\
         \hline
    \end{tabular}
\end{table}

\paragraph{\model{rover}} A partially observable version of the Mars rover task scheduling problem described in \citet{hartmanns2020} based on \citet{bresina2005}. The problem models the scheduling of a variety of experiments on Mars. Experiments have differing time and energy consumption and success probabilities. Upon success of an experiment, a certain scientific value is collected. Energy consumption for some tasks is subject to uncertainty; the consumed energy has a high or low value with probability $0.5$ each. The specific parameters for each task are given in Table~\ref{tab:rover}.

The agent can schedule several experiments each day. It does not directly observe whether a task has been successful or not. The objective is to maximise the probability of achieving at least a certain cumulative scientific value without exceeding both time and energy limits. Scientific values is modelled in $\tupleaccess{\coststruct}{1}$, while time and energy are modelled in $\tupleaccess{\coststruct}{2}$ and $\tupleaccess{\coststruct}{3}$ respectively.

\paragraph{\model{serv}} The \emph{service} domain is a partially observable variant of the care home scenario described in \citet{lacerda2017}. A robot is navigating a care home environment consisting of a central hallway with rooms adjacent to each side of the hallway. Overall, the map consists of 21 locations. The robot does not observe its location and each location change to an adjacent location can fail with probability $0.01$.

The robot's routine consists of the main task of checking whether three occupants of specified rooms want water and deliver it if it is desired. The robot can collect bottles of water at a central kitchen area. At any point, it can only carry at most two bottles. For delivery, the robot first has to check whether the occupant wants the water. With a probability of $0.2$, the occupant actually wants the water and the robot can deliver it.
In addition, the robot has the secondary task of interacting with four designated occupants.
Each action of the robot requires a certain amount of time. The central area, where the robot starts and which it has to cross to get to the kitchen, is crowded with a probability of $0.2$ every time the robot enters it, causing an additional time cost.

We are interested in the probability that the robot delivers water to where it is required within the time limit (bound on $\tupleaccess{\coststruct}{1}$).

\begin{figure}[t]
\centering\includegraphics{water.pdf}
\caption{Robot Navigation Task with Water Obstacles}
\label{fig:water}
\end{figure}

\paragraph{\model{water}} The \emph{water} problem considers an amphibious robot starting in the south-west corner of a regular grid consisting of nine cells.
In every step, the robot moves in any of the four cardinal directions, where a move out of bounds makes the robot stay in place.
The task is to visit the flag in the north-west corner and then return to the initial position multiple times.
A river runs through the center row of the map, modelled as one tile of \emph{deep water}, one tile of \emph{shallow water}, and one \emph{bridge} tile.
All other tiles are \emph{land} tiles.
With a probability of $0.5$, there are \emph{high water} conditions, making crossing the river without the bridge more difficult.
The robot is not able to observe the water conditions.
The energy consumption for each move depends on the current tile and the water conditions as outlined on the right of Figure~\ref{fig:water}. A move out-of-bounds still consumes the respective energy.

The agent collects one unit of cost/reward (modelled in dimension $\tupleaccess{\coststruct}{2}$) every time it reaches the target position.
The objective is to compute the probability that a minimum number of trips (bound on $\tupleaccess{\coststruct}{2}$) is completed within the given energy budget (bound in dimension $1$).

\paragraph{\model{walk}} An agent is starting at position $0$ of a hallway consisting of $N{+}1$ positions.
In each step, it can decide to either attempt to \emph{move} to the adjacent position $i{+}1$, \emph{observe} its current position $i$ for a cost of $1$ unit, or completely \emph{stop} the process.
A move action fails with probability $0.5$, resulting in the agent staying at position $i$.
In position $N$, a move will always make the agent stay in place.
The agent is only able to observe its current position if it executes an \emph{observe} action.
However, \emph{observe} actions are unreliable and fail with probability $0.9$.

The goal of the agent is to stop the process exactly in position $N{-}1$, leading to a target state.
If it stops the process in any other position, it is trapped and cannot fulfil its goal any more.

The cost-bounded objective is to reach the target state, i.e., stopping in position $N{-}1$, while using at most a specified number of \emph{observe} actions. We consider to sizes, $N{=}40$ and $N{=}120$.

\section{Considered Hyper-Parameters and Detailed Setup Information}
\label{sec:hyperparam}\label{app:setup}
For the experimental evaluation we consider 25 different hyper-parameter assignments each for \config{cut} and \config{discr}.
As described in the main paper, both methods are applied to yield finite abstractions of the belief MDP we analyse for the respective configuration \config{unfold}, \config{ca-unfold} or \config{ca-bel-seq}.

We have chosen the hyper-parameter values such that we expect them to result in MDPs that \storm{} can handle in a reasonable amount of time, with larger values included optimistically. 

The considered hyper-parameters are:
\begin{itemize}
    \item \config{cut}
    \begin{itemize}
    \item \textbf{Considered hyper-parameter:} \texttt{size-threshold}
    \item \textbf{Description:} The number of states up to which the belief MDP is explored. After the limit is reached, \emph{cut-offs} are applied to approximate the dynamics of the belief MDP beyond that point. Larger values result in more accurate approximations, but exploration and analysis of the resulting MDP typically take longer. For more information, we refer to \citet{bork2022}.

    \item \textbf{Considered values:} $2^{8}$, $2^{9}$, $2^{10}$, $2^{11}$, $2^{12}$, $2^{13}$, $2^{14}$, $2^{15}$, $2^{16}$, $2^{17}$, $2^{18}$, $2^{19}$, $2^{20}$, $2^{21}$, $2^{22}$, $2^{23}$, $2^{24}$, $2^{25}$, $2^{26}$, $2^{27}$, $2^{28}$, $2^{29}$, $2^{30}$, $2^{31}$, $2^{32}$
    \end{itemize}

    \item \config{discr}
    \begin{itemize}
    \item \textbf{Considered hyper-parameter:} \texttt{resolution}
    \item \textbf{Description:} The \emph{resolution} of the belief grid used for discretisation. The resolution describes how coarse the belief space is approximated. With a resolution of $\mathtt{r}$, the space is discretised to beliefs only containing probabilities that are multiples of $\nicefrac{1}{\mathtt{r}}$. Typically, the higher the resolution, the better the approximation gets, at the cost of increased runtime. For more information, we refer to \citet{bork2020}.

    \item \textbf{Considered values:} $1$, $2$, $3$, $4$, $5$, $6$, $7$, $8$, $9$, $10$, $12$, $14$, $15$, $16$, $18$, $20$, $21$, $24$, $25$, $28$, $30$, $35$, $36$, $42$, $49$
    \end{itemize}
\end{itemize}

We elaborate on the setup used for our experimental evaluation.

\subsection{Implementation}
    \begin{itemize}
        \item based on \storm{} 1.9.0
        \item built using \emph{CMake} 3.26.3 and \emph{GCC} 12.3.0
        \item relevant dependency: \emph{Boost} 1.82
        \item for all relevant computations, in particular solving MDP queries, we use native implementations in \storm{}, i.e.,~no external libraries are used.
    \end{itemize}

\subsection{System}
We used several identical systems to conduct the benchmarking. We used the \emph{Slurm} workload manager, version 22.05.4 for enforcing number of used cores and memory limits per instance.
\begin{itemize}
    \item CPU: Intel Xeon 8468 Sapphire \@ 2.1 GHz, limited to 4 cores per instance. The implementation runs single-threaded.
    \item RAM: limited to 64 GB per instance
    \item OS: Rocky Linux 8.10
    \item no GPUs are used for our experiments
\end{itemize}


\section{Additional Results}\label{app:results}
\begin{table*}[pt]\centering%\setlength{\tabcolsep}{1mm}
\caption{Further Details for Experiments (\config{unfold})}\label{tab:add_results_unf}
\input{experiment_data/tableunf.tex}
\end{table*}
\begin{table*}[pt]\centering%\setlength{\tabcolsep}{1mm}
\caption{Further Details for Experiments (\config{ca-unfold})}\label{tab:add_results_caunf}
\input{experiment_data/tablecaunf.tex}
\end{table*}
\begin{table*}[pt]\centering%\setlength{\tabcolsep}{1mm}
\caption{Further Details for Experiments (\config{ca-bel-seq})}\label{tab:add_results_belseq}
\input{experiment_data/tablebelseq.tex}%
\end{table*}



\paragraph{Detailed Result Tables} Tables \ref{tab:add_results_unf}, \ref{tab:add_results_caunf}, and \ref{tab:add_results_belseq} 
extend our experimental data provided in the main paper. 
For each table, the first five columns are as in Table \ref{tab:benchmark_info}, except that column `$k$' depicts the dimension of the cost bound.
The columns `time' and `result' in the \config{cut} and \config{discr} sections of the individual tables repeat the information from Table \ref{tab:results}, where each of the three tables consider a different configuration---\config{unfold} in Table~\ref{tab:add_results_unf}, \config{ca-unfold} in Table~\ref{tab:add_results_caunf}, and \config{ca-bel-seq} in Table~\ref{tab:add_results_belseq}.
The remaining columns provide additional information concerning 
\begin{itemize}
    \item the number of states  of the transformed POMDP after incorporation of cost-awareness and/or unfolding (Columns `$|S_\mathsf{un}|$', `$|S_\mathsf{un}^\mathsf{ca}|$', and `$|S^\mathsf{ca}|$'),
\item the number of states of the considered belief MDP abstraction (Columns `$|\mathcal{B}^\mathsf{cut}|$' and `$|\mathcal{B}^\mathsf{discr}|$'), and
\item used hyper parameter for discretization (Column `$\mathtt{r}$', see also Section \ref{sec:hyperparam}).
\end{itemize}
Comparing unfolding and sequential epoch analysis on cost-aware POMDPs (\config{ca-unfold} vs. \config{ca-bel-seq}), we observe that the latter handles significantly smaller state spaces while usually achieving similar or tighter approximations in less time.



\paragraph{Additional Plots} Figure \ref{fig:plot} in the main paper shows the evolution of value bounds obtained over time for two benchmarks. We provide similar plots for the remaining benchmarks in Figures~\ref{fig:add0} to \ref{fig:add6}.

\paragraph{Log Files} As part of the supplementary material, we provide all raw log files generated for our experimental evaluation in the folder 
\begin{equation*}\texttt{code\_data\_appendix/logs/raw}.\end{equation*} 
They document runs of our implementation for all settings \config{unfold}, \config{ca-unfold} and \config{ca-bel-seq} for both \config{cut} and \config{discr} with the hyper-parameters described in Section~\ref{sec:hyperparam}.
In addition to the raw log files (.log) there are also JSON files containing the relevant information extracted from the logs.

The files are named according to the scheme:
\begin{equation*}\text{storm.}\underbrace{\text{CONFIG}}_{}\underbrace{\text{C/D}}_{}\underbrace{\text{PARAM}}_{}.\underbrace{\text{EXPERIMENT}}\_\underbrace{\text{INSTANCE}}\end{equation*}
where \text{CONFIG} is one of 
\begin{itemize}
    \item unf : \config{unfold},
    \item caunf : \config{ca-unfold},
    \item belseq : \config{ca-bel-seq},
\end{itemize}
and C/D indicates whether the run uses \config{cut} (c) or \config{discr} (d), followed by the considered hyper-parameter $\text{PARAM}$ where for (c) the parameter is the exponent, i.e.,~ we apply a size threshold of $2^\text{PARAM}$.
$\text{EXPERIMENT} \in \{\text{main},\text{lvls},\text{bnds}\}$ denotes the kind of experiment the logfile is referring to, where the latter two refer to the runs for the plots at the bottom of Figure~\ref{fig:plot}. All other results belong to `\text{main}'.
Instances are named by the model identifier, followed by an (internally used) identifier of the bounded reachability query and the considered bound values. Note that the implementation uses non-strict inequalities for $>$.

\paragraph{Interactive Table} As a more convenient way to view all results obtained during our experiments, we provide interactive tables. 

The tables are given in the HTML files
\begin{itemize}
    \item \texttt{code\_data\_appendix/logs/table/table.html} (for the main experiments)
    \item \texttt{code\_data\_appendix/logs/lvlstable/table.html} (for the data used in the plot at the bottom left of Figure~\ref{fig:plot})
    \item \texttt{code\_data\_appendix/logs/bndstable/table.html} (for the data used in the plot at the bottom right of Figure~\ref{fig:plot})
\end{itemize}

and can be viewed in a web browser.
The columns of the tables are named similar to the naming scheme for the log files. In addition, we provide columns indicating the best result obtained within $10$, $100$, $1000$ and $1800$ seconds.
The latter coincides with the values considered in the main paper.
Columns can be hidden for a more clearly arranged view. 
A result cell contains the computed approximation values (also indicating if it is an under- or over-approximation) and the runtime (walltime) in seconds. Clicking on a result shows an overview as well as the raw log used to obtain the result.



\begin{figure}[p]\centering
\defaulttimeresplotleft{clean_rbrmax2_N06-B1060-B205-__lvl11-__lvl20}{0.1}{1800}%
\defaulttimeresplotright{clean_rbrmax2_N12-B1120-B211-__lvl11-__lvl20}{0.1}{1800}%
\caption{Value bounds obtained  for \model{clean6}, $|\epochs|{=}497$ (left) and  \model{clean12}, $|\epochs|{=}1508$ (right).}
\label{fig:add0}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{incline_rbrmax2_B175-B220}{0.1}{1800}%
\defaulttimeresplotright{obstcl_rbrmax2_B125-B27}{0.1}{1800}%
\caption{Value bounds obtained  for \model{incline}, $|\epochs|{=}497$ (left) and  \model{obstcl}, $|\epochs|{=}83$ (right).}
\label{fig:add1}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{resrc_rbrmax3_B105-B205-B3060}{0.1}{1800}%
\defaulttimeresplotright{resrc_rbrmax3_B115-B215-B3180}{0.1}{1800}%
\caption{Value bounds obtained for \model{resrc}, $|\epochs|{=}2{\cdot}2107$ (left) and  $|\epochs|{=}4{\cdot}10^4$ (right).}
\label{fig:add2}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{rover_rbrmax3_B1200-B20360-B3200}{0.1}{1800}%
\defaulttimeresplotright{rover_rbrmax3_B1600-B21080-B3600}{0.1}{1800}%
\caption{Value bounds obtained  for \model{rover}, $|\epochs|{=}7{\cdot}10^5$ (left) and  $|\epochs|{=}2{\cdot}10^7$ (right).}
\label{fig:add3}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{serv_rbrmax1_B10570}{0.1}{1800}%
\defaulttimeresplotright{serv_rbrmax1_B11000}{0.1}{1800}%
\caption{Value bounds obtained for \model{serv}, $|\epochs|{=}40$ (left) and  $|\epochs|{=}68$ (right).}
\label{fig:add4}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{walk_rbrmax1_N040-B180}{0.1}{1800}
\defaulttimeresplotright{walk_rbrmax1_N120-B180}{0.1}{1800}
\caption{Value bounds obtained for \model{walk40}, $|\epochs|{=}82$ (left) and  \model{walk120}, $|\epochs|{=}82$ (right).}
\label{fig:add5}
\end{figure}
\begin{figure}[p]\centering
\defaulttimeresplotleft{water_rbrmax2_B10590-B2050}{0.1}{1800}
\defaulttimeresplotright{water_rbrmax2_B11790-B2150}{0.1}{1800}
% \includegraphics[width=0.99\textwidth]{plot_appendix_fig.pdf}
\caption{Value bounds obtained  for \model{water}, $|\epochs|{=}3{\cdot}10^4$ (left) and $|\epochs|{=}3{\cdot}10^5$ (right).}
\label{fig:add6}
\end{figure}
\end{document}
