\documentclass[accepted]{uai2025}

% \synctex=1
\input{0_packages}
\input{0_macros}

\sloppy

\title{Lower Bound on Howard Policy Iteration for Deterministic Markov Decision Processes}


% \author{Ali Asadi}{Institute of Science and Technology Austria (ISTA), Austria }{ali.asadi@ista.ac.at}{}{}
% \author{}{Institute of Science and Technology Austria (ISTA), Austria }{krishnendu.chatterjee@ist.ac.at}{}{}
% \author{Jakob de Raaij}{Harvard University, USA}{jderaaij@fas.harvard.edu}{}{}
\author[1]{\href{mailto:<ali.asadi@ista.ac.at>}{Ali Asadi}{}}
\author[1]{\href{mailto:<krishnendu.chatterjee@ist.ac.at>}{Krishnendu Chatterjee}{}}
\author[2]{\href{mailto:<jderaaij@fas.harvard.edu>}{Jakob de Raaij}{}}

\affil[1]{%
    Institute of Science and Technology Austria (ISTA)
}
\affil[2]{%
    Harvard University
}
%\keywords{Deterministic Markov Decision Processes, limit-average objectives, Howard’s policy iteration}

\begin{document}
\maketitle
\input{abstract}
\input{introduction}
\input{preliminaries}
\input{overview}
\input{lower_bound}
\input{extensions}

\section{Conclusion and Future Work} \label{sec:conclu}
In this work, we studied Howard's policy iteration algorithm for DMDPs with mean-payoff objectives and constructed a family of examples with $2n$ vertices and $\calO(n^2)$ edges where the algorithm requires \(\Omega(n^2)\) iterations and improved the lower bound on the number of iterations to $\widetilde{\Omega}(I)$ with respect to the input size $I$. There are several interesting directions for future work. In particular, Hansen's conjecture~\citep{hansen2012worst} on the number of iterations remains a major open problem. Furthermore, the practical performance of Howard’s policy iteration, despite its high theoretical worst-case complexity, raises relevant and interesting questions. While our focus is to establish an improved theoretical lower bound for this classical algorithm, these practical concerns highlight important directions for future research. 

\begin{acknowledgements}
This research was partially supported by the ERC CoG 863818 (ForM-SMArt) grant and Austrian Science Fund (FWF) 10.55776/COE12.
\end{acknowledgements}

\bibliography{refs}
\newpage
\onecolumn

\title{Lower Bound on Howard Policy Iteration for Deterministic Markov Decision Processes\\(Supplementary Material)}
\maketitle

\appendix

\input{appendix}

\end{document}
