

\section{Sequence of Policies}
\label{sec:policy-seq}
In this section, we illustrate the sequence of policies appearing in Howard's policy iteration on the DMDP $P_3$ and for the general $P_n$. 

\subsection{The policies for our running example}
\begin{figure*}[h]
    \centering
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_initpol_1}
        \caption{Policy $\initpol_{1}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_1-1}
        \caption{Policy $\policy_{1,1}$}
    \end{subfigure}
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_1-2}
        \caption{Policy $\policy_{1,2}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_finalpol_1}
        \caption{Policy $\finalpol_1$}
    \end{subfigure}
    \caption{Part I: the sequence of policies appearing in Howard's policy iteration over our running example. Thick lines correspond to policy choices. Unlabeled (gray) edges have weight 0.}
    \label{fig:policy-sequence-1}
\end{figure*}
\begin{figure*}[hp]\ContinuedFloat
    \centering
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_initpol_2}
        \caption{Policy $\initpol_2$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_2-1}
        \caption{Policy $\policy_{2,1}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_2-2}
        \caption{Policy $\policy_{2,2}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_2-3}
        \caption{Policy $\policy_{2,3}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_finalpol_2}
        \caption{Policy $\finalpol_2$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_initpol_3}
        \caption{Policy $\initpol_3$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_3-1}
        \caption{Policy $\policy_{3,1}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.49\textwidth}
        \centering
        \input{examples/3_policy_3-2}
        \caption{Policy $\policy_{3,2}$}
    \end{subfigure}
    \caption{Part II: the sequence of policies appearing in Howard's policy iteration over our running example. Thick lines correspond to policy choices. Unlabeled (gray) edges have weight 0.}
    \label{fig:policy-sequence-2}
\end{figure*}


\newpage
\section{The policies in the general case}
\label{sec:general-pol}
We illustrate the sequence of policies that appear from policy $\initpol_i$ to policy $\initpol_{i+1}$. To keep the display clear, we omit edge weights and edges not in the policy from the figures and show only the edges in the current policy.
\begin{figure*}[h]
    \centering
    \begin{subfigure}{0.99\textwidth}
        \centering
        \input{examples/n_initpol_i.tex}
        \caption{Policy $\initpol_{i}$}
    \end{subfigure} 
    \par\bigskip
    \begin{subfigure}{0.99\textwidth}
        \centering
        \input{examples/n_policy_i-1.tex}
        \caption{Policy $\policy_{i,1}$.}
    \end{subfigure}
    \caption{Part I: policies appearing in Howard's policy iteration on $P_n$. Only edges in the policy are shown; edge weights are omitted.}
\end{figure*}


\begin{figure*}[]\ContinuedFloat
    \centering
    \rotatebox{90}{
        \begin{minipage}{0.85\textheight}  
            \centering
            \begin{subfigure}{0.8\textheight}
                \centering
                \input{examples/n_policy_i-j}
                \caption{Policy $\policy_{i,j}$}
            \end{subfigure}
            \par\bigskip
            \begin{subfigure}{0.8\textheight}
                \centering
                \input{examples/n_policy_i-j1}
                \caption{Policy $\policy_{i,j+1}$}
            \end{subfigure}
        \end{minipage}
    }
    \caption{Part II: Comparison of policies $\policy_{i,j}$ and $\policy_{i,j+1}$, with $j>1$, appearing in Howard's policy iteration on $P_n$. Only edges in the policy are shown; edge weights are omitted. The edges in $\policy_{i,j+1}$ that differ from $\policy_{i,j}$ are bold.}
    
\end{figure*}

\begin{figure*}[h]\ContinuedFloat
\centering
    \begin{subfigure}{0.99\textwidth}
        \centering
        \input{examples/n_policy_i-i}
        \caption{Policy $\policy_{i,i+1}$}
    \end{subfigure} 
    \par\bigskip
    \begin{subfigure}{0.99\textwidth}
        \centering
        \input{examples/n_finalpol_i1}
        \caption{Policy $\finalpol_{i}$}
    \end{subfigure}
    \par\bigskip
    \begin{subfigure}{0.99\textwidth}
         \centering
        \input{examples/n_initpol_i1}
        \caption{Policy $\initpol_{i+1}$}
    \end{subfigure}
    \caption{Part III: policies appearing in Howard's policy iteration on $P_n$. Only edges in the policy are shown; edge weights are omitted.}
\end{figure*}

% \begin{figure}
%     \centering
%     \input{examples/n_policy_i-i}
%     \caption{Policy $\policy_{i,i+1}$}
% \end{figure}

% \begin{figure}
%     \centering
%     \input{examples/n_finalpol_i1}
%     \caption{Policy $\finalpol_{i}$}
% \end{figure}

% \begin{figure*}
%     \centering
%     \input{examples/n_initpol_i1}
%     \caption*{Policy $\initpol_{i+1}$}
% \end{figure*}




