\section{Improved Lower Bound}
In this section, we construct a family of DMDPs with $2n$ vertices of size $\calO(n^2 \log n)$ on which Howard's algorithm performs $\Omega(n^2)$ iterations. 

\subsection{DMDP Construction}
Given a positive integer $n$, we construct a DMDP $P_n = (\Vertices_n, \Edges_n, \weight_n$). We denote the set of vertices by $\Vertices_n = \{b_1, \ldots, b_n, t_1, \ldots, t_n  \}$. The ordering of vertices is $(t_1, b_1, \ldots, b_n, t_2, \ldots, t_n)$.  We denote the set of edges by
\begin{align*}
    \Edges_n 
    &\defas  \{(b_i, b_j) \mid 1 \le j < i \le n \} 
    \\  & \,\,\cup  \{(b_i, t_j) \mid 1 \le i, j \le n\}
    \\  &\,\,\cup \{ (t_i, b_j) \mid 1 \le j \le i \le n \} 
    \\  &\,\,\cup \{ (t_i, t_j) \mid 0 \le j \le i \le n \}\,.
\end{align*}
We now define the weight function as
% \[
%     w_n(\vertex, \otherver) \defas \begin{cases}
%         n(n+1)^2 + n  & \vertex = b_i \land \otherver = b_j \quad \text{for all } 1 \le j < i \le n\\
%         (j-2)(n+1)^2 + n  & \vertex = b_i \land \otherver = t_j \quad \text{for all } 1 \le i, j \le n\\
%         (n+1)^3 & \vertex = t_i \land \otherver = b_j \quad \text{for all } 1 \le j \le i \le n\\
%         (j-1)(n+1)^2 & \vertex = t_i \land \otherver = t_j \quad \text{for all } 1 \le j < i \le n\\
%         n(n+1)^2 + i-1 & \vertex = t_i \land \otherver = t_i \quad \text{for all } 1 \le i \le n\\
%         \end{cases}
% \]
\[
     w_n(\vertex, \otherver) \defas \begin{cases}
        (n+1)^2  & \vertex = b_i \land \otherver = b_j \\& \text{for all } 1 \le j < i \le n\\
        (n+1)^2 & \vertex = t_i \land \otherver = b_j \\& \text{for all } 1 \le j \le i \le n\\
        0  & \vertex = b_i \land \otherver = t_j \\& \text{for all } 1 \le i, j \le n\\
        0 & \vertex = t_i \land \otherver = t_j \\& \text{for all } 1 \le j < i \le n\\
        n(n+1) + i & \vertex = t_i \land \otherver = t_i \\& \text{for all } 1 \le i \le n\\
        \end{cases}
\]
\Cref{fig:dmdp} illustrates an example of the DMDP with $n=3$. 

% \begin{figure}[t]
% \input{examples/graph}
% \caption{Our running example.}
% \label{fig:dmdp}
% \end{figure}

\begin{figure}[t]
\centering
\resizebox{0.8\linewidth}{!}{%
    \input{examples/3_graph}
}
\caption{Our running example with $n=3$. Unlabeled (gray) edges have weight $0$.}
\label{fig:dmdp}
\end{figure}


\subsection{Policies}
We describe three families of policies that appear in Howard's algorithm. We first define the family of policies $\initpol_{i}$ for $1 \le i \le n$ as 
\[
    \initpol_{i}(\vertex) \defas \begin{cases}
        b_k & \vertex = t_k \qquad \text{for all } 1 \le k \le i-2\\
        t_{k} & \vertex = t_k \qquad \text{for all } k \in \{i-1, i\}\\
        t_{i} & \text{otherwise}
    \end{cases}
\]
We now define another family of policies $\policy_{i,j}$ for $1 \le i \le n$ and $1 \le j \le i+1$  as
\[
    \policy_{i,j}(\vertex) \defas \begin{cases}
        t_i & \vertex = t_i\\
        b_k & \vertex = t_k \qquad \text{for all } (1 \le k \le j \land k \neq i)\\
        & \phantom{\vertex = t_k} \qquad \text{or }  (k\le i-2 \land j=1)\\
        t_i & \vertex = b_1\\
        b_{k-1} & \vertex = b_k \qquad \text{for all } 2 \le k \leq j\\
        b_j & \text{otherwise}
    \end{cases}
\]
Finally, we define the last family of policies $\finalpol_{i}$ for $1 \le i \le n$ as
\[
    \finalpol_{i}(\vertex) \defas \begin{cases}
        t_k & \vertex = t_k \qquad \text{for all } k \in \{i, i+1\}\\
        b_k & \vertex = t_k \qquad \text{for all } 1 \le k < i\\
        t_i & \vertex = b_1\\
        b_{k-1} & \vertex = b_k \qquad \text{for all } 2 \le k \leq i+2\\
        b_{i+2} & \text{otherwise}
    \end{cases}
\]
For more intuition, the policies $\initpol_{2}$, $\policy_{2,1}$, $\policy_{2,3}$, and $\finalpol_{2}$ over our running example are illustrated in \Cref{fig:policy-families}. An illustration of all policies for the running example can be found in \Cref{sec:policy-seq}. Moreover, the outline of policies that appear in the general DMDP $P_n$ is illustrated in \Cref{sec:general-pol}.

\begin{figure*}[t]
    \centering
    \begin{subfigure}{0.4\textwidth}
        \centering
        \resizebox{0.8\linewidth}{!}{%
            \input{examples/3_initpol_2}%
        }
        \caption{Policy $\initpol_2$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.4\textwidth}
        \centering
        \resizebox{0.8\linewidth}{!}{%
            \input{examples/3_policy_2-1}%
        }
        \caption{Policy $\policy_{2,1}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.4\textwidth}
        \centering
        \resizebox{0.8\linewidth}{!}{%
            \input{examples/3_policy_2-3}%
        }
        \caption{Policy $\policy_{2,3}$}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.4\textwidth}
        \centering
        \resizebox{0.8\linewidth}{!}{%
            \input{examples/3_finalpol_2}%
        }
        \caption{Policy $\finalpol_2$}
    \end{subfigure}
    \caption{Examples of policies. Thick lines correspond to policy choices. Unlabeled (gray) edges have weight 0.}
    \label{fig:policy-families}
\end{figure*}


\subsection{Howard's Algorithm on $P_n$}
Given the DMDP $P_n$, we show that the sequence of policies that appear in Howard's algorithm is as follows. 
\begin{align}
    &\initpol_1 \to \policy_{1, 1} \to \policy_{1, 2} \to \finalpol_{1} \notag\\
    &\to \initpol_2 \to \policy_{2, 1} \to \policy_{2, 2} \to \policy_{2, 3} \to \finalpol_{2}\notag\\
    &\mathrel{\vdots}\label{eq:howard-seq}\\
    &\to \initpol_{n-1} \to \policy_{n-1, 1} \to \ldots \to \policy_{n-1,n} \to \finalpol_{n-1}\notag\\
    &\to \initpol_{n} \to \policy_{n, 1} \to \ldots \to \policy_{n, n-1}\notag
\end{align}


\paragraph{Intuition.} The $n$ highest mean-payoff cycles in the graph are the self-loops of $t_1$ through $t_n$. At a high level, the algorithm "finds" those cycles one after another, taking roughly $i$ iterations to find the cycle at $t_i$ after finding the cycle at $t_{i-1}$. This happens because whenever the next-best cycle is found, all "progress" the algorithm made so far in the rest of the graph is lost.
In policies $\initpol_{i}$, the lasso-shaped runs of all vertices (except $t_{i-1}$) end up in the cycle at $t_i$. Now, in the $(\policy_{i,j})_j$ chain of policies, the vertices keep adding an edge of weight $(n+1)^2$ to the path of their run by including more of the $b$-vertices (the "deceleration lane"). Since the weight of the deceleration lane edges is greater than the weight of the self-loops, the $t$-vertices only "realize" they can do better than their current run by using their self-loop when they can no longer improve their path using the deceleration lane. However, by the tie-breaking rule, the vertices only ever add one additional vertex of the deceleration lane to their path, so it takes all $i+1$ iterations from the $(\policy_{i,j})_j$ chain until the best improvement by appraisal for vertex $t_{i+1}$ is to use its self-loop, which happens in the iteration to $\finalpol_i$ (vertices $t_{i+2},...,t_n$ can still do more deceleration lane improvements so don't use their self-loop yet). In the next iteration, all vertices in the deceleration lane as well as $t_{i+2},...,t_n$ "realize" that in their current run they do not end up in the highest mean-payoff cycle, since a new, better cycle formed at $t_{i+1}$. Thus, instead of doing the next improvement in the deceleration lane, they all choose their edge directly to the now-best cycle at $t_{i+1}$. Thus, all "progress" in the deceleration lane is lost.
This continues until the cycle at $t_n$ is formed, when the algorithm does a final run through the deceleration lane before halting in the optimal policy.

\paragraph{Formal Argument.} In the following, we formally show that given a policy in the sequence, the Bellman operator returns the next policy in the sequence. The initial policy is $\initpol_1$, because $t_1$ is the vertex with lowest index in the ordering and all vertices have outgoing edges to $t_1$. 

\begin{lemma}
\label{lem:howard-sequence}
    The following assertions hold:
    \begin{compactenum}
        \item\label{item:init-to-pol} for $1 \le i \le n$, it holds that $\B(\initpol_{i}) = \policy_{i, 1}$; 
        \item\label{item:pol-to-pol} for \(1 \le i \le n\) and \(1 \le j \le \begin{cases} 
        i & 1 \le i \le n-1 \\ 
        n-2 & i = n 
    \end{cases}\), it holds that \(\mathcal{B}(\policy_{i,j}) = \policy_{i,j+1}\);
        \item\label{item:pol-to-final}  for $1 \le i \le n-1$, it holds that $\B(\policy_{i,i+1}) = \finalpol_{i}$;
         \item\label{item:final-to-init}  for $1 \le i \le n-1$, it holds that $\B(\finalpol_{i}) = \initpol_{i+1}$; and
        
        \item\label{item:pol-to-end} $\B(\policy_{n,n-1}) = \policy_{n,n-1}$. 
    \end{compactenum}
\end{lemma}



\begin{proof}
    We formally prove the first two items of the result. We abbreviate the proofs of the rest, where details can be filled in similarly.
    For notational simplicity, we denote by \[\vali{i} = w_n(t_i,t_i)= n(n+1)+i\] the weight of the self-looping edge of vertex $t_i$ and by \[\wmax=(n+1)^2\] the weight of all other non-zero edges. 
    % By the construction of $P_n$, 
    The following inequalities will be essential throughout the proof:
    \begin{compactitem}
        \item it holds that 
        \begin{equation}
        \label{eq:pol-to-pol1} 
        \wmax > \vali{n} > \dots > \vali{i}>\dots>\vali{1}>0\,;
        \end{equation}
        \item
        for all $0\le k\leq n$ and $1\leq i\leq n$, it holds that 
         \begin{equation}
            \label{eq:pol-to-pol2} 
            k\wmax < (k+1)\vali{i}\,;  
         \end{equation}
        \item for all $k\leq n+1$ and $1\leq i\leq n$, it holds that 
        \begin{equation}
            \label{eq:pol-to-pol3} 
            (k-1)\wmax - k\vali{i} < k\wmax - (k+1)\vali{i}\,.
        \end{equation}
    \end{compactitem}

    \paragraph{Proof of \Cref{item:init-to-pol}.}
     We first give a quick intuitive rationale: In policy $\initpol_{i}$, all vertices that have an edge to directly move to $t_i$, the highest-value cycle of value $\vali{i}$, use it. $t_{i-1}$ loops to itself and the vertices $t_k, k\leq i-2$, that have no edge directly to $t_i$, use the edge to $b_{k}$ (which uses the edge to $t_i$). Since $t_i$ is the highest-value cycle, all vertices will pick their next edge so that they end up there, and between their options choose to maximize the weight of the path to $t_i$, minus $\vali{i}$ times the path length. All the vertices that currently use the edge directly to $t_i$, and $t_{i-1}$, can improve this quantity of the path to $t_i$ by using one of their edges to any of the $b$-vertices, thus adding an edge of weight $\wmax>\vali{i}$ to their path. By the tie breaking rule they pick the edge to $b_1$. The remaining vertices, $t_k$, for $k\leq i-2$, in this iteration cannot increase the average weight of their path to $t_i$, so do not change their used edge.

      We now formalize this intuition. First, we compute the mean-payoff for the policy $\initpol_i$. For all vertices $\vertex \in \Vertices \setminus \{ t_{i-1} \}$, the cycle of the lasso-shaped play $P^{\initpol_i}_\vertex$ is of form $\langle
    t_i \rangle$. Since $\initpol_i(t_{i-1}) = t_{i-1}$, therefore the cycle of $P^{\initpol_i}_{t_{i-1}}$ is of form $\langle t_{i-1} \rangle$. Therefore, the mean-payoff for the policy $\initpol_i$ is 
    \begin{equation}
    \label{eq:mp-initpol}
        \val^{\initpol_i}(\vertex) = \begin{cases}
            \vali{i-1} & \vertex = t_{i-1}\\
            \vali{i}  & \text{otherwise}\,.
        \end{cases}
    \end{equation}
    We now compute the potential function $\pot^{\initpol_i}$. Fo vertices $t_{i-1}$ and $t_i$, the potential is 0, because the path of their lasso-shaped play is of length 0. For $1 \le k \le i-2$, the path of the play $P^{\initpol_i}_{t_k}$ is of form $\langle t_k, b_k, t_i \rangle$. Therefore, we have
    \begin{align*}
        \pot^{\initpol_i}(t_k) 
        &= (\wmax - \vali{i})+ (0 - \vali{i} )\\
        &= \wmax - 2\vali{i} \,.
    \end{align*}
    For $1 \le k \le n$, the path of the play $P^{\initpol_i}_{b_k}$ is of form $\langle b_k, t_i \rangle$. Since the weight of the edge $(b_k, t_i)$ is $0$, the potential of the vertex $b_k$ is $\pot^{\initpol_i}(b_k) = - \vali{i}$.
    For $i < k$, the path of the play $P^{\initpol_i}_{t_k}$ is of form $\langle t_k, t_i \rangle$. Since the weight of the edge $(t_k, t_i)$ is $0$, the potential of the vertex $t_k$ is $\pot^{\initpol_i}(t_k) = - \vali{i}$.
    To consolidate the previous computations, the potential function for the policy $\initpol_i$ is
    \begin{equation}
    \label{eq:pot-initpol}    
        \pot^{\initpol_i}(\vertex) = \begin{cases}
            0& \vertex = t_k \quad \forall k \in \{i-1, i\}\\
            \wmax - 2\vali{i} & \vertex = t_k \quad \forall  k \le i-2\\
            - \vali{i}& \vertex = b_k \quad \forall k \le n\\
            - \vali{i}& \vertex = t_k \quad \forall  k>i
        \end{cases}
    \end{equation}
    We are now ready to show that \(\B(\initpol_i)(\vertex) = \policy_{i,1}(\vertex)\) for all \(\vertex \in \Vertices_n\). Recall that the appraisal for edge \((\vertex, \otherver)\) under \(\initpol_i\) is:
    \[
        \appr^{\initpol_i}(\vertex, \otherver) = \left(\val^{\initpol_i}(\otherver), \, \weight_n(\vertex, \otherver) - \val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)\right),
    \]
    compared lexicographically with tie-breaking as described in \Cref{Section: Preliminaries}. 
    By \Cref{eq:mp-initpol} we know that the first element of the appraisal is maximized by all $\otherver\in \Edges(\vertex)\backslash\{t_{i-1}\}$. Thus, the second element of the appraisal, $\apprtwo^{\initpol_i}(\vertex, \otherver)$, is deciding. We now precede by, unfortunately quite tediously but inevitability, calculating $\apprtwo^{\initpol_i}(\vertex, \otherver)$ for all $\vertex\in\Vertices$ and $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$.

    For all $j \le i-2$, $\vertex = t_j$, we get that 
    \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    =\begin{cases} 
        \wmax - 3\vali{i} & \otherver=t_k, k\le j-1 \\
        \wmax + \vali{j} - 3\vali{i} & \otherver=t_j \\
        \wmax - 2\vali{i} & \otherver=b_k, k\le j
      \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le j$. By the tie-breaking rule $t_j$ favors $\initpol_i(t_j)=b_j$ if possible, so that $\B(\initpol_{i})(t_j)=b_j=\policy_{i,1}(t_j).$

       For $\vertex = t_{i-1}$, we get that 
    \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    =\begin{cases} 
              \wmax - 3\vali{i} & \otherver=t_k, k\le i-2 \\
              \wmax - 2\vali{i} & \otherver=b_k, k\le i-1 \\
      \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le i-1$. Since $\initpol_i(t_{i-1})=t_{i-1}$,  the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(t_{i-1})=b_1=\policy_{i,1}(t_{i-1}).$

       For $\vertex = t_{i}$, we get that 
    \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    =\begin{cases} 
            \wmax - 3\vali{i} & \otherver=t_k, k\le i-2 \\
            0 &\otherver=t_i \\ 
            \wmax - 2\vali{i} &\otherver=b_k, k\le i  \\
      \end{cases}\,.\] This is maximized by $\otherver=t_i$. Thus, it holds that $\B(\initpol_{i})(t_{i})=t_i=\policy_{i,1}(t_i).$

     For all $j > i$, $\vertex = t_j$, we get that 
    \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    =\begin{cases} 
             \wmax - 3\vali{i}& \otherver=t_k, k\le i-2 \\
              -\vali{i}& \otherver=t_i  \\
              -2\vali{i}& \otherver=t_k, i<k<j  \\
              \vali{j}- 2\vali{i}& \otherver=t_j  \\
              \wmax - 2\vali{i}& \otherver=b_k, k\le j  \\
      \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le j$ 
      % (we don't need to worry about $t_i$ or $t_j$ since $i<j\le n$)
      . Since $\initpol_i(t_{j})=t_{i}$, the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(t_{j})=b_1=\policy_{i,1}(t_j).$

       For all $1\le j \le n$, $\vertex = b_j$, we get that 
      \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    =\begin{cases} 
             \wmax - 3\vali{i}& \otherver=t_k, k\le i-2 \\
             -\vali{i}& \otherver=t_i \\
             -2\vali{i}& \otherver=t_k, k>i \\
             \wmax - 2\vali{i}& \otherver=b_k, k< j \\
      \end{cases}\,.\] For $j\ge 2$, this is maximized by $\otherver=b_k$ for any $k< j$. Since $\initpol_i(t_{j})=t_{i}$, the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(b_{j})=b_1=\policy_{i,1}(b_j).$ For $j=1$, it is maximized for $u=t_i$, so $\B(\initpol_{i})(b_1)=t_i=\policy_{i,1}(b_1).$
      Thus, we conclude that $\B(\initpol_{i}) = \policy_{i,1}$.
     
    % We now formalize this intuition. First, we compute the mean-payoff for the policy $\initpol_i$. For all vertices $\vertex \in \Vertices \setminus \{ t_{i-1} \}$, the cycle of the lasso-shaped play $P^{\initpol_i}_\vertex$ is of form $\langle
    % t_i \rangle$. Since $\initpol_i(t_{i-1}) = t_{i-1}$, therefore the cycle of $P^{\initpol_i}_{t_{i-1}}$ is of form $\langle t_{i-1} \rangle$. Therefore, the mean-payoff for the policy $\initpol_i$ is 
    % \begin{equation}
    % \label{eq:mp-initpol}
    %     \val^{\initpol_i}(\vertex) = \begin{cases}
    %         n(n+1) + i - 1 & \vertex = t_{i-1}\\
    %         n(n+1) + i  & \text{otherwise}\,.
    %     \end{cases}
    % \end{equation}
    % We now compute the potential function $\pot^{\initpol_i}$. Fo vertices $t_{i-1}$ and $t_i$, the potential is 0, because the path of their lasso-shaped play is of length 0. For $1 \le k \le i-2$, the path of the play $P^{\initpol_i}_{t_k}$ is of form $\langle t_k, b_k, t_i \rangle$. Therefore, we have
    % \begin{align*}
    %     \pot^{\initpol_i}(t_k) 
    %     &= \left ((n+1)^2+1 - \left (n(n+1)+i \right ) \right ) \\&+ \left (0 - \left (n(n+1) + i \right ) \right)\\
    %     &= -n^2-2i+2\,.
    % \end{align*}
    % For $1 \le k \le n$, the path of the play $P^{\initpol_i}_{b_k}$ is of form $\langle b_k, t_i \rangle$. Since the weight of the edge $(b_k, t_i)$ is $0$, the potential of the vertex $b_k$ is
    % \begin{align*}
    %     \pot^{\initpol_i}(b_k) &= - \left(n(n+1)+i\right)\,.
    % \end{align*}
    % For $i < k$, the path of the play $P^{\initpol_i}_{t_k}$ is of form $\langle t_k, t_i \rangle$. Since the weight of the edge $(t_k, t_i)$ is $0$, the potential of the vertex $t_k$ is
    % \begin{align*}
    %     \pot^{\initpol_i}(t_k) &= - \left(n(n+1)+i\right)\,.
    % \end{align*}
    % To consolidate the previous computations, the potential function for the policy $\initpol_i$ is
    % \begin{equation}
    % \label{eq:pot-initpol}    
    %     \pot^{\initpol_i}(\vertex) = \begin{cases}
    %         0 & \vertex = t_k \quad \forall k \in \{i-1, i\}\\
    %         -n^2-2i+2 & \vertex = t_k \quad \forall  k \le i-2\\
    %         - \left(n(n+1)+i\right)& \vertex = b_k \quad \forall k \le n\\
    %         - \left(n(n+1)+i\right)& \vertex = t_k \quad \forall  k>i
    %     \end{cases}
    % \end{equation}
    % We are now ready to show that \(\B(\initpol_i)(\vertex) = \policy_{i,1}(\vertex)\) for all \(\vertex \in \Vertices_n\). Recall that the appraisal for edge \((\vertex, \otherver)\) under \(\initpol_i\) is:
    % \[
    %     \appr^{\initpol_i}(\vertex, \otherver) = \left(\val^{\initpol_i}(\otherver), \, \weight_n(\vertex, \otherver) - \val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)\right),
    % \]
    % compared lexicographically with tie-breaking as described in \Cref{Section: Preliminaries}. 
    % By \Cref{eq:mp-initpol} we know that the first element of the appraisal is maximized by all $\otherver\in \Edges(\vertex)\backslash\{t_{i-1}\}$. Thus, the second element of the appraisal, $\apprtwo^{\initpol_i}(\vertex, \otherver)$, is deciding. We now precede by, unfortunately quite tediously but inevitability, calculating $\apprtwo^{\initpol_i}(\vertex, \otherver)$ for all $\vertex\in\Vertices$ and $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$.

    % For all $j \le i-2$, $\vertex = t_j$, we get that 
    % \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    % =\begin{cases} 
    %     -2n^2-n-2i+1, & \otherver=t_k, k\le j-1 \\
    %     -n^2+j-2i+1, & \otherver=t_j \\
    %     -n^2-i+1, & \otherver=b_k, k\le j
    %   \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le j$. By the tie-breaking rule $t_j$ favors $\initpol_i(t_j)=b_j$ if possible, so that $\B(\initpol_{i})(t_j)=b_j.$

    %    For $\vertex = t_{i-1}$, we get that 
    % \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    % =\begin{cases} 
    %           -2n^2-n-2i+1 & \otherver=t_k, k\le i-2 \\
    %           -n^2-i+1 & \otherver=b_k, k\le i-1 \\
    %   \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le i-1$. Since $\initpol_i(t_{i-1})=t_{i-1}$,  the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(t_{i-1})=b_1.$

    %    For $\vertex = t_{i}$, we get that 
    % \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    % =\begin{cases} 
    %         -2n^2-n-2i+1 & \otherver=t_k, k\le i-2 \\
    %         0 &\otherver=t_i \\ 
    %         -n^2-i+1&\otherver=b_k, k\le i  \\
    %   \end{cases}\,.\] This is maximized by $\otherver=t_i$ so that $\B(\initpol_{i})(t_{i})=t_i.$

    %  For all $j > i$, $\vertex = t_j$, we get that 
    % \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    % =\begin{cases} 
    %          -2n^2-n-2i+1& \otherver=t_k, k\le i-2 \\
    %           -n^2-n-1& \otherver=t_i  \\
    %           -2n^2-2n-i-1& \otherver=t_k, i<k<j  \\
    %           -n^2-n+j-i-1& \otherver=t_j  \\
    %           -n^2-i+1& \otherver=b_k, k\le j  \\
    %   \end{cases}\,.\] This is maximized by $\otherver=b_k$ for any $k\le j$ 
    %   % (we don't need to worry about $t_i$ or $t_j$ since $i<j\le n$)
    %   . Since $\initpol_i(t_{j})=t_{i}$, the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(t_{j})=b_1.$

    %    For all $1\le j \le n$, $\vertex = b_j$, we get that 
    %   \[\apprtwo^{\initpol_i}(\vertex, \otherver)
    % =\begin{cases} 
    %          -2n^2-n-2i+1& \otherver=t_k, k\le i-2 \\
    %          -n^2-n-1& \otherver=t_i \\
    %          -2n^2-2n-i-1& \otherver=t_k, k>i \\
    %          -n^2-i+1& \otherver=b_k, k< j \\
    %   \end{cases}\,.\] For $j\ge 2$, this is maximized by $\otherver=b_k$ for any $k< j$. Since $\initpol_i(t_{j})=t_{i}$, the tie-breaking rule will favor based on the vertex ordering, so that $\B(\initpol_{i})(b_{j})=b_1.$ For $j=1$, it is maximized for $u=t_i$, so $\B(\initpol_{i})(b_1)=t_i.$

    %   Thus, we conclude that $\B(\initpol_{i}) = \policy_{i,1}$.

    \paragraph{Proof of \Cref{item:pol-to-pol}.}
    We first give a quick intuitive rationale: In $\policy_{i,j}$, all paths end up in the same cycle, the self-loop at $t_i$ with value $\vali{i}$. Thus, in the iteration to $\policy_{i,j+1}$, the other vertices choose their next edge solely to maximize the weight of their path to $t_i$, minus $\vali{i}$ times the path length. The optimal path for that is using as much of the deceleration lane $\langle b_n,...,b_1\rangle$ as possible, since its edges have weight $\wmax > \vali{i}$. $b_1,...,b_{j+1}$ already move to their predecessor, so for all vertices that can (except $t_i$), it is optimal to pick the edge to to $b_{j+1}$ (which they do, by the tie-breaking rule) to maximize the edges in the deceleration lane in their path. Thus, in $\policy_{i,j+1}$ the vertices that can, all pick the edge to $b_{j+1}$ (except $t_i$), while the other vertices cannot further improve their path and hence pick the same edge as in $\policy_{i,j}$.
    
    Formally, we consider policy $\policy_{i,j}$ for any $1\leq i \le n$ and $1 \le j \le \begin{cases} 
        i & 1 \le i \le n-1 \\ 
        n-2 & i = n 
    \end{cases}$. For all vertices $\vertex\in\Vertices$, the cycle of the lasso-shaped play $P^{\policy_{i,j}}$ is $\langle t_i \rangle$. Therefore, for all vertices $\vertex\in\Vertices$, it holds that $\val^{\policy_{i,j}}(\vertex) = \vali{i}$. Hence, $\B(\policy_{i,j})(\vertex)$ solely depends on $\apprtwo^{\policy_{i,j}}(\vertex,\otherver).$
    We  precede by calculating $\apprtwo^{\policy_{i,j}}(\vertex,\otherver).$ for all $\vertex\in\Vertices$ and $\otherver\in\Edges(\vertex)$. 
    We will first assume $j>1$, since $\policy^{i,j}$ is slightly different at $j=1$. We will treat this special case at the end.
    
    For $\vertex = t_{i}$, we get that \begin{multline*}
    \apprtwo^{\policy_{i,j}}(\vertex, \otherver)
    \\ =\begin{cases} 
            k\wmax  - (k+2)\vali{i} & \otherver=t_k, k\le j \\
            j\wmax  - (j+2)\vali{i} & \otherver=t_k, j<k<i \\
            0 &\otherver=t_i \\ 
            k\wmax  - (k+1)\vali{i}&\otherver=b_k, k\le j  \\
            (j+1)\wmax  - (j+2)\vali{i}&\otherver=b_k, j<k\le i 
      \end{cases}\,.\end{multline*} By \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3}, we know that this is maximized by $\otherver=t_i$ so that $\B(\policy_{i,j})(t_{i})=t_i = \policy_{i,j+1}(t_i).$

      For $j<h<i$, $\vertex = t_{h}$, we get that  \begin{multline*}\apprtwo^{\policy_{i,j}}(\vertex, \otherver)
    \\ =\begin{cases} 
            k\wmax  - (k+2)\vali{i} & \otherver=t_k, k\le j \\
            j\wmax  - (j+2)\vali{i} & \otherver=t_k, j<k<h \\
            j\wmax + \vali{h} - (j+2)\vali{i} & \otherver=t_h \\
            k\wmax  - (k+1)\vali{i}&\otherver=b_k, k\le j  \\
            (j+1)\wmax  - (j+2)\vali{i}&\otherver=b_k, j<k\le h 
      \end{cases}\,.\end{multline*} By \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3}, this is maximized by $\otherver=b_k$ for all $j<k\leq h$. Since $\policy_{i,j}(v)=b_j$, tie-breaking is done by vertex order so that $\B(\policy_{i,j})(t_{h})=b_{j+1} = \policy_{i,j+1}(t_h).$  

      For $h\leq j, h<i$, $\vertex = t_{h}$, we get that  \begin{multline*}\apprtwo^{\policy_{i,j}}(\vertex, \otherver)
    \\ =\begin{cases} 
            k\wmax  - (k+2)\vali{i} & \otherver=t_k, k< h \\
            h\wmax + \vali{h} - (h+2)\vali{i} & \otherver=t_h \\
            k\wmax  - (k+1)\vali{i}&\otherver=b_k, k\le h  \\
      \end{cases}\,.\end{multline*} By \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3}, this is maximized by $\otherver=b_h$ so that $\B(\policy_{i,j})(t_{h})=b_{h} = \policy_{t_h}.$  

    For $i< h$, $\vertex = t_{h}$, we get that  \begin{multline*}\apprtwo^{\policy_{i,j}}(\vertex, \otherver)
    \\ =\begin{cases} 
            k\wmax  - (k+2)\vali{i} & \otherver=t_k, k\le j \\
            j\wmax  - (j+2)\vali{i} & \otherver=t_k, j<k<h, k\neq i \\
            -\vali{i} & \otherver=t_i \\
            j\wmax + \vali{h} - (j+2)\vali{i} & \otherver=t_h \\
            k\wmax  - (k+1)\vali{i}&\otherver=b_k, k\le j  \\
            (j+1)\wmax  - (j+2)\vali{i}&\otherver=b_k, j<k\le h 
      \end{cases}\,.\end{multline*} By \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3}, this is maximized by $\otherver=b_k$ for all $j<k\leq h$. Since $\policy_{i,j}(v)=b_j$, tie-breaking is done by vertex order so that $\B(\policy_{i,j})(t_{h})=b_{j+1}=\policy_{i,j+1}(t_h).$ 

        For $1\leq h \leq n$, $\vertex = b_{h}$, we get that  \begin{multline*}\apprtwo^{\policy_{i,j}}(\vertex, \otherver)
    \\ =\begin{cases} 
            k\wmax  - (k+2)\vali{i} & \otherver=t_k, k\le j \\
            j\wmax  - (j+2)\vali{i} & \otherver=t_k, j<k, k\neq i \\
            -\vali{i} & \otherver=t_i \\
            k\wmax  - (k+1)\vali{i}&\otherver=b_k, k\le j, k<h  \\
            (j+1)\wmax  - (j+2)\vali{i}&\otherver=b_k, j<k<h 
      \end{cases}\,.\end{multline*} By \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3}, for $j+1\ge h$, this is maximized by $\otherver=b_{h-1}$, so that $\B(\policy_{i,j})(b_{h})=b_{h-1}=\policy_{i,j+1}(b_h).$ For $j+1<h$, this is maximized by $b_{k}$ for all $j<k<h$. Since $\policy_{i,j}(b_h)=b_j$, tie-breaking is done by vertex order so that $\B(\policy_{i,j})(b_{h})=b_{j+1}=\policy_{i,j+1}(b_h).$ 

       Consider $j=1$. $\apprtwo^{\policy_{i,j}}(\vertex, \otherver)$ generally is unchanged from the case $j>1$, except for $\otherver=t_k$ for $1<k\le i-2$. In these cases, due to the one extra edge in the path of run $P^{\policy_{i,j}}_\vertex$,  $\apprtwo^{\policy_{i,j}}(\vertex, \otherver)$ is greater by $\wmax - \vali{i}$. This does not change which $\otherver$'s maximize $\apprtwo^{\policy_{i,j}}(\vertex, \otherver)$, so the results for $\B(\policy_{i,j})$ outlined above for $j>1$ hold for $j=1$ as well.
       
       We conclude that $\B(\policy_{i,j})=\policy_{i,j+1}$.

     % For $\vertex = t_{i}$, we get that $\apprtwo^{\initpol_i}(t_{i}, t_{i}) = 0$. For all $\otherver\neq t_i$, we see that $\apprtwo^{\initpol_i}(t_{i}, \otherver)\geq 0$ for some 
     
     
     % For $\B(\policy_{i,j})(t_i)\neq t_i$, it needs to hold that . This would imply another cycle including $t_i$ of average weight at least $\val^{\policy_{i,j}}(\vertex) = n(n+1)+1$. Note that the weight of  edge to $t_i$ is $0$ and that 

     
    
    
    \paragraph{Proof of \Cref{item:pol-to-final}.}
    This case is highly similar to \Cref{item:pol-to-pol}. In the iteration on $\policy_{i,i+1}$, all vertices except $t_{i+1}$ follow the same pattern as for the iteration from $\policy_{i,j}$ to $\policy_{i,j+1}$ for $j\leq i$. Vertex $t_{i+1}$, though, can no longer use an edge to a higher $b_k$ to add another edge of weight $\wmax>\vali{i}$ to its path to $t_i$. However, (differently to the vertices $t_k$ for $k < i$), the edge to itself has a value higher than the value of its current run's cycle $\langle t_i \rangle$, i.e. $\vali{i+1}>\vali{i}$, so it will use this edge to add it to its path. Thus, in the iteration from $\policy_{i,i+1}$ to $\finalpol_i$, all vertices will behave as in \Cref{item:pol-to-pol}, except for $t_{i+1}$, which picks its self-loop.
    Formally, for all $\vertex,\otherver \in \Vertices$, $\appr^{\policy_{i,j}}(\vertex, \otherver)$ for $j=i+1$ follows the same pattern as for $j\leq i$. We can check that for all vertices $\vertex \neq t_{i+1}$, also the pattern of which $\otherver$'s maximize it stays unchanged, so that $\B(\policy_{i,i+1})(\vertex) = {\policy_{i,i+2}}(\vertex) = {\finalpol_{i}}(\vertex)$ \footnote{Note that  policy $\sigma_{i,i+2}$ does not appear in the algorithm, but we use it here to illustrate this point.} for $\vertex \neq t_{i+1}$. 
    For $\vertex = t_{i+1}$, by \Cref{eq:pol-to-pol1,eq:pol-to-pol2,eq:pol-to-pol3} $\apprtwo^{\policy_{i,i+1}}(t_{i+1}, \otherver)$ (and thus $\appr^{\policy_{i,i+1}}(t_{i+1}, \otherver)$) is maximized by $\otherver=t_{i+1}$ so that $\B(\policy_{i,i+1})(t_{i+1}) = t_{i+1} = {\finalpol_{i}(t_{i+1})}.$
    
    \paragraph{Proof of \Cref{item:final-to-init}.}
    Intuitively, in $\finalpol_{i}$, the best cycle is $\langle t_{i+1} \rangle$ but currently no vertex other than $t_{i+1}$ ends up there. Thus, $t_{i+1}$ has the uniquely highest value, so all vertices with edges to $t_{i+1}$ will pick them. For vertices that do not have an edge to $t_{i+1}$, those are $t_k$ for $k\leq i+1$, this iteration step still is identical to the iteration from $\policy_{i,i}$ to $\policy_{i,i+1}$ in which they did not change the edge they use.
    Formally, for all vertices $\vertex \in \Vertices \setminus \{ t_{i+1} \}$, the cycle of the lasso-shaped play $P^{\finalpol_i}_\vertex$ is of form $\langle
    t_i \rangle$. The cycle of $P^{\finalpol_i}_{t_{i+1}}$ is of form $\langle t_{i+1} \rangle$. Hence, the mean-payoff for the policy $\finalpol_i$ is 
    \begin{equation*}
    %\label{eq:mp-finalpol}
        \val^{\finalpol_i}(\vertex) = \begin{cases}
            \vali{i+1} & \vertex = t_{i+1}\\
            \vali{i}  & \text{otherwise}\,.
        \end{cases}
    \end{equation*}
    Since $\val^{\finalpol_i}(\vertex)$ is uniquely maximized by $\vertex = t_{i+1}$, $\B(\finalpol_{i})(\vertex) = t_{i+1}$ for all $\vertex$ where $t_{i+1}\in\Edges(\vertex)$, i.e., for $\vertex=b_k, 1\le k\le n$ and $\vertex=t_k, i+1\le k\le n$. 
    For the remaining vertices, $\vertex=t_k, 1\le k\le i$, their appraisal function $\appr^{\finalpol_{i}}(\vertex, \otherver)$ is still the same as $\appr^{\policy_{i,i}}(\vertex, \otherver)$. Thus, $\B(\finalpol_{i})(t_k)= \B(\policy_{i,i})(t_k)=\policy_{i,i+1}(t_k)= \initpol_{i+1}(t_k)$.
    
    \paragraph{Proof of \Cref{item:pol-to-end}.}
    Again, this case is very similar to \Cref{item:pol-to-pol}. In the general case for the iteration from $\policy_{i,j}$ to $\policy_{i,j+1}$, only vertices $t_k$ with $k\geq j+1, k\neq i$ and $b_k$ with $k\geq j+2$ change the edge they use. Since we reached the end of the deceleration lane and the highest-value self-loop is being used, there are no more vertices that match these criteria, so no vertex changes the edge it uses. 
    Formally, $\appr^{\policy_{i,j}}(\vertex, \otherver)$ for $i=j+1=n$ follows the same pattern as for all values of $i,j$ considered in \Cref{item:pol-to-pol}. For all vertices $\vertex \in\Vertices$, the pattern of which $\otherver$'s maximize it also stays unchanged, so that $\B(\policy_{n,n-1}) = {\policy_{n,n}}={\policy_{n,n-1}}$.
\end{proof}

% 
    % $\vertex=t_{j}$, $j\leq i-2$, $\vertex=t_{i-1}$, $\vertex=t_i$, $\vertex=t_j$, $j>i$, and $\vertex=b_{j}$,
    % 
    % \Cref{tab:init-to-pol_t1,tab:init-to-pol_t2,tab:init-to-pol_t3,tab:init-to-pol_t4,tab:init-to-pol_t5}, contain for all $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ the values of the second term of $\appr^{\initpol_i}(\vertex, \otherver)$.
% \begin{table}[h]
%         \centering
%         \begin{tabular}{c|c}
%             $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ &  $\weight_n(\vertex, \otherver) -\val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)$\\ \hline
%              $t_k, k\le j-1$ & $-2n^2-n-2i+1$ \\
%              $t_j$ & $-n^2+j-2i+1$ \\
%              $b_k, k\le j$ & $-n^2-i+1$ \\
%         \end{tabular}
%         \caption{$\vertex = t_j$ for all $j \le i-2$}\label{tab:init-to-pol_t3}

%         \bigskip
%         \begin{tabular}{c|c}
%             $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ &  $\weight_n(\vertex, \otherver) -\val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)$\\ \hline
%              $t_k, k\le i-2$ & $-2n^2-n-2i+1$ \\
%              $b_k, k\le i-1$ & $-n^2-i+1$ \\
%         \end{tabular}
%         \caption{$\vertex = t_{i-1}$}\label{tab:init-to-pol_t2}
        
%         \bigskip
%         \begin{tabular}{c|c}
%             $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ &  $\weight_n(\vertex, \otherver) -\val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)$\\ \hline
%              $t_k, k\le i-2$ & $-2n^2-n-2i+1$ \\
%              $t_i$ & 0 \\ 
%              $b_k, k\le i$ & $-n^2-i+1$ \\
%         \end{tabular}
%         \caption{$\vertex = t_i$}\label{tab:init-to-pol_t1}

%         \bigskip
%         \begin{tabular}{c|c}
%             $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ &  $\weight_n(\vertex, \otherver) -\val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)$\\ \hline
%              $t_k, k\le i-2$ & $-2n^2-n-2i+1$ \\
%              $t_i$ & $-n^2-n-1$ \\
%              $t_k, i<k<j$ & $-2n^2-2n-i-1$ \\
%              $t_j$ & $-n^2-n+j-i-1$ \\
%              $b_k, k\le j$ & $-n^2-i+1$ \\
%         \end{tabular}
%         \caption{$\vertex = t_j$ for all $j>i$}\label{tab:init-to-pol_t4}

%         \bigskip
%         \begin{tabular}{c|c}
%             $\otherver\in\Edges(\vertex)\backslash\{t_{i-1}\}$ &  $\weight_n(\vertex, \otherver) -\val^{\initpol_i}(\otherver) + \pot^{\initpol_i}(\otherver)$\\ \hline
%              $t_i$ & $-n^2-n-1$ \\
%              $t_k, k\le i-2$ & $-2n^2-n-2i+1$ \\
%              $t_k, k>i$ & $-2n^2-2n-i-1$ \\
%              $b_k, k< j$ & $-n^2-i+1$ \\
%         \end{tabular}
%         \caption{$\vertex = b_j$ for all $1\le j \le n$}\label{tab:init-to-pol_t5}
        
%     \end{table}
% 
    % \subparagraph{Case 1: \(\vertex = b_1\)}
    % The outgoing edges of $b_1$ is of form $(b_1, t_j)$ for all $1 \le j \le n$. By \Cref{eq:mp-initpol}, we have
    % $\val^{\initpol_i}(t_{i-1})$ is strictly smaller than $\val^{\initpol_i}(t_j)$. Hence, $(b_1, t_{i-1})$ cannot be chosen by the Bellman operator. Among the remaining edges $(b_1, t_j)$ with $j \neq i-1$, the first coordinate of the appraisal $\val^{\initpol_i}(t_j)$ coincide. Therefore, we consider the second coordinate. A straightforward, but tedious algebraic comparison shows that the unique maximum occurs at $j = i$. Hence, the Bellman operator outputs $t_i$, given $b_1$.
    % 
    % \subparagraph{Case 2: \(v = b_{k}\)}
    % We now consider the general case of $b_k$. Similar to the case of $b_1$, we can show that among outgoing edges of form $(b_k, t_j)$, the maximum occurs at $j=i$. However, there are edges of form $(b_k, b_j)$ for all $1 \le j < k$. By comparing the second coordinate of the appraisal, the best outgoing edge is $(b_k, b_1)$. 
    % 
    % \subsubsection*{Case 3: \(v = t_k\) (\(1 \leq k \leq i-2\))}
    % \begin{align*}
    % \text{Outgoing edges:} &\quad (t_k, b_j)\ (1 \leq j \leq k), (t_k, t_j)\ (1 \leq j \leq k) \\
    % \text{Appraisals:} &\quad 
    % \begin{cases}
    % \text{All } b_j \text{ edges:} & \val^{\initpol_i}(b_j) = n(n+1)^2 + i - 1 \\
    % \text{Lex order:} & \text{Prefer } b_j \text{ over } t_j
    % \end{cases} \\
    % \text{Conclusion:} &\quad \B(\initpol_i)(t_k) = b_1 = \sigma_{i,1}(t_k)
    % \end{align*}
    % 
    % \subsubsection*{Case 4: \(v = t_k\) (\(k > i\))}
    % \begin{align*}
    % \text{Outgoing edges:} &\quad (t_k, b_j)\ (1 \leq j \leq k), (t_k, t_j)\ (1 \leq j \leq k) \\
    % \text{Appraisals:} &\quad 
    % \begin{cases}
    % \text{Edge to } b_1: & \text{Second component } > \text{ edge to } t_j \\
    % \text{Tie-breaker:} & \text{Selects } b_1
    % \end{cases} \\
    % \text{Conclusion:} &\quad \B(\initpol_i)(t_k) = b_1 = \sigma_{i,1}(t_k)
    % \end{align*}
    % 
    % \subsubsection*{Case 5: \(v = b_1\)}
    % \begin{align*}
    % \text{Outgoing edges:} &\quad (b_1, t_j)\ (1 \leq j \leq n) \\
    % \text{Appraisals:} &\quad 
    % \begin{cases}
    % \text{Edge to } t_i: & \text{Maximizes } \val
    % \end{cases} \\
    % \text{Conclusion:} &\quad \B(\initpol_i)(b_1) = t_i = \sigma_{i,1}(b_1)
    % \end{align*}
    
    % \subsubsection*{Case 6: \(v = b_k\) (\(k \geq 2\))}
    % \begin{align*}
    % \text{Outgoing edges:} &\quad (b_k, b_j)\ (1 \leq j < k), (b_k, t_j)\ (1 \leq j \leq n) \\
    % \text{Appraisals:} &\quad 
    % \begin{cases}
    % \text{Edges to } b_j: & \text{Same } \val, \text{tie-breaker selects } b_1
    % \end{cases} \\
    % \text{Conclusion:} &\quad \B(\initpol_i)(b_k) = b_1 = \sigma_{i,1}(b_k)
    % \end{align*}
    
    % \subsection*{Final Conclusion}
    % For all \(v \in V_n\), \(\B(\initpol_i)(v) = \sigma_{i,1}(v)\). Thus, \(\B(\initpol_i) = \sigma_{i,1}\). \qedhere
    % \end{proof}
    
    % \subsection*{Appendix: Second Component Derivations}
    % For edge \((t_k, t_j)\) where \(k > i\):
    % \begin{equation*}
    % \begin{aligned}
    % & (j-1)(n+1)^2 - \val^{\initpol_i}(t_j) + \pot^{\initpol_i}(t_j) \\
    % &= (j-1)(n+1)^2 - [n(n+1)^2 + i - 1] + [(i-n-1)(n+1)^2 - i + 1] \\
    % &= (j - 2n - 2)(n+1)^2 - 2i + 2
    % \end{aligned}
    % \end{equation*}
    
    % For edge \((t_k, b_j)\):
    % \begin{equation*}
    % \begin{aligned}
    % & (n+1)^3 - \val^{\initpol_i}(b_j) + \pot^{\initpol_i}(b_j) \\
    % &= (n+1)^3 - [n(n+1)^2 + i - 1] + [(i-n-2)(n+1)^2 + n - i + 1] \\
    % &= (i - n - 1)(n+1)^2 + n - 2i + 2
    % \end{aligned}
    % \end{equation*}
    
    % We proceed by conditioning on the vertex $\vertex$.
    % \subparagraph{Case $\vertex = b_1$.} The outgoing edges of the vertex $b_1$ is $\Edges(b_1) = \{t_1, \ldots, t_n\}$. By \Cref{eq:mp-initpol}, the mean-payoff of all vertices in $\Edges(b_1)$ except $t_{i-1}$ is the same, and the mean-payoff of $t_{i-1}$ is smaller than the rest. Recall that the first coordinate of the appraisal of an edge is the mean-payoff of the edge tail. Therefore, the vertex $b_1$ is indifferent to $\Edges(b_1) \setminus \{ t_{i-1} \}$ with respect to the first coordinate of the appraisal. Thus, we consider the second coordinate of the appraisal.  The second coordinate of the appraisal of the edge $(b_k, t_\ell)$ is 
    % \begin{align*}
    %     w_n(b_1, t_\ell) &- \val^{\initpol_i}(t_\ell) + \pot^{\initpol_i}(t_\ell)\\
    %     &= n(n+1)^2 + n - (n(n+1)^2 + i - 1) + (i - n - 2)(n+1)^2 + n - i + 1 \\
    %     &= (i - n - 2)(n+1)^2 + 2(n - i + 1) \,.
    % \end{align*}
    % We compute the second coordinate of appraisal for all the outgoing edges of the vertex $b_1$, except the edge to $t_{i-1}$, and show that the edge $(b_1, t_i)$ achieves the maximum.
    % % We now compute the appraisal of all the edges. For $1 \le \ell < k \le n$, the appraisal of the edge $(b_k, b_\ell)$ is 
    % \begin{align*}
    %     \appr^{\initpol_i}&(b_k, b_\ell)\\
    %     &= \left ( \val^{\initpol_i}(b_\ell), w_n(b_k, b_\ell) - \val^{\initpol_i}(b_\ell) + \pot^{\initpol_i}(b_\ell) \right )\\
    %     &= \left (n(n+1)^2 + i - 1, n(n+1)^2 + n - (n(n+1)^2 + i - 1) + (i - n - 2)(n+1)^2 + n - i + 1  \right )\\
    %     &= \left (n(n+1)^2 + i - 1, (i - n - 2)(n+1)^2 + 2(n - i + 1) \right )\,.
    % \end{align*}
    % For $1 \le k, \ell \le n$ and $\ell \not\in \{i-1, i\}$, the appraisal of the edge $(b_k, t_\ell)$ is 
    % \begin{align*}
    %     \appr^{\initpol_i}&(b_k, t_\ell)\\
    %     &= \left ( \val^{\initpol_i}(t_\ell), w_n(b_k, t_\ell) - \val^{\initpol_i}(t_\ell) + \pot^{\initpol_i}(t_\ell) \right )\\
    %     &= \left (n(n+1)^2 + i - 1, n(n+1)^2 + n - (n(n+1)^2 + i - 1) + (i - n - 2)(n+1)^2 + n - i + 1  \right )\\
    %     &= \left (n(n+1)^2 + i - 1, (i - n - 2)(n+1)^2 + 2(n - i + 1) \right )\,.
    % \end{align*} 



\begin{proof}[Proof of \Cref{thm:main-result}]
    The constructed DMDP has $2n$ vertices and $\frac{3n^2 + n}{2}$ edges. The absolute value of weights is $\calO(n^3)$. Therefore, the size of the DMDP is $\calO(n^2\log n)$.
    
    \Cref{lem:howard-sequence} shows that if Howard's algorithm starts with the policy $\initpol_1$, it iterates over all policies in the sequence shown in \Cref{eq:howard-seq}. Therefore, the length of the sequence is 
    \begin{equation*}
        2n + \sum_{i=1}^{n} (i+1) -3 = \frac{n^2 + 7n - 6}{2}\,,
    \end{equation*}
    where the equality follows from the sum of arithmetic series and algebraic rearrangement of terms, which yields the result.
\end{proof}


\subsection{Experimental Evaluation}
To validate our lower bound example, we implemented both Howard's policy iteration and the example in Python. The experimental evaluation confirms the sequence of policies shown by the theoretical analysis. The full implementation is publicly available at \url{https://doi.org/10.5281/zenodo.14823415}.



    