\section{Computation Issue}\label{sec:efficient_update}
% In this section, we present the solutions to the optimization problems of both \LSOMD and \LSFTRL.
In this section, we present the solutions to the optimization problem of the update of \LSFTRL in Eq. \eqref{update:FTRL} and the computation of the ``balanced transition'' $p^\star$ in Eq. \eqref{eq:p_star}.

\subsection{\LSFTRL Update}\label{app:efficient_upd_ftrl}
% \subsubsection{Reducing to an OMD-like Update}
% \subsection{Efficient Update for \LSOMD}\label{app:efficient_upd_omd}
To solve the update of \LSFTRL in Eq. \eqref{update:FTRL}, we first present an OMD-like update as well as its solution. We then show that the solution to the OMD-like update is equivalent to the \LSFTRL update, which provides the final optimization solution to the \LSFTRL update.

To begin with, we first introduce the OMD-like update, which leverages a list of learning rates $\eta\coloneqq(\eta_h(x_h))_{x_h\in\gX_h,h\in[H]}$ adaptive to each infoset and a generalized potential function defined as follows (not to be confused with the negative entropy potential function $\Psi_h(\mu)=\sumlevel \mu_{1:h}(x_h,a_h)\log(\mu_{1:h}(x_h,a_h))$ used in \LSFTRL):
\[\psi_{\eta}(\mu)=\sumH\sumlevel\frac{\mu_{1:h}(x_h,a_h)}{\eta_h(x_h)}\log\left(\frac{\mu_{1:h}(x_h,a_h)}{\sum_{a^\prime_h\in\gA}\mu_{1:h}(x_h,a^\prime_h)}\right)\,.\]
By the fact that for any $\mu\in\Pi_{\max}$, the derivative of $\psi_{\eta}(\mu)$ satisfies
\begin{equation*}
    \nabla_{x_h,a_h}\psi_{\eta}(\mu)=\frac{1}{\eta_h(x_h)}\log(\mu_h(a_h|x_h)) \,, 
\end{equation*}
one can see that $\psi_{\eta}(\mu)$ induces the 
following  distance-generating function, which is a
generalized version of the \textit{dilated} entropy distance-generating function of \citet{kozuno2021learning}:
% \[D_{\psi_{\eta}}(\mu^1\Vert\mu^2)=\sumH\sumlevel \frac{\mu_{1:h}^1(x_h,a_h)}{\eta_h(x_h)} \log\frac{\mu^1_h(a_h|x_h)}{\mu^2_h(a_h|x_h)}\,.\]
\begin{align}\label{eq:dilated_DGF}
    D_{\psi_{\eta}}(\mu^1\Vert\mu^2)=\sumH\sumlevel \frac{\mu_{1:h}^1(x_h,a_h)}{\eta_h(x_h)} \log\frac{\mu^1_h(a_h|x_h)}{\mu^2_h(a_h|x_h)}\,.
\end{align}
The OMD-like update in accordance with the generalized dilated entropy distance-generating function in Eq. \eqref{eq:dilated_DGF} is defined as
\begin{align}\label{update:omd_generalized}
    \mu^{t+1}&=\argmin_{\mu\in \Pi_{\max}} \left\langle\mu,\hat{\ell}^{t}\right\rangle+D_{\psi_{\eta}}(\mu\Vert\mu^{t})\nonumber\\
    &=\argmin_{\mu\in \Pi_{\max}} \left\langle\mu,\hat{\ell}^{t}\right\rangle+\sumH\sumlevel \frac{\mu_{1:h}(x_h,a_h)}{\eta_h(x_h)} \log\frac{\mu_h(a_h|x_h)}{\mu^t_h(a_h|x_h)}\,.
\end{align}

% To begin with, we introduce 
% a generalized version of OMD update in Eq. \eqref{eq:OMD_update}, which leverages learning rates adaptive to each infoset. Specifically, given any list of learning rates $\eta\coloneqq(\eta_h(x_h))_{x_h\in\gX,h\in[H]}$, the generalized potential function is defined as 
% \[\Psi_{\eta}(\mu)=\sumH\sumlevel\frac{\mu_{1:h}(x_h,a_h)}{\eta_h(x_h)}\log\left(\frac{\mu_{1:h}(x_h,a_h)}{\sum_{a^\prime_h\in\gA}\mu_{1:h}(x_h,a^\prime_h)}\right)\,.\]
% By the fact that for all positive $\mu\in\Pi_{\max}$, the derivative of $\Psi_{\eta}(\mu)$ satisfies
% \begin{equation*}
%     \nabla_{x_h,a_h}\Psi_{\eta}(\mu)=\frac{1}{\eta_h(x_h)}\log(\mu_h(a_h|x_h)) \,, 
% \end{equation*}
% one can see that $\Psi_{\eta}(\mu)$ induces the 
% generalized \textit{dilated} entropy distance-generating function
% \[D_{\Psi_{\eta}}(\mu^1\Vert\mu^2)=\sumH\sumlevel \frac{\mu_{1:h}^1(x_h,a_h)}{\eta_h(x_h)} \log\frac{\mu^1_h(a_h|x_h)}{\mu^2_h(a_h|x_h)}\,.\]
% The generalized version of OMD update in \Eqref{eq:OMD_update} is given as follows:
% \begin{align*}
%     \mu^{t+1}&=\argmin_{\mu\in \Pi_{\max}} \left\langle\mu,\hat{\ell}^{t}\right\rangle+D_{\Psi_{\eta}}(\mu\Vert\mu^{t})\nonumber\\
%     &=\argmin_{\mu\in \Pi_{\max}} \left\langle\mu,\hat{\ell}^{t}\right\rangle+\sumH\sumlevel \frac{\mu_{1:h}(x_h,a_h)}{\eta_h(x_h)} \log\frac{\mu_h(a_h|x_h)}{\mu^t_h(a_h|x_h)}\,.\label{update:omd_generalized}
% \end{align*}
% 1. \zhao{TBF: why learning rates and $p^\star$ are comparable?} \chen{[Re:]Basically, the methods in \citet{farina20stochastic,bai2022nearoptimal,Fiegel2023adapting} is adding a weighted term in dilated entropy generating function. And it turns out their weighted term all vary with X. c.f. sec 5 in \citet{Fiegel2023adapting} for 'Dilated entropy and policy update'.}
% We remark that $\eta=\{\eta_h(x_h)\}_{x_h\in\gX_h,h\in[H]}$ also generalizes the of ``balanced transitions" used in \citet{farina20stochastic,bai2022nearoptimal,Fiegel2023adapting}.

The solution to \Eqref{update:omd_generalized} is given in the following proposition. Notice that the solution to similar optimization problems of
previous works \citep{kozuno2021learning,bai2022nearoptimal,Fiegel2023adapting} critically relies on the sparsity of their importance-weighted loss estimator, which only permits non-zero loss estimates along the experienced trajectory $\{(x^t_h,a^t_h)\}_{h\in[H]}$. 
In contrast, the solution to \Eqref{update:omd_generalized} in the following proposition solves the optimization problem of OMD with generalized dilated entropy distance-generating function and the loss estimator with non-zero loss estimates for all infoset-action pairs $(x,a)\in\gX\times\gA$.

\begin{proposition}\label{prop:update}
    The solution to the OMD-like update in Eq. \eqref{update:omd_generalized} satisfies
    \begin{align*}
&\mu_h^{t+1}(a_h|x_h)\\
=&\mu_h^t(a_h|x_h)\exp\left\{-\eta_h(x_h)\hatellxa+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta_h(x_h)}{\eta_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})-\log Z_h^t(x_h)\right\}\,,
\end{align*}
where
\begin{align}\label{eq:partition_Z}
Z_{h}^t(x_{h})&=\sum_{a_h\in\gA}\mu_h^t(a_h|x_h)\exp\left\{-\eta_h(x_h)\hatellxa
+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta_h(x_h)}{\eta_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})\right\}\,,
\end{align}
and for notational convenience, we define that $\forall (x_H,a_H)\in \gX_H\times\gA$, it has a unique descendant $x_{H+1}$ such that $Z_{H+1}^t(x_{H+1})=1$.
\end{proposition}
\begin{proof}
First note that
\begin{align}\label{equ:opt_object}
&\left\langle\mu,\hat{\ell}^{t}\right\rangle+D_{\Psi_{\eta}}(\mu\Vert\mu^{t}) \notag\\
=&\sum_{h=1}^H{\sumlevel{\mu _{1:h}(x_h,a_h)\left[ \hat{\ell }_{h}^{t}\left( x_h,a_h \right) +\frac{1}{\eta_h(x_h)}\log \frac{\mu _h(a_h|x_h)}{\mu _{h}^{t}(a_h|x_h)} \right]}}
\notag\\
=&\sum_{h=1}^H{\sum_{x_h\in\gX_h}{\mu _{1:h-1}(x_h)\left[ \left< \mu _h(\cdot |x_h),\hat{\ell }_{h}^{t}\left( x_h,\cdot \right) \right> +\frac{\KL\left( \mu _h(\cdot |x_h)\Vert\mu _{h}^{t}(\cdot |x_h) \right)}{\eta_h(x_h)} \right]}} \,. 
\end{align}

We now prove the proposition via backward induction over $h=H,\ldots,1$. 

When $h=H$, for any $x_H\in\gX_H$, 
Eq. \eqref{equ:opt_object} shows that
% \zhao{TBF: check the following equations}
% \begin{align*}
%   &  \quad \mu _{H}^{t+1}(a_H|x_H) \propto_{a_H} \mu _{H}^{t}(a_H|x_H)\exp \left\{ -\eta_h(x_h) \hat{\ell }_{H}^{t}(x_H, a_H) \right\} \\
%   & = \mu _{H}^{t}(a_H|x_H)\exp \left\{ -\eta_h(x_h)  \hat{\ell }_{H}^{t}(x_H, a_H) -\log Z_{H}^{t}(x_H) \right\},
% \end{align*}
% \begin{align*}
%   &  \quad \mu _{H}^{t+1}(a_H|x_H) \propto_{a_H} \mu _{H}^{t}(a_H|x_H)\exp \left\{ -\eta_h(x_h) \hat{\ell }_{H}^{t}(x_H, a_H) \right\} \\
%   & = \mu _{H}^{t}(a_H|x_H)\exp \left\{ -\eta_h(x_h)  \hat{\ell }_{H}^{t}(x_H, a_H) -\log Z_{H}^{t}(x_H) \right\},
% \end{align*}
\begin{align*}
\mu _{H}^{t+1}(a_H|x_H) = \mu _{H}^{t}(a_H|x_H)\exp \left\{ -\eta_h(x_h)  \hat{\ell }_{H}^{t}(x_H, a_H) -\log Z_{H}^{t}(x_H) \right\},
\end{align*}
where $Z_H^t(x_H)=\sum_{a_H\in\gA}\mu_H^t(a_H|x_H)\exp\{-\eta_h(x_H)\hat{\ell }_{H}^{t}(x_H, a_H)\}$ is a normalization factor. 

Fix some $h\in[H]$. Now suppose the induction hypothesis holds from step $h+1$ to $H$ and consider the $h$-th step. Using the induction hypothesis, 
one can see that \Eqref{equ:opt_object} can be rewritten as
\begin{align*}
    &\sum_{h^\prime=1}^H{\sum_{(x_{h^\prime},a_{h^\prime})\in\gX_{h^\prime}\times\gA}{\mu _{1:h^\prime}(x_{h^\prime},a_{h^\prime})\left[ \hat{\ell }_{h^\prime}^{t}\left( x_{h^\prime},a_{h^\prime} \right) +\frac{1}{\eta_{h^\prime}(x_{h^\prime})}\log \frac{\mu _{h^\prime}(a_{h^\prime}|x_{h^\prime})}{\mu _{h^\prime}^{t}(a_{h^\prime}|x_{h^\prime})} \right]}}\\
=&\sum_{h^\prime=1}^H{\sum_{x_{h^\prime}\in\gX_{h^\prime}}{\mu _{1:h^\prime-1}(x_{h^\prime})\left[ \left< \mu _{h^\prime}(\cdot |x_{h^\prime}),\hat{\ell }_{h^\prime}^{t}\left( x_{h^\prime},\cdot \right) \right> +\frac{\KL\left( \mu _{h^\prime}(\cdot |x_{h^\prime})\Vert\mu _{h^\prime}^{t}(\cdot |x_{h^\prime}) \right)}{\eta_{h^\prime}(x_{h^\prime})} \right]}}
\\
=&\sum_{h^\prime=1}^h{\sum_{x_{h^\prime}\in\gX_{h^\prime}}{\mu _{1:h^\prime-1}(x_{h^\prime})\left[ \left< \mu _{h^\prime}(\cdot |x_{h^\prime}),\hat{\ell }_{h^\prime}^{t}\left( x_{h^\prime},\cdot \right) \right> +\frac{\KL\left( \mu _{h^\prime}(\cdot |x_{h^\prime})\Vert\mu _{h^\prime}^{t}(\cdot |x_{h^\prime}) \right)}{\eta_{h^\prime}(x_{h^\prime})} \right]}}
\\
&+\sum_{h^\prime=h+1}^H{\left[ \sum_{x_{h^\prime+1}\in\gX_{h^\prime+1}}\frac{\mu_{1:h^\prime}(x_{h^\prime+1})}{\eta_{h^\prime+1}(x_{h^\prime+1})}\log Z^t_{h^\prime+1}(x_{h^\prime+1})- \sum_{x_{h^\prime}\in\gX_{h^\prime}}\frac{\mu_{1:h^\prime-1}(x_{h^\prime})}{\eta_{h^\prime}(x_{h^\prime})}\log Z^t_{h^\prime}(x_{h^\prime})\right]}
\\
=&\sum_{h^\prime=1}^h{\sum_{x_{h^\prime}\in\gX_{h^\prime}}{\mu _{1:h^\prime-1}(x_{h^\prime})\left[ \left< \mu _{h^\prime}(\cdot |x_{h^\prime}),\hat{\ell }_{h^\prime}^{t}\left( x_{h^\prime},\cdot \right) \right> +\frac{\KL\left( \mu _{h^\prime}(\cdot |x_{h^\prime})\Vert\mu _{h^\prime}^{t}(\cdot |x_{h^\prime}) \right)}{\eta_{h^\prime}(x_{h^\prime})} \right]}}
\\
&-\sum_{x_{h+1}\in\gX_{h+1}}\frac{\mu_{1:h}(x_{h+1})}{\eta_{h+1}(x_{h+1})}\log Z^t_{h+1}(x_{h+1})\\
=&\sum_{h^\prime=1}^{h-1}{\sum_{x_{h^\prime}\in\gX_{h^\prime}}{\mu _{1:h^\prime-1}(x_{h^\prime})\left[ \left< \mu _{h^\prime}(\cdot |x_{h^\prime}),\hat{\ell }_{h^\prime}^{t}\left( x_{h^\prime},\cdot \right) \right> +\frac{\KL\left( \mu _{h^\prime}(\cdot |x_{h^\prime})\Vert\mu _{h^\prime}^{t}(\cdot |x_{h^\prime}) \right)}{\eta_{h^\prime}(x_{h^\prime})} \right]}}
\\
&+\sum_{x_h\in\gX_h}\mu_{1:h-1}(x_h)\left[ \underbrace{\left< \mu _h(\cdot |x_h),\hat{\ell }_{h}^{t}\left( x_h,\cdot \right)-\sum_{x_{h+1}\in C(x_h,\cdot)}\frac{\log Z_{h+1}^t(x_{h+1})}{\eta_{h+1}(x_{h+1})} \right> +\frac{\KL\left( \mu _h(\cdot |x_h)\Vert\mu _{h}^{t}(\cdot |x_h) \right)}{\eta_h(x_h)}}_{\heartsuit } \right]\,.
\end{align*}
By minimizing $(\heartsuit)$, one can derive that
\begin{align*}
&\mu_h^{t+1}(a_h|x_h)\\
=&\mu_h^t(a_h|x_h)\exp\left\{-\eta_h(x_h)\hatellxa+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta_h(x_h)}{\eta_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})-\log Z_h^t(x_h)\right\}\,,
\end{align*}
where
\begin{align*}
Z_{h}^t(x_{h})&=\sum_{a_h\in\gA}\mu_h^t(a_h|x_h)\exp\left\{-\eta_h(x_h)\hatellxa+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta_h(x_h)}{\eta_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})\right\}\,.
\end{align*}
The proof is thus concluded.
\end{proof}

% Setting $\eta_h(x)\equiv\eta$ for all $x\in \gX$ in Proposition~\ref{prop:update}
% immediately implies the update procedure for \LSOMD, detailed in Algorithm \ref{algo:upomd}. 
In what follows, for notational convenience, we denote $J^t_h(x_h,a_h)=-\eta_h(x_h)\hatellxa
+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta_h(x_h)}{\eta_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})$ as the surrogate loss.

% ------------------------- 2024.05.22 -------------------------
% \begin{algorithm}[!thb]
% \caption{\UPOMD}\label{algo:upomd}
% \begin{algorithmic}[1]
% \STATE \textbf{Input:} Tree-like structure of $\gX\times \gA$, $\hat{\mu}^t$ given by update Eq. \eqref{eq:OMD_update}, fixed learning rates $\eta$ and the loss estimates $\left\{ \hatellxa\right\}_{(x_h,a_h)\in \gX\times\gA}$.
%     \STATE \textbf{Initialization:} For all $x_{H}$ in $\gX_H$, initialize $Z^t(x_{H+1})= 1$.
% \FOR{ $h=H$ to $1$} 
%     \FOR{$x_{h}$ in $\gX_h$} 
%         \STATE Compute $ J^t_h(x_h,a_h)= -\eta\hatellxa+\sum_{x_{h+1}\in C(x_h,a_h)}\log Z_{h+1}^t(x_{h+1})$.
%         \STATE Compute $ Z_{h}^t(x_{h})=\sum_{a_h\in\gA}\hat{\mu}_h^t(a_h|x_h)\exp\left(J^t_h(x_h,a_h)\right)$.
%         \FOR{ $a_{h}$ in $\gA$} 
%             \STATE Compute $ \hat{\mu}_h^{t+1}(a_h|x_h)=\hat{\mu}_h^t(a_h|x_h)\exp\left(J^t_h(x_h,a_h)-\log Z_h^t(x_h)\right)$.
%         \ENDFOR
%     \ENDFOR
% \ENDFOR
% \end{algorithmic}
% \end{algorithm}
% ------------------------- 2024.05.22 -------------------------

% \subsection{\LSFTRL Algorithm}\label{sec:app:lsftrl_pseudo_code}
% This section presents the pseudocode of \LSFTRL, detailed in Algorithm \ref{algo:lsftrl}.

\begin{algorithm}[!thb]
\caption{\UPFTRL}\label{algo:upftrl}
\begin{algorithmic}[1]
\STATE \textbf{Input:} Tree-like structure of $\gX\times\gA$, fixed learning rates $\eta$, ``balanced transition'' $p^\star$ and cumulative loss estimates $\left\{ \hat{L}^t_h(x_h,a_h)\right\}_{(x_h,a_h)\in \gX\times\gA}$.
\STATE \textbf{Initialization:} For all $x_{H}$ in $\gX_H$, initialize $Z^t(x_{H+1})= 1$. Set adaptive learning rates $\eta^\star$ according to~\Eqref{eq:eta_star}. Set base policy $\mu^\star$ according to~\Eqref{eq:mu_star}.
\FOR{ $h=H$ to $1$} 
    \FOR{ $x_{h}$ in $\gX_h$}
        \STATE Compute $ J^t_h(x_h,a_h)= -\eta^\star_h(x_h)\hat{L}^t(x_h,a_h)+\sum_{x_{h+1}\in C(x_h,a_h)}\frac{\eta^\star_h(x_h)}{\eta^\star_{h+1}(x_{h+1})}\log Z_{h+1}^t(x_{h+1})$.
        \STATE Compute $Z_{h}^t(x_{h})=\sum_{a_h\in\gA}\mu_h^\star(a_h|x_h)\exp\left(J^t_h(x_h,a_h)\right)$.
        \FOR{ $a_{h}$ in $\gA$}
            \STATE Compute $ \mu_h^{t+1}(a_h|x_h)=\mu_h^\star(a_h|x_h)\exp\left(J^t_h(x_h,a_h)-\log Z_h^t(x_h)\right)$.
        \ENDFOR
    \ENDFOR
\ENDFOR
\end{algorithmic}
\end{algorithm}

% \subsection{Efficient Update for \LSFTRL}\label{app:efficient_upd_ftrl}
To solve the update of \LSFTRL, we follow the same idea as \citet{Fiegel2023adapting} that translates the update of FTRL into an OMD-like update. In specific, Proposition F.2 of \citet{Fiegel2023adapting} shows that the update of \Eqref{update:FTRL} is equivalent to the solution to the following optimization problem:
% \label{eq:update_ftrl2}
\begin{align*}
    \mu^{t+1} =\argmin_{\mu\in \Pi_{\max}} \left\langle\mu,\hat{L}^{t}\right\rangle + D_{\Psi_{\eta^\star}}\left(\mu\Vert\mu^\star\right)\,,
\end{align*}
where $\eta^\star$ and $\mu^\star$ satisfy
\begin{align}\label{eq:eta_star}
\eta^\star_h(x_h)= \frac{\eta}{(H-h+1)p^\star_{1:h}(x_h)}\,,
\end{align}
and
\begin{align}\label{eq:mu_star}
    \mu^{\star}= \argmin_{\mu\in\Pi_{\max}}\sumH\Psi_h\left(p^\star_{1:h}\cdot\mu_{1:h}\right)\,,
\end{align}
and recall
$D_{\psi_{\eta^\star}}\left(\mu^1, \mu^0\right)=\sum_{h=1}^H \sum_{\left(x_h, a_h\right) \in \mathcal{X}_h\times\mathcal{A}} \frac{\mu_{1: h}^1\left(x_h, a_h\right)}{\eta_h^\star\left(x_h\right)} \log \frac{\mu_h^1\left(a_h| x_h\right)}{\mu_h^0\left(a_h|x_h\right)}$.
Note that $\mu^\star$ can be computed efficiently via backward dynamic programming.

Then, combined with the solution to the OMD-like update in Proposition \ref{prop:update}, the solution to the update of \LSFTRL can be obtained by substituting $\hat{\ell}^t$ and $\mu^t$ with $\hat{L}^t$ and $\mu^{\star}$ in Eq. \eqref{update:omd_generalized}, the details of which are presented in Algorithm \ref{algo:upftrl}.

\begin{algorithm}[!thb]
\caption{\MAXLAMBDA}\label{algo:maxlambda}
\begin{algorithmic}[1]\label{algo:compute_pstar}
\STATE \textbf{Input:} Tree-like structure of $\gX\times \gA$\,.
\STATE \textbf{Initialization:} Transition array $p[\cdot]$ of size $X$\,; auxiliary array $f[\cdot]$ of size $X$, $C[\cdot,\cdot]$ of size $X\times A$\,. For all $x_H$ in $\gX_H$, initialize $ f[x_H]= 1 $\,.
\FOR{$h=H-1$ to $1$} 
    \FOR{$x_h$ in $\gX_h$}
        \FOR{$a_h$ in $\gA$}
            \STATE Compute $C[x_{h},a_h]= \sum_{x_{h+1}\in C(x_h,a_h)}f[x_{h+1}] $\,,
        \ENDFOR
        \STATE Compute $ f[x_h]= \max_{a\in \gA} C[x_h,a]$\,.
    \ENDFOR
\ENDFOR
\FOR{$x_1$ in $\gX_1$}
    \STATE Compute $p[x_1]= \frac{f[x_1]}{\sum_{x_1\in\gX_1}f[x_1]}$\,.
\ENDFOR
\FOR{ $h=1$ to $H-1$}
    \FOR{ $x_h, a_h$ in $\gX_h\times \gA$}
        \FOR{$x_{h+1}$ in $C(x_h,a_h)$}
            \STATE Compute $p[x_{h+1}]= p[x_h]\cdot\frac{f[x_{h+1}]}{\sum_{x_{h+1}\in C(x_h,a_h)}f[x_{h+1}]}$\,.
        \ENDFOR
    \ENDFOR
\ENDFOR
\STATE \textbf{return} $p$.
\end{algorithmic}
\end{algorithm}

\subsection{Computation of Balanced Transition $p^\star$}\label{app:maxlambda} 
This section presents Algorithm \ref{algo:compute_pstar}, which solves the computation of $p^\star$ defined in Eq. \eqref{eq:p_star} via backward dynamic programming.

