% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

%% my preamble
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{dsfont}
\usepackage{subcaption}
% \usepackage{algorithmic}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{hyperref}

\algrenewcommand\algorithmicindent{1.0em}%
\newcommand{\argmax}{\mathop{\rm arg~max}\limits}
\newcommand{\argmin}{\mathop{\rm arg~min}\limits}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{remark}[theorem]{Remark}

\allowdisplaybreaks[1]

%%% HELPER CODE FOR DEALING WITH EXTERNAL REFERENCES
\usepackage{xr}
\makeatletter
\newcommand*{\addFileDependency}[1]{
  \typeout{(#1)}
  \@addtofilelist{#1}
  \IfFileExists{#1}{}{\typeout{No file #1.}}
}
\makeatother

\newcommand*{\myexternaldocument}[1]{
    \externaldocument{#1}
    \addFileDependency{#1.tex}
    \addFileDependency{#1.aux}
}
%%% END HELPER CODE
\myexternaldocument{abe_504}

\title{Mutation-Driven Follow the Regularized Leader for Last-Iterate Convergence in Zero-Sum Games (Supplementary Material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<abe$\_$kenshi@cyberagent.co.jp>?Subject=Your UAI 2022 paper}{Kenshi Abe}{}}
\author[2]{Mitsuki Sakamoto}
\author[2]{Atsushi Iwasaki}
% Add affiliations after the authors
\affil[1]{%
    CyberAgent, Inc.
}
\affil[2]{%
    University of Electro-Communications
}
  
\begin{document}
\onecolumn
\maketitle

\appendix
\section{Unbiased Estimator for FTRL and O-FTRL under Bandit Feedback}
\label{sec:appendix_lattimore_estimator}
For FTRL and O-FTRL under bandit feedback, we use the following unbiased estimator of $q_i^{\pi^t}$ which is proposed by \citep{lattimore2020bandit}:
\begin{align*}
    \hat{q}_i^{\pi^t}(a_i) = u_{\max} - \frac{u_{\max} - u_i(a_1^t, a_2^t)}{\pi_i^t(a_i^t)}\mathds{1}[a_i = a_i^t].
\end{align*}
This estimator takes values in $(-\infty, u_{\max}]$ while the standard importance-weighted estimator takes values in $(-\infty, \infty)$.

\section{Sensitivity Analysis on Mutation Parameters}
\label{sec:sensitivity_analysis_mu}
In this section, we investigate the performance of M-FTRL with a fixed reference strategy with varying $\mu\in \{10^{-3}, 5\times 10^{-3}, 10^{-2}, 10^{-1}, 1\}$.
We set the reference strategy to $c_i=\left(\frac{1}{|A_i|}\right)_{a_i\in A_i}$, and set the learning rate to $\eta=10^{-1}$.
The initial strategy profile $\pi^0$ is generated uniformly at random in $\prod_{i=1}^2\Delta^{\circ}(A_i)$ for each instance.
We conduct experiments on BRPS under full-information feedback.
Figure \ref{fig:compare_mu} shows the average exploitability of $\pi^t$ for $100$ instances.
This result highlights the trade-off between the convergence rate and exploitability as shown in Theorem \ref{thm:expoitability_bound}.

\begin{figure}[h!]
    \centering
    \includegraphics[width=0.5\linewidth]{figs/appendix/brps_full_feedback_exploitability_compare_mu.pdf}
    \caption{
    Exploitability of $\pi^t$ for M-FTRL with a fixed reference strategy in BRPS under full-information feedback.
    }
    \label{fig:compare_mu}
\end{figure}

\section{Additional Lemmas}
\begin{lemma}
\label{lem:convex_conjugate}
For any $\pi\in \prod_{i=1}^2\Delta(A_i)$, $\pi^t$ updated by M-FTRL satisfies that:
\begin{align*}
    D_{\psi}(\pi, \pi^t) = \sum_{i=1}^2\left(\max_{p\in \Delta(A_i)}\left\{\left\langle z_i^t, p\right\rangle - \psi_i(p)\right\}-\langle z_i^t, \pi_i\rangle + \psi_i(\pi_i)\right).
\end{align*}
\end{lemma}

\begin{lemma}
\label{lem:stationary_point_rmd}
Let $\pi^{\mu}\in \prod_{i=1}^2\Delta(A_i)$ be a stationary point of (\ref{eq:rmd}).
For a player $i\in \{1, 2\}$, if $c_i\in \Delta^{\circ}(A_i)$ and $\mu>0$, then we also have $\pi_i^{\mu}\in \Delta^{\circ}(A_i)$.
\end{lemma}

\section{Proofs}
\subsection{Proof of Theorem \ref{thm:rmd}}
\label{sec:appendix_proof_thm_rmd}
\begin{proof}[Proof of Theorem \ref{thm:rmd}]
By the method of Lagrange multiplier, we have:
\begin{align*}
    \pi_i^t(a_i) = \frac{\exp\left(z_i^t(a_i)\right)}{\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)}.
\end{align*}
Therefore, the time derivative of $\pi_i^t(a_i)$ is given as follows:
\begin{align*}
    \frac{d}{dt}\pi_i^t(a_i) &= \frac{\frac{d}{dt}\exp\left(z_i^t(a_i)\right)}{\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)} - \frac{\exp\left(z_i^t(a_i)\right)\frac{d}{dt}\left(\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)\right)}{\left(\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)\right)^2} \\
    &= \frac{\exp\left(z_i^t(a_i)\right)\frac{d}{dt}z_i^t(a_i)}{\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)} - \frac{\exp\left(z_i^t(a_i)\right)\left(\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)\frac{d}{dt}z_i^t(a_i')\right)}{\left(\sum_{a_i'\in A_i}\exp\left(z_i^t(a_i')\right)\right)^2} \\
    &= \pi_i^t(a_i)\frac{d}{dt}z_i^t(a_i) - \pi_i^t(a_i)\sum_{a_i'\in A_i}\pi_i^t(a_i')\frac{d}{dt}z_i^t(a_i').
\end{align*}
From the definition of $z_i^t(a_i)$, we have:
\begin{align*}
    \frac{d}{dt}z_i^t(a_i)=q^{\pi^t}_i(a_i)+\frac{\mu}{\pi_i^t(a_i)}\left(c_i(a_i)-\pi_i^t(a_i)\right).
\end{align*}
By combining these equalities, we get:
\begin{align*}
    \frac{d}{dt}\pi_i^t(a_i) =& \pi_i^t(a_i)\left(q_i^{\pi^t}(a_i) +  \frac{\mu}{\pi_i^t(a_i)}\left(c_i(a_i)-\pi_i^t(a_i)\right) - \sum_{a_i'\in A_i}\pi_i^t(a_i')\left(q_i^{\pi^t}(a_i') + \frac{\mu}{\pi_i^t(a_i')}\left(c_i(a_i')-\pi_i^t(a_i')\right)\right)\right) \\
    =& \pi_i^t(a)\left(q_i^{\pi^t}(a_i) - v_i^{\pi^t}\right) + \mu\left(c_i(a_i)-\pi_i^t(a_i)\right) - \mu\pi_i^t(a_i)\sum_{a_i'\in A_i}\left(c_i(a_i')-\pi_i^t(a_i')\right) \\
    =& \pi_i^t(a)\left(q_i^{\pi^t}(a_i) - v_i^{\pi^t}\right) + \mu\left(c_i(a_i)-\pi_i^t(a_i)\right).
\end{align*}
\end{proof}

\subsection{Proof of Lemma \ref{lem:bregman_div}}
\label{sec:appendix_proof_lem_bregman_div}
\begin{proof}[Proof of Lemma \ref{lem:bregman_div}]
Let us define $\psi_i^{\ast}(z_i)=\max_{p\in \Delta(A_i)}\left\{\left\langle z_i, p\right\rangle - \psi_i(p)\right\}$.
Then, from Lemma \ref{lem:convex_conjugate}, the time derivative of $D_{\psi}(\pi, \pi^t)$ is given as:
\begin{align*}
    \frac{d}{dt}D_{\psi}(\pi, \pi^t) &= \sum_{i=1}^2\frac{d}{dt}\left(\max_{p\in \Delta(A_i)}\left\{\left\langle z_i^t, p\right\rangle - \psi_i(p)\right\}-\langle z_i^t, \pi_i\rangle + \psi_i(\pi_i)\right) \\
    &= \sum_{i=1}^2\frac{d}{dt}\left(\psi_i^{\ast}(z_i^t)-\langle z_i^t, \pi_i\rangle\right) \\
    &= \sum_{i=1}^2\left(\left\langle \frac{d}{dt}z_i^t, \nabla\psi_i^{\ast}(z_i^t)\right\rangle - \left\langle \frac{d}{dt}z_i^t, \pi_i\right\rangle\right) \\
    &= \sum_{i=1}^2\left\langle \frac{d}{dt}z_i^t, \nabla\psi_i^{\ast}(z_i^t) - \pi_i\right\rangle.
\end{align*}
From the maximizing argument of \citep{shalev2011online}, we have $\nabla \psi_i^{\ast}(z_i)=\argmax_{p\in \Delta(A_i)}\left\{\left\langle z_i, p\right\rangle - \psi_i(p)\right\}$ and then $\nabla \psi_i^{\ast}(z_i^t)=\pi_i^t$.
Furthermore, from the definition of $z_i^t(a_i)$, we have $\frac{d}{dt}z_i^t(a_i)=q^{\pi^t}_i(a_i)+\frac{\mu}{\pi_i^t(a_i)}\left(c_i(a_i)-\pi_i^t(a_i)\right)$.
Then,
\begin{align*}
    \frac{d}{dt}D_{\psi}(\pi, \pi^t) &= \sum_{i=1}^2\left\langle \frac{d}{dt}z_i^t, \pi_i^t - \pi_i\right\rangle \\
    &= \sum_{i=1}^2\sum_{a_i\in A_i}\left(q_i^{\pi^t}(a_i) + \frac{\mu}{\pi_i^t(a_i)}\left(c_i(a_i)-\pi_i^t(a_i)\right)\right) \left(\pi_i^t(a_i) - \pi_i(a_i)\right) \\
    &= \sum_{i=1}^2\sum_{a_i\in A_i}\left(\pi_i^t(a_i) - \pi_i(a_i)\right)\left(q_i^{\pi^t}(a_i) + \mu\left(\frac{c_i(a_i)}{\pi_i^t(a_i)}-1\right)\right)  \\
    &= \sum_{i=1}^2\sum_{a_i\in A_i}\left(\pi_i^t(a_i) - \pi_i(a_i)\right)\left(q_i^{\pi^t}(a_i) + \mu\frac{c_i(a_i)}{\pi_i^t(a_i)}\right)  \\
    &= \sum_{i=1}^2\left(v_i^{\pi^t} - v_i^{\pi_i, \pi_{-i}^t} + \mu\sum_{a_i\in A_i}\left(\pi_i^t(a_i) - \pi_i(a_i)\right) \frac{c_i(a_i)}{\pi_i^t(a_i)}\right) \nonumber\\
    &= - \sum_{i=1}^2v_i^{\pi_i, \pi_{-i}^t} + 2\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\frac{\pi_i(a_i)}{\pi_i^t(a_i)} \nonumber\\
    &= \sum_{i=1}^2v_i^{\pi_i^t, \pi_{-i}} + 2\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\frac{\pi_i(a_i)}{\pi_i^t(a_i)},
\end{align*}
where the sixth equality follows from $\sum_{i=1}^2v_i^{\pi^t}=0$ and $\mu\sum_{a\in A}\pi_i^t(a_i) \frac{c_i(a_i)}{\pi_i^t(a_i)}=\mu\sum_{a\in A}c_i(a_i)=\mu$, and the last equality follows from $v_1^{\pi_1,\pi_2^t}=-v_2^{\pi_1,\pi_2^t}$ and $v_2^{\pi_1^t,\pi_2}=-v_1^{\pi_1^t,\pi_2}$ by the definition of two-player zero-sum games.
\end{proof}

\subsection{Proof of Lemma \ref{lem:rmd_property}}
\label{sec:appendix_proof_rmd_property}
\begin{proof}[Proof of Lemma \ref{lem:rmd_property}]
By using the ordinary differential equation (\ref{eq:rmd}), we have for all $i\in \{1, 2\}$ and $a_i\in A_i$:
\begin{align*}
    &\pi_i^{\mu}(a_i)\left(q_i^{\pi^{\mu}}(a_i) - v_i^{\pi^{\mu}}\right) + \mu\left(c_i(a_i)-\pi_i^{\mu}(a_i)\right)  = 0.
\end{align*}
Then, we get:
\begin{align*}
    q_i^{\pi^{\mu}}(a_i) = v_i^{\pi^{\mu}} - \frac{\mu}{\pi_i^{\mu}(a_i)}\left(c_i(a_i)-\pi_i^{\mu}(a_i)\right).
\end{align*}
Note that from Lemma \ref{lem:stationary_point_rmd}, $\frac{1}{\pi_i^{\mu}(a_i)}$ is well-defined.
Then, for any $\pi_i'\in \Delta(A_i)$ we have:
\begin{align*}
    v_i^{\pi_i',\pi_{-i}^{\mu}} &= \sum_{a_i\in A_i}\pi_i'(a_i)q_i^{\pi^{\mu}}(a_i)\\
    &= v_i^{\pi^{\mu}} - \mu\sum_{a_i\in A_i}\frac{\pi_i'(a_i)}{\pi_i^{\mu}(a_i)}\left(c_i(a_i)-\pi_i^{\mu}(a_i)\right) \\
    &= v_i^{\pi^{\mu}} + \mu - \mu\sum_{a_i\in A_i}c_i(a_i)\frac{\pi_i'(a_i)}{\pi_i^{\mu}(a_i)}.
\end{align*}
\end{proof}

\subsection{Proof of Theorem \ref{thm:bregman_div}}
\label{sec:appendix_proof_thm_bregman_div}
\begin{proof}[Proof of Theorem \ref{thm:bregman_div}]
First, we prove the first part of the theorem.
By setting $\pi=\pi^{\mu}$ in Lemma \ref{lem:bregman_div} and $\pi'=\pi^t$ in Lemma \ref{lem:rmd_property}, we have:
\begin{align*}
    \frac{d}{dt}D_{\psi}(\pi^{\mu}, \pi^t) =&  \sum_{i=1}^2 v_i^{\pi_i^t, \pi_{-i}^{\mu}} + 2\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)} \\
    =& \sum_{i=1}^2v_i^{\pi^{\mu}} + 4\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\left(\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}+\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}\right) \\
    =& 4\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\left(\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}+\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}\right) \\
    =& 4\mu - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\left(\left(\sqrt{\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}}-\sqrt{\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}}\right)^2 + 2\right) \\
    =& - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\left(\sqrt{\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}}-\sqrt{\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}}\right)^2,
\end{align*}
where the third equality follows from $\sum_{i=1}^2v_i^{\pi^{\mu}}=0$ by the definition of zero-sum games.

Next, we prove the second part of the theorem.
From the first part of the theorem, we have:
\begin{align}
\label{eq:J_dot_KL}
    \frac{d}{dt}D_{\psi}(\pi^{\mu}, \pi^t) &= - \mu\sum_{i=1}^2\sum_{a_i\in A_i}c_i(a_i)\left(\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}+\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)} - 2\right) \nonumber\\
    &\leq - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\sum_{a_i\in A_i}\pi_i^{\mu}(a_i)\left(\frac{\pi_i^t(a_i)}{\pi_i^{\mu}(a_i)}+\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)} - 2\right) \nonumber\\
    &= - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\sum_{a_i\in A_i}\frac{(\pi_i^t(a_i)-\pi_i^{\mu}(a_i))^2}{\pi_i^t(a_i)} \nonumber\\
    &\leq - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\ln \left(1 + \sum_{a_i\in A_i}\frac{(\pi_i^t(a_i)-\pi_i^{\mu}(a_i))^2}{\pi_i^t(a_i)}\right) \nonumber\\
    &= - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\ln \left(\sum_{a_i\in A_i}\pi_i^{\mu}(a_i)\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}\right) \nonumber\\
    &\leq - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\sum_{a_i\in A_i}\pi_i^{\mu}(a_i)\ln \left(\frac{\pi_i^{\mu}(a_i)}{\pi_i^t(a_i)}\right) \nonumber\\
    &= - \mu\sum_{i=1}^2\left(\min_{a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\mathrm{KL}(\pi_i^{\mu}, \pi_i^t) \leq - \mu\left(\min_{i\in \{1,2\}, a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\sum_{i=1}^2\mathrm{KL}(\pi_i^{\mu}, \pi_i^t),
\end{align}
where the second inequality follows from $x \geq \ln(1+x)$ for all $x>0$, and the third inequality follows from the concavity of the $\ln(\cdot)$ function and Jensen's inequality for concave functions.
On the other hand, when $\psi_i(p)=\sum_{a_i\in A_i}p(a_i)\ln p(a_i)$, $D_{\psi_i}(\pi_i^{\mu}, \pi_i^t)=\mathrm{KL}(\pi_i^{\mu}, \pi_i^t)$.
Thus, we have $D_{\psi}(\pi^{\mu}, \pi^t)=\sum_{i=1}^2 \mathrm{KL}(\pi_i^{\mu}, \pi_i^t)$.
From this fact and (\ref{eq:J_dot_KL}), we have:
\begin{align*}
    \frac{d}{dt}\mathrm{KL}(\pi^{\mu}, \pi^t) \leq - \mu\left(\min_{i\in \{1,2\}, a_i\in A_i}\frac{c_i(a_i)}{\pi_i^{\mu}(a_i)}\right)\mathrm{KL}(\pi^{\mu}, \pi^t).
\end{align*}
\end{proof}

\section{Proofs of Additional Lemmas}
\subsection{Proof of Lemma \ref{lem:convex_conjugate}}
\begin{proof}[Proof of Lemma \ref{lem:convex_conjugate}]
First, for any $\pi\in \prod_{i=1}^2\Delta(A_i)$,
\begin{align}
    \label{eq:bregman_div}
    D_{\psi}(\pi, \pi^t) = \sum_{i=1}^2D_{\psi_i}(\pi_i, \pi_i^t) = \sum_{i=1}^2\left(\psi_i(\pi_i) - \psi_i(\pi_i^t) - \left\langle \nabla\psi_i(\pi_i^t), \pi_i - \pi_i^t\right\rangle\right).
\end{align}
From the assumptions on $\psi_i$ and the first-order necessary conditions for the optimization problem of $\argmax_{p\in \Delta(A_i)}\left\{\left\langle z_i^t, p\right\rangle - \psi_i(p)\right\}$, for $\pi_i^t=\argmax_{p\in \Delta(A_i)}\left\{\left\langle z_i^t, p\right\rangle - \psi_i(p)\right\}$, there exists $\lambda\in \mathbb{R}$ such that
\begin{align*}
    z_i^t - \nabla \psi_i(\pi_i^t) = \lambda \mathbf{1}.
\end{align*}
Therefore, we have:
\begin{align}
    \label{eq:inner_product}
    \left\langle z_i^t, \pi_i - \pi_i^t \right\rangle = \left\langle \lambda \mathbf{1} + \nabla \psi_i(\pi_i^t), \pi_i - \pi_i^t \right\rangle = \left\langle \nabla \psi_i(\pi_i^t), \pi_i - \pi_i^t \right\rangle.
\end{align}
By combining (\ref{eq:bregman_div}) and (\ref{eq:inner_product}):
\begin{align*}
    D_{\psi}(\pi, \pi^t) &= \sum_{i=1}^2\left(\psi_i(\pi_i) - \psi_i(\pi_i^t) - \left\langle z_i^t, \pi_i - \pi_i^t \right\rangle\right) \\
    &= \sum_{i=1}^2\left(\left\langle z_i^t, \pi_i^t\right\rangle - \psi_i(\pi_i^t) -\langle z_i^t, \pi_i\rangle + \psi_i(\pi_i)\right) \\
    &= \sum_{i=1}^2\left(\max_{p\in \Delta(A_i)}\left\{\left\langle z_i^t, p\right\rangle - \psi_i(p)\right\}-\langle z_i^t, \pi_i\rangle + \psi_i(\pi_i)\right).
\end{align*}
\end{proof}

\subsection{Proof of Lemma \ref{lem:stationary_point_rmd}}
\begin{proof}[Proof of Lemma \ref{lem:stationary_point_rmd}]
We assume that there exists $i\in \{1,2\}$ and $a_i\in A_i$ such that $\pi^{\mu}_i(a_i)=0$.
Then, for such $i$ and $a_i$, we have:
\begin{align*}
    \frac{d}{dt}\pi_i^{\mu}(a_i)=\pi_i^{\mu}(a_i)\left(q_i^{\pi^{\mu}}(a_i) - v_i^{\pi^{\mu}}\right) + \mu\left(c_i(a_i)-\pi_i^{\mu}(a_i)\right) = \mu c_i(a_i) > 0.
\end{align*}
This contradicts that $\frac{d}{dt}\pi_i^{\mu}(a_i)=0$ since $\pi^{\mu}$ is a stationary point.
Therefore, for all $i\in \{1, 2\}$ and $a_i\in A_i$, we have $\pi_i^{\mu}(a_i) > 0$.
\end{proof}

\bibliography{abe_504}

\end{document}
