%\documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{amssymb}
\usepackage{amsthm}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr}
\makeatletter

\newcommand*{\addFileDependency}[1]{% argument=file name and extension
\typeout{(#1)}% latexmk will find this if $recorder=0
% however, in that case, it will ignore #1 if it is a .aux or 
% .pdf file etc and it exists! If it doesn't exist, it will appear 
% in the list of dependents regardless)
%
% Write the following if you want it to appear in \listfiles 
% --- although not really necessary and latexmk doesn't use this
%
\@addtofilelist{#1}
%
% latexmk will find this message if #1 doesn't exist (yet)
\IfFileExists{#1}{}{\typeout{No file #1.}}
}\makeatother

\newcommand*{\myexternaldocument}[1]{%
\externaldocument{#1}%
\addFileDependency{#1.tex}%
\addFileDependency{#1.aux}%
}
\myexternaldocument{huang_541}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\setcounter{equation}{10}

\title{A Near-optimal High-probability Swap-Regret Upper Bound for
Multi-agent Bandits in Unknown General-sum Games (Supplementary material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{{Zhiming Huang}{}}
\author[1]{Jianping Pan}

% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science\\
    University of Victoria\\
    BC, Canada
}
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

%This Supplementary Material should be submitted as a separate file. Please do not append the Supplementary Material to the main paper~\ref{eq:correlated}. 

%Fig. \ref{fig:pitt} and Eq \ref{eq:example} in the main paper can be cross referenced using \texttt{xr}. 

\appendix
We start by introducing the notations that will be used in the proofs of Lemma~\ref{lm2} and Theorem~\ref{thm:regret3}. As the proofs are for each individual agent $n$, without confusion, we drop the subscript $n$ in some notations for brevity. 

Recall that $\mathcal{G}_t$ the $\sigma$-algebra generated by the history information of all agents till round $t$, i.e., $\mathcal{G}_{t}:= \sigma \left(\{a_n^1,r_n^1,\ldots,a_n^{t},r_n^{t}\}_{n \in \mathcal{N}} \right)$ and let $\mathbf{E}_{t} [\cdot] := \mathbf{E}[\cdot | \mathcal{G}_t]$ be the expectation conditioned on the history information by the end of round $t$. 
%Similarly, denote by $\mathcal{G}_{n,t}:= \sigma \left(\{a_n^1,r_n^1,\ldots,a_n^{t},r_n^{t}\} \right)$ the $\sigma$-algebra generated by the history information of agent $n$ till round $t$, and let $\mathbf{E}_{n,t}:=\mathbf{E}[\cdot | \mathcal{G}_{n,t}]$.
Recall that $y_{a}^t:= 1- u_n^t(a;\mathbb{A}_{-n}^t)$ is the instantaneous loss function if
agent $n$ plays arm $a \in A_n$ in round $t$, and thus $Y_{a,a^\prime}^t:= \frac{\mathbf{1}[a_n^t = a^\prime] p_a^t q_{a,a^\prime}^t  y_{a^\prime}^t} {p_{a^\prime}^t}$ and $\hat{Y}_{a,a^\prime}^t = \frac{Y_{a,a^\prime}^t}{q_{a,a^\prime}^t + \gamma_t}$.
Denote by $\hat{L}_a^t:= \sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n}q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t$ and ${L}_{a}^T:=\sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n} Y_{a,a^\prime}^t$.



%the cumulative estimated and actual loss  by meta-distribution $Q_{a}^t$ over $T$ rounds, respectively. 
\section{Proof of Lemma~\ref{lm2}}\label{pflm1}
\begin{proof}

Recall that $\tilde{Y}_{a,a^\prime}^t:= \mathbf{1}[a_n^t = a] y_{a^\prime}^t$. We first prove that the process $\{Z_t\}_{t\geq 0}$, where $Z_t  := \exp\left\{{\sum\limits_{s=1}^t \sum\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n}  \beta_{a,a^\prime}^s \left(\hat{Y}_{a,a^\prime}^s - \tilde{Y}_{a,a^\prime}^s \right) }\right\}$ for $t>0$ and $Z_0 = 1$, is a supermartingale with respect to filtration $\{\mathcal{G}_t\}_{t\geq 0}$ for all $a \in A_n$, i.e., $\mathbf{E}\left[Z_t | \mathcal{G}_{t-1} \right] \leq Z_{t-1}$. 
%Denote by $\mathbf{E}_{n,t}$
%Denote by $\mathbf{P}^t$ be the joint distribution over actions of all agents. As each agent runs independently, given $\mathcal{G}_{t-1}$, $\mathbf{P}^t = \prod\limits_{n\in \mathcal{N}}P_n^t$.
%Denote by $\mathbf{E}_{P_n^t}$ the expectation only conditioned on the selection distribution $P_n^t$.
Denote by $\mathbb{A}_{-n}^t$ the actions of all agents except for agent $n$ in round $t$. Then, we have that
\begin{equation}\label{eq:martdiff}
\resizebox{1.0\hsize}{!}{$
    \begin{aligned}
        &\mathbf{E}_{t-1} \left[ \exp\left\{ \sum_{a\in A_n} \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \left(\hat{Y}_{a,{a^\prime}}^t - \tilde{Y}_{a,{a^\prime}}^t \right) \right\}\right] 
        = \mathbf{E}_{t-1} \left[ \frac{\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\}}{\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t  \tilde{Y}_{a,{a^\prime}}^t \right\}}\right] 
        = \mathbf{E}_{t-1} \left[ \frac{\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\}}{\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \mathbf{1}[a_n^t = a]  y_{a^\prime}^t\right\}}\right] \\
        & = \mathbf{E}_{t-1} \left[ \mathbf{E}_{t-1} \left[\frac{\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\}}{\exp\left\{  \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \mathbf{1}[a_n^t = a]    y_{a^\prime}^t\right\}} \mid \mathbb{A}_{-n}^t \right]\right]  
        \leq \mathbf{E}_{t-1} \left[ \frac{\mathbf{E}_{t-1} \left[\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\} \mid \mathbb{A}_{-n}^t \right]}{\exp\left\{  \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t p_a^t   y_{a^\prime}^t\right\}}\right],
    \end{aligned}
    $}
\end{equation}
where the third equality is due to the law of total expectation, and the fourth equality is due to that $ y_{a^\prime}^t$ is determined given $\mathbb{A}_{-n}^t$ and $\beta_{a,a^\prime}^t$ is $\mathcal{G}_{t-1}$-measurable. Denote by $\mathbf{E}_{n,t-1}[\cdot]:= \mathbf{E}_{t-1}\left[ \cdot \mid  \mathbb{A}_{-n}^t \right]$.
Then, we show that $\mathbf{E}_{n,t-1} \left[\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\}\right] \leq \exp\left\{  \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t p_a^t y_{a^\prime}^t\right\}$ as follows:

\begin{equation*}
\resizebox{1.0\hsize}{!}{$
    \begin{aligned}
        &\mathbf{E}_{n,t-1} \left[ \exp \left\{ \sum_{a\in A_n} \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\} \right] = \mathbf{E}_{n,t-1} \left[ \exp \left\{ \sum_{a\in A_n} \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \frac{p_a^t \mathbf{1}[a_n^t = a^\prime ] q_{a,a^\prime}^t y_{a^\prime}^t}{p_{a^\prime}^t (q_{a,a^\prime}^t + \gamma_t)} \right\} \right]\\
        &\leq \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \exp \left\{  \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \frac{ \mathbf{1}[a_n^t = a^\prime ] q_{a,a^\prime}^t y_{a^\prime}^t}{p_{a^\prime}^t (q_{a,a^\prime}^t + \gamma_t)} \right\} \right] 
        \leq \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \exp \left\{  \sum_{a^\prime \in A_n} \frac{\beta^t_{a,a^\prime}}{2\gamma_t} \frac{ 2 \gamma_t \mathbf{1}[a_n^t = a^\prime ] q_{a,a^\prime}^t y_{a^\prime}^t}{p_{a^\prime}^t (q_{a,a^\prime}^t + \gamma_t \mathbf{1}[a_n^t = a^\prime ] q_{a,a^\prime}^t y_{a^\prime}^t )} \right\} \right]\\
        & = \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \exp \left\{  \sum_{a^\prime \in A_n} \frac{\beta_{a,a^\prime}^t}{2\gamma_t} \frac{ 2 \gamma_t \mathbf{1}[a_n^t = a^\prime ]  y_{a^\prime}^t/p_{a^\prime}^t}{ 1 + \gamma_t \mathbf{1}[a_n^t = a^\prime ]  y_{a^\prime}^t /p_{a^\prime}^t} \right\} \right] \leq \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \exp \left\{  \sum_{a^\prime \in A_n} \frac{\beta^t_{a,a^\prime}}{2\gamma_t} \log(1+ 2\gamma_t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t) \right\} \right] \\
        &\leq \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \exp \left\{  \sum_{a^\prime \in A_n}  \log(1+ \beta_{a,a^\prime}^t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t) \right\} \right] = \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \prod_{a^\prime \in A_n}  (1+ \beta_{a,a^\prime}^t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t) \right].
    \end{aligned}
    $}
\end{equation*}
where the first inequality is due to Jensen's inequality, the second inequality is due to that $0\leq \mathbf{1}[a_n^t = a^\prime ] q_{a,a^\prime}^t y_{a^\prime}^t \leq 1$, the third inequality is due to the fact that $\frac{z}{1+z/2}\leq \log(1+z)$ for all $z>0$, and the last inequality is due to the inequality $x\log(1+y)\leq \log(1+xy)$ for all $y>-1$ and $x\in[0,1]$.
As $\mathbf{1}[a_n^t =a^\prime]\mathbf{1}[a_n^t =a^{\prime\prime}]=0$ for any $a^\prime \neq a^{\prime\prime}$, the last term in above equation can be further processed as follows:
\begin{equation*}
    \begin{aligned}
        &\mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t \prod_{a^\prime \in A_n} (1+ \beta_{a,a^\prime}^t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t) \right] = \mathbf{E}_{n,t-1} \left[\sum_{a\in A_n} p_{a}^t (1+   \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t) \right] \\
        &=\mathbf{E}_{n,t-1} \left[ 1+   \sum_{a \in A_n}\sum_{a^\prime \in A_n} p_a^t \beta_{a,a^\prime}^t \mathbf{1}[a_n^t =a^\prime]y_{a^\prime}^t/p_{a^\prime}^t \right]  = 1 + \sum_{a\in A_n}\sum_{a^\prime\in A_n} p_a^t \beta_{a,a^\prime}^t y_{a^\prime}^t \leq \exp\left\{\sum_{a\in A_n}\sum_{a^\prime\in A_n} \beta_{a,a^\prime}^t p_{a}^t y_{a^\prime}^t\right\},
    \end{aligned}
\end{equation*}
where the inequality is due to $1+x\leq \exp\{x\}$ for any $x\in \mathbb{R}$.
Therefore, we have shown that $\mathbf{E}_{n,t-1} \left[\exp\left\{ \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \beta_{a,a^\prime}^t \hat{Y}_{a,{a^\prime}}^t \right\}\right] \leq \exp\left\{\sum\limits_{a\in A_n}\sum\limits_{a^\prime\in A_n} \beta_{a,a^\prime}^t p_{a}^t y_{a^\prime}^t\right\}$, which
indicates that (\ref{eq:martdiff}) is bounded by $1$. Thus, 
$$\mathbf{E}_{t-1} \left[ Z_t \right] = \mathbf{E}_{t-1} \left[ \exp\left\{ \sum_{a\in A_n} \sum_{a^\prime \in A_n} \beta_{a,a^\prime}^t \left(\hat{Y}_{a,{a^\prime}}^t - \tilde{Y}_{a,{a^\prime}}^t \right) \right\}\right] \cdot Z_{t-1} \leq Z_{t-1},$$
which shows that $\{Z_t\}_{t\geq 0}$ is a supermartingale with respect to filtration $\{\mathcal{G}_t\}_{t\geq 0}$. Thus, we have $\mathbf{E}\left[{Z_T}\right]\leq \mathbf{E}\left[Z_{T-1}\right]\ldots \leq \mathbf{E}\left[Z_0\right] = 1$. By the Markov inequality, we have 
\begin{equation*}
\begin{aligned}
    \Pr\left(\sum\limits_{t=1}^T \beta_{a,a^\prime}^t \sum\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n} \left(\hat{Y}_{a,a^\prime}^t - \tilde{Y}_{a,a^\prime}^t \right) \geq \epsilon \right) &\leq \mathbf{E}\left[ \exp \left\{\sum\limits_{t=1}^T \beta_{a,a^\prime}^t \sum\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n} \left(\hat{Y}_{a,a^\prime}^t - \tilde{Y}_{a,a^\prime}^t \right) \geq \epsilon  \right\} \right] \cdot \exp\{-\epsilon\}\\ 
    &\leq \exp\{-\epsilon\}.    
\end{aligned}
\end{equation*}
Then, the lemma follows by solving $\exp\{-\epsilon\} = \delta$ for $\epsilon$.

\end{proof}

\section{Proof of Theorem~\ref{thm:regret3}}\label{pftm13}
\begin{proof}
%and recall that $\hat{L}_{a,a^\prime}^T:= \sum\limits_{t=1}^T \frac{Y_{a,a^\prime}^t}{q_{a,a^\prime}^t + \gamma_t}$.
By the relationship between $P_n^t$ and $Q_{a}^t$, we have the following equation held:
\begin{equation}\label{eq:la}
\begin{aligned}
    &\sum_{a \in  A_n} {L}_{a}^T = \sum_{a \in A_n} \sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n}  Y_{a,a^\prime}^t 
    =\sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n} \sum_{a \in A_n}  \frac{\mathbf{1}[a_n^t = a^\prime] p_{a}^t q_{a,a^\prime}^t}{p_{a^\prime}^t} y_{a^\prime}^t\\
     &= \sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n}\mathbf{1}[a_n^t = a^\prime] y_{a^\prime}^t
    = \sum\limits_{t=1}^T \sum\limits_{a \in A_n}\mathbf{1}[a_n^t = a] y_{a}^t,
\end{aligned}
\end{equation}

%In addition to Lemma~\ref{lm2}, we need the following two lemmas.
%Lemma~\ref{lm1} gives a bound for the gap between the total loss received by a meta-distribution and the action-specific loss, and the proof of this lemma can be found in Appendix~\ref{pflm1}. 
%Lemma~\ref{lm:azu} states the Azuma's inequality for martingale sequences.

% \begin{lemma}\label{lm1}
% For any $k, a^\prime \in  A_n$, and a non-increasing $\eta_t>0$, we have that 
% $$\sum\limits_{a \in A_n} \left(\hat{L}_a^t - \hat{L}_{a,a^\prime}^t \right)
%     \leq \frac{\log(K_n)}{\eta_T} +  \sum\limits_{t=1}^T \sum\limits_{a \in A_n}\frac{\eta_t}{2} \sum\limits_{a^\prime \in A_n} \hat{Y}_{a,a^\prime}^t.$$
% \end{lemma}
%Denote by $L_{a,a^\prime}^T:= \sum\limits_{t=1}^T Y_{a,a^\prime}^t$, we have the following inequality held:


The regret defined in (\ref{eq:swap}) can be rewritten in the loss form and can be decomposed as follows:
\begin{equation}\label{eq:conversion}
\begin{aligned}
    &  R_{n}^{\rm swa} (T,\mathcal{F})
    = \max\limits_{F \in \mathcal{F}}\sum_{t=1}^T \sum_{a \in A_n} \mathbf{1}[a_n^t = a]  y_{a}^t - \sum_{t=1}^T \sum_{a \in A_n} \mathbf{1}[a_n^t = a] y_{F(a)}^t\\
    &  = \max_{F\in \mathcal{F}} \sum_{a\in A_n} {L}_a^T - \sum_{a\in A_n}\tilde{L}_{a,F(a)}^T
     = \underbrace{\sum_{a\in A_n} ({{L}_{a}^T -  \hat{L}_{a}^T})}_{=:\rm (a)}  + \underbrace{\sum_{a\in A_n}({\hat{L}_{a}^T  - \hat{L}_{a,F(a)}^T})}_{=: \rm (b)} + \underbrace{\sum_{a\in A_n}(\hat{L}_{a,F(a)}^T - \tilde{L}_{a,F(a)}^T)}_{=:(c)},
\end{aligned}
\end{equation}
where the second equality is due to (\ref{eq:la}) and the definition of $\Tilde{L}_{a,F(a)}^T:=\sum\limits_{t=1}^T \mathbf{1}[a_n^t=a]y_{F(a)}^t$.
% \begin{equation*}
    %         R_{n}^{4==========T} (T,\mathcal{F}) \leq \max\limits_{F \in \mathcal{F}} \sum_{a \in A_n} \left({L}_a^T- \tilde{L}_{a,F(a)}^T\right) + \sqrt{2T \log \frac{2}{\delta}}.
% \end{equation*}
% Decompose ${L}_a^T-\tilde{L}_{a,F(a)}^T$ as follows:
% $$ {L}_a^T-\tilde{L}_{a,F(a)}^T = \underbrace{({{L}_{a}^T - \hat{L}_{a}^T})}_{=:\rm (a)}  + \underbrace{({\hat{L}_{a}^T  - \hat{L}_{a,F(a)}^T})}_{=: \rm (b)} + \underbrace{(\hat{L}_{a,F(a)}^T - \tilde{L}_{a,F(a)})}_{=:(c)}$$.

We first show how to bound (a). By definition of ${L}_{a}^T$ and $\hat{L}_{a}^T$, we have that
\begin{equation*}
    \begin{aligned}
        &{L}_{a}^T - \hat{L}_{a}^T
        =\sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n} Y_{a,a^\prime}^t - \sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n}q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t = \sum\limits_{t=1}^T \sum\limits_{a^\prime \in A_n}  Y_{a,a^\prime}^t \left(  1 - \frac{q_{a,a^\prime}^t}{q_{a,a^\prime}^t + \gamma_t} \right)
        = \sum\limits_{t=1}^T  \gamma_t \sum\limits_{a^\prime \in A_n} \hat{Y}_{a,a^\prime}^t.
    \end{aligned}
\end{equation*}
Thus, (a) is bounded by $\sum\limits_{t=1}^T  \gamma_t \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \hat{Y}_{a,a^\prime}^t$.

Then, we show how to bound (b). Let $W_n^t :=  \prod\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n}   \exp{( -\eta_{t+1} \hat{L}_{a,a^\prime}^{t})}$, and we have that $W_n^0 =   \prod\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n}  \exp{(0)}= (K_n)^{K_n}$. Note that
$W_n^T = W_n^0\frac{W_n^1}{W_n^0}\ldots\frac{W_n^T}{W_n^{T-1}}= (K_n)^{K_n} \prod\limits_{t=1}^T \frac{W_n^t}{W_n^{t-1}}.$
Then we have
\begin{equation}\label{eq:expl21}
\begin{aligned}
    &\exp{(- \sum\limits_{a \in A_n} \eta_{T+1} \hat{L}_{a,F(a)}^T)} = \prod\limits_{a \in A_n}\exp{(-  \eta_{T+1} \hat{L}_{a,F(a)}^T)}
    \leq  \prod\limits_{a \in A_n} \sum\limits_{a^\prime \in A_n} \exp{(-  \eta_{T+1} \hat{L}_{a,a^\prime}^T)}
    = (K_n)^{K_n} \prod\limits_{t=1}^T \frac{W_n^t}{W_n^{t-1}},
\end{aligned}
\end{equation}
where the inequality is due to that $\exp{( -\eta_T \hat{L}_{w,w^\prime}^T)}\geq 0$.
% Recall that $\hat{L}_{w,w^\prime}^t = \hat{L}_{w,w^\prime}^{t-1} + \hat{Y}_{w,w^\prime}^t$, so we have that
% \begin{equation*}
% \begin{aligned}
%     W_n^t &=  \prod\limits_{w\in W_n}\sum\limits_{w^\prime  \in W_n}  \exp{( -\eta_t \hat{L}_{w,w^\prime}^t)}= \prod\limits_{w\in W_n}\sum\limits_{w^\prime  \in W_n}  \exp{( -\eta_t  \hat{L}_{w,w^\prime}^{t-1})}\exp{( -\eta_t \hat{Y}_{w,w^\prime}^t)}\\
% \end{aligned}
% \end{equation*}
Then, by the definition of $q_{w,w^\prime}^t$ in (\ref{update-meta22}), 
we obtain that
%By rewriting the ratio in the product part above in terms of $Q_w^T$,
\begin{equation}\label{eq:wt21}
\begin{aligned}
     \frac{W_n^t}{W_n^{t-1}} & = \frac{\prod\limits_{a\in A_n}\sum\limits_{a^\prime  \in A_n}  \exp{( -\eta_t  \hat{L}_{a,a^\prime}^{t-1})}\exp{( -\eta_t \hat{Y}_{a,a^\prime}^t)}}{\prod\limits_{a\in A_n}\sum\limits_{a^\prime  \in A_n}  \exp{( -\eta_t  \hat{L}_{a,a^\prime}^{t-1})}}  \\
     &=  \prod\limits_{a\in A_n}\sum\limits_{a^\prime  \in A_n} \frac{ \exp{( -\eta_t  \hat{L}_{a,a^\prime}^{t-1})}}{\sum\limits_{a^\prime  \in A_n}  \exp{( -\eta_t  \hat{L}_{a,a^\prime}^{t-1})}}\exp{( -\eta_t \hat{Y}_{a,a^\prime}^t)} \\
     &= \prod\limits_{a\in A_n}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t \exp{( -\eta_t \hat{Y}_{a,a^\prime}^t)} \leq \prod\limits_{a\in A_n}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t \exp{( -\eta_T \hat{Y}_{a,a^\prime}^t)}\\
     &\leq \prod\limits_{a\in A_n} \left(\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t -\eta_T \sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t + \frac{\eta_T^2}{2}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t {(\hat{Y}_{a,a^\prime}^t)}^2 \right)\\
     &\leq \prod\limits_{a\in A_n} \exp{\left(-\eta_T \sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t + \frac{\eta_T^2}{2}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t {(\hat{Y}_{a,a^\prime}^t)}^2\right)}\\
     & = \exp{\left(-\eta_T \sum\limits_{a \in A_n}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t + \frac{\eta_T^2}{2}\sum\limits_{a  \in A_n}\sum\limits_{a^\prime  \in A_n} q_{a,a^\prime}^t {(\hat{Y}_{a,a^\prime}^t)}^2\right)},
\end{aligned}
\end{equation}
where the first inequality is due to that $\eta_t$ is a non-increasing parameter, the second inequality is due to that
$\exp{(x)}\leq 1+x+\frac{x^2}{2}$ for any $x \leq 0$, and the third inequality is due to that $1+x\leq \exp{(x)}$ for any $x \in \mathbb{R}$. 
Combining (\ref{eq:wt21}) and (\ref{eq:expl21}),
% \begin{equation*}
% \begin{aligned}
%     &\exp{(-\sum\limits_{w \in W_n} \eta_t \hat{L}_{w,F(w)}^T)} \\ &\leq C_n^{C_n} \exp{ \left(-\eta_t \sum_{t=1}^T \sum\limits_{w \in W_n}\sum_{w^\prime \in W_n} q_{w,w^\prime}^t \hat{Y}_{w,w^\prime}^t + \frac{\eta_t^2}{2} \sum_{t=1}^T \sum_{w^\prime \in W_n} q_{w,w^\prime}^t \left( \hat{Y}_{w,w^\prime}^t\right)^2 \right)}.
% \end{aligned}
% \end{equation*}
and taking the logarithm for both sides of the above inequality, we have that
\begin{equation*}
\begin{aligned}
    -\sum\limits_{a \in A_n} \eta_T \hat{L}_{a,F(a)}^T &\leq {K_n}\log{(K_n)} - \sum\limits_{a \in A_n} \eta_T \underbrace{\sum\limits_{t=1}^T\sum\limits_{a^\prime \in A_n}  q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t}_{=:\hat{L}_a^T \ (\text{by definition of $\hat{L}_a^T$})} +  \frac{\eta_T^2}{2} \sum\limits_{t=1}^T \sum\limits_{a\in A_n}\sum\limits_{a^\prime \in A_n} q_{a,a^\prime}^t \left( \hat{Y}_{a,a^\prime}^t\right)^2.
\end{aligned}
\end{equation*}
Dividing both sides by $\eta_T >0$, with rearrangement, we have
\begin{equation}\label{eq:lalaa21}
\begin{aligned}
    \sum\limits_{a \in A_n} \hat{L}_a^T - \sum\limits_{a \in A_n} \hat{L}_{a,F(a)}^T &\leq \frac{K_n\log(K_n)}{\eta_T} + \frac{\eta_T}{2} \sum\limits_{t=1}^T \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} q_{a,a^\prime}^t \left( \hat{Y}_{a,a^\prime}^t\right)^2\\
    &\leq \frac{K_n\log(K_n)}{\eta_T} +  \sum\limits_{t=1}^T \frac{\eta_t}{2} \sum\limits_{a\in A_n} \sum\limits_{a^\prime \in A_n} \hat{Y}_{a,a^\prime}^t,
\end{aligned}
\end{equation}
where the second inequality is due to that $\eta_t$ is a non-increasing parameter and the fact that $q_{a,a^\prime}^t \hat{Y}_{a,a^\prime}^t \leq 1$.
Combining with the bound of (a), we have
\begin{equation*}
\begin{aligned}
   \sum_{a \in A_n} \left({L}_{a}^T - \tilde{L}_{a,F(a)}^T \right) & \leq  \frac{K_n\log(K_n)}{\eta_T} +  \sum_{t=1}^T \left(\frac{\eta_t}{2} + \gamma_t \right) \sum_{a \in A_n}\sum\limits_{a^\prime \in A_n}  \hat{Y}_{a,a^\prime}^t + \sum_{a\in A_n} \left( \hat{L}_{a,F(a)}^T - \tilde{L}_{a,F(a)} \right).
\end{aligned}
\end{equation*}
Let $\gamma_t = \eta_t/2$. By invoking Lemma~\ref{crl:1},  with probability at least $1-\delta$, we have the following inequality held:
\begin{equation*}
    \begin{aligned}
     \sum_{a \in A_n} \left({L}_a^t-\tilde{L}_{a,a^\prime}^T\right) 
    &\leq  \frac{K_n\log(K_n)}{\eta_T} + \sum_{t=1}^T {\eta_t} \left(\sum_{a \in A_n}\sum\limits_{a^\prime \in A_n} \tilde{Y}_{a,a^\prime}^t \right) +  \log (\frac{1}{\delta})+ \frac{1}{\eta_T} \log (\frac{K_n^{K_n}}{\delta} )\\ &
    \leq  \frac{K_n\log(K_n) + K_n \log (K_n / \delta)}{\eta_T} + \sum_{t=1}^T \eta_t K_n  +  \log (\frac{1}{\delta}),
    \end{aligned}
\end{equation*}
where the last inequality is due to that  
$\sum\limits_{a \in A_n}\sum\limits_{a^\prime\in A_n} 
\tilde{Y}_{a,a^\prime}^ t  = \sum\limits_{a \in A_n}\sum\limits_{a^\prime\in A_n} \mathbf{1}[a_n^t = a] y_{a^\prime}^t \leq K_n$ and $\log (\frac{K_n^{K_n}}{\delta} ) \leq K_n \log (K_n/\delta) $ for $\delta \in (0,1)$.

Letting   $\eta_t = \sqrt{\frac{\log(K_n)}{t}}$, we have
\begin{equation*}
    \begin{aligned}
        &R_n^T(T,\mathcal{F}) \leq 
         2K_n\sqrt{T \log (K_n)} + K_n\sqrt{\log(K_n)}\sum_{t=1}^T\sqrt{\frac{1}{t}} +  \left(1+K_n\sqrt{\frac{T}{\log K_n}}\right) \log (\frac{1}{\delta}).\\
    \end{aligned}
\end{equation*}

When $\eta_t = \sqrt{\frac{\log(K_n) + \log(K_n /\delta)}{t}}$, the above inequality becomes
\begin{equation*}
    R_n^T(T,\mathcal{F}) \leq K_n \sqrt{T (\log(K_n) + \log(K_n/\delta))} + K_n\sqrt{ (\log(K_n) + \log(K_n/\delta)} \sum_{t=1}^T {\frac{1}{t}} + \log (\frac{1}{\delta}).
\end{equation*}



Theorem~\ref{thm:regret3} follows by $\sum\limits_{t=1}^T \sqrt{\frac{1}{t}} \leq 2\sqrt{T}$.
\end{proof}

% \section{Proof of Theorem~\ref{thm:correlatedequalibrium3}}\label{sec:proofofequilibrium}
% \begin{proof}

% Recall that $r_{\left(k, k^{\prime}\right), n}^{t}:= \mathbf{1}\left[a_n^t = a\right] \left(r_n^t(k;\mathbb{K}_{-n}) - r_n^t(a^\prime;\mathbb{K}_{-n}) \right) $ is the instantaneous internal regret in round $t$. Let $\delta^\prime \in (0,1)$. As internal regret is bounded by swap regret,
% by (\ref{eq:thm11}) in Theorem~\ref{thm:regret3}, we have that with probability at least $1-\delta^\prime$, the instantaneous internal regret for agent $n$ is bounded as follows
% \begin{equation}\label{eq:internalregret}
%     R_n^{\rm int} (T) \leq 3K_n \sqrt{T (2\log(K_n) + \log(K_n/\delta^\prime))} +  \log \frac{1}{\delta^\prime}
% \end{equation}

% Thus, by using the union bound for all agents and letting $\delta = \delta^\prime/N$, we have that with probability $1-\delta$:
% \begin{equation*}
% \begin{aligned}
%      \sum\limits_{\mathbb{A}:a_n = a} \hat{\mathbf{P}}(\mathbb{K})\left(r_{n}\left(a^{\prime} ;{\mathbb{A}_{-n}}\right)-r_{n}\left(\mathbb{A}\right)\right)
%     =  \frac{1}{T}\sum\limits_{t=1}^T r_{\left(a, a^{\prime}\right), n}^{t} =
%     O(\max\limits_{n\in\mathcal{1}}K_n\sqrt{\frac{\log (K_n N/\delta)}{T}}),
% \end{aligned}
% \end{equation*}
% which coincides with the definition of the $\epsilon$-correlated equilibrium in (\ref{eq:correlated}) with $\epsilon = O(\max\limits_{n\in\mathcal{N}}K_n\sqrt{\frac{\log (K_n N/\delta)}{T}})$.

% On the other hand, by (\ref{eq:internalregret}), we have
% \begin{equation*}
%     \sum_{T=1}^{\infty} \Pr\left(\frac{R_n^{\rm int}(T)}{T} \leq 3K_n \sqrt{\frac{2\log(K_n) + \log(K_n/\delta^\prime)}{T}} + \frac{1}{T} \log \frac{1}{\delta^\prime}\right) \geq 
% \end{equation*}
% \end{proof}


%\bibliography{uai2023-template}

\end{document}
