\section{Preliminaries and Problem Setup}
We consider the optimization problem of finding the equilibrium $\x^* \in \xxx$ \footnote{In this paper, multiple agents' strategies are denoted by bold lowercase letters, e.g., $\x$ or $\x_{-i}$. The $i^{th}$ agent's strategy is denoted in subscript $x_i$ (non-bold).} of a game played by multiple agents, defined as follows
\begin{equation}\label{eq:NE}
    x^*_i \in \arg \max_{x_i \in \xxx_i} u_i(x_i, \x^*_{-i}), \quad \forall i \in \uSpace
\end{equation}
where $\uSpace = \{1,\cdots, n\}$ denotes the set of agents, $\xxx_i$ is the action set of agent $i$ ($\xxx=\xxx_1 \times\cdots\times\xxx_n$), and $u_i(x_i, \x^*_{-i})$ is agent $i$'s utility function where $x_i$ represents agent $i$'s action and $\x^*_{-i}$ denotes all the other agents' actions except for $i$. \mh{Our paper specifically focuses on finite games, which involve a finite number of players and a finite number of actions for each player. It is well-established, as demonstrated by \cite{nash1950non}, that every finite game possesses at least one Nash equilibrium, commonly known as the Nash existence theorem.}

The problem setup is a repeated game among $N$ agents or players. Each agent $i$ has an action set $\xxx_i\subseteq \rr^{d_i}$ and a utility function $\utilF:\xxx= \xxx_1\times \cdots \xxx_n \rightarrow [0, 1]$. We denote \textit{all} agents' action $\x = (x_1, \cdots, x_n)$ as an action profile. The Nash Equilibrium (NE) $\x^* \in \xxx$ is denoted in \eqref{eq:NE}. Given any action profile $\x$, we denote a loss function $f:\xxx \rightarrow \rr$ as follows.
\begin{equation}\label{eq:loss}
    f(\x) = \sum_{i \in \uSpace} \max_{x'_i \in \xxx_i} \utilF(x'_i,\x_{-i}) - \utilF(\x)
\end{equation}
Note that $f(\x) \ge 0$ for all $\x \in \xxx$ and the NE $\x^* = \arg\min_{\x \in \xxx} f(\x)$ satisfies $f(\x^*)=0$. An approximate Nash equilibrium $\x$ is denoted as \textit{$\epsilon$-NE} \citep{2137c69e-3c1d-390b-9337-e9522a64edcf,10.1145/779928.779933}, where each agent's strategy, given other agents' strategies, has suboptimality at most $\epsilon$, i.e., $\max_{x'_i \in \xxx_i} \utilF(x'_i,\x_{-i}) - \utilF(\x) \le \epsilon, \forall i \in \uSpace$. 

\begin{example}\label{example:saddle}
    We consider a two-player game from \cite{al2018approximating,paruchuri2008playing} as a running example, where the utility functions of the two players are defined as $u_1(x_1, x_2) = (x_2-x_2^*)^2 - (x_1 - x_1^*)^2$ and $u_2(x_1, x_2) = (x_1 - x_1^*)^2 - (x_2-x_2^*)^2$. $\x^* = (x_1^*, x_2^*) = (0.5, 0.5)$ denotes the NE. We illustrate the agent's utility function and loss function \eqref{eq:loss} in \figref{fig:example1}.
\end{example}


\begin{figure*}[tbh]
     \centering
     \begin{subfigure}[t]{0.32\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/agent1_u.png}
         \caption{Agent 1's utility in Example~\ref{example:saddle}. The left plot represents agent 1's partial maximum utility from \eqref{eq:loss} given agent 2's strategy $x_2$.}
         \label{fig:agent1}
     \end{subfigure}
     % \hfill
     \quad
     \begin{subfigure}[t]{0.3\textwidth}
         \centering
         \includegraphics[width=\textwidth,trim=0 -0.2cm 0 0, clip]{figures/loss_func.pdf}
         \caption{Heatmap showing the loss function \eqref{eq:loss} of Example \ref{example:saddle}. The optimal loss of $0$ is attained at the NE (0.5, 0.5).
        }
         \label{fig:loss_function}
     \end{subfigure}
     \quad
     \begin{subfigure}[t]{0.3\textwidth}
         \centering         \includegraphics[width=\textwidth,clip]{figures/roi.pdf}
         \caption{LCB on Example \ref{example:saddle}'s loss function posterior with 10 initialization points. Unmasked area indicates ROI defined by \eqref{eq:roi}.}
         \label{fig:roi}
     \end{subfigure}

        \caption{Function visualizations of Example $1$, where $x$-axis (i.e., $x_1$) represents agent 1's action and $y$-axis (i.e., $x_2$) represents agent 2's action. Agent 2's utility information is symmetric to \figref{fig:agent1} and is therefore omitted from this plot. 
        \figref{fig:agent1} shows that a rational agent's utility maximization strategy (i.e., Utility Maxima) is highly different from the minima of the loss function
        (i.e., NE $(0.5, 0.5)$), which highlights the novelty and difficulty of optimizing our loss function (\eqref{eq:loss}). \figref{fig:roi} highlights the efficiency of our optimization algorithm by reducing the search space.}
        \label{fig:example1}
\end{figure*}


Our objective is to minimize the unknown function (\eqref{eq:loss}), given only the query access to the objective function. 
Specifically, at every time step $t$, we can query an action profile $\x^t$ and observe each agent's corresponding utility $\y^t$, where $y^t_i = \utilF(\x^t) + \epsilon_i$ and $\epsilon_i \sim \nnn(0, \sigma^2)$. We denote a sequence of function evaluations (FEs) as $\ddd^{1:t}=\{(\x^1,\y^1); \cdots; (\x^t,\y^t)\}$. We define 
\begin{equation}
    \globalf(\instance^t) - f(\x^*) = \globalf(\instance^t)
\end{equation}
as regret, since $f(\x^*)=0$ for NE. We want to achieve a no-regret learning of NE:
$$\lim_{T\rightarrow\infty}\frac{1}{T}\sum_{t}^{T}\globalf(\instance^t)\rightarrow 0$$
\reviseFx{The definition of no-regret learning of Nash equilibrium generalizes the no-regret notion in games discussed by \cite{jafari2001no, daskalakis2021near}, and resembles the common notion of no-regret in the Bayesian optimization literature \citep{srinivas2009gaussian, chowdhury2017kernelized}.}
For every agent $i \in \uSpace$, we model their utility function $\utilF: \xxx\rightarrow [0,1]$ as a GP, which is a probability distribution over functions, i.e. \[\utilF(\x) \sim \gp\big(\mu_{\utilF}(\cdot),k_{\utilF}(\cdot,\cdot)\big),\] specified by its mean $\mu_{\utilF}(\cdot)$ and covariance (or kernel) $k_{\utilF}(\cdot, \cdot)$, respectively. The corresponding hyper-parameters are denoted by $\theta_{\utilF}$. We assume every agent has the same GP prior $\gp(0, k(\x, \x'))$ for their utility function.  Given a history of observations $\ddd^{1:t}$, the posterior distribution under a $\gp(0, k(\x, \x'))$ prior is also  Gaussian, with mean and variance functions updated as follows.
\begin{equation}\label{eq:gp_posterior}
    \begin{split}
        \mu_{u_i, t}(\x) &= \mathbf{k}^t_{u_i}(\x)^\top (\mathbf{K}^t_{u_i} + \sigma^2 \mathbf{I})^{-1} \y_i^{1:t} \\
        \sigma_{u_i, t}(\x)^2 &= k_{u_i}(\x, \x) - \mathbf{k}^t_{u_i}(\x)^\top (\mathbf{K}^t_{u_i} + \sigma^2 \mathbf{I})^{-1}\mathbf{k}^t_{u_i}(\x)
    \end{split}
\end{equation}
where $\mathbf{k}^t_{u_i}(\x) = [k_{u_i}(\x^j, \x)]_{j\in[t]}$, $\y_i^{1:t} = [y_i^1, \cdots, y_i^t]$, and $\mathbf{K}^t_{u_i}=[k_{u_i}(\x^i, \x^{j})]_{i\in[t], j\in[t]}$ is the kernel matrix. 
