\title{Supplementary Material}
\maketitle
\appendix
% \vspace{-4cm}
\section{Additional Related Work}\label{sec:additional_related}

\subsection{Learning Stackelberg games}
More broadly, our research is also related to learning the equilibrium in the game theory. Besides the Nash equilibrium studied in this paper, another well-studied game is the Stackelberg game \citep{von2004leadership,Gan2023RobustSE}. Specifically, Stackelberg games model a two-step sequential decision-making process between two agents, a leader and a follower. This canonical model for strategic leader-follower interactions has been adopted for many applications in the real world, such as contract design, optimal pricing, security resource allocation, and optimal traffic routing \citep{bolton2004contract,dawkins2021limits,paruchuri2008playing,roth2016watch,roughgarden2001stackelberg,yang2014adaptive}. Learning the Stackelberg equilibrium has also been extensively studied in the literature \citep{letchford2009learning,peng2019learning,dawkins2022first,Han2023LearningIO}, it would be interesting to study the learning of Stackelberg game equilibria via Gaussian Processes.

\subsection{BO with multiple structured utility functions} Within the scope of Bayesian optimization tasks, it is common to tackle multiple unknowns, as in the learning of equilibria, where the algorithm needs to deal with multiple unknown utility functions. The most related literature in the realm of BO would be optimizing the function network, where the objective function to be optimized could be decomposed into multiple unknown nodes in a known directed acyclic graph \citep{astudillo2021bayesian, buathong2023bayesian}. Similarly, \cite{sussex2022model} proposes to optimize the intervention on the casual graph with the extension of \UCB, a canonical acquisition in BO, and offers a corresponding theoretical guarantee on the convergence. These works assume that each node on the DAG graph representing the unknown function could be captured by a separate GP and assume independence between different nodes. However, when transferring our objective into a DAG, we are dealing with highly related nodes as will be illustrated in the following section. The reason is that part of the components of the ultimate objective is the partial maximization of the other. Also, unlike in the graph-based BO works, we would not observe the partial maximization and, therefore, could not update the GPs for all the nodes with corresponding observations. The gap in the assumption and process of evaluation hinders the direct application of the graph-based BO methods.

\section{Proofs}\label{sec: proof}

\subsection{Proof of \lemref{lem: roi}}
\begin{proof}
Similar to lemma 5.1 of \cite{srinivas2009gaussian}, given a constant $\beta=2\log(\uSpaceNum\vert \discreteSet \vert T/ \delta)$, with probability at least $1-\delta$, $\forall \instance\in \discreteSet, \forall t\geq 1, \forall g \in \{\utilF\}_{i\in \uSpace} \cup \{\partU\}_{i\in \uSpace}$,
$$\vert g(\instance) - \mu_{g, t-1}(\instance)\vert \leq \beta^{1/2}\sigma_{g, t-1}(\instance)$$
    
Note that we also take the union bound on $ g \in \{\utilF\}_{i\in \uSpace} \cup \{\partU\}_{i\in \uSpace}$.

Then, we have $\forall t \leq T, \instance\in \discreteSet$
$$\Pr{\UCB_{\partU, t}(\instance) = \max_{\partX}\UCB_{\utilF, t}(\partX, \minusIX) \geq \max_{\partX} \utilF(\partX, \minusIX) = \partU(\minusIX)} \geq 1-\delta
$$
and at the same time
$$\Pr{\LCB_{\partU, t}(\instance) = \max_{\partX}\LCB_{\utilF, t}(\partX, \minusIX) \leq \max_{\partX} \utilF(\partX, \minusIX) = \partU(\minusIX)} \geq 1-\delta
$$

This justifies the definition of \eqnref{eq:partialUCB} and \eqnref{eq:partialLCB}. 

As a result, we also have $\forall t \leq T, \instance\in \discreteSet$ 
$$\Pr{\UCB_{\globalf, t}(\instance) \geq \globalf(\instance) \geq \globalf(\instance^*) \geq \LCB_{\globalf, t}(\instance^*)} \geq 1-\delta
$$

By the definition of the threshold $\UCB_{\globalf, t, \min}$ we have
$\forall t \leq T$,
$$
\Pr{\UCB_{\globalf, t, \min} > \LCB_{\globalf, t}(\instance^*) } \geq 1-\delta
$$

By the definition of the $\globalf(\instance)$, we have $\forall \instance$, $\globalf(\instance)\geq 0$.


Hence we have $\forall t \leq T, \forall i \in \uSpace$
$$
\Pr{\instance^* \in \roi^t} \geq 1-\delta
$$
\end{proof}
    
\subsection{Proof of \thmref{thm: simReg}}
\begin{proof}
    The following proof shows that the width of the interval at $t$ is bounded. For briefness, we denote $\alpha_t \defeq \max_{\instance \in \searchSpace} {\acqF(\instance, \searchSpace)}$
    
    With probability at least $1-\delta$, $\forall T\geq t\geq 1$, we first have 
    $$\globalf(\instance^*) \in [\LCB_{\globalf,t,\min}, \UCB_{\globalf,t,\min}]$$
     and then 
    $$
    \UCB_{\globalf,t,\min} - \LCB_{\globalf,t,\min} \leq \alpha_t$$
   
    By lemma 5.1, 5.2 and 5.4 of \citet{srinivas2009gaussian}, with $\beta=2\log(\uSpaceNum \vert \discreteSet \vert T /\delta)$, $\forall g \in \{\utilF\}_{i\in \uSpace}$, we have $\sum_{t=1}^{T} (2\beta^{1/2}\sigma_{g, t-1},(\instance^t))^2 \leq {C_1\beta\maxInfo_{g, T}}$. Then we have the following hold with probability at least $1-\delta$: 
    \begin{align*}
        \sum_{t=1}^{T} \alpha_t^2 
        & \leq \sum_{t=1}^{T} (\UCB_{\globalf,t}(\instance^t) - \LCB_{\globalf,t}(\instance^t))^2\\
        & \leq \sum_{t=1}^{T} ((\uSpaceNum + 1) \sum_{g \in \{\utilF\}_{i\in \uSpace}}2\beta^{1/2}\sigma_{g, t-1}(\instance^{t}))^2\\
        &= (\uSpaceNum+1)^2\sum_{t=1}^{T} \sum_{g \in \{\utilF\}_{i\in \uSpace}} (2\beta^{1/2}\sigma_{g, t-1}(\instance^{t}))^2\\
        &\leq (\uSpaceNum+1)^2\sum_{g \in \{\utilF\}_{i\in \uSpace}}C_1\beta\maxInfo_{g, T}\\
        &= (\uSpaceNum+1)^2 C_1\beta\maxInfoARISE_T
    \end{align*}
    Where $C_1= 8/\log(1+\sigma^{-2})$.
    The second line holds for two reasons. First, we have $\forall g \in \{\utilF\}_{i\in \uSpace}$, $\UCB_{g,t}(\instance^t) - \LCB_{g,t}(\instance^t) \leq 2\beta^{1/2}\sigma_{g, t-1}(\instance^{t})$. Also, we have $\forall g \in \{\partU\}_{i\in \uSpace}$, $\UCB_{g,t}(\instance^t) - \LCB_{g,t}(\instance^t) \leq \sum_{{i\in \uSpace}}{\UCB_{\utilF,t}(\instance^t) - \LCB_{\utilF,t}(\instance^t)}$ since $\instance^t$ maximize \acqF.
    The last line holds due to the definition in \eqref{eq:gammaT}. By Cauchy-Schwarz, we have with probability at least $1-\delta$:
    $$
    \frac{1}{T}(\sum_{t=1}^{T} \alpha_t)^2 \leq (\uSpaceNum+1)^2C_1\beta\maxInfoARISE_T
    $$

    By the monotonocity assumed in $\assref{apt: mono_ci}$, $\forall g \in \uSpace$, $\forall 1 \leq t_1 < t_2 \leq T$, we have $\alpha_{t_2} \leq \alpha_{t_1}$. Therefore with probability at least $1-\delta$:
    \begin{align*}
        \vert CI_{\globalf^*, T}\vert \leq &=  \UCB_{\globalf,T,\min} - \LCB_{\globalf,T,\min}\\
        &\leq
        \alpha_T\\
        &\leq \sqrt{\frac{(\uSpaceNum+1)^2 \beta C_1\widehat{\maxInfo_T}}{T}}        
    \end{align*}
    For briefness, we denote $\hat{C}_1=8(\uSpaceNum+1)^2/\log(1+\sigma^{-2})$, then as long as $T \geq \frac{\beta \maxInfoARISE_T \hat{C}_1}{\epsilon^2}$, we have with probability at least $1-\delta$
    $$\vert CI_{\globalf^*, T}\vert \leq \epsilon$$

\end{proof}

\subsection{Proof of \thmref{thm: eNE}}
The following results bound the simple regret of the proposed \algoref{alg:main} with additional mild assumptions.

Different from the proof of \thmref{thm: simReg}, we are optimizing the acquisition on the ROI rather than the global search space. The key insight that 
\begin{align*}
    \sum_{t=1}^{T} \alpha_t^2 
    & \leq \sum_{t=1}^{T} (\UCB_{\globalf,t}(\instance^t) - \LCB_{\globalf,t}(\instance^t))^2\\
    & \leq \sum_{t=1}^{T} ((\uSpaceNum + 1) \sum_{g \in \{\utilF\}_{i\in \uSpace}}2\beta^{1/2}\sigma_{g, t-1}(\instance^{t}))^2
\end{align*}
no longer holds. Instead, we can only bound for $\hat{\alpha}_t \defeq \max_{\instance\in\discreteROI}{\acqF(\instance, \discreteROI)}$ similarly.
\begin{align*}
    \sum_{t=1}^{T} \hat{\alpha}_t^2 
    & = \sum_{t=1}^{T} (\UCB_{\globalf,t}(\instance^t, \discreteROI) - \LCB_{\globalf,t}(\instance^t, \discreteROI))^2\\
    & \leq \sum_{t=1}^{T} ((\uSpaceNum + 1) \sum_{g \in \{\utilF\}_{i\in \uSpace}}2\beta^{1/2}\sigma_{g, t-1}(\instance^{t}))^2
\end{align*}
Similarly, by Cauchy-Schwarz, we have
\begin{align*}
    \sum_{g \in \{\utilF\}_{i\in \uSpace}}{\UCB_{g,t}(\instance^t) - \LCB_{g,t}(\instance^t)}
    &\leq \sqrt{{\beta C_1\widehat{\maxInfo_T}}{T}}        
\end{align*}
Where $C_1= 8/\log(1+\sigma^{-2})$. And with the assumed monotonicity, we have with probability at least $1-\delta$:
\begin{align*}
    \hat{\alpha}_T 
    &\defeq \max_{\instance\in\discreteROI}\UCB_{\globalf, t}(\instance, \discreteROI) - \LCB_{\globalf, t}(\instance, \discreteROI)\\
    &\leq \sqrt{\frac{(\uSpaceNum+1)^2 \beta C_1\widehat{\maxInfo_T}}{T}}   
\end{align*}

Since we are assuming that after $T\geq \frac{\beta \maxInfoARISE_T \hat{C}_1}{\epsilon^2}$ iterations, $\forall \instance \in \discreteROI$, it holds that $\UCB_{\utilF,t}(\minusIX, \discreteROI) = \UCB_{\utilF,t}(\minusIX, \discreteSet)$ and $\LCB_{\utilF,t}(\minusIX, \discreteROI) = \LCB_{\utilF,t}(\minusIX, \discreteSet)$, we have $\alpha_T = \hat{\alpha}_T\leq \sqrt{\frac{(\uSpaceNum+1)^2 \beta \widehat{\maxInfo_T}C_1}{T}} = \sqrt{\frac{\beta \widehat{\maxInfo_T}\hat{C}_1}{T}} \leq \epsilon$.

In summary, we have with probability at least $1-\delta$:
$$\globalf(\instance^T)\leq \UCB_{\globalf, T, \min} \leq \sqrt{\frac{\beta \maxInfoARISE_T \hat{C}_1}{T}}\leq \epsilon$$

\section{Efficient Constrained Optimization}\label{sec: copt}
We propose to accelerate the candidate pick in the high-dimensional space by formulating the ROI identification and the acquisition function optimization in lines 4 and 5 of \algoref{alg:main} together as a conventional constrained optimization problem and solve it efficiently with an over-the-shelf tool. 

We first solve the $\UCBit_{\globalf, t, \min}$,

\begin{align*}
    \UCBit_{\globalf, t, \min} %&
    = \min_{\instance\in\searchSpace}\UCBit_{\globalf, t}(\instance) %\\
    ~~\text{s.t. }~~\text{ } \LCB_{\globalf, t-1}(\instance) %&
    \leq \UCBit_{\globalf, t-1, \min}
\end{align*}
then identify the candidate $\instance^t$ to be evaluated:
\begin{align*}
    \instance^t &= \argmax_{\instance\in\searchSpace}\acqF(\instance, \searchSpace) %\\
    ~~\text{s.t.}~~\text{ } \LCB_{\globalf, t}(\instance) %&
    \leq 0
\end{align*}

Since the above calculation of $\acqF(\instance, \roi^t)$ requires a marginal maximum of $\UCB_{\partU, t}$ and $\LCB_{\partU, t}$ for each agent $i \in \uSpace$, making the optimization a nested optimization problem, we propose the following approximation inspired by the reparametrization trick by \cite{sussex2022model}:

\begin{align*}
    \UCB_{\partU, t}(\minusIX, \searchSpace) &= \repaF_{i,t, \UCB}(\instance)\max_{\instance\in \searchSpace}\UCB_{\utilF, t}(\instance) \\
        \textsc{      \quad  }\LCB_{\partU, t}(\minusIX, \searchSpace) 
    &= \UCB_{\partU, t}(\minusIX, \searchSpace) - \repaF_{i, t, \LCB}(\instance)2\beta^{1/2}\max_{\instance\in \searchSpace}\sigma_{\utilF, t-1}(\instance)
\end{align*}


where $\repaF_{i,t, \UCB}(\instance) \in[0, 1]$ and $\repaF_{i,t, \LCB}(\instance) \in[0, 1]$ are learned with regression models(e.g. a neural network) that allows gradient-based optimization to optimize with respect to $\instance$. Here, $\max_{\instance\in \searchSpace}\sigma_{\utilF, t-1}(\instance)$ and $\max_{\instance\in \searchSpace}\UCB_{\utilF, t}$ are easy to obtain by applying over-the-shelf optimizer on the posterior. The regression models could be trained on related scenarios where the utility functions are known or cheap to evaluate so that the Gaussian process could be updated arbitrarily without incurring significant costs for training models for $\repaF_{i,t, \UCB}$ and $\repaF_{i,t, \LCB}$.


\section{Choice of $\beta$}
We follow the convention from \cite{srinivas2009gaussian} that applies practical $\beta$ values different from the theoretical results to achieve better empirical performance. We choose $\beta=1$ for Hotelling and $\beta=2$ otherwise. We showcase the sensitivity of the choice of $\beta$ for $\algname$ in \figref{fig: beta}. Note that though we choose $\beta$ different from theoretical results in \thmref{thm: simReg} where $\delta=0.05$, unlike typical hyper-parameters, each choice of value corresponds to a different confidence level of the error bound.

\begin{figure*}[tbh]
    \vspace{-2mm}
     \centering
        \centering
        \includegraphics[trim={.2cm .1cm .4cm 0.1cm}, width=\textwidth]{figures/summary_beta.pdf}
    \vspace{-3mm}
    \caption{Experimental results on choices of $\beta$. The theoretical value is defined as in \thmref{thm: simReg}. In each plot, the $x$-axis denotes the number of function evaluations.
    The curves show the $\globalf(\instance^t)$ values averaged over at least ten independent trials. The shaded area denotes the standard error. The observation perturbation is sampled from $\normal{(0, 0.01)}$, while the simple regrets shown in the figures do not count the noise.}
    \label{fig: beta}
    \vspace{-2mm}
\end{figure*}



\section{Additional Results on 3-player Games}\label{sec:additional_results}
\reviseFx{In the following, we incorporate additional experimental results for the Hotelling and Budget Allocation games, specifically examining scenarios with three players.}

\begin{figure*}[tbh]
    \vspace{-2mm}
    \centering
    \includegraphics[trim={.2cm .1cm .4cm 0.1cm}, width=0.6\textwidth]{figures/summary_regrets_uai_rebuttal.pdf}
    \vspace{-3mm}
    \caption{Experimental results on Hotelling and Budget Allocation games when there are 3 players involved, where the $x$-axis denotes the number of function evaluations. The curves show the $\globalf(\instance^t)$ values averaged over at least ten independent trials, and the shaded area denotes the standard error. The observation perturbation is sampled from $\normal{(0, 0.01)}$, while the simple regrets shown in the figures do not count the noise. The theoretical value is defined as in \thmref{thm: simReg}.}
    \label{fig: 3p_reg}
    \vspace{-2mm}
\end{figure*}

\reviseFx{Consistent with our previous results, \figref{fig: 3p_reg} shows that \algname outperforms or at least matches the performance of the best baseline method.}

