\section{Continuous Search Space}\label{sec:continuous}
\subsection{Theoretical Results}
In the following, we introduce the additional assumption that bounds the unknown functions' complexity when they are members of an RKHS space and enables the performance analysis when applying \algname on continuous search space \searchSpace\xspace instead of \discreteSet.


\begin{assumption} \label{apt: rkhs_gp}
The objective and constraints all lie in the RKHS $\rkhs_k$ corresponding to the kernel $k(\instance, \instance')$, and the corresponding norm is bounded by \rkhsB. Formally, $\globalf:\searchSpace\rightarrow \reals$ is a member of the RKHS of real-valued functions on \searchSpace\xspace with kernel $k$, with RKHS norm $\Vert \globalf\Vert_k \leq\rkhsB$. Similarly, $\cFunc_\conIdx:\searchSpace\rightarrow \reals$ is a member of the RKHS of real-valued functions on \searchSpace\xspace with kernel $k$, with RKHS norm $\Vert \cFunc_\conIdx\Vert_k \leq\rkhsB$, for all $\conIdx\in\conSpace$.
\end{assumption}

Then, we could derive similar results mapping from \lemref{lem: roi}.

\begin{lem0}\label{lem: cont_roi}
    Under the assumptions above, the regions of interest $\roi_{t}$, as defined in \eqref{eq:roi}, contain the global optimum with high probability. Formally, for all $\delta \in (0,1)$, $T\geq t\geq 1$, and the search space $\searchSpace$ that contains the optimum $\instance^* = \argmax_{\instance\in \searchSpace}f(\instance)$ where $\cFunc_\conIdx(\instance^*) > \epsilon_{\cFunc}$ for all $\conIdx\in\conSpace$ and $\beta_t^{1/2}= B + \sigma\sqrt{2(\widehat{\maxInfo_T}+1+ln(2(M+1)/\delta))}$,  we have $\Pr{\instance^* \in \roi_t} \geq 1-\delta$.
\end{lem0}

\begin{proof}
    Similar to theorem 2 of \cite{chowdhury2017kernelized}, with probability at least $1-1/2\delta$, $\forall \instance\in \discreteSet, \forall T \geq t\geq 1, \forall g \in \{\globalf\} \cup \{\cFunc_\conIdx\}_{\conIdx\in\conSpace}$,
    $$\vert g(\instance) - \mu_{g, t-1}(\instance)\vert \leq \beta_{t}^{1/2}\sigma_{g, t-1}(\instance)$$
    
    Note that we also take the union bound on $ g \in \{\globalf\} \cup \{\cFunc_\conIdx\}_{\conIdx\in\conSpace}$.

    First, by definition $S_{\cFunc, t} \defeq \bigcap_m^{\conSpace} S_{\cFunc_\conIdx, t}$, we have $\forall t \leq T, \instance \in S_{\cFunc, t}, \forall \conIdx\in\conSpace$
    $$
    \Pr{\cFunc_\conIdx(\instance) \geq \LCB_{\cFunc_\conIdx, t}(\instance) > 0} \geq 1-1/2\delta
    $$
    meaning with probability at $1-\delta$, $\instance$ lies in the feasible region. At the same time, we have, $\forall t \leq T$, $\forall \conIdx\in\conSpace$, given $\cFunc_\conIdx(\instance) > 0$
    $$
    \Pr{\UCB_{\globalf, t}(\instance^*) \geq \globalf(\instance^*) \geq \globalf(\instance) \geq \LCB_{\globalf, t}(\instance) \text{ }} \geq 1-1/2\delta
    $$
    Given the mutual independency between the objective $\globalf$ and the constraints $\cFunc_\conIdx$, and by the definition of the threshold $\LCB_{\globalf,t, \max}$, we have
    $\forall t \leq T$, when $\exists \instance \in  S_{\cFunc, t}$,
    $$
    \Pr{\UCB_{\globalf, t}(\instance^*) > \LCB_{\globalf, t, \max} } \geq 1-\delta
    $$

    Note when $ S_{\cFunc, t} = \emptyset$, $\LCB_{\globalf,t, \max}=-\infty$, we have $\Pr{\UCB_{\globalf, t}(\instance^*) > \LCB_{\globalf, t, \max} } = 1$.

    {In summary, we've shown that with probability at least $1-\delta$, $\instance^* \in \roi_{\globalf, t}$.}

    Next, by the definition of $\instance^* = \argmax_{\instance\in \searchSpace}f(\instance)$ $s.t.$ $\cFunc_\conIdx(\instance^*) > \epsilon_{\cFunc}$ we have $\forall t \leq T, \forall \conIdx\in\conSpace$

    $$
    \Pr{\UCBit_{\cFunc_\conIdx, t}(\instance^*) \geq \cFunc_\conIdx(\instance^*) > 0} \geq 1-1/2\delta
    $$
    meaning with probability at least $1-1/2\delta$, $\instance^* \in \roi_{\cFunc_\conIdx, t}$. And in general, we have $\forall t \leq T, \forall \conIdx\in\conSpace$
    $$
    \Pr{\instance^* \in  \roi_{t}} \geq 1-\delta
    $$

\end{proof}

\begin{remark}
The proof of \lemref{lem: cont_roi} substitutes the $\beta$ in the proof of \lemref{lem: roi} and alleviates the need for a discretization $\discreteSet$ with the additional assumption on the complexity of the unknown functions in \assref{apt: rkhs_gp}. Note the $\beta_t^{1/2}= B + \sigma\sqrt{2(\widehat{\maxInfo_T}+1+ln(2(M+1)/\delta))}$,  we have $\Pr{\instance^* \in \roi_t} \geq 1-\delta$ is larger than the original value in the theorem 2 of \cite{chowdhury2017kernelized} to make sure $\beta_t$ is the same for all $\forall T \geq t\geq 1$ and to guarantee a union bound on $ g \in \{\globalf\} \cup \{\cFunc_\conIdx\}_{\conIdx\in\conSpace}$. In the following, since $\beta_t$ is constant, we substitute it with $\beta$.
\end{remark}


Then, we could trivially map the \thmref{thm: width} when maximizing the acquisition functions on $\roi_t$ instead of $\discreteROI$ as in line 9 of \algoref{alg:main} and on $\roi_t \cap U_{\cFunc_\conIdx, t}$ instead of $\discreteROI \cap U_{\cFunc_\conIdx, t}$ as in line 8 of \algoref{alg:main}. The proof would be identical to \appref{sec:proof} except for the different $\beta$ and search space. 

\begin{algorithm}
    \caption{\textbf{\underline{CO}}nstrained \textbf{\underline{B}}O through \textbf{\underline{A}}daptive \textbf{\underline{R}}egion of Interest Acquisition on Continuous Space(\algname-CS)}
    \label{alg:main_cont}
        \begin{algorithmic}[1]
            \STATE {\bf Input}:Search space $\searchSpace$, initial observation $\Selected_0$, horizon $T$, confidence factor $\delta$, confidence coefficient $\beta$;
            \FOR{$t = 1\ to\ T$}
                \STATE Update the posteriors of $\GP_{\globalf,t}$ and $\GP_{\cFunc_\conIdx, t}$ according to \eqref{eq:posterior_mean} and \plaineqref{eq:posterior_covar}
                 
                \STATE Identify ROIs $\roi_t$, and undecided sets $U_{\cFunc_\conIdx, t}$ 
    
                \FOR{$\conIdx\in\conSpace$}
                    \IF{$U_{\cFunc_\conIdx, t} \neq \emptyset$}
                    \STATE Candidate for learning of each constraint: \\
                    $\instance_{\cFunc_\conIdx, t} \leftarrow \argmax_{\instance \in \roi_t \cap U_{\cFunc_\conIdx, t}} \acqC{(\instance)}$ 
                    (\plaineqref{eq:acqC})
                    \STATE  $\gG \leftarrow \gG \cup \cFunc_{\conIdx, t}$
                    \ENDIF
                \ENDFOR
                
                \STATE Candidate for optimizing the objective: \\
                $\instance_{\globalf, t} \leftarrow \argmax_{\instance \in \roi_t} \acqF{(\instance)}$ as in \eqref{eq:acqF}
                \STATE $\gG \leftarrow \gG \cup \globalf$
    
                \STATE Maximize the acquisition from different aspects: \\
                $g_t \leftarrow \argmax_{g \in \gG} \alpha_{g, t}{(\instance_{g, t})} $
    
                \STATE Pick the candidate to evaluate: $\instance_t \leftarrow \instance_{g_t, t}$  
    
                \STATE Update the observation set\\
                $ \Selected_t \leftarrow \Selected_{t-1} \cup \{(\instance_t, y_{\globalf, t}, \{y_{\cFunc_\conIdx, t}\}_{\conIdx\in\conSpace})\}$
                
            \ENDFOR
        \end{algorithmic}
\end{algorithm}


\begin{theorem}\label{thm: cont_width}
 Under the aforementioned assumptions, with a constant $\beta^{1/2}_t \defeq \beta^{1/2}= B + \sigma\sqrt{2(\widehat{\maxInfo_T}+1+ln(2(M+1)/\delta))}$ and the acquisition function from $\algoref{alg:main_cont}$, there exists an $\epsilon_\globalf \leq \epsilon_\cFunc$, such that after at most $T \geq \frac{\beta \widehat{\maxInfo_T} C_1}{\epsilon_\globalf^2}$ iterations, we have $\Pr{\vert CI_{\globalf^*, T}\vert \leq \epsilon_\globalf, \globalf^* \in CI_{\globalf^*, T }} \geq 1 - \delta$
    Here, $C_1 = 8/\log(1+\sigma^{-2})$.
\end{theorem}

\subsection{Efficient Discretization}

{Aiming at a continuous search space demands additional consideration when implementing a practical ROI identification on the continuous search space or requires a better coverage by the discretization of the dense search space for \algname in practice. This problem is more outstanding in high-dimensional tasks. Here, we briefly discuss potential remedies if we still resort to an efficient discretization. The random linear projection has been used for discretizing the search space to mitigate the dependency on the dimensionality while, with high probability, preserving the original geometry \citep{dasgupta1999learning, nayebi2019framework}. To efficiently discretize the dense search space for \algname (in high-dimensional applications), one option is to apply the random projection and its reverse studied by \citet{nayebi2019framework}, which shows strong empirical performance when combined with other BO algorithms and offers the following theoretical guarantee.} 

\begin{defn}{($\varepsilon$-subspace embedding \citep{nayebi2019framework})}
Given a matrix $V \in \mathbb{R}^{D \times d}$ with orthonormal columns, an integer $d \leq D$ and an approximation parameter $\varepsilon \in (0,1)$, an $\varepsilon$-subspace embedding for $V$ is a map $H: \mathbb{R}^d \rightarrow \mathbb{R}^D$ such that $\forall\,\instance \in \mathbb{R}^d$:

\begin{equation*}
    (1 - \varepsilon) \Vert V\instance\Vert_2^2 \leq \Vert HV\instance \Vert_2^2 \leq (1 + \varepsilon) \Vert V\instance\Vert_2^2
\end{equation*}
\end{defn}

\begin{theorem}{(Theorem 2 of \citet{nayebi2019framework})}\label{thm:hesbo}
Consider a Gaussian process that acts directly in the unknown active subspace of dimension $d_e$ with mean and variance functions $\mu(\cdot), \sigma^2(\cdot)$. Let $\hat{\mu}(\cdot), \hat{\sigma}^2(\cdot)$ be their approximations using an $\varepsilon$-subspace embedding for the active subspace. Then we have for every $\instance \in X$

1. $\vert \mu(\instance) - \hat{\mu}(\instance)\vert \leq 5\varepsilon \Vert \instance\Vert \Vert X - \hat{f}\Vert$

2. $\sigma^2(\instance) - \hat{\sigma}^2(\instance) \leq 12\varepsilon \Vert \instance\Vert^2 $

\end{theorem}

{For a comprehensive survey on the treatments of high-dimensional search space for BO, we refer to the recent survey by \citet{binois2022survey}. Besides the random projection \citep{nayebi2019framework, wang2016bayesian, letham2020re}, variable selection \cite{hellsten2023high},  tree-structure partition \citep{eriksson2021high}, and Markov Chain Monte Carlo sampling \citep{yi2024improving} on the search space could all be applied as plugins to improve the discretization efficiency for \algname.}

\subsection{Case Study}
{We illustrate the effectiveness of integrating Hashing-enhanced Subspace BO (HeSBO) \citep{nayebi2019framework} into CBO algorithms. We construct the following 40-dimensional CBO task that makes any grid discretization containing a feasible candidate on the original embedding intractable. 
 }
\paragraph*{Ackley-40D-2C}
This Ackley-40D-2C function is a variant for the task we study in \secref{sec: exps}. $f(\instance) = 20\exp{(-0.2\sqrt{1/d\sum_i^d{x_i^2}})} + \exp{(1/d\sum_i^d{\cos(2\pi x_i)})} + 20 + \exp(1),\ d=5$ where $\instance \in \bracket{-5, 10}^{40}$. We construct two constraints to enforce a feasible area taking up less than $0.6\%$ of the search space. The first constraint $\cFunc_1 = 1-(\sum_i^5{x_i})$. The second constraint $\cFunc_2 = 6 - (\sum_i^5{x_i^2})$.

We find that \algname fails in the original search space $\bracket{-5, 10}^{40}$ due to the intractability of any discretization containing sufficient feasible candidates. We integrate HeSBO into all the tested CBO algorithms to allow the algorithms to process on a 5-dimensional embedding space $\bracket{-1, 1}^{5}$. \algname relies on a random sampling containing 200000 candidates. The simple regret curves are shown in \figref{fig:exps:hesbo}. Though the point-wise comparison of \algname is not tractable in the original 40-dimensional search space, integrating HeSBO allows \algname to optimize the high-dimensional CBO toy problem efficiently. \Tabref{table: walltime_hesbo} shows that \algname could efficiently optimize the embedding space, benefited from the reduced dimensionality and the ROI identification that further reduces computation need dynamically.

\begin{figure*}[t]
    \centering
        {
      \includegraphics[trim={0cm 0cm 0cm 0cm}, width=.5\textwidth]{./fig/simple_regret_hesbo_neurips.pdf}
    }
  \caption{The figure illustrates the simple regret for Ackley-40D-2C. All the tested algorithms rely on the low-dimensional embedding in HeSBO. The results are collected from 15 independent trials. The shaded area denotes the standard error. 
  }   
  \label{fig:exps:hesbo}
  \end{figure*}

\begin{table}
  \centering
  \begin{tabular}{lcccc}
      \toprule
      \textbf{Problem} & \textbf{\algname} & \textbf{CMES-IBO} & \textbf{SCBO} & \textbf{cEI} \\
      \midrule
      Ackley-40D-2C & 75.87 & 281.47 & 34.33 & 185.07 \\
      \bottomrule
  \end{tabular}
  \caption{Average wall time (sec) of different CBO Methods collected from 15 independent trials.}\label{table: walltime_hesbo}
\end{table}