% In this section, we provide upper bounds of the cumulative regret of \Cref{alg:qmc-kernel-ucb}. 
% We first briefly review Mercer's theorem, and then we present the main claims of the regret upper bounds.
% Finally, we provide a proof sketch of the main results.
In this section, we provide upper bounds of the cumulative regret of \Cref{alg:qmc-kernel-ucb}.
We present the main claims of the regret upper bounds in Sec.~\ref{subsec:statements}, and provide a proof sketch of the main result in Sec.~\ref{subsec:sketch-proof}.
Recall that the complete proofs are provided in Appendix~\ref{sec:appendix_proofs}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{comment}
\subsection{Mercer's Theorem}
As we stated in the introduction, our regret bounds involve the decay rate of the eigenvalues of the Mercer operator.
Here, following \cite[Chapter 4.5]{steinwart2008support}, we briefly review the theoretical properties of the Mercer operator.
% In this section,we give some difinitions on RKHS and Mercer’s Theorem.  \par
% Let$\mathcal{X}$ be a compact metric space, $k :\mathcal{X} \times \mathcal{X}  \rightarrow \mathbb{R}$.The Hilbert space $\mathcal{H}$ that has functions on $\mathcal{X}$ equipped with an inner product$<>$ is called reproducing kernel Hilbert space
% (RKHS). 
Let $\cX$ be a measurable space and $\nu$ be a finite measure on $\cX$, and $k: \cX \times \cX \rightarrow \RR$ be a measurable kernel.
We denote by $L_2(\nu)$ the space of square-integrable functions on $\cX$ with respect to the measure $\nu$.
We define an integral operator $\cT_k: L_2(\nu) \rightarrow L_2(\nu)$ called the Mercer operator  by
$f \mapsto \int_{\cX} k(\cdot, x)f(x) d\nu(x)$.
Since $\cT_k$ is compact, positive, and self-adjoint, 
by the spectral theorem, 
there exists an orthonormal basis $\{\psi_i\}_{i \in I}$ of $L_2(\nu)$
such that for any $f \in L_2(\nu)$, 
$\cT_k$ has the following expansion 
$\cT_k f = \sum_{i \in I} \lambda_i \langle \psi_i, f \rangle_{L_2(\nu)}\psi_i$.
Here, $\{\lambda_i\}_{i \in I}$ is a set of non-zero eigenvalues of $\cT_k$ with $\lambda_1 \ge \lambda_2 \ge \cdots > 0$.
We refer to \cite[Theorem 4.49 and 4.51]{steinwart2008support} for the following form of Mercer's theorem.
\begin{thm}[Mercer's Theorem]
    \label{thm:mercer}
    Let $\{\psi_i\}_{i \in I}$ and $\{\lambda_i\}_{i \in I}$ be defined as above.
    Assume that $\cX$ is a compact metric space, $k: \cX \times \cX \rightarrow \RR$ is a continuous kernel, and $\nu$ is a finite Borel measure with $\supp \nu = \cX$.
    Then, we have the following expansion:
    \begin{equation*}
        k(x, x') = \sum_{i \in I} \lambda_i \psi_i(x)\psi_i(x'), \quad x, x' \in \cX.
    \end{equation*}
    Here, the convergence is absolute and uniform.
    Moreover, $\{\lambda_i^{1/2}\psi_i\}_{i \in I}$ forms an orthonormal basis of $\cH_k$.
    % $\cH_k$ has the following characterization as a subspace of $L_2(\lambda)$.
    % \begin{equation*}
    %     \cH_k = \left\{f = \sum_{i \in I} a_i \lambda_i ^{1/2}\psi_i: \|f\|_{\cH_k}^2 = \sum_{i \in I} a_i^2 < \infty\right\}.
    % \end{equation*}
    % That is, 
    % $\{\lambda_i^{1/2}\psi_i\}_{i \in I}$ forms an orthonormal basis of $\cH_k$.
\end{thm}

% We introduce a formal definition of polynomial and exponential eigendecay of the kernel $k$ 
% \citep[Definition 1]{vakili2021information}.
% \begin{dfn}[Polynomial and Exponential Eigendecay]
%     \label{def:eigendecay}
%     Let $\{\lambda_i\}_{i \in I}$ be the eigenvalues of the Mercer operator 
%     with $\lambda_1 \ge \lambda_2 \ge \cdots >0$ and $I \subseteq \ZZ_{\ge 1}$ as in Theorem \ref{thm:mercer}.
%     \begin{enumerate}
%         \item Let $\beta_p > 1$. We say $k$ has $\beta_p$ polynomial eigendecay 
%         if there exists a constant $C_p > 0$ such that $\lambda_n \le C_p n^{-\beta_p}$ for all $n \in I$.
%         \item Let $\beta_e > 0$. We say $k$ has $\beta_e$ exponential eigendecay if there exist constants
%         $C_{e, 1}, C_{e, 2} > 0$ such that $\lambda_n \le C_{e, 1} \exp(-C_{e, 2} n^{\beta_e})$ for all $n \in I$.
%     \end{enumerate}
% \end{dfn}

To discuss the theoretical property of \Cref{alg:qmc-kernel-ucb}, we introduce the following formal characteristic of eigendecay as defined in \citet[Definition~11]{chatterji2019online} and \citet[Definition~1]{vakili2021information}:
\begin{dfn}[Eigen-decay]
    \label{def:eigendecay}
    Let $\{\lambda_i\}_{i \in I}$ be the eigenvalues of the Mercer operator 
    with $\lambda_1 \ge \lambda_2 \ge \cdots >0$ and $I \subseteq \ZZ_{\ge 1}$ as in Theorem \ref{thm:mercer}.
    \begin{enumerate}
        \item Let $C_p > 0$ and $\beta_p > 1$ be constants. 
        We say a kernel $k$ has a $(C_p, \beta_p)$ polynomial eigendecay, if for all $n\in I$, we have $\lambda_n \leq C_p n^{-\beta_p}$.
        \item Let $C_{e,1}\, C_{e,2} > 0$ and $\beta_e > 0$ be constants.
        We say a kernel $k$ has a $(C_{e,1}, C_{e,2}, \beta_{e})$ exponential eigendecay, if for all $n\in I$, we have $\lambda_n \leq C_{e, 1} \exp(-C_{e, 2} n^{\beta_{e}})$.
    \end{enumerate}
    If we ignore constants $C_p, C_{e, 1}, C_{e, 2}$, then we simply say $k$ has a $\beta_p$ polynomial eigendecay or $\beta_e$ exponential eigendecay.
\end{dfn}

We provide examples of eigendecay of kernels in the case when $\cX$ is a compact subset of $\RR^d$.
It is known that a Mat\'ern kernel with a smoothness parameter $\nu>0$ has $(2\nu + d)/d$ polynomial eigendecay 
\citep[Theorem 15]{santin2016approximation}.
If $k$ is an SE or RQ kernel, then $k$ has $1/d$ exponential eigendecay.
The latter statement follows from \citep[Theorem 15]{santin2016approximation} and \citep[Theorem 11.22]{wendland2004scattered}.
\end{comment}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Main Results}
\begin{comment}
We are now ready to present the main theorem on the regret bound of the proposed \Cref{alg:qmc-kernel-ucb}.
\end{comment}
Besides the assumptions introduced in Sec. \ref{sec:problem-formulation}, 
we make the following assumptions.
\label{subsec:statements}
\begin{assump}
    \label{assump:upper-bd}
    % Let $N(\epsilon, \delta)$ be 
    % the number of queries of quantum reward oracles of the QMC.
    % Then, we assume there exists $\qmclbc > 0$ such that $N(\epsilon, \delta) \ge \frac{\qmclbc}{\epsilon}\log (1/\delta)$,
    % i.e., $N(\epsilon, \delta) = \Theta\left(\frac{1}{\epsilon}\log\left(\frac{1}{\delta}\right)\right)$.
    (a) $k$ is a Mercer kernel, i.e., there exist a sequence of functions $\{\psi_i\}_{i \in I} \subset \cH_k$ and 
    positive numbers $\{\lambda_i\}_{i\in I}$ with $\lambda_1 \ge \lambda_2 \ge \cdots$ satisfying 
    the statement of Theorem \ref{thm:mercer}.
    (b) There exists a constant $\psibar > 0$ such that $\|\psi_{i}\|_{\infty} \le \psibar$ for any $i \in I$.
    (c) There exists a constant $\overline{k} > 0$ such that $\sup_{x, x' \in \cX}|k(x, x')| \le \overline{k}$.
    (d)
    % Let $\phi:\mathcal{X}\to\mathcal{H}_k$ be the feature map of the RKHS with kernel $k$ defined in Sec. \ref{sec:problem-formulation}.
    We assume that there exists $S>0$ such that
    \begin{math}
        % &\|\phi(x)\|_{\bullet} \leq L \quad (\forall x\in\mathcal{X}),\\
        \|\theta^{\ast}\|_{\mathcal{H}_k} \leq S,
    \end{math}
    where $\theta^{\ast}$ is the ground truth vector that determines the mean reward function $\mu$.
\end{assump}
Here, assumptions (a), (b), (c) are assumed in the previous work \citep{vakili2021information}
and the assumption (d) is a boundedness condition, which is standard in the bandit literature.


We note that along with the standard bounded assumptions,
we assume the reward function $\mreward$ is normalized so that $\mreward(x) \in [0, 1]$ for any $x \in \cX$ in \Cref{subsec:reward-function}.
Since the standard boundedness assumptions imply that the reward function is bounded \footnote{By standard the bounded assumptions,
the Cauchy-Schwartz inequality, the reproducing property,
we see that  
 $|\mu(x)| \le \langle \theta^\ast, \phi(x)\rangle \le \|\theta^\ast\|_{\cH_k}
\|\phi(x)\|_{\cH_k} \le \|\theta^\ast\|_{\cH_k} \sqrt{\overline{k}}$.}, 
after normalization (or affine transformation) of rewards,
the assumption $\mreward(\cX)\subseteq [0, 1]$ can be satisfied.
% Since Proposition \ref{prop:conf-bd} implies the width of the confidence interval of the estimation $\muw_s(x)$ 
% is given as $O(\totnst \sigmaw_s(x))$ with a high probability, 
% it is essential to bound $m$,
% where $\totnst$ is the total number of stages of Algorithm \ref{alg:qmc-kernel-ucb}.


First, we introduce a regret upper bound of Algorithm \ref{alg:qmc-kernel-ucb} using the total number $\totnst$ of stages.
By Lemma \ref{lem:qmc}, Proposition \ref{prop:conf-bd}, and a standard proof technique for UCB-type algorithms, 
we can easily show that the cumulative regret of Algorithm \ref{alg:qmc-kernel-ucb} 
is bounded as follows:
\begin{prop}
    \label{prop:regret-using-m}
    Let $\totnst$ be the total number of stages and,
    $\tradeoff$ be a tradeoff parameter of Algorithm \ref{alg:qmc-kernel-ucb}.
    We assume that $\nstin \ge \totnst$, where $\nstin$ is the parameter of  
    Algorithm \ref{alg:qmc-kernel-ucb}.
    Then, with probability at least $1-\delta$, cumulative regret $R(T)$ of the algorithm is bounded by
    \begin{equation*}
        R(T) = O\left(m (\tradeoff^{-1} + \sqrt{m}) \log(\nstin/\delta) \right) .
    \end{equation*} 
\end{prop}
We remark that the total number $\totnst$ depends on $\tradeoff$.
If we take a large $\tradeoff$, then the number of oracle queries by the QMC method will be smaller,
and the total number $\totnst$ of stages will be larger.
By Proposition \ref{prop:regret-using-m}, we have to appropriately select $\tradeoff$ and 
provide an upper bound of $\totnst$.

The following proposition provides upper bounds of $\totnst$.
\begin{prop}
    \label{prop:totnst-ub}
    Assume $T > 1, \nstin \ge e$ and
    let $\totnst$ be the total number of stages of Algorithm \ref{alg:qmc-kernel-ucb}.
    \begin{enumerate}
        \item Suppose that $k$ has a $(C_p, \beta_p)$ polynomial eigendecay.
        We take $\eta$ as 
        \begin{equation*}
            \eta = T^{-\frac{1}{1 + \beta_p}}.
        \end{equation*}
        Then, there exists a constant $c_p > 0$ depending only on 
        $C_p, \beta_p, \reg, \overline{k}, \overline{\psi}$ satisfying the following inequality:
        \begin{equation*}
            \totnst \le c_p T^{\frac{2}{1 + \beta_p}} \log^{1-\beta_p^{-1}}(T).
        \end{equation*}
        \item Suppose that $k$ has a $(C_{e, 1}, C_{e, 2}, \beta_e)$ exponential eigendecay.
        We take $\eta = 1$.
        Then, there exists a constant $c_e > 0$ depending only on 
        $C_{e, 1}, C_{e, 2}, \beta_e, \reg, \overline{k}, \overline{\psi}$ 
        satisfying the following inequality:
        \begin{equation*}
            \totnst \le c_e \log^{1 + 1/\beta_e}\left(T\right).
        \end{equation*}
    \end{enumerate}
\end{prop}
In Proposition \ref{prop:totnst-ub}, 
in the case of the polynomial eigendecay, we select $\eta$ so that $\eta^{-1}$ and the upper bound of 
$\sqrt{m}$ have 
the same order of $T$ and in the case of the exponential decay, we select $\eta = 1$.
We note that upper bounds provided in Proposition \ref{prop:totnst-ub} have similarity to upper bounds of the maximum 
information gain $\gamma_T$ \citep[Corollary 1]{vakili2021information}.
Actually, \cite{dai2023quantum} showed that the total number $m$ of stages with the tradeoff parameter $\eta = 1$
has the same bound as $\gamma_{T^2}$.
Due to the appropriate choice of the tradeoff parameter, our results (Proposition \ref{prop:totnst-ub}) 
improves their result $\widetilde{O}(T^{2/\beta_p})$ in the case of the polynomial eigendecay.

% More precisely, although upper bounds of $m$ depends on the constant $\qmclbc$,
% our proof (or Proposition \ref{prop:totnst-ub}) shows that $m$ has the same upper bound as $\gamma_{T^2}$
% if $k$ has polynomial or exponential eigendecay.

Therefore, 
by Proposition \ref{prop:totnst-ub} and Proposition \ref{prop:regret-using-m},
we obtain the following theorem, which is the main result of this paper.
\begin{thm}[Upper Bounds of Algorithm \ref{alg:qmc-kernel-ucb}]
    \label{thm:upper-bd}
    Assume $T > 1$.
    Suppose that Assumption \ref{assump:upper-bd} holds.
    \begin{enumerate}
        \item Suppose that the kernel $k$ has a $\beta_p$ polynomial eigendecay. 
        Let $\eta$ and $c_p$ be as in Proposition \ref{prop:totnst-ub}.
        Then, 
        with probability at least $1-\delta$,
        the cumulative regret of Algorithm \ref{alg:qmc-kernel-ucb} with 
        $\nstin = c_p \eta^{-2}$ is bounded as 
        \begin{equation*}
            R(T) = O\left(
                T^{\frac{3}{ 1 + \beta_p}}
                \log^{3(1 - \beta_p^{-1})/2}(T)
            \log\left(\frac{T}{\delta}
            \right)\right).
        \end{equation*}
        \item Suppose that the kernel $k$ has a $\beta_e$ exponential eigendecay.
        Then with probability at least $1 - \delta$,
        the cumulative regret of Algorithm \ref{alg:qmc-kernel-ucb} with 
        $\nstin =  c_e \log^{1 + 1/\beta_e}\left(T\right)$ and $\tradeoff = 1$
        is bounded as 
        \begin{equation*}
            R(T) = O \left( 
            \log^{3(1 + \beta_e^{-1})/2} (T) \log\left(\frac{\log T }{\delta} \right)
            \right),
        \end{equation*}
        where $c_e$ is the constant provided in Proposition \ref{prop:totnst-ub}.
    \end{enumerate}
\end{thm}
Theorem \ref{thm:upper-bd} indicates that our regret upper bound 
exponentially improves
that of the classical algorithms \citep{valko2013finite,vakili2021information} if $k$ has an exponential eigendecay.
Moreover, our regret upper bound is better than the classical bounds 
if a polynomial eigendecay with large $\beta_p$.
\strevision{
Moreover, we note that our regret upper bound improves that of \citep{dai2023quantum} in the case of polynomial eigendecay. 
More precisely, in the case of a $\beta_p$-polynomial eigendecay, while the regret upper bound of \citep{dai2023quantum} is given as 
$\widetilde{O}\left( T^{\frac{3}{\beta_p}} \log\left(\frac{1}{\delta} \right)\right)$,
that of ours is $\widetilde{O}\left( T^{\frac{3}{1 + \beta_p}} \log\left(\frac{1}{\delta} \right)\right)$.
We also note that our regret bound is better than 
the regret bound $\widetilde{O}(T^{1/2 + 1/\beta_p})$ of GP-UCB \citep{srinivas2010gaussian} whenever GP-UCB has sublinear regret 
(i.e., $\beta_p > 2$), while that of Q-GP-UCB \citep{dai2023quantum} is not necessarily better than GP-UCB.
}

% Theorem \ref{thm:upper-bd} assumes that we know the constants $c_p, c_e$ explicitly. 
% However, these constants depend on parameters of the kernel $k$ (e.g., length scales) and 
% an explicit computation would be difficult. 
% We note that by Proposition \ref{prop:regret-using-m}  
% the cumulative regret $R(T)$ with the parameter $\nstin = T$ is bounded as $O(m^2 \log (T/\delta))$
% since $T$ is a trivial bound of the total number of stages.
% In the case of exponential eigendecay, this regret bound incurs an additional $\log T$ factor compared to 
% that provided in Theorem \ref{thm:upper-bd}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Sketch of the Proof}
\label{subsec:sketch-proof}
% In this section, we provide a sketch of the proof of Theorem \ref{thm:upper-bd}.
% Similar to the linear case \citep{wan2023quantum}, we divide our proof into two stages. 
% First, assuming the total number $\totnst$ of stages of Algorithm \ref{alg:qmc-kernel-ucb} is at most $\nstin$, we show that the cumulative regret is bounded as $R(T) = O(m^2 \log (M/\delta))$. 
As previously mentioned, 
providing the upper bounds of the total number of stages $\totnst$ is a key step to prove the main result Theorem \ref{thm:upper-bd}.
In this section, we provide a sketch of the proof of Proposition \ref{prop:totnst-ub}.
Following \citep{wan2023quantum}, we first relate $m$ to the log-determinant $\qinfgain$ of the positive operator 
$\reg^{-1}V_{m}$.
% TODO: rewrite this 
Then, by considering a projection onto a finite dimensional subspace of $\cH_k$,
we provide an upper bound of $\qinfgain$.


% By Lemma \ref{prop:regret-using-m}, to prove Theorem \ref{thm:upper-bd}
% it is sufficient to provide an upper bound of the total number of stages $\totnst$.
% In the linear case, \cite{wan2023quantum} proved that the total number of bounded as $m = O(D\log (T))$,
% where $D$ is the dimension of the space (RKHS).
% However, if the decay rate of eigenvalues of the Mercer operator $\cT_k$ is fast, then
% the uncertainty $\epsilon_s$ in the estimation of $\mu(x_s)$ decreases fast as 
% the stage $s$ proceeds.
% Since in each stage QMC calls reward oracles $O(1/\epsilon_s)$ times, 
% the total number of stages could be much smaller than $D\log(T)$ if the decay rate is fast.
% Therefore, the bound $m = O(D\log(T))$ by \citep{wan2023quantum} is too large for our objective.
% We resolve this issue by considering a projection into a smaller dimensional subspace of the RKHS $\cH_k$.
% Such a proof technique is also used for analysis of upper bounds of maximum information gain
% \citep{vakili2021information}. 
% In the following, we adapt their proof technique to our setting and provide upper bounds of the total number of stages.


Let $K_\totnst, W_\totnst  \in \RR^{\totnst \times \totnst}$ be matrices and 
$\pwp_m: \cH_k \rightarrow \cH_k$ be the positive semi-definite operator defined in Sec. \ref{sec:method}.
\begin{equation*}
    \pwp_m = \sum_{s=1}^m \epsilon_s^{-2} \phi(x_s)\phi(x_s)^\trn.
\end{equation*}
We define $\qinfgain > 0$ by 
\begin{equation}
    \label{eq:gamma-def} 
    % \qinfgain =
    \log \det\left(I_\totnst+\reg^{-1}K'\right) =
    \log \det \left(I_{\cH_k} + \reg^{-1} \pwp_m \right),
\end{equation}
where $K' = W_\totnst^{1/2}K_\totnst W_\totnst^{1/2}$.
% $V_{m}|_{\cG_m}: \cG_m \rightarrow \cG_m$ is the restriction of $V_{m}$ on $\cG_m$.
% We note that the RHS \eqref{eq:gamma-def} is well-defined since $\cG_m$ is finite dimensional,
We note that \eqref{eq:gamma-def} holds by the Weinstein-Aronszajn identity.
If the weight matrix is the identity matrix, then the definition of $\qinfgain$ is almost identical to 
that of the maximum information gain $\infgain_T$ defined as 
\begin{math}
    \infgain_T = \sup_{\xi_1, \dots, \xi_T \in \cX}\log \det \left(I_T + K(\boldsymbol{\xi}) \right),
\end{math}
where $K(\boldsymbol{\xi}) = (k(\xi_i, \xi_j))_{1\le i, j \le T} \in \RR^{T \times T}$.
However, unlike $\gamma_T$, $\qinfgain$ depends on the matrix size $\totnst$ trivially.
More precisely, it can be proved that $\qinfgain = \totnst \log 2$ (Lemma \ref{lem:gamma-m}).
Therefore, to bound $\totnst$, it is sufficient to bound $\qinfgain$.
If the RKHS $\cH_k$ is finite dimensional, \cite[Lemma 2]{wan2023quantum} provides an upper bound of 
$\qinfgain$ of the form $O(\dim \cH_k \log(T))$. 
However, this bound is vacuous since $\dim \cH_k$ can be infinite.
In an attempt of deriving an upper bound of the maximum information gain $\infgain_T$, there was a similar issue.
\cite{vakili2021information} resolved the issue by considering a projection of $\cH_k$ to a finite dimensional subspace
and we take a similar approach.

We recall that a set of functions $\{\lambda_i^{1/2} \psi_i\}_{i \in I}$ forms an orthonormal basis of the RKHS $\cH_k$
(Theorem \ref{thm:mercer}),
where $I=\{1, 2, \cdots, \dim \cH_k \}$ if $\cH_k$ is finite dimensional and 
$I = \ZZ_{\ge 1}$ otherwise. 
For a positive integer $D$, 
we define an orthogonal projection $\cP_D: \cH_k \rightarrow \cH_k$
by $f \mapsto \sum_{i=1}^{D} \langle f, \lambda_i^{1/2}\psi_i \rangle_{\cH_k} \lambda_i^{1/2}\psi_i$.
Then, $\cP_D(f)$ gives an approximation of $f$ in the finite dimensional subspace $\cP_D(\cH_k)$.
To bound $\qinfgain$, one can mimic the proof of \citep[Theorem 3]{vakili2021information}, 
however, we provide a more generalized result.
Below, we show that our upper bounds of $\qinfgain$ (Corollary \ref{cor:gamma-bound}) can be derived from 
the following proposition.
We also note that the proof of the following proposition provides a simple alternative proof of \citep[Theorem 3]{vakili2021information}.
\begin{prop}
    \label{prop:log-det-ineq}
    Let $\pi: \cH_k \rightarrow \cH_k$ be a projection operator of finite rank
    and $\frankop: \cH_k \rightarrow \cH_k$ be a positive semi-definite operator of finite-rank.
    We assume that the range (image) $\Ran \pi$ of $\pi$ is $D$-dimensional with $D < \infty$.
    Then, the following inequality holds:
    \begin{multline*}
        \log \det (I + U)  
        \le  D \log \left(1 + \frac{\Tr U \pi}{D}\right) + 
       \Tr U(I - \pi).
    \end{multline*}
\end{prop}

We apply Proposition \ref{prop:log-det-ineq} to the case when 
$\pi = \cP_D, U = \reg^{-1} \pwp_m$.
We can bound $\Tr \pwp_m \cP_D$ by 
$\Tr \pwp_m \cP_D \le \Tr \pwp_m = \Tr W_m^{1/2} K_m W_m^{1/2} \le \overline{k}\Tr W_m$.
To compute $\Tr \pwp_m(I - \cP_D)$, it is sufficient to compute 
$t(i, x) := \lambda^{1/2}_i \psi_i^\trn \phi(x) \phi(x)^\trn \lambda_i^{1/2}\psi_i$ for each $i \in I$ and $x \in \cX$.
By the reproducing property, we have $t(i, x) = \lambda_i \psi_i^2(x) \le \lambda_i \psibar^2$.
Therefore, 
\begin{align*}
    &\Tr \pwp_m (I - \cP_D)  = \sum_{i > D} \sum_{s = 1}^m\epsilon_s^{-2}t(i, x_s)\\
    &\le \sum_{i > D}\lambda_i \psibar^2  \sum_{s=1}^m\epsilon_s^{-2} = 
    \delta_D \Tr W_m,
\end{align*}
where $\delta_D$ is defined as $\sum_{i \in I, i> D} \lambda_i \psibar^2$.
Thus, we obtain the following.
\begin{cor}
    \label{cor:gamma-bound}
    We define $E$ by $\sum_{s=1}^{m}\epsilon_s^{-2}$, i.e., $E = \Tr W_m$.
    Then, for any $D \in \ZZ_{\ge 1}$, the following inequality holds:
   \begin{equation*} 
        \qinfgain \leq D\log\left(1+\frac{\overline{k}}{D\reg}E \right)+
        \frac{\delta_D}{\reg}E.
   \end{equation*} 
\end{cor}
Then, by Corollary \ref{cor:gamma-bound} and using the same argument as the proof of \cite[Corollary 1]{vakili2021information},
we can bound $\qinfgain$ in terms of $E$.
Since the total number of oracle queries of Algorithm \ref{alg:qmc-kernel-ucb} is limited up to $T$,
we have $T \gtrsim \eta^{-1}\sum_{i=1}^m \epsilon_i^{-1} \ge \sqrt{E}$.
Therefore, we can bound the $\qinfgain$ in terms of $\eta T$.
By selecting the $\eta^{-1}$ as the same order as an upper bound of $\qinfgain$,
we can provide an upper bound of $\qinfgain$ in terms of $T$.
By $\qinfgain = \totnst \log 2$ (Lemma \ref{lem:gamma-m}), we have the assertion of Proposition \ref{prop:totnst-ub}.