
Corollaries \ref{Corollary: Order greater than 2}, \ref{Corollary: Finite Dimension} demonstrate that if the Vendi score order $\alpha$ is greater than $2$ or the kernel feature map dimension $d$ is finite, then the Vendi score can converge to the population Vendi with $n=O(d)$ samples. However, the theoretical results do not apply to an order $1\le \alpha < 2$ when the kernel map dimension is infinite, e.g. the original order-1 Vendi score \citep{friedman_vendi_2023} with a Gaussian kernel. Our numerical observations indicate that a standard sample size below 20000 could be insufficient for the convergence of order-1 Vendi score (Figure~\ref{fig:kernel_convergence}). To address this gap, here we define the truncated Vendi score by truncating the eigenspectrum of the kernel matrix, and then show that  the existing kernel approximation algorithms for Vendi score concentrate around this modified Vendi score. 
\begin{definition}\label{Definition: population Vendi}
Consider the normalized kernel matrix $\frac{1}{n}K$ of samples $x_1,\ldots ,x_n$. Then, for an integer parameter $t \ge 1$, consider the top-$t$ eigenvalues of $\frac{1}{n}K$: $\lambda_1\ge \lambda_2\ge \cdots \ge \lambda_t$. Define $S_t = \sum_{i=1}^t \lambda_i$ and consider the truncated probability sequence $[{\lambda}^{\scriptscriptstyle \text{trunc}}_1,\ldots ,{\lambda}^{\scriptscriptstyle \text{trunc}}_t]$: $${\lambda}^{\scriptscriptstyle \text{trunc}}_i= \lambda_i + \frac{1-S_t}{t}\quad \text{for $\; i=1,\ldots ,t$}$$
We define the order-$\alpha$  $t$-truncated Vendi score as
\begin{equation*}
    \mathrm{Vendi}_\alpha^{(t)}(x_1,\ldots ,x_n) := \exp\Bigl(\frac{1}{1-\alpha}\log\Bigl(\sum_{i=1}^t {\lambda}^{{\scriptscriptstyle \text{trunc}}^{\large\alpha}}_i\Bigr)\Bigr)
\end{equation*}
Notably, for order $\alpha=1$, the $t$-truncated Vendi score is:
\begin{equation*}
    \mathrm{Vendi}_1^{(t)}(x_1,\ldots ,x_n) := \exp\Bigl(\sum_{i=1}^t {\lambda}^{\scriptscriptstyle \text{trunc}}_i\log\frac{1}{{\lambda}^{\scriptscriptstyle \text{trunc}}_i}\Bigr)
\end{equation*}
\end{definition}

\begin{remark}\label{Remark: truncated Vendi statistic}
The above definition of $t$-truncated Vendi score leads to the definition of $t$-truncated population Vendi $\mathrm{Vendi}_\alpha^{(t)}(P_X)$, where the mentioned truncation process is applied to the eigenspectrum of the population kernel covariance matrix $\widetilde{C}_X$.
 Note that the truncated Vendi score is a statistic and function of random samples $x_1,\ldots ,x_n$, whereas the truncated population Vendi is deterministic and a characteristic of the population distribution $P_X$.
\end{remark}
According to Definition~\ref{Definition: population Vendi}, we find the probability model with the minimum $\ell_2$-norm difference from the $t$-dimensional vector $[\lambda_1,\ldots ,\lambda_t]$ including only the top-$t$ eigenvalues. Then, we use the order-$\alpha$ entropy of the probability model to define the order-$\alpha$ $t$-truncated population Vendi. Our next result shows that this population quantity can be estimated using $n=O(t)$ samples by $t$-truncated Vendi score for every kernel function.
\begin{theorem}\label{Thm: truncated Vendi guarantee}
Consider the setting in Theorem \ref{Thm: eigenvalue convergence}. Then, for every $n\ge 2+8\log (1/\delta)$, the difference between the  $t$-truncated population Vendi and the empirical $t$-truncated Vendi score of samples $x_1,\ldots ,x_n$ is bounded with probability at least $1-\delta$:
\begin{align*}
    &\Bigl\vert\, \mathrm{Vendi}^{(t)}_\alpha\bigl(x_1,\ldots,x_n\bigr)^{\frac{1-\alpha}{\alpha}} - \mathrm{Vendi}^{(t)}_\alpha\bigl(P_X\bigr)^{\frac{1-\alpha}{\alpha}}\, \Bigr\vert \\
    \le\:  &\sqrt{\frac{32\max\{1,t^{2-\alpha}\}\log\bigl(2/\delta\bigr)}{n}}
\end{align*}
\end{theorem}
\begin{proof}
    We defer the proof to the Appendix.
\end{proof}
As implied by Theorem~\ref{Thm: truncated Vendi guarantee}, the $t$-truncated population Vendi can be estimated using $O(t)$ samples, i.e. the truncation parameter $t$ plays the role of the bounded dimension of a finite-dimension kernel map. Our next theorem shows that the Nyström method \citep{friedman_vendi_2023} and the FKEA method \citep{ospanov_fkea_2024} for reducing the computational costs of Vendi scores have a bounded difference with the truncated population Vendi. 

\begin{theorem}\label{Thm: Nyström, FKEA}
    Consider the setting of Theorem~\ref{Thm: eigenvalue convergence}. (a) Assume that the kernel function is shift-invariant and the FKEA method with $t$ random Fourier features is used to approximate the Vendi score. Then, for every $\delta$ satisfying $n\ge 2+8\log (1/\delta)$, with probability at least $1-\delta$: 
    \begin{align*}
    &\Bigl\vert \mathrm{FKEA}\text{-}\mathrm{Vendi}^{(t)}_\alpha\bigl(x_1,\ldots,x_n\bigr)^{\frac{1-\alpha}{\alpha}}- \mathrm{Vendi}^{(t)}_\alpha\bigl(P_X\bigr)^{\frac{1-\alpha}{\alpha}} \Bigr\vert \\
    &\, \le \sqrt{\frac{128\max\{1,t^{2-\alpha}\}\log\bigl(3/\delta\bigr)}{\min\{n,t\}}}
\end{align*}
(b) Assume that the Nyström method is applied with parameter $t$ for approximating the kernel function. Then, if for some $r\ge 1$, the kernel matrix $K$'s $r$th-largest  eigenvalue satisfies ${\lambda}_{r}  \le \tau $ and $t\ge r\tau \log(n)$, the following holds with probability at least $1-\delta - 2n^{-3}$:  
\begin{align*}
    &\Bigl\vert \mathrm{ Nystrom}\text{-}\mathrm{Vendi}^{(t)}_\alpha\bigl(x_1,\ldots,x_n\bigr)^{\frac{1-\alpha}{\alpha}} - \mathrm{Vendi}^{(t)}_\alpha\bigl(P_X\bigr)^{\frac{1-\alpha}{\alpha}}\Bigr\vert  \\
    & \ \le \mathcal{O}\Bigl(\sqrt{\frac{\max\{1,t^{2-\alpha}\}\log\bigl(2/\delta\bigr)t\tau^2\log(n)^2}{n}}\Bigr)
\end{align*}
\end{theorem}
\begin{proof}
    We defer the proof to the Appendix.
\end{proof}
%Theorem~\ref{Thm: Nyström, FKEA} shows FKEA and Nyström methods with parameter $t$ lead to estimating $t$-truncated population Vendi.%, connecting the defined statistic to the existing algorithms for computing the Vendi score. 

