\newpage
\onecolumn

\appendix
\section{Notation Table}
\label{appendix-table}

We summarize the symbols frequently used throughout the main paper in Table~\ref{tab-notation}.

\input{tables/notations}

\section{Missing Proofs}
\label{appendix-proofs}

We provide the proofs of theorems and lemmas that are missing from the main paper.

\subsection{Proof of Lemma~\ref{lm-grr-mldp}}

\begin{proof}
Let $H = [h_1, \cdots, h_L]$, $H' = [h'_1, \cdots, h'_L] \in [1, R]^{L}$ be two length-$L$ integer sequences and $\bm{y} = [y_1, \cdots, y_L] \in [1, R]^{L}$ be any possible output sequence of $\mathcal{M}_{\mathrm{GRR}}$.
We define a function $\phi(h, h')$ to indicate whether two integers in the same position of $H$ and $H'$ are equal, that is,
\begin{displaymath}
  \phi(h, h') =
  \begin{cases}
    1 & \text{if } h \neq h', \\
    0 & \text{otherwise}.
  \end{cases}
\end{displaymath}
Based on the procedure of the GRR mechanism, we have
\begin{align*}
  & \Pr[\mathcal{M}_{\mathrm{GRR}}(H) = \bm{y}] = \prod_{i=1}^{L} p^{1 - \phi(y_i, h_i)} q^{\phi(y_i, h_i)}, \\
  & \Pr[\mathcal{M}_{\mathrm{GRR}}(H') = \bm{y}] = \prod_{i=1}^{L} p^{1 - \phi(y_i, h'_i)} q^{\phi(y_i, h'_i)},
\end{align*}
where $p = \frac{e^{\gamma}}{e^{\gamma} + R - 1}$ and $q = \frac{1}{e^{\gamma} + R - 1}$.

Consequently, we have
\begin{multline*}
    \ln\big(\tfrac{\Pr[\mathcal{M}_{\mathrm{GRR}}(H)= \bm{y}]}{\Pr[\mathcal{M}_{\mathrm{GRR}}(H') = \bm{y}]}\big) = \ln\Big(\prod_{i=1}^{L} (\tfrac{p}{q})^{\phi(y_i, h'_i) - \phi(y_i, h_i)}\Big) \leq \ln\Big(\prod_{i=1}^{L} e^{\gamma \cdot \phi(h_i, h'_i)}\Big) = \ln(e^{\gamma d_{\mathrm{hash}}(H, H')}) = \gamma d_{\mathrm{hash}}(H, H')
\end{multline*}
because $\phi(y_i, h'_i) - \phi(y_i, h_i) \leq \phi(h_i, h'_i)$ for any $i \in \{1, \cdots,$ $L\}$ and $d_{\mathrm{hash}}(H, H') = \sum_{i=1}^{L} \phi(h_i, h'_i)$.
Therefore, we prove that $\mathcal{M}_{\mathrm{GRR}}$ on any integer sequence of length $L$ and range $R$ provides $(\gamma d_{\mathrm{hash}}, 0)$-mLDP.
\end{proof}

\subsection{Proof of Lemma~\ref{lm-EXP-bound}}

\begin{proof}
  According to \citep{DatarIIM04}, the collision probability of $\bm{x}$ and $\bm{x}'$ under the $2$-stable LSH scheme is $k(\bm{x}, \bm{x}')$ in Eq.~\ref{eq-l2kernel}.
  After rehashing the original hash values randomly in the range $[1, R]$, the probability that two non-colliding hash values collide is $\frac{1}{R}$.
  Meanwhile, any colliding hash values must still collide after rehashing.
  Consequently, the collision probability of $h(\bm{x})$ and $h(\bm{x}')$ after rehashing is $k(\bm{x}, \bm{x}') + \frac{1}{R} (1 - k(\bm{x}, \bm{x}'))$.
  For the hash values $h(\bm{x})$ and $h(\bm{x}')$ after rehashing, we have
  $\Pr_{h \sim \mathcal{H}}[h(\bm{x}) \neq h(\bm{x}')] = \tfrac{R - 1}{R} \cdot (1 - k(\bm{x}, \bm{x}'))$.
  That is, a random variable that indicates if $h(\bm{x}) \neq h(\bm{x}')$ follows a Bernoulli distribution with success probability $\frac{R - 1}{R} \cdot (1 - k(\bm{x}, \bm{x}'))$.
  Based on the definition of $d_{\mathrm{hash}}(\cdot, \cdot)$ and the fact that the LSH functions are independent of each other, $X$ follows a binomial distribution $\mathcal{B}\big(L, \frac{R - 1}{R} \cdot (1-k(\bm{x},\bm{x}'))\big)$.
\end{proof}

\subsection{Proof of Theorem~\ref{thm-mldp}}
\label{appendix-thm-mldp}

\begin{mdframed}
\begin{proposition}[Chernoff Bound \citep{Chernoff52}]
\label{chernoff}
  Let $X$ be a random variable drawn from a binomial distribution $\mathcal{B}(L, p)$. Then, for all $0 < s < 1 - p$, we have
  $\Pr\left[X \geq L(p + s)\right] \leq \exp\big(- L \cdot D_{\mathrm{KL}}(p + s \parallel p)\big) \leq \exp(-2Ls^2)$,
  where $D_{\mathrm{KL}}(p + s \parallel p) = (p + s) \ln{\frac{p + s}{p}} + (1 - p - s) \ln{\frac{1 - p - s}{1 - p}}$.
\end{proposition}
\end{mdframed}

\begin{proof}
  Since $X \sim \mathcal{B}\big(L, \frac{R - 1}{R} \cdot (1 - k(\bm{x},\bm{x}'))\big)$ by Lemma~\ref{lm-EXP-bound}, we have the following inequality from Proposition~\ref{chernoff}:
  \begin{equation}\label{eq-chernoff}
    \Pr\big[X \geq L\big(\tfrac{R - 1}{R} \cdot (1-k(\bm{x},\bm{x}')) + s \big) \big] \leq \exp(-2 L s^2).
  \end{equation}
  By setting $s = \sqrt{\frac{\ln(1/\eta)}{2L}}$ in Eq.~\ref{eq-chernoff} for any $\eta \in (0, 1)$, we have
  $\Pr[\gamma X \geq \gamma L(\tfrac{R - 1}{R} \cdot (1-k(\bm{x},\bm{x}')) + \textstyle \sqrt{\frac{\ln(1/\eta)}{2L}})] \leq \eta$.
  Since $1 - k(\bm{x},\bm{x}') \leq \frac{c d(\bm{x}, \bm{x}')}{\omega}$ for any $c \geq 0.8$, we further have
  \begin{equation}\label{eq-chernoff-2}
    \Pr\big[\gamma X \geq \gamma L\big(\tfrac{c (R - 1) d(\bm{x}, \bm{x}')}{\omega R} + \textstyle \sqrt{\frac{\ln(1/\eta)}{2L}} \big) \big] \leq \eta.
  \end{equation}
  Since $\mathcal{L}_{\bm{x}, \bm{x}'} \leq \gamma X$ as shown in Eq.~\ref{eq-log-loss}, we obtain that
  \begin{equation}\label{eq-chernoff-3}
    \Pr\big[\mathcal{L}_{\bm{x}, \bm{x}'} \geq \tfrac{\gamma c L(R - 1) d(\bm{x}, \bm{x}')}{\omega R} + \gamma \textstyle \sqrt{\frac{L\ln(1/\eta)}{2}} \big] \leq \eta.
  \end{equation}
  Based on Definition~\ref{def-mldp}, we prove that the LSH+GRR mechanism provides $(d_{\chi}, \eta)$-mLDP, where $d_{\chi}(\bm{x}, \bm{x}') = \frac{\gamma c L(R - 1)}{\omega R}\cdot $ $d(\bm{x}, \bm{x}') + \gamma\sqrt{\frac{L\ln(1/\eta)}{2}}$, for any $c \geq 0.8$ and $\eta \in (0, 1)$.
\end{proof}

\subsection{Proof of Corollary~\ref{col-mldp}}

\begin{proof}
  When applying the Chernoff bound with the Kullback–Leibler divergence in Proposition~\ref{chernoff} to $X$, we have
  \begin{equation}\label{eq-chernoff-kl}
    \Pr[X \geq L(p + s)] \leq \exp\big(-L \cdot D_{\mathrm{KL}}(p + s \parallel p)\big).
  \end{equation}
  By applying the same procedure as for the proof of Theorem~\ref{thm-mldp} on Eq.~\ref{eq-chernoff-kl}, we obtain
  $$\Pr[\mathcal{L}_{\bm{x}, \bm{x}'} \geq \gamma L(\tfrac{c (R - 1) d(\bm{x}, \bm{x}')}{\omega R} + s) ] \leq \exp(-L \cdot D_{\mathrm{KL}}(p + s \parallel p))$$
  and conclude the proof.
\end{proof}

\subsection{Proof of Corollary~\ref{thm-ldp}}

\begin{proof}
  For any $\bm{x}, \bm{x}' \in \mathbb{R}^d$, we have $d_{\mathrm{hash}}(H(\bm{x}), H(\bm{x}')) \leq L$.
  According to \citep{ChatzikokolakisABP13}, if a mechanism provides $(\gamma d_{\mathrm{hash}}, 0)$-mLDP, then it will also provide $\gamma \max_{H, H'} d_{\mathrm{hash}}(H, H')$-LDP.
  Therefore, the LSH+GRR mechanism satisfies $\gamma L$-LDP, which equals the total privacy budget of using the GRR mechanism with a privacy parameter $\gamma$ sequentially $L$ times.
\end{proof}

\subsection{Proof of Lemma~\ref{lm-unbiasedness}}

\begin{proof}
  Due to the relationships between the $2$-stable LSH scheme and the $l_2$-LSH kernel, the original collision probability $p_0(\bm{x}, \bm{q})$ of $\bm{x}$ and $\bm{q}$ before rehashing is exactly $k(\bm{x}, \bm{q})$.
  As already analyzed in the proof of Lemma~\ref{lm-EXP-bound}, the collision probability $p_1(\bm{x}, \bm{q})$ of $\bm{x}$ and $\bm{q}$ after rehashing becomes
  \begin{equation}\label{collide-rehash}
    p_1(\bm{x}, \bm{q}) =  k(\bm{x}, \bm{q}) + \tfrac{1}{R} (1 - k(\bm{x}, \bm{q})).
  \end{equation}
  Next, if $\bm{x}$ and $\bm{q}$ collide, their collision probability after performing the GRR mechanism will be $\frac{e^{\gamma}}{e^{\gamma} + R - 1}$; otherwise, their probability of collision after performing the GRR mechanism will be $\frac{1}{e^{\gamma} + R - 1}$.
  Therefore, the collision probability $p_2(\bm{x}, \bm{q})$ of $\bm{x}$ and $\bm{q}$ after performing the GRR mechanism is
  \begin{equation}\label{collide-grr}
    p_2(\bm{x}, \bm{q}) = \tfrac{p_1(\bm{x}, \bm{q}) \cdot e^{\gamma}}{e^{\gamma} + R - 1} + \tfrac{1 - p_1(\bm{x}, \bm{q})}{e^{\gamma} + R - 1}.
  \end{equation}
  Based on Eqs.~\ref{collide-rehash} and~\ref{collide-grr}, we have
  \begin{equation}\label{eq-correction}
    \begin{aligned}
      k(\bm{x}, \bm{q})
      & = \big(\tfrac{(e^\gamma + R - 1) \cdot p_2(\bm{x}, \bm{q}) - 1}{e^\gamma - 1} - \tfrac{1}{R} \big) \cdot \tfrac{R}{R - 1} \\
      & = \tfrac{(e^\gamma + R - 1) (R \cdot p_2(\bm{x}, \bm{q}) - 1)}{(e^\gamma - 1) (R - 1)}.
    \end{aligned}
  \end{equation}
  According to the sketch construction procedure, it is obvious that $\mathbb{E}[\mathcal{S}_\mathcal{D}[i, h_i(\bm{q})]]$ $= \sum_{\bm{x} \in \mathcal{D}} p_2(\bm{x}, \bm{q})$.
  By replacing $p_2(\bm{x}, \bm{q})$ in Eq.~\ref{eq-correction} with $\mathcal{S}_\mathcal{D}[i, h_i(\bm{q})]$, summing up the results for all $\bm{x} \in \mathcal{D}$, and considering Algorithm~\ref{alg-2}, we have
  \begin{equation}\label{eq-unbiased}
    \mathbb{E}\big[\widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]\big] = \textstyle \sum_{\bm{x} \in \mathcal{D}} k(\bm{x}, \bm{q}) = n \mathrm{KDE}_{\mathcal{D}}(\bm{q}).
  \end{equation}
  According to Algorithm~\ref{alg-2}, we have
  \begin{equation}\label{eq-var}
      \mathrm{Var}\big[ \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})] \big] = \big(\tfrac{(e^\gamma + R - 1)R}{(e^\gamma - 1)(R-1)}\big)^2 \cdot \mathrm{Var}\big[ \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})]\big].
  \end{equation}
  To compute $\mathrm{Var}\big[ \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] \big]$, we define a random variable $I(\bm{x}, \bm{q})$ to indicate if $\widehat{h}_i(\bm{x}) = h_i(\bm{q})$, that is,
  \begin{equation}
    I(\bm{x}, \bm{q}) =
    \begin{cases}
    1 & \text{if } \widehat{h}_i(\bm{x}) = h_i(\bm{q}),\\
    0 & \text{otherwise}.\\
    \end{cases}
  \end{equation}
  We can see that $I(\bm{x}, \bm{q})$ is a Bernoulli variable with success probability $p_2(\bm{x}, \bm{q})$ and $ \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] = \sum_{\bm{x} \in \mathcal{D}} I(\bm{x}, \bm{q})$.
  Since $\mathrm{Var}[X] = \mathbb{E}[X^2] - \mathbb{E}[X]^2$ for any random variable $X$,
  \begin{equation}\label{eq-var-1}
    \mathrm{Var}\big[ \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] \big] \leq \mathbb{E}\big[ (\textstyle \sum_{\bm{x} \in \mathcal{D}} I(\bm{x}, \bm{q}))^2 \big].
  \end{equation}
  Then, we acquire that
  \begin{equation}\label{eq-var-2}
    \begin{aligned}
      & \mathbb{E}\big[ (\textstyle \sum_{\bm{x} \in \mathcal{D}} I(\bm{x}, \bm{q}))^2 \big] = \textstyle \sum_{\bm{x} \in \mathcal{D}}\sum_{\bm{x}' \in \mathcal{D}} \mathbb{E}\big[I(\bm{x}, \bm{q}) I(\bm{x}', \bm{q}) \big] \\
      & \leq \textstyle \sum_{\bm{x} \in \mathcal{D}}\sum_{\bm{x}' \in \mathcal{D}} \sqrt{\mathbb{E}\big[ I^2(\bm{x}, \bm{q}) \big] \mathbb{E}\big[I^2(\bm{x}', \bm{q}) \big]} \\
      & = \textstyle \sum_{\bm{x} \in \mathcal{D}}\sum_{\bm{x}' \in \mathcal{D}} \sqrt{\mathbb{E}\big[ I(\bm{x}, \bm{q}) \big] \mathbb{E}\big[I(\bm{x}', \bm{q}) \big]} \\
      & = \big(\textstyle \sum_{\bm{x} \in \mathcal{D}} \sqrt{p_2(\bm{x}, \bm{q})} \big)^2,
    \end{aligned}
  \end{equation} 
  where the inequality follows from the Cauchy-Schwarz inequality.
  By combining Eqs.~\ref{eq-var-1} and~\ref{eq-var-2}, we have
  \begin{equation}\label{eq-var-bound}
    \mathrm{Var}\big[ \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] \big] \leq \big(\textstyle  \sum_{\bm{x} \in \mathcal{D}} \sqrt{p_2(\bm{x}, \bm{q})} \big)^2.
  \end{equation}
  According to Eq.~\ref{eq-correction}, we have
  \begin{equation}\label{eq-p2}
    p_2(\bm{x}, \bm{q}) = \big( \tfrac{e^{\gamma}}{e^{\gamma} + R - 1} -\tfrac{1}{R} \big) \cdot k(\bm{x}, \bm{q}) + \tfrac{1}{R}.
  \end{equation}
  By taking Eq.~\ref{eq-p2} into Eq.~\ref{eq-var-bound} and letting $t_1 = \frac{e^{\gamma}}{e^{\gamma} + R - 1} -\frac{1}{R}$ and $t_2 = \frac{1}{R}$, we further obtain that
  \begin{equation}\label{eq-var-4}
    \begin{aligned}
      \mathrm{Var}\big[\mathcal{S}_\mathcal{D}[i, h_i(\bm{q})]\big]
      & \leq \big(\textstyle \sum_{\bm{x} \in \mathcal{D}} \sqrt{t_1 \cdot k(\bm{x}, \bm{q}) + t_2} \big)^2 \\
      & \leq \big(\sqrt{t_1} \cdot \widetilde{K}(\bm{q}) + \sqrt{t_2} \big)^2,
    \end{aligned}
  \end{equation}
  where $\widetilde{K}(\bm{q}) = \sum_{\bm{x} \in \mathcal{D}} \sqrt{k(\bm{x}, \bm{q})}$.
  By combining Eq.~\ref{eq-var-4} with Eq.~\ref{eq-var}, we finally acquire Eq.~\ref{estimator-var}
  and conclude the proof.
\end{proof}

\subsection{Proof of Theorem~\ref{thm-approx}}

\begin{proof}
  The median-of-mean technique has been widely used to estimate the expected value of a random variable $X$ within an additive error $\alpha > 0$ with a failure probability of at most $\eta \in (0, 1)$. 
  By applying Chebyshev's inequality and the Chernoff bound, we find that when $L = O\big(\frac{\mathrm{Var}[X] \cdot \log(1/\eta)}{\alpha^2} \big)$ samples are drawn from the distribution of $X$, the median-of-mean estimator $\widehat{X}$ will satisfy that $\Pr\big[ |\widehat{X} - \mathbb{E}[X]| \leq \alpha \big] \geq 1 - \eta$.
  Based on Lemma~\ref{lm-unbiasedness}, we find that the expected value of each $\frac{1}{n} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]$ is $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$ and its variance is bounded.
  In addition, since $0 < \sqrt{\frac{e^{\gamma}}{e^{\gamma} + R - 1} -\frac{1}{R}} < 1$, $0 \leq \widetilde{K}(\bm{q}) \leq n$, and $\frac{1}{\sqrt{R}} < 1$ (for $R > 1$), the variance of $\frac{1}{n} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]$ can be simplified as 
  $$\mathrm{Var}[\tfrac{1}{n} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]] \leq (\tfrac{(e^\gamma + R - 1)R}{(e^\gamma - 1)(R - 1)})^2 \cdot (\tfrac{n + 1}{n})^2.$$
  Then, since $(\frac{R}{R - 1})^2 \leq 4$ and $(\frac{n + 1}{n})^2 \leq 4$, we have
  $$\mathrm{Var}[\tfrac{1}{n} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]] = O((\tfrac{e^\gamma + R - 1}{e^\gamma - 1})^2).$$
  Therefore, we conclude that the median of means of $L = O((\tfrac{e^\gamma + R - 1}{e^\gamma - 1})^2 \cdot \tfrac{\log(1/\eta)}{\alpha^2})$ independent estimators in the form of $\frac{1}{n} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]$ is an $(\alpha, \eta)$-approximation of $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$.
\end{proof}

\subsection{Proof of Theorem~\ref{thm-circdep}}
\begin{proof}
    First, by simplifying Eq.~\ref{eq-gamma}, we get $\gamma = \frac{\varepsilon}{O(L+\sqrt{L\log (1/\eta)})}$, since $r$ and $\omega$ are data-dependent constants and $0.5 \leq \frac {R-1}{R} <1$. Taking $\varepsilon = O(\frac{\log(1/\eta)}{\alpha^{2}})$ and $L = O(\frac{\log (1/\eta)}{\alpha^{2}})$ into the above equation, we have
    \begin{equation}\label{eq-circdep-1}
      \gamma = \frac{O(\frac{\log(1/\eta)}{\alpha^{2}})}{O(\frac{\log(1/\eta)}{\alpha^{2}}) + O(\frac{\log(1/\eta)}{\alpha})}
       = \frac{O(\frac{\log(1/\eta)}{\alpha^{2}})}{O(\frac{\log(1/\eta)}{\alpha^{2}})}
       = O(1)
     \end{equation}
     Given that $\gamma = O(1)$ and $R = O(1)$, we can also simplify $L = O(\frac{\log(1/\eta)}{\alpha^{2}})$ in Theorem~\ref{thm-approx}.
     This means that Eq.~\ref{eq-gamma} and Theorem~\ref{thm-approx} hold at the same time when $\varepsilon = O(\frac{\log(1/\eta)}{\alpha^{2}})$, $L = O(\frac{\log(1/\eta)}{\alpha^{2}})$, and $R = O(1)$ and the circular dependence on $\gamma$ and $L$ is resolved.
\end{proof}

\section{Additional Experimental Results}
\label{appendix-experiments}

\subsection{Effect of Privacy Radius on mLDP-KDE}
\label{appendix-subsec-radius}

We tested two schemes to decide the value of $r$ in the existing literature on mLDP: (1) setting $r$ to the maximum of the $x$-th percentile distance of a point from its neighbors for some $x \in (0, 100]$ \citep{Chatzikokolakis15} and (2) setting $r$ to the average distance from a point to its $t$-nearest neighbors for some $t \in \mathbb{Z}^{+}$ \citep{FernandesKM21}.

Initially, we set $r$ as the maximum of the 10th percentile distance between a point and its neighbors.
Unfortunately, as presented in Table~\ref{appendix-tab-radius}, such settings of $r$ resulted in sub-par KDE quality on most data sets, which is attributable to their highly skewed distributions, where a few outliers are distant from most other points and substantially increase the value of $r$.
We then shifted our focus to adjust the value of $r$ based on the average, rather than the maximum, distance from a point to its $t$-nearest neighbors for $t = 1, 10, 100, 1000, 10000$.
This yields more promising results, as detailed in Table~\ref{appendix-tab-radius}.
These findings suggest that \textsc{mLDP-KDE} can achieve high-quality KDE results while offering a reasonable level of privacy protection where each point is, on average, indistinguishable from $100$--$10,000$ other points in the data set.
By default, we set the value of $r$ w.r.t.~$t = 100$ on each data set in the remaining experiments.

\begin{table}[ht]
\small
\centering
\caption{MSEs of \textsc{mLDP-KDE} when the privacy radius $r$ is set to the average distance from a point to its $t$-nearest neighbors for $t \in \{1, 10, 100, 1000, 10000\}$ (rounded to two or three significant figures) or the maximum of the 10th percentile distance of a point from its neighbors.}
\label{appendix-tab-radius}
\vspace{-1em}
\begin{tabular}{|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{\textbf{Data Set}} & \multirow{2}{*}{\textbf{$t$}} & \multirow{2}{*}{\textbf{$r$}} & \multicolumn{3}{c|}{\textbf{MSE}} \\ \cline{4-6}
& & & $\varepsilon = 1$ & $\varepsilon = 5$ & $\varepsilon = 20$ \\ \hline
\multirow{6}{*}{\textbf{CodRNA}} 
& 1 & 0.01 & 0.0021  & 0.0006  & 0.0003 \\ \cline{2-6}
& 10 & 0.055 & 0.0018 & 0.0007 & 0.0003 \\ \cline{2-6}
& 100 & 0.1 & 0.0016 & 0.0009 & 0.0005 \\ \cline{2-6}
& 1,000 & 0.15 & 0.0044 & 0.00095 & 0.0006 \\ \cline{2-6}
& 10,000 & 0.2 & 0.0044 & 0.00142 & 0.0006 \\ \cline{2-6}
& (max of 10th percentile) & 0.8494 & 0.0268 & 0.00174 & 0.0009 \\ \hline
\multirow{6}{*}{\textbf{CovType}} 
& 1 & 0.01 & 0.0003  & 0.0002 & 6$\times 10^{-5}$\\ \cline{2-6}
& 10 & 0.06 & 0.0003 & 0.0001 & 8$\times 10^{-5}$\\ \cline{2-6}
& 100 & 0.1 & 0.0023 & 0.0001 & 8$\times 10^{-5}$\\ \cline{2-6}
& 1,000 & 0.3 & 0.0065 & 0.0004 & 0.0001\\ \cline{2-6}
& 10,000 & 0.5 & 0.0144 & 0.0005 & 0.0001\\ \cline{2-6}
& (max of 10th percentile) & 1.7803 & 0.0520 & 0.0085 & 0.0012\\ \hline
\multirow{6}{*}{\textbf{RCV1}}
& 1 & 0.01 & 0.0008 & 0.0006 & 0.0002\\ \cline{2-6}
& 10 & 0.055 & 0.0013 & 0.0007 & 0.0002\\ \cline{2-6}
& 100 & 0.2 & 0.0058 & 0.0008 & 0.0006\\ \cline{2-6}
& 1,000 & 0.35 & 0.006 & 0.0008 & 0.0006\\ \cline{2-6}
& 10,000 & 0.5 & 0.022 & 0.0013 & 0.0007\\ \cline{2-6}
& (max of 10th percentile) & 0.8565 & 0.013 & 0.0013 & 0.0008\\ \hline
\multirow{6}{*}{\textbf{Yelp}} 
& 1 & 0.001 & 0.0008 & 0.00034 & 0.00018\\ \cline{2-6}
& 10 & 0.00175 & 0.0014 & 0.00038 & 7$\times 10^{-5}$\\ \cline{2-6}
& 100 & 0.0025 & 0.0016 & 0.00056 & 7$\times 10^{-5}$\\ \cline{2-6}
& 1,000 & 0.00375 & 0.0013 & 0.00041 & 8$\times 10^{-5}$\\ \cline{2-6}
& 10,000 & 0.005 & 0.0015 & 0.00073 & 0.0001\\ \cline{2-6}
& (max of 10th percentile) & 8.4035 & 0.0295 & 0.0154 & 0.0101\\ \hline
\multirow{6}{*}{\textbf{SYN}} 
& 1 & 0.072 & 0.0034 & 0.00027 & 0.0001\\ \cline{2-6}
& 10 & 0.088 & 0.0036 & 0.00035 & 0.0001\\ \cline{2-6}
& 100 & 0.107 & 0.0037 & 0.0008 & 0.0001\\ \cline{2-6}
& 1,000 & 0.142 & 0.0036 & 0.0008 & 0.0001\\ \cline{2-6}
& 10,000 & 0.177 & 0.0037 & 0.0004 & 0.0001\\ \cline{2-6}
& (max of 10th percentile) & 11.2314 & 0.0082 & 0.0038 & 0.0024\\ \hline
\end{tabular}
\end{table}

\subsection{Time Efficiency}
\label{appendix-subsec-efficiency}

The first row of Figure~\ref{fig-time} presents the construction time of each method on five data sets by varying the privacy budget from $1$ to $20$.
We observe that \textsc{RACE} generally has the longest construction time.
Compared to non-sketch methods, \textsc{RACE} takes a longer time for $L = 1,000$ LSH computations than $m \leq 100$ perturbation operations per point.
\textsc{RACE} also builds sketches much slower than \textsc{mLDP-KDE} in most cases because its sketch sizes are much larger than those of \textsc{mLDP-KDE}.
\textsc{mLDP-KDE} exhibits only a longer construction time than other algorithms when the privacy budget is higher and the sketch size is larger.
Due to the additional GRR procedure, \textsc{mLDP-KDE} becomes slower than \textsc{RACE} when $L > 400$ (resp.~$L = 1,000$ for \textsc{RACE}).
\textsc{DM-KDE}, \textsc{PM-KDE}, \textsc{SW-KDE}, and \textsc{GI-KDE} take less construction time than sketch-based methods, which are barely affected by $\varepsilon$ because their perturbation procedures are the same for all values of $\varepsilon$.

The second row of Figure~\ref{fig-time} depicts the query time of each method for $\varepsilon = 1$ to $20$.
\textsc{mLDP-KDE} shows a substantial improvement in query efficiency compared to other algorithms. 
Its query time is more than four orders of magnitude faster than that of \textsc{DM-KDE}, \textsc{PM-KDE}, \textsc{SW-KDE}, and \textsc{GI-KDE}, which compute the KDE by evaluating the kernel functions for all perturbed points.
Compared to \textsc{RACE}, \textsc{mLDP-KDE} exhibits a query time of almost an order of magnitude faster on all data sets except Yelp because it uses much smaller values of $L$ in the sketch and requires fewer LSH computations.
On the Yelp data set, the query efficiency of \textsc{mLDP-KDE} is lower than \textsc{RACE} when $\varepsilon > 30$ since the values of $L$ in their sketches are close to each other but \textsc{mLDP-KDE} requires additional computations for correction.

In summary, although \textsc{mLDP-KDE} may not always have a notable advantage in construction time compared to other algorithms, its query time efficiency is exceptionally high, which aligns with the complexity analysis outlined in Section \ref{sec-theory}.
Given that the sketch and other data structures are constructed just once, trading a longer construction time for a significantly lower query time is justifiable.

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.64\textwidth]{figures/legend_eps.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.192\textwidth]{figures/CodRNA_ConstructionTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/CovType_ConstructionTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/RCV1_ConstructionTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/Yelp_ConstructionTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/SYN_ConstructionTime_epsilon.pdf}
  \\
  \includegraphics[width=0.192\textwidth]{figures/CodRNA_QueryTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/CovType_QueryTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/RCV1_QueryTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/Yelp_QueryTime_epsilon.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/SYN_QueryTime_epsilon.pdf}
  \\
  \vspace{-1em}
  \caption{Construction time and query time of all algorithms for KDE under LDP/mLDP with varying privacy budget $\varepsilon \in \{1, 2.5, 5, \cdots, 20\}$.}
  \label{fig-time}
\end{figure}

\subsection{Sketch Size and Communication Cost}
\label{appendix-subsec-sketch-size-effect}

We show the sketch size and communication cost of \textsc{mLDP-KDE} with different privacy budgets on each data set in Figure~\ref{fig-eps-bar}.
We find that both measures increase with the privacy budget $\varepsilon$.
The size of the \textsc{mLDP-KDE} sketch increases naturally with $\varepsilon$ according to our privacy analysis in Section~\ref{sec-alg}.
For comparison, the RACE sketch size is always $8 \times 10^5$ bytes.
The communication cost, which includes the transmission of the LSH parameters from the server to all clients and the hash sequences from all clients to the server, also increases with the privacy budget $\varepsilon$, since it is linear to the value of $L$.
The communication costs of non-sketch methods are equal to the data set sizes listed in Table~\ref{tab:datasets}.
As can be seen, sketch methods incur higher overhead in communication than non-sketch ones for $L > m$.

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.192\textwidth]{figures/CodRNA_SketchSize_CommunicationCost.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/CovType_SketchSize_CommunicationCost.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/RCV1_SketchSize_CommunicationCost.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/Yelp_SketchSize_CommunicationCost.pdf}
  \hfill
  \includegraphics[width=0.192\textwidth]{figures/SYN_SketchSize_CommunicationCost.pdf}
  \\
  \vspace{-1em}
  \caption{Sketch size and communication cost of \textsc{mLDP-KDE} with varying privacy budget $\varepsilon \in \{1, 5, 10, 15, 20\}$.}
  \label{fig-eps-bar}
\end{figure}

\subsection{Additional Scalability Test}
\label{appendix-subsec-scalability}

We test the scalability of different methods on the SYN data sets with numbers of points $n$ ranging from $10^4$ to $10^6$ (and $m = 50$) and dimensionalities $m$ ranging from $5$ to $50$ (and $n = 10^5$).
Figure~\ref{fig-mn-time} shows the results for construction and query time.
Since the construction time and query time of all methods except \textsc{mLDP-KDE} are not affected by $\varepsilon$, their results for $\varepsilon = 1, 5, 20$ are combined in Figure~\ref{fig-mn-time}.
The construction time of each method increases almost linearly with $n$.
In terms of query time, \textsc{mLDP-KDE} and \textsc{RACE} are not affected by $n$, whereas non-sketch methods exhibit a linear increase with $n$.
For different values of m, \textsc{mLDP-KDE} also outperforms all competitors in terms of construction and query time.
Moreover, the time efficiency of \textsc{mLDP-KDE} depends mainly on the value of $\varepsilon$ but does not show obvious changes in different dimensions.

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.48\textwidth]{figures/legend_n.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.2\textwidth]{figures/ConstructionTime_n.pdf}
  \hspace{1mm}
  \includegraphics[width=0.2\textwidth]{figures/QueryTime_n.pdf}
  \hspace{3mm}
  \includegraphics[width=0.2\textwidth]{figures/ConstructionTime_m.pdf}
  \hspace{1mm}
  \includegraphics[width=0.2\textwidth]{figures/QueryTime_m.pdf}
  \\
  \vspace{-1em}
  \caption{Construction and query time of all methods on the SYN data set with varying data set size $n$ from $10^4$ to $10^6$ and dimensionality $m$ from $5$ to $50$.}
  \label{fig-mn-time}
\end{figure}

\subsection{Visualized Results}
\label{appendix-subsec-heatmap}

To verify that the KDE distributions generated by \textsc{mLDP-KDE} closely approximate the exact distributions, we performed some visualizations and showed the visualized results for the KDE distributions.
We draw 2D heat maps of KDE distributions utilizing t-SNE for dimensionality reduction.
These heat maps, presented in Figure~\ref{appendix-fig-heatmap}, indicate that GI-KDE completely fails to preserve the exact KDE distributions in all cases, but \textsc{mLDP-KDE} generally preserves the exact KDE distributions in most cases when $\varepsilon = 5$ and always does when $\varepsilon = 20$.
These results further substantiate the effectiveness of \textsc{mLDP-KDE} over GI-KDE.

\begin{figure}[!ht]
  \centering
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_exact.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_race.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_gi_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_gi_kde_e_20.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_mldp_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CodRNA_heatmap_mldp_kde_e_20.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_exact.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_race.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_gi_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_gi_kde_e_20.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_mldp_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/CovType_heatmap_mldp_kde_e_20.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_exact.pdf}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_race.pdf}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_gi_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_gi_kde_e_20.pdf}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_mldp_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/RCV1_heatmap_mldp_kde_e_20.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_exact.pdf}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_race.pdf}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_gi_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_gi_kde_e_20.pdf}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_mldp_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/Yelp_heatmap_mldp_kde_e_20.pdf}
  \\
  \vspace{1mm}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_exact.pdf}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_race.pdf}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_gi_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_gi_kde_e_20.pdf}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_mldp_kde_e_5.pdf}
  \includegraphics[width=0.16\textwidth]{figures/SYN_heatmap_mldp_kde_e_20.pdf}
  \\
  \vspace{-1em}
  \caption{2D heat maps for the KDE distributions provided by different methods on each data set, where t-SNE is used for dimensionality reduction.}
  \label{appendix-fig-heatmap}
\end{figure}

\section{Generalizations to Other LSH Kernels}
\label{appendix-general-lsh}

We discuss how to extend \textsc{mLDP-KDE} to support other LSH kernels beyond the $l_2$-LSH kernel for the Euclidean distance.

\paragraph{$l_1$-LSH Kernel}
By replacing the $2$-stable LSH scheme with the $1$-stable LSH scheme \citep{DatarIIM04}, where the random vector $\bm{a}$ in each LSH function is drawn from the Cauchy distribution instead of the Gaussian distribution, the \textsc{mLDP-KDE} framework can be applied directly to the $l_1$-LSH kernel for the Manhattan distance.
The approximation bound and the complexity of \textsc{mLDP-KDE} are not affected after adaptation.

For the privacy analysis on the $l_1$-LSH kernel, denoted as
$$k_{l_1}(\bm{x}, \bm{x}') = \frac{2}{\pi} \arctan\big(\frac{\omega}{\|\bm{x} - \bm{x}'\|_1}\big) - \frac{\|\bm{x} - \bm{x}'\|_1}{\pi\omega} \ln\big(1 + \frac{\omega^2}{\|\bm{x} - \bm{x}'\|_1^2} \big),$$
where $\|\bm{x} - \bm{x}'\|_1$ is the $l_1$-distance of $\bm{x}$ and $\bm{x}'$, we have a slightly weaker bound $1 - k_{l_1}(\bm{x}, \bm{x}') \leq \tfrac{c_1}{\omega} \cdot \|\bm{x} - \bm{x}'\|_1 + c_2$ for any $c_1 \geq 1.2$ and $c_2 \geq 0.1$.
Accordingly, the term $\frac{c (R-1)}{\omega R} \cdot d(\bm{x}, \bm{x}')$ in Theorem~\ref{thm-mldp} and Corollary~\ref{col-mldp} should be replaced by $\frac{c_1 (R-1)}{\omega R} \cdot d(\bm{x}, \bm{x}') + \frac{c_2 (R-1)}{R}$ so that the LSH+GRR mechanism still provides mLDP.

\paragraph{Angular Kernel}
To extend \textsc{mLDP-KDE} to the angular kernel \citep{ColemanS20, LeiWL0ZGD21}, we replace the $2$-stable LSH scheme with the SRP-LSH scheme. 
A function $h_{\mathrm{srp}}: \mathbb{R}^{m} \mapsto \{+1, -1\}$ in the SRP-LSH family is defined as $h_{\mathrm{srp}}(\bm{x}) = \mathrm{sign}(\bm{a} \cdot \bm{x})$, where $\bm{a}$ is also drawn from the standard $m$-dimensional Gaussian distribution.
The angular kernel is defined as $k_{\mathrm{ang}}(\bm{x}, \bm{x}') = 1 - \frac{\theta(\bm{x}, \bm{x}')}{\pi}$, where $\theta(\bm{x}, \bm{x}')$ is the angle between $\bm{x}$ and $\bm{x}'$.
We can see that, unlike $l_1$- and $l_2$-LSH functions, the output of each SRP-LSH function is binary.
As such, by mapping the output values $\{-1, +1\}$ to the range $[1, 2]$, rehashing is not needed.
Accordingly, the GRR mechanism is reduced to the special case of $R = 2$, i.e., the randomized response (RR) mechanism.

For privacy analysis, by removing rehashing and due to $k_{\mathrm{ang}}(\bm{x}, \bm{x}') = 1 - \frac{\theta(\bm{x}, \bm{x}')}{\pi}$, we refine Theorem~\ref{thm-mldp} for the angular kernel as follows:
The LSH+RR mechanism provides $(d_{\chi}, \eta)$-mLDP, where $d_{\chi}(\bm{x}, \bm{x}') = \frac{\gamma L}{\pi} \cdot \theta(\bm{x}, \bm{x}') + \gamma\sqrt{\frac{L \ln(1/\eta)}{2}}$.
Corollary~\ref{col-mldp} holds by setting $p = \frac{\theta(\bm{x}, \bm{x}')}{\pi}$ and replacing the term $\frac{c (R-1)}{\omega R} \cdot d(\bm{x}, \bm{x}')$ with $\frac{\theta(\bm{x}, \bm{x}')}{\pi}$.
Then, the unbiased estimator in Algorithm~\ref{alg-2} for the angular kernel becomes
% $\widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})] = \tfrac{(e^\gamma + 1) \cdot \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] - n}{e^\gamma - 1}$,
\begin{displaymath}
  \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})] = \tfrac{(e^\gamma + 1) \cdot \mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] - 1}{e^\gamma - 1},
\end{displaymath}
with its variance being bounded by
% $(\tfrac{e^{\gamma} + 1}{e^{\gamma} - 1})^2 \cdot (\sqrt{\tfrac{e^{\gamma} - 1}{e^{\gamma} + 1}} \widetilde{K}(\bm{q}) + \sqrt{\tfrac{1}{e^{\gamma} + 1}})^2$.
\begin{displaymath}
  \big(\tfrac{e^{\gamma} + 1}{e^{\gamma} - 1} \big)^2 \cdot \big(\sqrt{\tfrac{e^{\gamma} - 1}{e^{\gamma} + 1}} \widetilde{K}(\bm{q}) + \sqrt{\tfrac{1}{e^{\gamma} + 1}} \big)^2.
\end{displaymath}
Moreover, the number of independent rows required to guarantee an $(\alpha, \eta)$-approximate KDE in Theorem~\ref{thm-approx} remains $L = O\big((\frac{e^\gamma + 1}{e^\gamma - 1})^2 \cdot \frac{\log(1/\eta)}{\alpha^2}\big)$.
Finally, the complexity of \textsc{mLDP-KDE} does not change after performing all the above adaptations.

\paragraph{General LSH Kernel}
We finally analyze how \textsc{mLDP-KDE} can support KDE on general LSH kernels.
Consider an LSH kernel defined by an LSH family $\mathcal{H}$ on a metric distance $d(\cdot, \cdot)$, where each $h \in \mathcal{H}$ maps a data point $\bm{x}$ to an integer $h(\bm{x})$. The range of $h(\cdot)$ can be bounded or unbounded, which is rehashed to the range of $[1, R]$.
The basic property of $\mathcal{H}$ is that closer data points have a higher probability of mapping to the same hash value, which is formalized as follows.
\begin{definition}[$(r_1, r_2, p_1, p_2)$-LSH family \citep{IndykM98}]\label{def-lsh-family}
  A hash family $\mathcal{H}$ is $(r_1, r_2, p_1, p_2)$-LSH w.r.t.~a metric distance $d(\cdot, \cdot)$, where $r_1 < r_2$ and $p_1 > p_2$, if for two points $\bm{x}$ and $\bm{x}'$:
  \begin{itemize}
    \item If $d(\bm{x}, \bm{x}') \leq r_1$, then $\Pr_{h \in \mathcal{H}}[h(\bm{x}) = h(\bm{x}')] \geq p_1$;
    \item If $d(\bm{x}, \bm{x}') \geq r_2$, then $\Pr_{h \in \mathcal{H}}[h(\bm{x}) = h(\bm{x}')] \leq p_2$.
  \end{itemize}
\end{definition}
According to \citep{ColemanS20}, the only additional requirement for any $(r_1, r_2, p_1, p_2)$-LSH family to form a positive semi-definite radial kernel in the form of $k(\bm{x}, \bm{x}') = f(d(\bm{x}, \bm{x}'))$ based on the collision probability function $f(\cdot)$ is that $f(\cdot)$ is monotonically decreasing.
Intuitively, by applying any $(r_1, r_2, p_1, p_2)$-LSH family, \textsc{mLDP-KDE} can be used for the corresponding LSH kernel.
Due to the definition of the LSH kernel, the approximation bounds of \textsc{mLDP-KDE} are naturally satisfied.
For privacy analysis, unlike for a specific LSH kernel, since the relationship between $k(\bm{x}, \bm{x}')$ and $d(\bm{x}, \bm{x}')$ is unavailable, we can only provide probabilistic LDP bounds when $d(\bm{x}, \bm{x}')$ is in the ranges of $(0, r_1]$, $(r_1, r_2]$, and $(r_2, +\infty)$.
Assuming that $\Pr_{\mathcal{H}}[h(\bm{x}) = h(\bm{x}')] = p_1$ when $d(\bm{x}, \bm{x}') = r_1$ and $\Pr_{\mathcal{H}}[h(\bm{x}) = h(\bm{x}')] = p_2$ when $d(\bm{x}, \bm{x}') = r_2$, we have
\begin{align*}
  \Pr\big[\mathcal{L}_{\bm{x}, \bm{x}'} \geq \gamma L (\tfrac{R - 1}{R} (1 - p_1) + s) \big] \leq \exp(-2Ls^2); \\
  \Pr\big[\mathcal{L}_{\bm{x}, \bm{x}'} \geq \gamma L (\tfrac{R - 1}{R} (1 - p_2) + s) \big] \leq \exp(-2Ls^2).
\end{align*}
when $d(\bm{x}, \bm{x}') \leq r_1$ and $d(\bm{x}, \bm{x}') \leq r_2$.
In this way, similar results to those of Theorem~\ref{thm-mldp} and Corollary~\ref{col-mldp} can be obtained by replacing $s$ with the corresponding values.
When $d(\bm{x}, \bm{x}')$ $\geq r_2$, \textsc{mLDP-KDE} still provides $\gamma L$-LDP by Corollary~\ref{thm-ldp}.

\subsection{Experimental Evaluation for Other LSH Kernels}

To demonstrate the generalizability of \textsc{mLDP-KDE}, we evaluate the MSEs of different methods for the $l_1$-LSH kernel and the angular kernel with privacy budgets $\varepsilon \in \{1, 2.5, 5, \cdots, 20\}$.

For the $l_1$-LSH kernel, we ran all methods except \textsc{GI-KDE}, which is specific to the Euclidean distance, on the CovType data set.
The values of $\omega$ and $r$ are set to $2.5$ and $0.05$ for the $l_1$-LSH kernel because the average Manhattan distance between the points is about five times greater than the average Euclidean distance.
As shown in the first plot of Figure~\ref{fig-l1-ang-MSE}, we can see that \textsc{mLDP-KDE} performs consistently and significantly better than other algorithms.
However, its MSEs for the $l_1$-LSH kernel are generally higher because of larger constants in the privacy bound.

For the angular kernel, we carried out experiments on the Yelp data set and compared all methods with two additional baselines, \textsc{FKM-LL-RACE} and \textsc{FKM-LR-RACE}, which integrate the two LSH schemes under mLDP in \citep{FernandesKM21} with RACE.
From the second plot in Figure~\ref{fig-l1-ang-MSE}, we still observe that \textsc{mLDP-KDE} outperforms all other algorithms by a large margin.
The advantage of \textsc{mLDP-KDE} over \textsc{FKM-LL-RACE} and \textsc{FKM-LR-RACE} further confirms its effectiveness for KDE problems compared to general LSH methods under mLDP.

These results support our justification for the generalization of \textsc{mLDP-KDE} to other LSH kernels.

\begin{figure}[ht]
  \centering
  \includegraphics[width=0.48\textwidth]{figures/legend_angular_l1.pdf}
  \vspace{1mm}
  \\
  \includegraphics[width=0.2\textwidth]{figures/l1_MSE_epsilon.pdf}
  \hspace{1mm}
  \includegraphics[width=0.2\textwidth]{figures/angular_MSE_epsilon.pdf}
  \\
  \vspace{-1em}
  \caption{MSEs of different algorithms on the CovType data set for the $l_1$-LSH kernel and the Yelp data set for the angular kernel with varying privacy budgets $\varepsilon \in \{1, 2.5, 5, \cdots, 20\}$.}
  \label{fig-l1-ang-MSE}
\end{figure}

