\section{Our Algorithm}
\label{sec-alg}

This section introduces \textsc{mLDP-KDE}, an LSH-based framework for KDE under mLDP, and analyzes it theoretically.

\subsection{The \textsc{mLDP-KDE} Framework}

\paragraph{Overview}
\citet{ColemanS20} and \citet{LeiWL0ZGD21} have developed LSH-based sketches for approximate density estimation on LSH kernels.
They are adaptable to a local computing model, where users independently calculate hash values and send them to a central server, which then aggregates all of them into a comprehensive sketch for KDE.
However, this approach does not inherently provide local differential privacy, as the server can potentially infer individual user data from the transmitted hash values.

To address this and align LSH-based sketches with mLDP in Definition~\ref{def-mldp}, we show that it suffices to perturb hash values using GRR \citep{KairouzBR16} with a specific privacy parameter on the user side before sending them to the server.
Remarkably, the sketch composed of perturbed hash values can provide an unbiased KDE at any query point within a bounded additive error with high probability.

\paragraph{Sketch Construction}
Algorithm~\ref{alg-1} depicts the \textsc{mLDP-KDE} sketch construction procedure.
Initially, the server randomly selects $L$ LSH functions from the $2$-stable LSH scheme \citep{DatarIIM04} and sends the hash parameters to users.
Then a user with the data point $\bm{x} \in \mathcal{D}$ generates $L$ integers by hashing $\bm{x}$ with $L$ LSH functions, which are then rehashed into the range of $[1, R]$ using a scheme consistent across the server and all users.
The user independently runs the GRR mechanism on each hash value using the same parameter $\gamma$ determined by the input privacy budget $\varepsilon$, radius $r$, confidence parameter $\eta$, and height and width $L, R$ of the sketch (see Theorem~\ref{thm-mldp} and Corollary~\ref{col-mldp} in Section~\ref{sec-theory} for the determination of $\gamma$).
After obtaining the perturbed hash values $\widehat{H}(\bm{x})$, the user sends them back to the server.
This procedure is called the LSH+GRR mechanism since it applies the LSH computation \citep{ColemanS20} and the GRR mechanism \citep{KairouzBR16} in a sequential manner.

When receiving the perturbed hash values from all users, the server builds the sketch $\mathcal{S}_{\mathcal{D}}$ similarly to that of the \textsc{RACE} sketch \citep{ColemanS20}.
It initializes an array $L \times R$ of all zeros.
For each sequence $\widehat{H}(\bm{x})$ of the hash values for $\bm{x} \in \mathcal{D}$, it increments the counter $\mathcal{S}_{\mathcal{D}}[i, \widehat{h}_i(\bm{x})]$ for each $i \in \{1, \cdots, L\}$.
After processing all data points in $\mathcal{D}$, it returns the sketch $\mathcal{S}_{\mathcal{D}}$.

\begin{algorithm}[t]
  \caption{\textsc{mLDP-KDE} Sketch Construction}
  \label{alg-1}
  \KwIn{Data set $\mathcal{D}$, bandwidth $\omega$, privacy budget $\varepsilon$, radius $r$, confidence parameter $\eta$, sketch height $L$ and width $R$}
  \KwOut{Sketch $\mathcal{S}_\mathcal{D}$}
  \BlankLine\Comment{Server side}
  \For{$i = 1$ \KwTo $L$}{
    Draw a vector of $m$ random variables from $\mathcal{N}(0, 1)$ as $\bm{a}_i$ and a random variable from $\mathcal{U}(0, \omega)$ as $b_i$\;
  }
  Send the LSH parameters $\bm{A} = [\bm{a}_1, \cdots, \bm{a}_L]$ and $\bm{B} = [b_1, \cdots, b_L]$ to each user\;
  
  \BlankLine\Comment{User side with a data point $\bm{x} \in \mathcal{D}$ on receiving $\bm{A}$ and $\bm{B}$}
  Set $\gamma \gets \varepsilon/(\frac{0.8 r L(R - 1)}{\omega R} + \sqrt{\frac{L \ln(1/\eta)}{2}})$ or by Corollary~\ref{col-mldp}\;
  \For{$i = 1$ \KwTo $L$}{
    $h_i(\bm{x}) \gets \mathrm{Rehash}\big( \lfloor \frac{\bm{a}_i \cdot \bm{x} + b_i}{\omega} \rfloor, R \big)$\;
    $\widehat{h}_i(\bm{x}) \gets \mathcal{M}_{\mathrm{GRR}}(h_i(\bm{x}))$ with parameter $\gamma$\;
  }
  Report $\widehat{H}(\bm{x}) = [\widehat{h}_1(\bm{x}), \cdots, \widehat{h}_L(\bm{x})]$ to the server\;
 
  \BlankLine\Comment{Server side}
  Initialize sketch $\mathcal{S}_\mathcal{D} \gets \bm{0}^{L \times R}$\;
  \ForEach{$\bm{x} \in \mathcal{D}$}{
    \For{$i = 1$ \KwTo $L$}{
      $\mathcal{S}_\mathcal{D}[i, \widehat{h}_i(\bm{x})] \gets \mathcal{S}_\mathcal{D}[i, \widehat{h}_i(\bm{x})] + 1$\;
    }
  }
  \Return $\mathcal{S}_\mathcal{D}$\;
\end{algorithm}

\paragraph{KDE Query Processing}
Algorithm \ref{alg-2} presents how the server processes a KDE query using the sketch $\mathcal{S}_{\mathcal{D}}$.
Upon receiving a query point $\bm{q} \in \mathbb{R}^m$, the server first calculates a sequence of hash values $h_1(\bm{q}), \cdots, h_L(\bm{q})$ employing an identical sequence of $L$ hash functions and rehashing scheme outlined in Algorithm~\ref{alg-1}.
Then, we provide an approximation $\widehat{\mathrm{KDE}}_{\mathcal{D}}(\bm{q})$ of $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$ through the $L$ corresponding counters $\mathcal{S}_\mathcal{D}[1, h_1(\bm{q})], \cdots, \mathcal{S}_\mathcal{D}[L, h_L(\bm{q})]$.
Subsequently, we analyze how the output distribution is affected by the rehashing scheme and the GRR mechanism to derive an unbiased estimator of $\frac{\widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]}{n}$ from $\mathcal{S}_\mathcal{D}[i, h_i(\bm{q})]$ for each $i \in \{1, \cdots, L\}$ (see Lemma~\ref{lm-unbiasedness} in Section~\ref{sec-theory} for how the estimator is attained).
This process produces $L$ unbiased estimators for $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$ from $\mathcal{S}_\mathcal{D}$.
Finally, these estimators are divided into $L'$ groups, each containing $L/L'$ estimators.
For each group, we compute the mean value $\widehat{K}[l]$ for $l \in \{1, \cdots, L'\}$ and return the median value as an approximation $\widehat{\mathrm{KDE}}_{\mathcal{D}}(\bm{q})$ for $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$.

\begin{algorithm}[t]
  \caption{\textsc{mLDP-KDE} Query Processing}
  \label{alg-2}
  \KwIn{Sketch $\mathcal{S}_\mathcal{D}$, query point $\bm{q}$, the same hash and privacy parameters as Algorithm~\ref{alg-1}, group parameter $L'$}
  \KwOut{Approximation $\widehat{\mathrm{KDE}}_{\mathcal{D}}(\bm{q})$ of $\mathrm{KDE}_{\mathcal{D}}(\bm{q})$}
  \BlankLine
  \For{$i = 1$ \KwTo $L$}{
    $h_i(\bm{q}) \gets \mathrm{Rehash}\big(\lfloor \frac{\bm{a}_i \cdot \bm{q} + b_i}{\omega} \rfloor, R \big)$\;
  }
  $\widehat{K} \gets \bm{0}^{L'}$\;
  \For{$l = 1$ \KwTo $L'$}{
    \For{$j = 1$ \KwTo $\frac{L}{L'}$}{
      $i \gets (l - 1) L' + j$\;
      $\widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})] \gets \frac{e^\gamma + R - 1}{(e^\gamma - 1)(R - 1)} \cdot$ $(\mathcal{S}_\mathcal{D}[i, h_i(\bm{q})] \cdot R - n)$\;
      $\widehat{K}[l] \gets \widehat{K}[l] + \frac{L'}{n L} \cdot \widehat{\mathcal{S}}_\mathcal{D}[i, h_i(\bm{q})]$\;
    }
  }
  \Return $\widehat{\mathrm{KDE}}_{\mathcal{D}}(\bm{q}) \gets \mathrm{Median}(\widehat{K}[1], \cdots, \widehat{K}[L'])$\;
\end{algorithm}
