
\section{Our Data Structures}\label{app:data}
In this section, we describe our data structures in detail. Starting with the initialize part in Section~\ref{app:data_structure_init}, we state the result of space storage and running time for Initialize in \textsc{DynamicKDE}. In Section~\ref{app:data_structure_update}, we demonstrate the running time for the update part in our data structure. Section~\ref{app:data_structure_query} presents the running time for the query procedure. Finally, we study the LSH data structure in Section~\ref{app:data_structure_lsh}. It is an important member in the \textsc{DynamicKDE} structure and fundamental to the implementation of all three procedures above.


\subsection{Initialize part of data structure}\label{app:data_structure_init}

In this section, we describe the space storage and running time of \textsc{Initialize} part of our data structure \textsc{DynamicKDE}.

We start by showing the space storage of \textsc{LSH} structure.

\begin{lemma}[Space storage of \textsc{LSH}, formal version of Lemma~\ref{lem:LSH_storage}]\label{lem:LSH_storage_formal}
Given data set $\{x_i\}_{i\in[n]}\subset\R^d$, parameter $L,k\in\mathbb{N}_+$, the \textsc{Initialize} (Algorithm~\ref{alg:LSH_public_app}) of the data-structure \textsc{LSH} uses space
\begin{align*}
    O(Lkdn^{o(1)}+Ln)
\end{align*}
\end{lemma}
\begin{proof}
The space storage comes from two parts: \textsc{ChooseHashFunc} and \textsc{ConstructHashTable}.

{\bf Part 1.} \textsc{ChooseHashFunc}(line~\ref{lin:choose_hash_func}) takes $L,k$ as input. 

It has a for loop with $L$ iterations.

In each iteration, it samples $k$ functions(line~\ref{lin:LSH_sample_k_functions}) from hash family $\mathcal{H}$ to create $\mathcal{H}_l$, which uses $O(kdn^{o(1)})$ space.

Thus the total space usage of \textsc{ChooseHashFunc} is $L\cdot O(kdn^{o(1)})=O(Lkdn^{o(1)})$.

{\bf Part 2.} \textsc{ConstructHashTable}(line~\ref{lin:construct_hash_table}) takes data set $\{x_i\}_{i\in[n]}$ and parameter $L$ as input.

It has two recursive for loops.
\begin{itemize}
    \item The first for loop repeats $L$ iterations.
    \item The second for loop repeats $n$ iterations.
\end{itemize}

The space storage of the inner loop comes from line~\ref{lin:insert} and line~\ref{lin:aggregate_hash_table}, which is $O(1)$.

Thus the total space storage of \textsc{ConstructHashTable} is $L\cdot n\cdot O(1)=O(Ln)$.

The final space storage of \textsc{Initialize} is 
\begin{align*}
    &~{\bf Part 1}+{\bf Part 2}\\
    =&~O(Lkdn^{o(1)}+Ln)
\end{align*}

Thus, we complete the proof.
\end{proof} 

Using the above lemma, we state the space storage of our \textsc{DynamicKDE} structure.

\begin{lemma}[Space storage part of Theorem~\ref{thm:main_result}, formal version of Lemma~\ref{lem:init}]\label{lem:init_formal}
The \textsc{Initialize} of the data structure \textsc{DynamicKDE} (Algorithm~\ref{alg:dynamic_KDE_initialize}) uses space
\begin{align*}
    O(\epsilon^{-2}(\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\cdot\log(1/f_{\mathsf{KDE}})\cdot \mathrm{cost}(K) \cdot\log^2 n\cdot(\frac{1}{f_{\mathsf{KDE}}}+n^{o(1)}\cdot\log^2 n))
\end{align*}
\end{lemma}

\begin{proof}
The space storage mainly comes from $K_1\cdot R$ copies of $\mathcal{H}$. 

Now let's consider the space storage of $\mathcal{H}$. By Lemma~\ref{lem:LSH_storage}, we replace $\{x_i\}_{i\in[n]},L,k$ with $P_r$ (line~\ref{lin:sample}), $K_{2,r}$ (line~\ref{lin:K_2}), $k_r$ (line~\ref{lin:k_j}) respectively. We have $|P_r|=O(\frac{1}{f_{\mathsf{KDE}}}),K_{2,r}=O(\mathrm{cost}(K)\cdot\log n)$ and $k_r=O(\log n)$. Thus the total space usage of $\mathcal{H}$ is
\begin{align}\label{eq:LSH_space_storage}
    & ~ O(Lkdn^{o(1)}+Ln)
    \\ 
    = & ~O(\mathrm{cost}(f)n^{o(1)}\cdot\log^3 n+ \mathrm{cost}(f)(\frac{1}{f_{\mathsf{KDE}}})\cdot \log n)
\end{align}

The total space storage of \textsc{Initialize} of the data structure \textsc{DynamicKDE} is
\begin{align*}
    &~K_1\cdot R\cdot O(Lk+Ln)\\
    =&~O(K_1\cdot R\cdot \mathrm{cost}(K)\log n\cdot(\frac{1}{f_{\mathsf{KDE}}}+\log n))\\
    =&~O(\epsilon^{-2}(\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\cdot\log(1/f_{\mathsf{KDE}})\cdot \mathrm{cost}(K) \cdot\log^2 n\cdot(\frac{1}{f_{\mathsf{KDE}}}+n^{o(1)}\cdot\log^2 n))
\end{align*}
where the first step follows from Eq.~\eqref{eq:LSH_space_storage}, the last step follows from $K_1=O(\epsilon^{-2} (\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\cdot\log n)$ and $R=O(\log(1/f_{\mathsf{KDE}}))$.

Thus, we complete the proof.
\end{proof}

Next, we show an upper bound on running time for \textsc{Initilize} in \textsc{LSH} data structure. 

\begin{lemma}[Upper bound on running time of \textsc{Initialize} of the data-structure \textsc{LSH}, formal version of Lemma~\ref{lem:LSH_initialize_upper_bound} ]\label{lem:LSH_initialize_upper_bound_formal}

Given input data points $\{x_i\}_{i\in[n]}\subset \mathbb{R}^d$, parameters $k,L\in \mathbb{N}_+$, LSH parameters $p_{\mathrm{near}},p_{\mathrm{far}} \in [0,1],c\in[1,\infty), r\in \R_+$ and kernel $f$, the \textsc{Initialize} of the data-structure \textsc{LSH}(Algorithm~\ref{alg:LSH_public_app}) runs in time
\begin{align*}
    O(L\cdot(kdn^{o(1)}+dn^{1+o(1)}+n\log n))
\end{align*}
\end{lemma}
\begin{proof}
This procedure consists of three parts
 

{\bf Part 1.} We invoke \textsc{ChooseHashTable} procedure with parameters $k,L$ (line~\ref{lin:LSH_intialize_choose_hash_func}). The \textsc{ChooseHashTable} procedure has one for loop with L iterations.

Now let's consider the running time in line~\ref{lin:LSH_sample_k_functions}, which is the running time in each iteration. In line~\ref{lin:LSH_sample_k_functions}, we sample $k$ hash functons from hash family $\mathcal{H}$, which takes $O(k\cdot dn^{o(1)})$ time.  

Thus the total running time for {\bf Part 1} is
\begin{align*}
    O(Lkdn^{o(1)})
\end{align*}


{\bf Part 2.} We invoke \textsc{ConstructHashTable} procedure with data set $\{x_i\}_{i\in[n]}$. This procedure has two recursive for loops.
\begin{itemize}
    \item The first loops repeat $L$ iterations
    \item The second loop repeats $n$ iterations
\end{itemize}

Now let's consider the running time from line~\ref{lin:find_bucket_insert_element} to line~\ref{lin:aggregate_hash_table}, which is the time for each inner loop. 
\begin{itemize}
    \item line~\ref{lin:find_bucket_insert_element}: We first evaluate $\mathcal{H}_l(x_i)$, which takes $O(dn^{o(1)})$. Then we insert $x_i$ in the bucket $\mathcal{H}_l(x_i)$, which takes $O(\log n)$ time.
    \item line~\ref{lin:aggregate_hash_table} takes $O(1)$ time.
\end{itemize}

The running time from line~\ref{lin:find_bucket_insert_element} to line~\ref{lin:aggregate_hash_table} is $O(dn^{o(1)}+\log n)$

The total running time for {\bf Part 2} is
\begin{align*}
    O(Ln\cdot(dn^{o(1)}+\log n))
\end{align*}

{\bf Putting it all together.} We prove that the \textsc{Initialize} of the data-structure \textsc{LSH}(Algorithm~\ref{alg:LSH_public_app}) runs in time
\begin{align*}
    &~{\bf Part 1}+{\bf Part 2}\\
    =&~O(Lkdn^{o(1)})+O(Ln\cdot(dn^{o(1)}+\log n))\\
    =&~O(L\cdot(kdn^{o(1)}+dn^{1+o(1)}+n\log n))
\end{align*}

Thus, we complete the proof.

\end{proof}

Combining the results above, we can demonstrate the running time of \textsc{Initialize} in \textsc{DynamicKDE} in the following lemma.

\begin{lemma}[The initialize part of Theorem~\ref{thm:main_result}, formal version of Lemma~\ref{lem:dynamic_KDE_initialize}]\label{lem:dynamic_KDE_initialize_formal}
Given $(f:\mathbb{R}^d\times \mathbb{R}^d \rightarrow [0,1], P\subset\mathbb{R}^d, \epsilon \in (0,1),f_{\mathsf{KDE}}\in[0,1])$, the \textsc{Initialize} of the data-structure \textsc{DynamicKDE} (Algorithm~\ref{alg:dynamic_KDE_initialize}) runs in time
\begin{align*}
    O(\epsilon^{-2}n^{1+o(1)}\cost(f)\cdot (\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\log(1/f_{\mathsf{KDE}}) \cdot \log^2 n)
\end{align*}
\end{lemma}

\begin{proof}
The \textsc{Initialize} procedure has two recursive for loops.
\begin{itemize}
    \item The first loops repeat $K_1=O(\epsilon^{-2}\log (n)\cdot f_{\mathsf{KDE}}^{-o(1)})$ iterations
    \item The second loops repeats $R=O(\log \frac{1}{f_{\mathsf{KDE}}})$ iterations
\end{itemize}

Now let's consider the running time from line~\ref{lin:c_ij} to line~\ref{lin:LSH_initialize_in_KDE}, which is the running time of the inner loop.
\begin{itemize}
    \item line~\ref{lin:c_ij} to line~\ref{lin:sample_prob} takes $O(\log(1/f_{\mathsf{KDE}}))$ time.
    \item line~\ref{lin:sample} takes $O(n)$ time.
    \item line~\ref{lin:LSH_initialize_in_KDE}:
    By Lemma~\ref{lem:LSH_initialize_upper_bound}, we replace $L$ with $K_{2,r}=O(\mathrm{cost}(K)\cdot \log n)$ and $k$ with $k_r=O(\log n)$ .
    
    Thus the running time of this line is
    \begin{align*}
        &~O(L\cdot(kdn^{o(1)}+dn^{1+o(1)}+n\log n)\\
        =&~O(\mathrm{cost}(K)\cdot \log n\cdot(n^{o(1)}\cdot\log^2  n+n^{1+o(1)}\cdot\log  n+n\cdot\log n))\\
        =&~O(n^{1+o(1)}\mathrm{cost}(K)\cdot\log^2 n)
    \end{align*}
    
\end{itemize}
where the first step follows from $K_{2,r}=O(\mathrm{cost}(K)\cdot \log n)$, $d=O(\log n)$ and $k_r=O(\log n)$, the second step follows from $O(n\log n)=O(n^{1+o(1)})$.

The running time from from line~\ref{lin:c_ij} to line~\ref{lin:LSH_initialize_in_KDE} is
\begin{align*}
    &~O(\log(1/f_{\mathsf{KDE}}))+O(n)+O(n^{1+o(1)}\mathrm{cost}(K)\log n)\\
    =&~O(n^{1+o(1)}\mathrm{cost}(K)\log^2 n)
\end{align*}

The final running time for \textsc{Initialize} procedure is
\begin{align*}
    &~K_1\cdot R \cdot O(n^{1+o(1)}\mathrm{cost}(K)\cdot\log^2 n)\\
    =&~O(\epsilon^{-2}n^{1+o(1)}\cost(f)\cdot (\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\cdot\log(1/f_{\mathsf{KDE}}) \cdot \log^3 n)
\end{align*}
where we use $K_1=O(\epsilon^{-2}\cdot f_{\mathsf{KDE}}^{-o(1)}\cdot\log n)$ and $R=O(\log(1/f_{\mathsf{KDE}}))$. 

Thus, we complete the proof.
\end{proof}


\begin{algorithm}[!ht]\caption{Dynamic KDE, members and initialize part}\label{alg:dynamic_KDE_initialize}
\begin{algorithmic}[1]
\State {\bf data structure} \textsc{DynamicKDE}      \Comment{Theorem~\ref{thm:main_result}}
\State {\bf members}
    \State \hspace{4mm} $d \in \mathbb{N}_+$ \Comment{Dimension of data point}
    \State \hspace{4mm} For ${i\in[n]}$,$x_i \in \mathbb{R}^d$ \Comment{dataset $X$}
    \State \hspace{4mm} $K_1 \in \mathbb{N}_+$ \Comment{Number of repetitions}
    \State \hspace{4mm} $R \in \mathbb{N}_+$
    \State \hspace{4mm} For $a \in [K_1]$, $\wt{P}_a \subset \mathbb{R}^d$ \Comment{Sampled data points}
    \State \hspace{4mm} $K_2 \in \mathbb{N}_+$
    \State \hspace{4mm} For $a \in [K_1]$, $r \in [R]$, $H_{a,r}\in \textsc{LSH}$ \label{lin:instance_LSH_app} \Comment{Instances from \textsc{LSH} class}
\State {\bf end members}

\State

\Procedure{\textsc{Initialize}}{$X \subset \R^d,\epsilon \in (0,1),f_{\mathsf{KDE}} \in [0,1]$} \Comment{Lemma~\ref{lem:init}}
    \State \Comment{$f_{\mathsf{KDE}}$ is a known quantity satisfy $f_{\mathsf{KDE}}\geq f_{\mathsf{KDE}}^{*}$}
    \State \Comment{$\epsilon$ represents the precision of estimation}
    \State $K_1\leftarrow C\cdot \epsilon^{-2} \log{n}\cdot f_{\mathsf{KDE}}^{-o(1)}$\label{lin:K1}
    \State $R \leftarrow \left\lceil\log{{1}/{f_{\mathsf{KDE}}}}\right\rceil$\label{lin:J}
    \For{$a=1,2,\cdots,K_1$}
        \For{$r=1,2,\cdots,R$}
            \For{$i=r+1,\cdots,R+1$}
                \State $c_{i,r}\leftarrow\min\{\frac{z_{i-1}}{z_{r}},\log^\frac{1}{7}n\}$\label{lin:c_ij} \Comment{$z_r$ is defined in Definition~\ref{def:level}}
            \EndFor
            \State $k_r\leftarrow \max_{i \in \{r+1, \cdots, R+1\}}\frac{1}{\log \frac{1}{p}}\lceil\frac{i-r}{\tilde{c}_{i,r}(1-o(1))}\rceil$\label{lin:k_j}
            \State $p_{\mathrm{near},r}\leftarrow p(z_r)$
            \State $K_{2,r} \leftarrow 100\log{n}\cdot p_{\mathrm{near},r}^{-k_r} $\label{lin:K_2}  
            \Comment{$p_{\mathrm{near},r},p_{\mathrm{far},r}$ are defined in Lemma~\ref{lem:p}}
            \State $p_{ \mathrm{sampling} }\leftarrow \min\{\frac{1}{2^{r}nf_{\mathsf{KDE}}},1\}$ \label{lin:sample_prob}
            \State $P_r\leftarrow$ sample each element in $X$ with probability $p_{\mathrm{sampling}}$.\label{lin:sample}
            \State $\mathcal{H}_{a,r}.\textsc{Initialize}(P_r,k_r,K_{2,r})$ \label{lin:LSH_initialize_in_KDE}
        \EndFor
        \State $\tilde{P}_{a}\leftarrow$ sample each element in $X$ with probability $\frac{1}{n}$ \Comment{Store $\tilde{P}_a$}
    \EndFor
\EndProcedure
\State {\bf end data structure}
\end{algorithmic}
\end{algorithm}

\begin{algorithm}[!ht]\caption{Dynamic KDE, update part}\label{alg:dynamic_KDE_update}
\begin{algorithmic}[1]
\State {\bf data structure} \textsc{DynamicKDE} \Comment{Theorem~\ref{thm:main_result}}
\State

\Procedure{\textsc{Update}}{$v \in \R^d, f_{\mathsf{KDE}} \in [0,1], i \in [n]$} \Comment{Lemma~\ref{lem:dynamic_KDE_update}}
    \For{$a=1,2,\cdots,K_1$}
        \For{$r=1,2,\cdots,R$}
            \State $\mathcal{H}_{a,r}.\textsc{UpdateHashTable}(v,i)$\label{update_hashtable_in_dynamic_KDE}
        \EndFor
    \EndFor
    \State $x_i\leftarrow v$
\EndProcedure
\State {\bf end data structure}
\end{algorithmic}
\end{algorithm}

\subsection{Update part of data structure}\label{app:data_structure_update}
The goal of this section is to prove Lemma~\ref{lem:dynamic_KDE_update}. Our Lemma~\ref{lem:dynamic_KDE_update_formal} in this section is the formal version of Lemma~\ref{lem:dynamic_KDE_update}. We present an auxiliary Lemma~\ref{lem:hash_update_formal} and then show how to this this auxiliary lemma to prove Lemma~\ref{lem:dynamic_KDE_update_formal}.

\begin{lemma}[Update time of LSH, formal version of Lemma~\ref{lem:hash_update}]\label{lem:hash_update_formal}
Given a data point $z\in\mathrm{R}^d$ and index $i\in[n]$, the \textsc{UpdateHashTable} of the data-structure \textsc{LSH} runs in (expected) time
\begin{align*}
    O(n^{o(1)}\log(n)\cdot \cost(f)).
\end{align*}

\end{lemma}

\begin{proof}


This procedure has one for loop which repeats $L=O(\log n)$ iterations.
Now let us consider the running time from line~\ref{lin:insert} to line~\ref{lin:delete}, which is the time for each iteration.

\begin{itemize}
    \item line~\ref{lin:insert} takes $O(dn^{o(1)}\cost(f))$
    \item line~\ref{lin:delete} takes the same time as line~\ref{lin:insert}
\end{itemize}

The final running time
\begin{align*}
    &~L\cdot O(dn^{o(1)}\cost(f))\\
    =&~O(n^{o(1)}\cost(f)\cdot \log^2 n).
\end{align*}
where we use $L=O(\log n)$ and $d=O(\log n)$.

Thus, we complete the proof.

\end{proof}

\begin{lemma}[The update part of Theorem~\ref{thm:main_result}, formal version of Lemma~\ref{lem:dynamic_KDE_update}]\label{lem:dynamic_KDE_update_formal}
Given an update $z\in\R^d$ and index $i\in[n]$, the \textsc{Update} of the data-structure \textsc{DynamicKDE} (Algorithm~\ref{alg:dynamic_KDE_update}) runs in (expected) time
\begin{align*}
    O(\epsilon^{-2}n^{o(1)}\cost(f)\cdot (\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\log(1/f_{\mathsf{KDE}}) \cdot \log^3 n).
\end{align*}
\end{lemma}

 

\begin{proof}
 

This algorithm has two recursive for loops
\begin{itemize}
    \item The first loops repeat $K_1=O(\epsilon^{-2}\log (n)\cdot f_{\mathsf{KDE}}^{-o(1)})$ iterations
    \item The second loops repeats $R=O(\log \frac{1}{f_{\mathsf{KDE}}})$ iterations
\end{itemize}

Now let's consider the running time in line~\ref{update_hashtable_in_dynamic_KDE}, which is the time for each inner loop.

By Lemma~\ref{lem:hash_update}, line~\ref{update_hashtable_in_dynamic_KDE} takes $O(n^{o(1)}\log(n)\cdot \cost(f))$ time. 

The final running time
\begin{align*}
    &~K_1\cdot R\cdot O(n^{o(1)}\cost(f)\cdot \log^2 n)\\
    =&~O(\epsilon^{-2}n^{o(1)}\cost(f)\cdot (\frac{1}{f_{\mathsf{KDE}}})^{o(1)}\log(1/f_{\mathsf{KDE}}) \cdot \log^3 n)
\end{align*}
where we use $K_1=O(\epsilon^{-2}\log (n)\cdot f_{\mathsf{KDE}}^{-o(1)})$ and $R=O(\log \frac{1}{f_{\mathsf{KDE}}})$.

Thus, we complete the proof.
\end{proof}


\subsection{Query part of data structure}\label{app:data_structure_query}
The goal of this section is to prove Lemma~\ref{lem:dynamic_KDE_query_formal}.
Our Algorithm~\ref{alg:dynamic_KDE_query} is for querying the approximated kernel density at point $q$, and Lemma~\ref{lem:dynamic_KDE_query_formal} specifies the running time for the query operation.
In order to prove this lemma, we list and prove a few auxiliary lemmas.


\begin{algorithm}[!ht]\caption{Dynamic KDE, query part}\label{alg:dynamic_KDE_query}
\begin{algorithmic}[1]
\State {\bf data structure} \textsc{DynamicKDE} \Comment{Theorem~\ref{thm:main_result}}
\State
 
\Procedure{\textsc{Query}}{$q\in \mathbb{R}^d, \epsilon \in (0,1),f_{\mathsf{KDE}} \in [0,1]$} 
 
    \For{$a=1,2,\cdots,K_1$}\label{lin:first_loop}
        \For{$r=1,2,\cdots,R$}\label{lin:second_loop}
            \State $\mathcal{H}_{a,r}.\textsc{Recover}(q)$\label{lin:evaluate_recover}
            \State $\mathcal{S} \leftarrow \mathcal{S} \cup (\mathcal{H}_{a,r}.\mathcal{R}\cap L_r) $\label{lin:choose_L_j_point}
        \EndFor\label{lin:end_second_loop}
        \State $\mathcal{R}_{R+1}\leftarrow$ recover points in $L_{R+1}\cap\tilde{P}_{a}$\label{lin:recover_point_J+1} \Comment{Recover by calculating $w$ directly.}
        \State $\mathcal{S} \leftarrow \mathcal{S}\cup\mathcal{R}_{R+1}$\label{lin:add_point_to_S}
        \For{$x_{i}\in \mathcal{S}$} \label{lin:third_loop} 
            \State $w_{i}\leftarrow f(x_{i},q)$
            \If{$x_{i}\in L_{r}$ for some $r\in[R]$}
                \State $p_{i}\leftarrow\min\{ \frac{1}{2^{r} n f_{\mathsf{KDE}} },1\}$
            \ElsIf{$x_{i} \in X \setminus \bigcup_{ r \in [R] } L_{r}$}
                \State $p_{i}\leftarrow\frac{1}{n}$
            \EndIf
        \EndFor\label{lin:end_third_loop}
        \State $T_{a}\leftarrow\sum_{x_{i}\in\mathcal{S}}\frac{w_i}{p_i}$ \label{lin:output_Z_a}
    \EndFor
    \State \Return $\mathrm{Median}\{T_{a}\}$
\EndProcedure
\State {\bf end data structure}
\end{algorithmic}
\end{algorithm}

We start by showing a lemma that states the expected number of points in each level set.

\begin{lemma}[expected number of points in level sets, formal version of Lemma~\ref{lem:expect_number_points_level_set}]\label{lem:expect_number_points_level_set_formal}
Given a query $q\in\R^d$ and fix $r\in[R]$. For any $i\in[R+1]$, weight level $L_i$ contributes at most $1$ point to the hash bucket of query $q$.
\end{lemma}

\begin{proof}
We consider 2 cases:

{\bf Case 1.} $i\leq r$: By lemma~\ref{lem:upper_bound_geometric}, we have $|L_i|\leq 2^inf_{\mathsf{KDE}}$. In the $r$'th phase, we sample each point in the whole data set with probability $\min\{\frac{1}{2^r nf_{\mathsf{KDE}}},1\} $ to obtain a subset $X_r$ (Algorithm~\ref{alg:dynamic_KDE_initialize} line~\ref{lin:sample}). Then
\begin{align*}
    &~\E[|\{x:x\in L_i\cap X_r\}|]\\
    \leq&~|L_i|\cdot \frac{1}{2^r n f_{\mathsf{KDE}}}\\
    \leq&~2^i n f_{\mathsf{KDE}}\cdot \frac{1}{2^r n f_{\mathsf{KDE}}}\\
    =&~2^{i-r}\\\
    \leq&~1
\end{align*}
where the first step follows from sampling probability $\min\{\frac{1}{2^r n f_{\mathsf{KDE}}},1\}$, the second step follows from $|L_i|\leq 2^i n f_{\mathsf{KDE}}$, the third step follows from canceling $n f_{\mathsf{KDE}}$, the last step follows from $i\leq r$.

Thus, there is at most $1$  
sampled point from $L_i$ in expectation.
 
Then $L_i$ contributes at most $1$ point in the bucket of query $q$ in expectation. 
 
{\bf Case 2.} $i=r+1,\cdots,R + 1$: By Lemma~\ref{lem:LSH}, we have $|L_i|\leq2^i n f_{\mathsf{KDE}}$. The sampling rate in $r$'th phase is $\min\{\frac{1}{2^r n f_{\mathsf{KDE}}},1\}$(Algorithm~\ref{alg:dynamic_KDE_initialize} line~\ref{lin:sample}). Then there are at most $2^{i-r}$ sampled points from $L_i$ in expectation. We set up LSH function such that the near distance is $z_r$ (Definition~\ref{def:level}). Also, we use (Algorithm~\ref{alg:dynamic_KDE_initialize} line~\ref{lin:k_j})  
\begin{align*}
    k:=k_r:=\frac{1}{\log \frac{1}{p}}\max\limits_{i=r+1,\cdots,R+1}\lceil\frac{i-r}{c_{i,r}(1-o(1))}\rceil.
\end{align*}
as the number of concatenations. By Lemma~\ref{lem:upper_bound_recovered_point}, $L_i$ contributes at most $1$ point in the bucket of query $q$ in expectation.

The total number of points that $L_i$ contributes to hash bucket of $q$ is $\max\{${\bf Case 1},{\bf Case 2}$\}=1$ in expectation.

Thus, we complete the proof.
\end{proof}

Next, we present the running time of \textsc{Recover} procedure in the LSH data structure, which is an important part of \textsc{Query} procedure in \textsc{DynamicKDE}.

\begin{lemma}[running time for recover points given a query, formal version of Lemma~\ref{lem:recover_point_from_q}]\label{lem:recover_point_from_q_formal}
Given a query $q\in\R^d$ and $L,R,k\in \mathbb{N}_+$, the \textsc{Recover} of the data-structure \textsc{LSH} runs in (expected) time
\begin{align*}
    O(L k n^{o(1)}+LR)
\end{align*}
\end{lemma}
\begin{proof}
The procedure has one for loop with $L$ iterations.
In each iteration, the running time consists of two parts
\begin{itemize}
    \item The evaluation of $\mathcal{H}_l(q)$ takes $O(kn^{o(1)})$ time
    \item The \textsc{Retrieve} operation takes $O(|\mathcal{H}_l(q)|)$ time. By Lemma~\ref{lem:expect_number_points_level_set_formal}, $|\mathcal{H}_l(q)|=O(R)$
\end{itemize}

The running time of one iteration is $O(kn^{o(1)}+R)$

The final running of this procedure is $L\cdot O(kn^{o(1)}+R)=O(Lkn^{o(1)}+LR)$.

Thus, we complete the proof.
\end{proof}

Based on the running time of \textsc{Recover} in LSH above, we prove the running time of \textsc{Query} procedure in \textsc{DynamicKDE}. 

\begin{lemma}[Query part of Theorem~\ref{thm:main_result}, formal version of Lemma~\ref{lem:dynamic_KDE_query}]\label{lem:dynamic_KDE_query_formal}
Given a query $q\in\R^d$, the \textsc{Query} of the data-structure \textsc{DynamicKDE} (Algorithm~\ref{alg:dynamic_KDE_query}) runs in (expected) time 
\begin{align*}
    O(\epsilon^{-2}n^{o(1)}\log(1/f_{\mathsf{KDE}})\cdot f_{\mathsf{KDE}}^{-o(1)}\cdot\mathrm{cost}(K)\log^3 n).
\end{align*}
\end{lemma}
 

\begin{proof}[Proof]
First, the algorithm do a for loop with $K_1=O(\epsilon^{-2}\log n\cdot f_{\mathsf{KDE}}^{-o(1)})$ iterations. 

In each iteration, the running time consists of three parts. 

{\bf Part 1.} The running time from line~\ref{lin:second_loop} to line~\ref{lin:end_second_loop}, which is a for loop with $R$ iterations. In each iteration, the running time comes from
\begin{itemize}
    \item By Lemma~\ref{lem:recover_point_from_q}, we replace $L$ with $K_2,j=O(\mathrm{cost}(K)\log n)$, $J=O(\log(1/f_{\mathsf{KDE}}))$ and $k$ with $k_j=O(\log n)$.
    Thus  line~\ref{lin:evaluate_recover} takes $O(n^{o(1)}\mathrm{cost}(K)\log^2 n)$ time.
    \item line~\ref{lin:choose_L_j_point} takes $O(|\mathcal{H}_{a,r}.\mathcal{R}|)$ time. By Lemma~\ref{lem:upper_bound_recovered_point}, $|\mathcal{H}_{a,r}.\mathcal{R}|=O(1)$. Thus the running time of line~\ref{lin:choose_L_j_point} is $O(1)$.
\end{itemize}

Thus the running time of this for loop is $R \cdot O(n^{o(1)}\mathrm{cost}(K)\log^2 n) = O(n^{o(1)}\log(1/f_{\mathsf{KDE}})\cdot\mathrm{cost}(K)\log^2 n)$, where we use $R = O(\log(1/f_{\mathsf{KDE}}))$.

{\bf Part 2.} The running time from line~\ref{lin:third_loop} to line~\ref{lin:end_third_loop}, which is a forloop with $|\mathcal{S}|$ iterations. In each iteration, the running time is $O(1)$. By Lemma~\ref{lem:upper_bound_recovered_point}, $|\mathcal{S}|=O(R)$. Thus the running time of this forloop is $O(\log(1/f_{\mathsf{KDE}}))$, where we use $R = O(\log(1/f_{\mathsf{KDE}}))$.  

{\bf Part 3.} The running time of line~\ref{lin:recover_point_J+1},~\ref{lin:add_point_to_S} and~\ref{lin:output_Z_a} is $O(1)$

The final running time of \textsc{Query} is
\begin{align*}
    &~K_1\cdot ({\bf Part 1}+{\bf Part 2}+{\bf Part 3})\\
    =&~K_1\cdot O(n^{o(1)}\log(1/f_{\mathsf{KDE}})\cdot\mathrm{cost}(K)\log^2 n
    \\
    & ~ +O(\log(1/f_{\mathsf{KDE}}))+O(1))\\
    =&~O(\epsilon^{-2}n^{o(1)}\log(1/f_{\mathsf{KDE}})\cdot f_{\mathsf{KDE}}^{-o(1)}\cdot\mathrm{cost}(K)\log^3 n)
\end{align*}
where the first step follows directly from the running time of three parts, and the last step follows from $K_1=O(\epsilon^{-2}\log n\cdot f_{\mathsf{KDE}}^{-o(1)})$

Thus, we complete the proof.



 
 
 
 
\end{proof}

\subsection{LSH data structure}\label{app:data_structure_lsh}


In this section, we present the LSH data structures with the following procedures:

{\bf Initialize} Given a data set $\{x_1, \cdots, x_n\} \in \R^d$ and integral parameters $k, L$, it first invokes private procedure \textsc{ChooseHashFunc}. The idea behind this is to amplify the "sensitivity" of hashing by concatenating $k$ basic hashing functions from the family $\mathcal{H}$(Algorithm~\ref{alg:LSH_public_app} line~\ref{lin:basic_hash_family}) into a new function. Thus we obtain a family of "augmented" hash function $\mathcal{H}_l, l \in [L]$ (Algorithm~\ref{alg:LSH_private_app} line~\ref{lin:LSH_sample_k_functions}). We follow by \textsc{ConstructHashTable} in which we hash each point $x_i$ using the hashing function $\mathcal{H}_{l}$. Then we obtain $L$ hash tables corresponding to $L$ hash functions which can be updated quickly.

{\bf Recover} Given a query $q \in \R^d$, it finds the bucket where $q$ is hashed by $\mathcal{H_l}$ and retrieve all the points in the bucket according to hashtable $\mathcal{T}_l$. This operation applies to all $L$ hashtables.

{\bf UpdateHashTable} Given a new data point $z \in \R^d$ and index $i \in [n]$, it repeats the following operations for all $l \in [L]$: find bucket $\mathcal{H}_l(z)$ and insert point $z$; find bucket $\mathcal{H}_l(x_i)$ and delete point $x_i$.


\begin{algorithm}[!ht]\caption{LSH, members and public procedures}\label{alg:LSH_public_app}
\begin{algorithmic}[1]
\State {\bf data structure} \textsc{LSH}
\State {\bf members}
    \State \hspace{4mm} $d,n \in \mathbb{N}_+$ \Comment{$d$ is dimension, $n$ is number of data points}
    \State \hspace{4mm} $K,L\in \mathbb{N}_+$ \Comment{$K$ is amplification factor, $L$ is number of repetition for hashing}
    \State \hspace{4mm} $p_{\mathrm{near}},p_{\mathrm{far}}\in (0,1)$ \Comment{Collision probability}
    \State \hspace{4mm} For $l \in L$, $\mathcal{T}_l:=[n]$ \Comment{Hashtable recording data points hashed by $\mathcal{H}_l$}
    \State \hspace{4mm} $\mathcal{R}:=[n]$ \Comment{retrieved points}
    \State \hspace{4mm} $\mathcal{H}:=\{f\in\mathcal{H}:\mathbb{R}^{d}\rightarrow[M]\}$ \Comment{$M$ is number of buckets for hashing family $\mathcal{H}$}
    \State \hspace{4mm} For $l \in [L]$, $\mathcal{H}_{l} \in \mathcal{H}^K$ \label{lin:basic_hash_family} \Comment{Family of amplified hash functions with at most $M^K$ non-empty buckets}
    \State \hspace{4mm} For $b \in [M^K]$, $\mathcal{S}_b:=$AVL tree \Comment{Use AVL tree to store points in bucket}
\State {\bf end members}


\State
\State {\bf public}
\Procedure{\textsc{Initialize}}{$\{x_i\}_{i\in[n]}\subset \mathbb{R}^d, k,L\in \mathbb{N}_+$}\label{lin:LSH_initialize}
 
    \State \textsc{ChooseHashFunc}($k,L$)\label{lin:LSH_intialize_choose_hash_func}
    \State \textsc{ConstructHashTable}($\{x_i\}_{i\in[n]}$)\label{lin:LSH_initialize_construct_hash_table}
\EndProcedure

\State
\Procedure{\textsc{Recover}}{$q\in\mathbb{R}^d$}\label{alg:LSH_recover}
    \State $\mathcal{R} \leftarrow 0$
    \For{$l\in[L]$}
        \State $\mathcal{R}\leftarrow \mathcal{R} \cup \mathcal{T}_{l}$.\textsc{Retrieve}($\mathcal{H}_{l}(q)$) \Comment{Find the bucket $\mathcal{H}_{l}(q)$ in $\mathcal{T}_l$ and retrieve all points}\label{lin:LSH_retrieve}
    \EndFor
\EndProcedure

\State
\Procedure{\textsc{UpdateHashTable}}{$z\in\mathbb{R}^d, i\in[n]$}\label{lin:update_hashtable}
    \For{$l\in [L]$}
        \State $\mathcal{H}_{l}(z)$.\textsc{Insert}($z$)\label{lin:insert} \Comment{$\mathcal{H}_{l}(z)$ denotes the bucket that $z$ is mapped to}
        \State $\mathcal{H}_{l}(x_i)$.\textsc{Delete}($x_i$)\label{lin:delete}
    \EndFor
\EndProcedure
\State {\bf end data structure}
\end{algorithmic}
\end{algorithm}

Next, we provide a private procedure of LSH in Algorithm~\ref{alg:LSH_private_app}.




