% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{xr}
\usepackage{multirow}
\usepackage{siunitx}
\usepackage{adjustbox}
\usepackage{multirow}
\usepackage{hyperref,xspace}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{color}
\usepackage{bbm}
\usepackage{mathtools}
\usepackage{framed}
\usepackage{graphicx}
\usepackage[boxed]{algorithm2e}
\usepackage{bbm}
\usepackage{balance}
\usepackage{verbatim}
\usepackage{paralist,enumitem}
\usepackage{thmtools, thm-restate}

\usepackage{float}
\newtheorem{thm}{Theorem}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{prop}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{note}[theorem]{Note}
\newtheorem{obs}[theorem]{Observation}
\newtheorem{rem}[theorem]{Remark}
\newtheorem{cor}[theorem]{Corollary}
\newtheorem{innercustomthm}{Theorem}
\newtheorem{innercustomlemma}{Lemma}
\newenvironment{customthm}[1]
  {\renewcommand\theinnercustomthm{#1}\innercustomthm}
  {\endinnercustomthm}
\newenvironment{customlemma}[1]
  {\renewcommand\theinnercustomlemma{#1}\innercustomlemma}
  {\endinnercustomlemma}


%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example
\newcommand{\MSE}{\mathrm{MSE}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\CBE}{\mathrm{CBE}}
\newcommand{\N}{\mathrm{N}}
\newcommand{\D}{\mathcal{D}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\sign}{\mathrm{sign}}
\newcommand{\R}{\mathbb{R}}
\renewcommand{\O}{\tilde{O}}
\newcommand{\RMSE}{\mathrm{RMSE}}
\newcommand{\MLE}{\mathrm{MLE}}
\newcommand{\X}{\mathbf{X}}
\newcommand{\V}{\mathbf{V}}
\newcommand{\m}{\mathbf{m}}
\newcommand{\fa}{\lambda}
\newcommand{\balpha}{\boldsymbol{\alpha}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\Sign}{h}
\newcommand{\Signn}{h'}
\newcommand{\SRP}{\mathrm{SRP}}
\newcommand{\CSSRP}{\mathrm{CSSRP}}
\newcommand{\CSSRPL}{\mathrm{CSSRP-L}}
\newcommand{\SB}{\mathrm{SuperBit}}
\newcommand{\CS}{\textsc{Count-Sketch Sign-Random-Projection}}
\newcommand{\CSL}{\textsc{Count Sketch Signed Random Projection-L}}
\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}

\newcommand{\thetaxe}{\theta_{\vec{x},\vec{e}}}
\newcommand{\thetaye}{\theta_{\vec{y},\vec{e}}}


\title{Improving Sign-Random-Projection via Count Sketch  \\(Supplementary Material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<s19021@students.iitmandi.ac.in>?Subject=Your UAI 2022 paper}{Punit Pankaj Dubey}{}}
\author[1]{Bhisham Dev Verma}
\author[1]{Rameshwar Pratap}
\author[2]{Keegan Kang}

% Add affiliations after the authors
\affil[1]{%
    % Computer Science Dept.\\
   Indian Institute of Technology Mandi, H.P., India
}
\affil[2]{%
    % Second Affiliation\\
  Bucknell University, Lewisburg, Pennsylvania, USA
}
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%   }

\begin{document}
\onecolumn
\maketitle


% \newpage
\section{Extended Experimental Results:}
This section presents the extended experimental results for comparison among baseline and proposed methods~($\CSSRP$ and $\CSSRPL$) using metrics: similarity search~(defined in section 5.3 in the paper) and variance analysis via box plot~(defined in section 5.4 in the paper). We summarized our findings for similarity search using recall in Figure~\ref{fig:Recall_appendix}, and for variance analysis via box plot in Figure~\ref{fig:boxplot_appendix}.

 \begin{figure*}[ht!]
   \centering
  \includegraphics[width=\linewidth]{Recall_supp.pdf}
  \caption{Comparison among the baselines on the task of top-$k$ similarity search. A higher value of recall indicates a  better performance.
 }
  \label{fig:Recall_appendix}
\end{figure*}

\begin{figure*}[ht!]
   \centering
  \includegraphics[width=\linewidth]{box_supp.pdf}
  \caption{Comparison among baselines on the task of variance analysis via box plot. The sampled  pairs are at angles $\ang{85}$ and $\ang{90}$,  respectively. The smaller interquartile range is an indicator of lower variance. The dotted line represents the actual angle in degree.}
  \label{fig:boxplot_appendix}
\end{figure*}



\section{Missing Proofs:}
In this section, we present the missing proofs from the main paper. For convenience, we also restate them here. \\


% \subsection*{Proof of Lemma $3$:}
%%%%% Commented proof of lemma 3

{\large \underline{\textbf{Proof of Lemma $3$:}}}\\
\begin{customlemma}{3}[Adapted from Lemma $4$ of~\cite{DBLP:conf/kdd/2006}]\label{lem:v_normal}
%\begin{lemma}\label{lem:v_normal}[Adapted from Lemma $4$ of~\cite{DBLP:conf/kdd/2006}]
Let $\vec{r} = (r_1, \ldots, r_{j}, \ldots, r_{D}) \in \mathbb R^D$ s.t.
\begin{align*}
r_j \sim \left\{ \begin{array}{r l}
1 & ~\text{with probability } \frac{1}{2K} \\
0 & ~\text{with probability } \frac{K-1}{K} \\
-1 & ~\text{with probability } \frac{1}{2K} \\
\end{array}\right.
\end{align*}
and $\vec{a}  \in \mathbb R^D$. Denote $\alpha = \sum_{j=1}^D r_ja_j = \langle \vec{r}, \vec{a} \rangle$. Then if $D\rightarrow \infty$ and $K=o(D)$, we have 
$
\alpha \xRightarrow[]{\mathcal{L}} \mathcal{N}\left(0,\frac{||\vec{a}||^2}{K} \right)
$
with the rate of convergence
\begin{align*}
|F_{\alpha}(y)-\Phi(y)|&\leq 0.8\sqrt{K}\frac{\sum_{i=1}^D|a_{i}|^3}{(\sum_{i=1}^D a_{i}^2)^{3/2}} = 0.8\sqrt{\frac{K}{D}}\frac{\mathbb E[|a_{i}|^3]}{(\mathbb E [a_{i}^2])^{3/2}}\rightarrow 0,
\end{align*}
where $\xRightarrow[]{\mathcal{L}}$ denotes ``convergence in distribution", $F_{\alpha}(y)$ is the empirical cumulative density function of $\alpha$, and $\Phi(y)$ is the CDF of  $\mathcal{N}\left(0,\frac{||\vec{a}||^2}{K} \right)$.
%\end{lemma}
 \end{customlemma}

\begin{proof}
We know that 
\begin{align*}
\alpha &= \sum_{j=1}^{D} r_{j}a_{j},
\end{align*}
where 
\begin{align*}
r_j \sim \left\{ \begin{array}{r l}
1 & ~\text{with probability } \frac{1}{2K}, \\
0 & ~\text{with probability } \frac{K-1}{K}, \\
-1 & ~\text{with probability } \frac{1}{2K}. \\
\end{array}\right.
\end{align*}
Let
\begin{align*}
    z_{j} &= r_{j}a_{j}.
\end{align*}
Then 
\begin{align*}
    \E[z_{j}] &= \E[r_{j}a_{j}] =a_{j} \E[r_{j}] = 0.\\
    \Var[z_{j}] &= \E[(z_{j} - \E[z_{j}])^2] = \E[z_{j}^2] = \E[r_{j}^2 a_{j}^2] = \frac{a_{j}^2}{K}.\\
    \E[|z_{j}|^{2 + \delta}] &=  |a_{j}|^{2 +\delta} \E[ |r_{j}|^{2 +\delta} ] =  |a_{j}|^{2 +\delta} \left( 1 \times \frac{1}{K} + 0 \times \frac{K-1}{K} \right) = \frac{|a_{j}|^{2 +\delta}}{K}. 
\end{align*}
Let 
\begin{align*}
    S_{D}^2 &= \sum_{j=1}^{D} \Var[z_{j}] = \frac{\sum_{j=1}^{D}a_{j}^2}{K}.
\end{align*}

To prove that $\frac{\sum_{j=1}^{D} z_{j}}{S_{D}} \xRightarrow[]{\mathcal{L}} \mathcal{N}(0,1)$, we need to show that  following Lindeberg condition is satisfied.
 \begin{align}
    & \frac{1}{S_{D}^2} \sum_{j=1}^{D} \E \left[z_{j}^2; |z_{j}| > \epsilon S_{D} \right] \to 0 ~~ \text{ for any } \epsilon > 0.\label{eq:14}
\end{align}
Now, we compute the LHS of the Equation~\eqref{eq:14}:
\begin{align*}
    \frac{1}{S_{D}^2} \sum_{j=1}^{D} \E \left[ z_{j}^2; |z_{j}| > \epsilon S_{D} \right] &\leq \frac{1}{S_{D}^2} \sum_{j=1}^{D} \E \left[ \frac{|z_{j}|^{2+\delta}}{(\epsilon S_{D})^{\delta}} \right].\\
        &= \frac{1}{\frac{\sum_{j=1}^{D}a_{j}^2}{K}} \cdot \frac{1}{\epsilon^{\delta}} \cdot  \frac{\sum_{j=1}^{D} \frac{|a_{j}|^{2 +\delta}}{K}}{\left( \frac{\sum_{j=1}^{D}a_{j}^2}{K} \right)^{\frac{\delta}{2}}}.\\
    &= \frac{1}{\epsilon^{\delta}} \cdot  \frac{\sum_{j=1}^{D} \frac{|a_{j}|^{2 +\delta}}{K}}{\left( \frac{\sum_{j=1}^{D}a_{j}^2}{K} \right)^{\frac{2 +\delta}{2}}}.\\
    & = K^{\frac{\delta}{2}} \cdot \frac{1}{\epsilon^{\delta}} \cdot \frac{\sum_{j=1}^{D} \frac{|a_{j}|^{2 +\delta}}{D}}{\left( \frac{\sum_{j=1}^{D}a_{j}^2}{D} \right)^{\frac{2 +\delta}{2}}} \cdot \frac{1 }{D^{\frac{\delta}{2}}}.\\
    & = \left(\frac{K}{D} \right)^{\frac{\delta}{2}} \cdot \frac{1}{\epsilon^{\delta}} \cdot \frac{\E\left[|a_{j}|^{2 +\delta} \right]}{\left( \E[a_{j}^2] \right)^{\frac{2 +\delta}{2}}}.\\
    &  \to 0. \numberthis \label{eq:o(D)}
\end{align*}
Equation \eqref{eq:o(D)} holds as $K = o(D)$. Therefore, for $K = o(D)$, due to Lindeberg Central Limit theorem~\citep{lindenberg_clt,feller-vol-2}, we have 
\begin{align*}
&\frac{\sum_{j=1}^{D} z_{j}}{S_{D}} = \frac{\sum_{j=1}^{D} r_{j} a_{j}}{\sqrt{\frac{\sum_{j=1}^{D} a_{j}^2}{K}}} = \frac{\alpha}{\sqrt{||\vec{a}||^2/K}} \xRightarrow[]{\mathcal{L}} \mathcal{N}(0,1). \\
& \alpha  \xRightarrow[]{\mathcal{L}} \mathcal{N} \left(0,\frac{||\vec{a}||^2}{K} \right). \numberthis \label{eq:o(D)_norm}
\end{align*}
We remain to find the rate of convergence. For this we use Berry Esseen theorem~\citep{Berry_Esseen,feller-vol-2}.
Let us denote
\begin{align*}
   \rho_{D} &=  \sum_{j=1}^{D}\E\left[ |z_{j}|^3\right].\\
     &= \sum_{j=1}^{D}|a_{j}|^3 \E\left[|r_{j}|^3\right].\\
    &= \frac{\sum_{j=1}^{D}|a_{j}|^3}{K}
\end{align*}
Then, due to  Berry Esseen theorem, we have
\begin{align*}
   |F_{\alpha}(y)  - \Phi(y)|   &\leq 0.8 \frac{\rho_{D}}{S_{D}^3}.\\
    & = 0.8 \frac{\frac{\sum_{j=1}^{D}|a_{j}|^3}{K}}{\left( \frac{\sum_{j=1}^{D} a_{j}^2}{K}\right)^{\frac{3}{2}}}.\\
    &= 0.8 \times \sqrt{\frac{K}{D}} \frac{\sum_{j=1}^{D}|a_{j}|^3/D}{\left(\sum_{j=1}^{D} a_{j}^2/D\right)^{\frac{3}{2}}}. \\
    &= 0.8 \times\sqrt{\frac{K}{D}} \frac{\E[|a_{j}|^3]}{\left(\E[ a_{j}^2] \right)^{\frac{3}{2}}}. \\
    & \rightarrow 0  \qquad\text{ as } \quad D \rightarrow \infty. \numberthis \label{eq:o(D)_conv}
\end{align*}
Equation~\eqref{eq:o(D)_conv}  holds for $K = o(D)$. Equation~\eqref{eq:o(D)_norm} and \eqref{eq:o(D)_conv} completes a proof of the Lemma 3.
 \end{proof}



%\subsection*{Proof of Theorem $6$:}

{\large \underline{\textbf{Proof of Theorem $6$:}}}\\
\begin{customthm}{6}\label{thm:prove_var_angle}
%\begin{theorem}\label{thm:prove_var_angle}
 Let $\vec{a}, \vec{b}\in \R^D$, and $\Sign(\vec{a})$, $\Sign(\vec{b})$ be their $K$-dimensional binary vector obtained via our proposal (Definition~2 define in the paper). If $K=o(D)$, then  as $D \rightarrow \infty$ we have the following
  \begin{align*}
      &\Var\left[\frac{\pi}{K}||  h(\vec{a})-  h(\vec{b})||_1\right] \notag \\
      &=\frac{\pi^2}{K^2}\left(\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi}+K(K-1)\frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\times \eta \right)-\theta_{(\vec{a}, \vec{b})}^2. 
 \end{align*}
 where, $k_1 \neq k_2$, $k_1, k_2 \in[K]$, and \\
 $\eta=\Pr\left[ \left(\Sign^{(k_2)}(\vec{a}) \neq \Sign^{(k_2)}(\vec{b}) \right) | \left(\Sign^{(k_1)}(\vec{a}) \neq \Sign^{(k_1)}(\vec{b}) \right) \right]$.
\end{customthm}
\begin{proof}
 We know that 
  \begin{align*}
     \Var\left[\frac{\pi}{K}||  h(\vec{a})-  h(\vec{b})||_1\right]&=\frac{\pi^2}{K^2}\Var\left[||  h(\vec{a})-  h(\vec{b})||_1\right].\\
     =& \frac{\pi^2}{K^2}\Var\left[\sum_{k=1}^{K} Y_{k}\right] \numberthis.\\
    &\text{where  }Y_{k} := \mathbbm{1}_{\{ h^{(k)}(\vec{a}) \neq  h^{(k)}(\vec{b})\}}.
 \end{align*}
 We focus on the term
 \begin{align*}
     \Var\left[\sum_{k=1}^{K} Y_{k}\right]&=\E\left[\left(\sum_{k=1}^{K} Y_{k}\right)^2\right]-\left(\E\left[\sum_{k=1}^{K} Y_{k}\right]\right)^2.\\
     &=\E\left[ \sum_{k=1}^{K} Y_{k}^2+\sum_{k_1\neq k_2, k_1,k_2 \in [K]} Y_{k_1}Y_{k_2}\right]-\left(\E\left[\sum_{k=1}^{K} Y_{k}\right]\right)^2.\\
     &=\sum_{k=1}^{K}\E\left[  Y_{k}\right]+\sum_{k_1\neq k_2, k_1,k_2 \in [K]}\E\left[ Y_{k_1}Y_{k_2}\right]-\left(\E\left[\sum_{k=1}^{K} Y_{k}\right]\right)^2. \numberthis
 \end{align*}
 We compute the value of each term one-by-one as follows:
 \begin{align*}
     \sum_{k=1}^{K}\E\left[  Y_{k}\right]&=\sum_{k=1}^{K} \Pr\left[ h^{(k)}(\vec{a}) \neq  h^{(k)}(\vec{b}) \right].\\
     &=\sum_{k=1}^{K} \frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\\
     &=\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi}. \numberthis \label{eq:eq5555212}
 \end{align*}
Now, we compute the following
 \begin{align*}
     &\sum_{k_1\neq k_2, k_1,k_2 \in [K]}\E\left[ Y_{k_1}Y_{k_2}\right]=\sum_{k_1\neq k_2} \Pr\left[ \left(h^{(k_2)}(\vec{a}) \neq h^{(k_2)}(\vec{b}) \right) \cap \left(h^{(k_1)}(\vec{a}) \neq h^{(k_1)}(\vec{b}) \right) \right].\\
     &=\sum_{k_1\neq k_2, k_1,k_2 \in [K]} \Pr\left[ \left(h^{(k_2)}(\vec{a}) \neq h^{(k_2)}(\vec{b}) \right) | \left(h^{(k_1)}(\vec{a}) \neq h^{(k_1)}(\vec{b}) \right) \right]  \times\Pr\left[  \left(h^{(k_1)}(\vec{a}) \neq h^{(k_1)}(\vec{b}) \right) \right].\\
     &=\sum_{k_1\neq k_2, k_1,k_2 \in [K]} \frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\cdot\Pr\left[ \left(h^{(k_2)}(\vec{a}) \neq h^{(k_2)}(\vec{b}) \right) | \left(h^{(k_1)}(\vec{a}) \neq h^{(k_1)}(\vec{b}) \right) \right]. \numberthis \label{eq:eqalpha1}
 \end{align*}
From linearity of expectation and Equation~\eqref{eq:eq5555212}, we have
 \begin{align*}
    \left(\E\left[\sum_{k=1}^{K} Y_{k}\right]\right)^2&=\left(\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi} \right)^2.
\end{align*}
{ We denote \\ $ \Pr\left[ \left(\Sign^{(k_2)}(\vec{a}) \neq \Sign^{(k_2)}(\vec{b}) \right) | \left(\Sign^{(k_1)}(\vec{a}) \neq \Sign^{(k_1)}(\vec{b}) \right) \right]:= \eta$ }in Equation~\eqref{eq:eqalpha1}, and $\eta \in \left[0, \frac{\theta}{\pi}\right].$
 Therefore
 \begin{align*}
   \Var\left[\sum_{k=1}^{K} Y_k \right]&=\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi}+K(K-1)\frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\times \eta-\left(\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi} \right)^2
\end{align*}
Hence, the variance of our estimate is 
\begin{align*}
    \Var\left[\frac{\pi}{K}|| h(\vec{a})- h(\vec{b})||_1\right]&=\frac{\pi^2}{K^2}\left(\frac{K\theta_{(\vec{a},\vec{b})}}{\pi}+K(K-1)\frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\times \eta - \left(\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi} \right)^2\right).\\
    &=\frac{\pi^2}{K^2}\left(\frac{K\theta_{(\vec{a}, \vec{b})}}{\pi}+K(K-1)\frac{\theta_{(\vec{a}, \vec{b})}}{\pi}\times \eta \right)-\theta_{(\vec{a}, \vec{b})}^2.\numberthis\label{eq:var_our_estimator}
\end{align*}
% The variance of $\SRP$ estimator is given as 
% \begin{align}
% \Var[X]&=\frac{\pi^2}{K^2}\times\left({ \frac{K\theta_{(\vec{a},\vec{b})}}{\pi}}+K(K-1)\times\left(\frac{\theta_{(\vec{a},\vec{b})}}{\pi}\right)^2\right)-{\theta_{(\vec{a},\vec{b})}}^2.
% \end{align}
Equation~\eqref{eq:var_our_estimator} completes a proof of the theorem.
 \end{proof}

{\large \underline{\textbf{Proof of Theorem $9$:}}}\\

In order to prove Theorem~\ref{thm:cssrpl_esitmation}, we require the following lemma which is similar to Lemma~\ref{lem:v_normal}. We first complete its proof and then conclude with the proof of Theorem~\ref{thm:cssrpl_esitmation}.


\begin{customlemma}{12}\label{lem:v_normal_l}[Adapted from Lemma $4$ of~\cite{DBLP:conf/kdd/2006}]
Let $\vec{r}' = (r_1', \ldots, r_{j}', \ldots, r_{D}') \in \mathbb R^D$ s.t.
\begin{align*}
r_j' \sim \left\{ \begin{array}{r l}
1 & ~\text{with probability } \frac{l}{2K} \\
0 & ~\text{with probability } \frac{K-l}{K} \\
-1 & ~\text{with probability } \frac{l}{2K} \\
\end{array}\right.
\end{align*}
and $\vec{a}  \in \mathbb R^D$. Denote $\alpha' = \sum_{j=1}^D r_j'a_j = \langle \vec{r}', \vec{a} \rangle$. Then if $D\rightarrow \infty$ and $K=o(lD)$, we have 
$
\alpha' \xRightarrow[]{\mathcal{L}} \mathcal{N}\left(0,\frac{||\vec{a}||^2}{K} \right)
$
with the rate of convergence
{
\begin{align*}
|F_{\alpha'}(y)-\Phi(y)|\leq 0.8\sqrt{\frac{K}{l}}\frac{\sum_{i=1}^D|a_{i}|^3}{(\sum_{i=1}^D a_{i}^2)^{3/2}}= 0.8\sqrt{\frac{K}{lD}}\frac{\mathbb E[|a_{i}|]^3}{(\mathbb E [a_{i}^2])^{3/2}}\rightarrow 0,
\end{align*}
}
where $\xRightarrow[]{\mathcal{L}}$ denotes ``convergence in distribution", $F_{\alpha'}(y)$ is the empirical cumulative density function of $\alpha'$, and $\Phi(y)$ is the CDF of  $\mathcal{N}\left(0,\frac{||\vec{a}||^2}{K} \right)$.
\end{customlemma}
\begin{proof}
We know that 
\begin{align*}
\alpha' &= \sum_{j=1}^{D} r_{j}'a_{j},
\end{align*}
where 
\begin{align*}
r_j' \sim \left\{ \begin{array}{r l}
1 & ~\text{with probability } \frac{l}{2K}, \\
0 & ~\text{with probability } \frac{K-l}{K}, \\
-1 & ~\text{with probability } \frac{l}{2K}. \\
\end{array}\right.
\end{align*}
Let
\begin{align*}
    z_{j}' &= r_{j}'a_{j}.
\end{align*}
Then 
\begin{align*}
    \E[z_{j}'] &= \E[r_{j}'a_{j}] =a_{j} \E[r_{j}'] = 0.\\
    \Var[z_{j}'] &= \E[(z_{j}' - \E[z_{j}'])^2] = \E[z_{j}'^2] = \E[r_{j}'^2 a_{j}^2] =\frac{l}{K} a_{j}^2. \\
    \E[|z_{j}'|^{2 + \delta}] &=  |a_{j}|^{2 +\delta} \E[ |r_{j}'|^{2 +\delta} ] =  |a_{j}|^{2 +\delta} \left( 1 \times \frac{l}{K} + 0 \times \frac{K-l}{K} \right) =  \frac{l}{K}|a_{j}|^{2 +\delta}. 
\end{align*}
Let 
\begin{align*}
    S_{D}'^2 &= \sum_{j=1}^{D} \Var[z_{j}] = \frac{l}{K} \sum_{j=1}^{D}a_{j}^2.
\end{align*}

To prove that $\frac{\sum_{j=1}^{D} z_{j}'}{S_{D}'} \xRightarrow[]{\mathcal{L}} \mathcal{N}(0,1)$, we need to show that  following Lindeberg condition~\citep{lindenberg_clt,feller-vol-2}  is satisfied.
 \begin{align}
     &\frac{1}{S_{D}'^2} \sum_{j=1}^{D} \E \left[z_{j}'^2; |z_{j}'| > \epsilon S_{D}' \right] \to 0 ~~ \text{ for any } \epsilon > 0. \numberthis\label{eq:46}
\end{align}
Now, we compute the LHS of the Equation~\eqref{eq:46}:
\begin{align*}
     \frac{1}{S_{D}'^2} \sum_{j=1}^{D} \E \left[ z_{j}'^2; |z_{j}'| > \epsilon S_{D}' \right] & \leq \frac{1}{S_{D}'^2} \sum_{j=1}^{D} \E \left[ \frac{|z_{j}'|^{2+\delta}}{(\epsilon S_{D}')^{\delta}} \right]\\
    &= \frac{1}{\frac{l}{K}\sum_{j=1}^{D}a_{j}^2} \cdot \frac{1}{\epsilon^{\delta}} \cdot  \frac{ \frac{l}{K} \sum_{j=1}^{D}|a_{j}|^{2 +\delta}}{\left( \frac{l}{K}\sum_{j=1}^{D}a_{j}^2 \right)^{\frac{\delta}{2}}}.\\
    &= \frac{1}{\epsilon^{\delta}} \cdot  \frac{\frac{l}{K} \sum_{j=1}^{D} |a_{j}|^{2 +\delta}}{\left( \frac{l}{K}  \sum_{j=1}^{D}a_{j}^2 \right)^{\frac{2 +\delta}{2}}}.\\
    & = \left(\frac{K}{l} \right)^{\frac{\delta}{2}} \cdot \frac{1}{\epsilon^{\delta}} \cdot \frac{\sum_{j=1}^{D} \frac{|a_{j}|^{2 +\delta}}{D}}{\left( \frac{\sum_{j=1}^{D}a_{j}^2}{D} \right)^{\frac{2 +\delta}{2}}} \cdot \frac{1 }{D^{\frac{\delta}{2}}}.\\
    & = \left(\frac{K}{lD} \right)^{\frac{\delta}{2}} \cdot \frac{1}{\epsilon^{\delta}} \cdot \frac{\E\left[|a_{j}|^{2 +\delta} \right]}{\left( \E[a_{j}^2] \right)^{\frac{2 +\delta}{2}}}.\\
    &  \to 0. \numberthis \label{eq:o(D)_l}
\end{align*}
Equation \eqref{eq:o(D)_l} holds when $K = o(lD)$. Therefore, for $K = o(lD)$, due to Lindeberg Central Limit theorem~\citep{lindenberg_clt,feller-vol-2}, we have 
\begin{align*}
&\frac{\sum_{j=1}^{D} z_{j}'}{S_{D}'} = \frac{\sum_{j=1}^{D} r_{j}' a_{j}}{\sqrt{\frac{\sum_{j=1}^{D} a_{j}^2}{K}}} = \frac{\alpha'}{\sqrt{||\vec{a}||^2/K}} \xRightarrow[]{\mathcal{L}} \mathcal{N}(0,1). \\
& \alpha'  \xRightarrow[]{\mathcal{L}} \mathcal{N} \left(0,\frac{||\vec{a}||^2}{K} \right). \numberthis \label{eq:o(D)_l_nor}
\end{align*}
We remain to find the rate of convergence. For this we use Berry Esseen theorem~\citep{Berry_Esseen,feller-vol-2}.
Let us denote
\begin{align*}
   \rho_{D}' &=  \sum_{j=1}^{D}\E\left[ |z_{j}'|^3\right].\\
     &= \sum_{j=1}^{D}|a_{j}|^3 \E\left[|r_{j}'|^3\right].\\
    &= \frac{l}{K}\sum_{j=1}^{D}|a_{j}|^3.
\end{align*}
From Berry Esseen theorem~\citep{Berry_Esseen,feller-vol-2}, we have
\begin{align*}
    &|F_{\alpha'}(y)  - \Phi(y)|  \leq 0.8 \frac{\rho_{D}'}{S_{D}'^3}.\\
    & = 0.8 \frac{\frac{l}{K}\sum_{j=1}^{D}|a_{j}|^3}{\left( \frac{l}{K} \sum_{j=1}^{D} a_{j}^2 \right)^{\frac{3}{2}}}.\\
    &= 0.8 \times \sqrt{\frac{K}{lD}} \frac{\sum_{j=1}^{D}|a_{j}|^3/D}{\left(\sum_{j=1}^{D} a_{j}^2/D\right)^{\frac{3}{2}}}. \\
    &= 0.8 \times\sqrt{\frac{K}{lD}} \frac{\E[|a_{j}|^3]}{\left(\E[ a_{j}^2] \right)^{\frac{3}{2}}}. \\
    & \rightarrow 0 \qquad \text{as} \quad D \rightarrow \infty. \numberthis \label{eq:o(D)_l_conv}
\end{align*}
Equation~\eqref{eq:o(D)_l_conv} holds for $K = o(lD)$. Equation~\eqref{eq:o(D)_l_nor}  and \eqref{eq:o(D)_l_conv} completes a proof of the Lemma 12.
 \end{proof}


We now complete a proof of Theorem~\ref{thm:cssrpl_esitmation}.
\begin{customthm}{9}\label{thm:cssrpl_esitmation}
 Let $\vec{a}, \vec{b}\in \R^D$, and $\Signn(\vec{a})$, $\Signn(\vec{b})$ be their $K$-dimensional binary vector obtained via our improved estimator proposal (stated in Definition~8 in the paper). If $K=o(lD)$, then as $D \rightarrow \infty$ we have the following
  \begin{align*}
     \E\left[\frac{\pi}{K}|| \Signn(\vec{a})- \Signn(\vec{b})||_1\right]=\theta_{(\vec{a}, \vec{b})}.
 \end{align*}
 \end{customthm}


\begin{proof}
Let $R'$ be a $K \times D$ projection matrix~(defined in Definition~8 in the paper) such that each column of $R'$ has exactly $l$ non-zero entries. These $l$ positions are sampled uniformly at random and each of them takes value $\{\pm1\}$ with probability $1/2$
 \begin{align}
     R' &= \begin{bmatrix}
     \vec{r}'_{1} \\
     \vdots\\
     \vec{r}'_{k}\\
     \vdots\\
     \vec{r}'_{K}
     \end{bmatrix}_{K\times D}.\label{eq:rrr}
 \end{align}


We first consider each row $\vec{r}'_k, 1 \leq k \leq K$ of the random matrix in  Equation~\eqref{eq:rrr}. The goal is to find the distribution of each $\vec{r}'_k$, and hence compute
\begin{align*}
\mathbb E\left[ \sum_{k=1}^K | \Signn^{(k)}(\vec{a}) - \Signn^{(k)}(\vec{b}) | \right] &  = \sum_{k=1}^K \mathbb E\left[ | \Signn^{(k)}(\vec{a}) - \Signn^{(k)}(\vec{b}) |  \right].
\end{align*}
Suppose we denote $Z'_k : = | \Signn^{(k)}(\vec{a}) - \Signn^{(k)}(\vec{b})|$. While each $Z'_k$ are not independent due to our construction of $R'$, let us briefly consider how each $\vec{r}'_k$ is distributed.

When $k = 1$, we have that each entry in $\vec{r}'_1$ comes from a sparse Bernoulli distribution with 
\begin{align} \label{eq:SBD_l}
r'_{1j} \sim \left\{ \begin{array}{r l}
1 & \text{with probability } \frac{l}{2K} \\
0 & \text{with probability } \frac{K-l}{K} \\
-1 & \text{with probability } \frac{l}{2K}.
\end{array}\right.
\end{align} 
where $\mathbb E[r'_{1j}] = 0$, with $\text{Var}[r'_{1j}] = \frac{l}{K}$. Here, we note that each entry in $\vec{r}'_1$ is i.i.d.

We can also compute the moment generating function of each $r'_{1j}$ and get
\begin{align} \label{eq:SBD_l_mgf}
\mathbb E\left[e^{sr'_{1j}}\right] = \frac{K-l}{K} + \frac{l\cdot(\exp\{s\} + \exp\{-s\})}{2K}.
\end{align}

Now let us consider the case $k =2$, and compute the moment generating function for each $r'_{2j}$. By using the Law of Total Expectation, we have that
\begin{align}
&\mathbb E\left[e^{sr'_{2j}}\right]  = \mathbb E\left[e^{sr'_{2j}}~|~r'_{1j} = 0\right] \mathbb P\left[r'_{1j} = 0\right]  + \mathbb E\left[e^{sr'_{2j}}~|~r'_{1j} = 1\right] \mathbb P\left[r'_{1j} = 1\right]  + \mathbb E\left[e^{sr'_{2j}}~|~r'_{1j} = -1\right] \mathbb P\left[r'_{1j} = -1\right]. \\
& = \left( \frac{l(\exp\{s\} + \exp\{-s\}) }{2(K-1)} + \frac{K-l-1}{K-1}\right) \frac{K-l}{K}   +\left( \frac{(l-1)(\exp\{s\} + \exp\{-s\}) }{2(K-1)} + \frac{K-l}{K-1}\right)\frac{l}{2K }  \notag \\
& \qquad+ \left( \frac{(l-1)(\exp\{s\} + \exp\{-s\}) }{2(K-1)} + \frac{K-l}{K-1}\right)\frac{l}{2K}. \\
\ & = l(K-1)\times\frac{(\exp\{s\} + \exp\{-s\} )}{2K(K-1)} + \frac{(K-l)(K-1)}{(K-1)K}.\\
\ & = \frac{l \cdot(\exp\{s\} + \exp\{-s\} )}{2K} + \frac{(K-l)}{K}.
\end{align}

which is the same moment generating function as the Sparse Bernoulli distribution mentioned in Equation~\eqref{eq:SBD_l}. 

Now consider $\vec{r}_k, 2 < k \leq K-l$, and suppose we have seen $l'$ non-zero entries so far. Denote $\lambda = \exp\{s\} + \exp\{-s\}$. 

For ease of notation, if we have seen $l'$ non-zero entries so far, then we have $\max(l-l',0)$ non-zero entries to choose, out of the remaining $K-k+1$ terms, and the probability of drawing a non-zero for our $k^\text{th}$ entry has to be given by $\frac{1}{2}p^{(l-l')}_{(K-k + 1)} = \frac{\max(l-l',0)}{(K-k + 1)}$.

Then we can write
\begin{align*}
\mathbb E\left[e^{sr'_{kj}}\right] 
& = \sum_{i=0}^{l'} \Big(\mathbb E\left[e^{sr'_{kj}}~|~ \text{$i$ non-zeroes for $r'_{k'j}, k' < k$} \right]  \times \mathbb P\left[  \text{$i$ non-zeroes for $r'_{k'j}, k' < k$}  \right]  \Big). \\
\ & = \sum_{i=0}^{l'} \bigg[ \left(~\frac{1}{2}\lambda p^{(l-i)}_{(K-k + 1)} + \left(1-p^{(l-i)}_{(K-k + 1)} \right)  ~\right) \times \mathbb P\left[  \text{$i$ non-zeroes for $r'_{k'j}, k' < k$}  \right] \bigg].
\end{align*}
Consider each term in the above summation. For the right term, we have
\begin{align*}
 &\sum_{i=1}^{l'} (1-p^{(l-i)}_{(K-k + 1)})\mathbb P\left[  \text{$i$ non-zeroes for $r'_{k'j}, k' < k$}  \right] \\
\ & = \sum_{i=1}^{l'} p^{(K-l - (k - 1 - i))}_{(K-k + 1)}\mathbb P\left[  \text{$k-1-i$ zeroes for $r'_{k'j}, k' < k$}  \right] \\
\ & = p^{K-l}_{K} \\
\ & = \frac{K-l}{K}. \numberthis
\end{align*}
The left term is more straightforward, and we have
\begin{align*}
\frac{1}{2}  \sum_{i=0}^{l'} p^{(l-i)}_{(K-k + 1)} \mathbb P\left[  \text{$i$ non-zeroes for $r_{k'j}, k' < k$}  \right]  = \frac{1}{2} \frac{l}{K} \lambda.
\end{align*}
Adding both terms together, we get the MGF of the Sparse Bernoulli distribution mentioned in Equation~\eqref{eq:SBD_l}. Using Lemma~\ref{lem:v_normal_l}, we can show that ${\alpha'}_{k} = \langle \vec{r'}_k, \vec{a} \rangle$  and ${\beta'}_{k} = \langle \vec{r'}_{k}, \vec{b} \rangle$ converge in distribution to $\mathcal{N}\left(0,\frac{||\vec{a}||^2}{K} \right)$ as $D$ grows large. This fact and  
 Lemma~4 (defined in paper),  concludes a proof of the theorem. 
\end{proof}


\section{Other details:}
 \subsection*{Guarantee of \textsc{Count-Sketch} algorithm:}
 The following theorem states that for a pair of real-valued vectors their sketches obtained via \textsc{Count-Sketch} closely approximate the original pairwise inner product.
\begin{customthm}{13}[\cite{WeinbergerDLSA09,count_sketch}]\label{thm:FH_results}
Given vectors $\vec{a}=(a_1,\ldots a_D)$,  $\vec{b}=(b_1,\ldots b_D)$ get 
 compressed into vectors 
  $\vec{\alpha}=(\alpha_1, .. \alpha_k, .. \alpha_{K})$  and $\vec{\beta}=(\beta_1, .. \beta_k, .. \beta_{K})$, respectively,   using the \textsc{Count-Sketch} algorithm, where $ k\in[K]$. Then, we have the following
  \begin{align*}
      \E[\vec{\alpha}]&=\E[\vec{\beta}]=\vec{0}. \numberthis\label{eq:exp_alpha} \\
      \E[\langle \vec{\alpha}, \vec{\beta} \rangle]&=\langle\vec{a}, \vec{b}\rangle. \numberthis\label{eq:exp_alpha_beta}\\
      \Var[\langle\vec{\alpha}, \vec{\beta} \rangle] &=\frac{1}{K}\sum_{i\neq j, i,j=1}^D  \left({a_i^2b_j^2}+{a_ib_ia_jb_j}\right).
      \numberthis\label{eq:var_alpha_beta}
\end{align*}
\end{customthm}
\bibliography{dubey_233-supp}
\end{document}
