\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
% version; also before submission to
% see how the non-anonymous paper
% would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{amssymb}
\usepackage{bm}
\usetikzlibrary{quantikz}
\usepackage[ruled,linesnumbered]{algorithm2e}
\usepackage{algorithmic}
\usepackage[switch]{lineno}
\usepackage{accents}
\usepackage{amsthm}
%\usepackage{tikz}
%\usetikzlibrary{calc,positioning}

%% Self-defined macros
%\newcommand{\swap}[3][-]{#3#1#2} % just an example
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{techlemma}{Technical Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{proposition}{Proposition}
\newtheorem{example}{Example}

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
%\usepackage{xr} 
%\externaldocument{uai2023-template}

\newcommand{\zero}{|0\rangle}
\newcommand{\one}{|1\rangle}
\newcommand{\x}{|x\rangle}
\newcommand{\hn}{\accentset{\circ}{n}}
\newcommand{\hist}{\text{\rm \textbf{hist}}}
\newcommand{\hhist}{\widehat{\hist}}
\newcommand{\thist}{\overline{\hist}}
\newcommand{\calV}{\mathcal{V}}
\newcommand{\calG}{\mathcal{G}}
\newcommand{\calH}{\mathcal{H}}
\newcommand{\MOV}{\text{\rm MoV}}
\newcommand{\NMOV}{\text{\rm NMoV}_{\text{Q}}}
\newcommand{\NMOVC}{\text{\rm NMoV}_{\text{C}}}
\newcommand{\llceil}{\left\lceil}
\newcommand{\rrceil}{\right\rceil}
\newcommand{\lnorm}{\left|\left|}
\newcommand{\rnorm}{\right|\right|}
\newcommand{\qps}{\rm QPS}
\newcommand{\tone}{s}
\newcommand{\score}{\text{Score}}
\newcommand{\vv}{\boldsymbol{V}}
\newcommand{\SM}{\rm SM}
\newcommand{\NSM}{\rm NSM}
\newcommand{\prc}{\Pr\left[\text{\rm correct}\right]}
\newcommand{\nwi}{n_{\text{\rm win}}}
\newcommand{\nlo}{n_{\text{\rm lose}}}
\newcommand{\cp}{{\text{\rm Cap}}}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
%\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Accelerating Voting by Quantum Computation\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<aoliu.cs@gmail.com>?Subject=Your UAI 2023 paper}{Ao Liu}{}}
\author[1]{Qishen Han}
\author[1]{Lirong Xia}
\author[2]{Nengkun Yu}
% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science\\
    Rensselaer Polytechnic Institute\\
    Troy, NY, USA
}
\affil[2]{%
    Department of Computer Science\\
    Stony Brook University\\
    Stony Brook, NY, USA
}
  
\begin{document}
\maketitle
\begin{center}
    \textbf{\large The Appendix of UAI-23 Accepted Paper}\\ 
    \textbf{\large Accelerating Voting by Quantum Computation}
\end{center}
\appendix
\section{Implementation of Quantum Counting Algorithm.}
\label{apx:quantum}
In this section, we aim to introduce the implementation of the quantum part of Algorithm~1 from a more technical perspective. We will first introduce the basics of quantum computing. Then we will specify the implementation of circuits of quantum counting in Algorithm~1, and why they accelerate the voting process. 

\subsection{Quantum Basics.}
\label{apx:basic}
\noindent\textbf{Basic quantum computation.}
Quantum bit (or \emph{qubit} in short) is the counterpart of classical \emph{bit}, which takes a deterministic binary from $\{0,1\}$. Qubit, on the other hand, is represented by a linear combination of $\{\zero,\one\}$, which are counterparts to $\{0,1\}$, respectively. That is, every qubit  $|\psi\rangle$ is written as 
\begin{equation}\nonumber
|\psi\rangle = \alpha\zero + \beta\one,    
\end{equation}
where $\alpha$ and $\beta$ are complex numbers and are usually called amplitudes. If we measure the qubit, there is $|\alpha|^2$ probability to get $0$ and $|\beta|^2$ probability to get $1$. Naturally, we always have $|\alpha|^2+|\beta|^2 = 1$ because the probabilities should sum to $1$. Qubits sometimes are written as vectors to simplify notations. Formally, 
\begin{equation}\nonumber
\begin{bmatrix}
\alpha\\
\beta
\end{bmatrix} \triangleq \alpha\zero + \beta\one.   
\end{equation}
$t > 1$ qubits are presented as a $2^t$-dimensional vector, where the $j$-th component of the vector (denoted as $\alpha_j$) represents the amplitude of $|j_1\cdots j_t\rangle$ (or $|j\rangle$), where $j_1\cdots j_t$ is the binary representation of $j$. Similar to the 1-qubit case, the probability of  observing $j_1,\cdots,j_t$ from those $t$ qubit equals to $|\alpha_j|^2$. %The phase of $\alpha_t$ (in Hilbert space) is called the phase of state $j_1\cdots j_t$. 

A quantum operation (quantum gate) $Q$ on $t$ qubits is denoted by a $2^t\times 2^t$ unitary matrix, which means the matrix's inverse is its Hermitian conjugate. Applying a quantum operation $Q$ on quantum state $|\psi\rangle$ is denoted by
\begin{equation}\nonumber
Q|\psi\rangle \triangleq \boldsymbol{Q}_{(2^t\times 2^t)} \; \vec{\psi}_{(2^t)},
\end{equation}
where the the quantum operator $\boldsymbol{Q}_{(2^t\times 2^t)}$ is a $2^t\times 2^t$ unitary matrix and the quantum state $\vec{\psi}_{(2^t)}$ is a $2^t$ dimensional column vector.\\

\noindent\textbf{Quantum circuit of some useful quantum operators.\footnote{All quantum circuits of this paper are drawn using the Quantikz package~\citep{kay2018tutorial} for \LaTeX.}  } Quantum circuits run from the left-hand side to the right-hand side. For example, the following circuit means applying Hadamard gate $H$ on a quantum state $|\psi\rangle$.
\begin{equation}\nonumber
\begin{quantikz} 
& \ket{\psi}\; & \gate{H} & \qw 
\end{quantikz} \qquad \text{ where } \boldsymbol{H} = \frac{1}{\sqrt 2} \begin{bmatrix}
1 & 1\\
1 & -1
\end{bmatrix}.
\end{equation}

The quantum circuit notion
\begin{equation}\nonumber
\begin{quantikz} 
\ket{\psi}\; \qw & \meter{0/1} &  \measuretab{b}
\end{quantikz}
\end{equation}
denotes measuring quantum state $|\psi\rangle$ with $0/1$ base ($b$ denotes the result of measurement). Naturally, the complexity of quantum measurement and Hadamard gate are both $\Theta(1)$.

Quantum oracle~\citep{berthiaume1994oracle,van1998quantum,kashefi2002comparison} is a widely-used operator to encode binary functions or binary information. Given $t$ qubits and a binary function $f:\{0,\cdots,2^{t}-1\}\mapsto \{0,1\}$, quantum oracle (based on function $f(\cdot)$) applies a phase shift of $-1 = e^{\pi i}$ if $f(x) = 1$ and does nothing otherwise. We can query oracle many times and regard the number of queries as the cost \citep{10.1145/237814.237866}. Formally,
\begin{equation}\nonumber
\left\{
\begin{array}{ll}
O_f\x = \x  & \text{if } f(x) = 1 \\
O_f\x = -\x & \text{otherwise}
\end{array}
\right..
\end{equation}

Suppose we have a quantum gate $G$ on $t$ qubits. The following operation is called \emph{controlled-$G$}.
\begin{equation}\nonumber
\begin{quantikz} 
\qw & \ctrl{1} & \qw \\
\qw & \gate{G} & \qw 
\end{quantikz} = \begin{bmatrix}
\boldsymbol{I}_{(2^t\times 2^t)} & \boldsymbol{0}_{(2^t\times 2^t)}\\
\boldsymbol{0}_{(2^t\times 2^t)} & \boldsymbol{G}_{(2^t\times 2^t)}
\end{bmatrix},
\end{equation}
where $\boldsymbol{I}$ denotes the identity matrix, and $\boldsymbol{0}$ denotes the zeros matrix. %The controlled-$G$ gate can be implemented within the same complexity as the $G$ gate, which is $\Theta(1)$. We assume that the controlled-$G$ gate, once constructed, can be used for arbitrary times. 
To simplify notations, we also write
\begin{equation}\nonumber
\begin{quantikz} 
& \ctrl{1} & \qw \\
& \gate{G^a} & \qw 
\end{quantikz} = \begin{quantikz} 
& \ctrl{1} & \qw \ \ldots \ \qw & \ctrl{1} & \qw\\
& \gate{G} & \qw \ \ldots \ \qw & \gate{G} & \qw
\end{quantikz}\;{\text{(repeat } a \text{ times)} }.
\end{equation}

\subsection{Implementation of Quantum Counting Circuit.}
\label{apx:circuit}
Figure~\ref{fig:qc2} shows the quantum counting circuit, which is a combination of Grover search algorithm~\citep{10.1145/237814.237866} and quantum reverse
Fourier transformation (the $QFT^{\dagger}$ operator) Followings we focus on introducing Grover algorithm and why it accelerates the computation. 

\begin{figure*}[htp]
\begin{quantikz}
\lstick[wires=4]{Register 1\\$\tone$ qubits} & \ket{0}\;\, &  \gate{H} & \qw & \ctrl{4}& \qw & \qw \  \ldots \ & \qw & \gate[4]{QFT^{\dagger}} & \meter{0/1} & \measuretab{b_{1}}\\
& \ket{0}\;\, & \gate{H} & \qw & \qw  & \ctrl{3} & \qw  \ \ldots \ & \qw & \qw & \meter{0/1} &  \measuretab{b_{2}}\\
& \vdots & \vdots & & &   & \vdots &  & \qwbundle[alternate]{} & \qwbundle[alternate]{} & \vdots \\
& \ket{0}\;\, & \gate{H} &  \qw & \qw & \qw  & \qw  \ \ldots \ & \ctrl{1} & \qw &  \meter{0/1} &  \measuretab{b_{\tone}}\\
\lstick[wires=3]{Register 2\\$t$ qubits} & \ket{0}\;\, & \gate{H} & \qw & \gate[3,bundle={2}]{G^{2^{0}}}  & \gate[3,bundle={2}]{G^{2^{1}}}  & \qw \ \ldots \ & \gate[3,bundle={2}]{G^{2^{\tone-1}}}  & \qw &  \rstick[wires=3]{trash}\\
& \vdots & \vdots & & &   & \qwbundle[alternate]{} \,\ldots\, & & \qwbundle[alternate]{} \\
& \ket{0}\;\, & \gate{H} & \qw &   &  & \qw \ \ldots \ &   & \qw &  \\
\end{quantikz}
\caption{The circuit for quantum counting algorithm.}\label{fig:qc2}
\end{figure*}

\begin{figure}[htp]
\begin{quantikz}
\lstick[wires=3]{$t$ qubits}   & \gate[3,bundle={2}]{O_{f_j}} & \gate{H} & \gate[3,bundle={2}]{\qps} & \gate{H}& \qw\\ %2\ket{0^{t}}\bra{0^{t}}-I_{t}
 &  & \qwbundle[alternate]{} \;\; \vdots\;\;  & & \qwbundle[alternate]{} \;\;\vdots \;\; & \qwbundle[alternate]{} \\
  & & \gate{H} & & \gate{H} & \qw\\
\end{quantikz}
\caption{The circuit for Grover operator.}\label{fig:qc1}
\end{figure}

\noindent\textbf{Grover operator.} Grover algorithm is an efficient search algorithm. Given a binary function $f: \{0,1,\cdots, 2^t - 1\} \to \{0, 1\}$, Grover algorithms returns an $x$ with $f(x) = 1$ with high probability. The Grover operation in Algorithm~1 is constructed by the quantum circuit in Figure~\ref{fig:qc1}, where $t = \lceil\log n\rceil$ denotes the minimum number of quantum bits to encode $n$.
The quantum operator $\qps$ is called quantum phase shifting, which provides a phase shift of $-1$ on every state except $|0\rangle$. Mathematically,
\begin{equation}\nonumber
\begin{split}
& \zero \stackrel{\qps}{\longrightarrow} \zero\qquad\text{ and }\\
& \x \stackrel{\qps}{\longrightarrow} -\x\; \text{ for any } x \in{1,\cdots,2^{t}-1}.
\end{split}
\end{equation}
Here, $\x$ represents the $x$-th base state of the $t$ qubits. The high-level idea of Grover operator's functionality is shown in Figure~\ref{fig:grover}, where $|\psi\rangle$ is the input of Grover operators in quantum counting, and $\{|\alpha\rangle, |\beta\rangle\}$ is a pair of orthogonal bases. The formal definition of $|\psi\rangle$, $|\alpha\rangle$, and $|\beta\rangle$ can be found in Appendix~\ref{app:add}. Under the $|\alpha\rangle$ $|\beta\rangle$ base, the quantum oracle $O_{f_j}$ reflects $|\psi\rangle$ over $|\alpha\rangle$, while the rest parts of $G$ reflects $O_{f_j}|\psi\rangle$ over $|\psi\rangle$. The angle between the output state $G|\psi\rangle$ and initial state $|\psi\rangle$
\begin{equation}\nonumber
\theta = 2\arcsin\left(\sqrt{\hist_j\cdot 2^{-t}}\right),
\end{equation}
which includes the information about $\hist_j$. Since function $\arcsin(\sqrt{x})$ grows quadratically faster than linear functions when $x$ is small, we expect that an estimation about $\arcsin(\sqrt{x})$ could be quadratically more accurate than directly estimate $x$. \\ 


\begin{figure}[htp]
    \centering
    \includegraphics[width = 0.378\textwidth]{fig_grover.pdf}
    \caption{An illustration of Grover operator's functionality (Figure 6.3 in~\citet{nielsen2002quantum}).}
    \label{fig:grover}
\end{figure}

\subsection{Functionality for Grover algorithm}\label{app:add}
According to (6.4) in~\citet{nielsen2002quantum}, Hadamard gate changes $t$ qubits of $\zero$ to an equal superposition state (equal probability of observing any outcome under quantum measurements).
\begin{equation}\nonumber
|\psi\rangle = \frac{1}{2^{t/2}}\cdot \sum_{x=0}^{2^t-1} |x\rangle.
\end{equation}
Letting $f:\{0,\cdots,2^{t}-1\}\mapsto \{0,1\}$ be the binary function to construct the quantum oracle, and $\hn_1$ be the number of $x$ such that $f(x) =1$. The orthogonal bases $|\alpha\rangle$ and $|\beta\rangle$ are defined as,
\begin{equation}\nonumber
\begin{split}
|\alpha\rangle &\triangleq \frac{1}{\sqrt{2^t-\hn_1}}\cdot \sum_{x:f(x) = 0} |x\rangle\qquad\text{and}\\
|\beta\rangle &\triangleq \frac{1}{\sqrt{\hn_1}}\cdot \sum_{x:f(x) = 1} |x\rangle.
\end{split}    
\end{equation}
Under the $|\alpha\rangle$ $|\beta\rangle$ base,  the equal superposition state
\begin{equation}\nonumber
|\psi\rangle = \sqrt{\frac{2^t-\hn_1}{2^t}} 
\;|\alpha\rangle + \sqrt{\frac{\hn_1}{2^t}}\;|\beta\rangle.
\end{equation}
Since 
\begin{equation}\nonumber
\theta = 2\arcsin\left(\sqrt{\hn_1\cdot 2^{-t}}\right),
\end{equation}
we have
\begin{equation}\nonumber
\begin{split}
|\psi\rangle &= \cos\left(\frac{\theta}{2}\right)
\,|\alpha\rangle + \sin\left(\frac{\theta}{2}\right)\,|\beta\rangle, \\
O_f|\psi\rangle &= \cos\left(\frac{\theta}{2}\right)
\,|\alpha\rangle + \sin\left(-\frac{\theta}{2}\right)\,|\beta\rangle, \text{ and}\\
G|\psi\rangle &= \cos\left(\frac{3\theta}{2}\right)
\,|\alpha\rangle + \sin\left(\frac{3\theta}{2}\right)\,|\beta\rangle.
\end{split}    
\end{equation}

% \section{Proof for Proposition~\ref{prop:hypergeo}}
% \label{app:hypergeo}
% \textit{Let $X$ be a random variable that follows hypergeometric distribution $H(N, p\cdot N, T)$, and $Y$ be a random variable that follows binomial distribution $\mathcal{B}(T, p)$. Suppose $T$ and $p \in (0, 1)$ are fixed. Then for all $k = 0,1,\cdots, T$, $\lim_{n\to\infty} P(X = k) = P(Y = k)$.}

% \begin{proof}
%     Let $q =1-p$. The formula of $P(X = k)$ is 
%     \begin{equation}\nonumber
%     \begin{split}
%      &\ P(X = k)
%         = \frac{\binom{p N}{k}\binom{q N}{T-k}}{\binom{N}{T}}\\
%         = &\ \frac{(p N)!}{k!(p N - k)!}\cdot\frac{(q N)!}{(T-k)!(q N - T+k)!}\cdot\frac{T!(N-T)!}{N!}\\
%         = &\ \binom{T}{k}\cdot \frac{(pN)!}{(pN-k)!}\cdot \frac{(qN)!}{(qN-T+k)!}\cdot \frac{(N-T)!}{N!}.
%     \end{split}
%     \end{equation}
%     When $T$ and $p$ (therefore, $ q$) are fixed, we have 
% \end{proof}

\section{Missing proofs and discussions}
\subsection{Missing Proof for Lemma~3}\label{app:proof}

\textbf{Lemma~3.}
\emph{Given $\varepsilon\in(0,0.5]$, any fast (2-candidate) majority voting algorithm based on sampling with replacement requires at least $\Omega\left(\frac{n^2\cdot\left(\frac{1}{2}-\varepsilon\right)^2}{\MOV^2}\right)$ runtime and at least $\Omega\left(\log\big(\frac{n^2\cdot\left(\frac{1}{2}-\varepsilon\right)^2}{\MOV^2}\big)\right)$ space to achieve $\prc\geq 1-\varepsilon$.}
\begin{proof}%[Proof~of~Lemma~3]
For majority voting (when $m=2$), the corresponding profile with margin of victory $\MOV$ is 
\begin{equation}\label{equ:profile_2}
\left\{
\begin{array}{l}
\nwi = (\lfloor n/2\rfloor+\MOV) \text{ votes for the winner}\\
\\
\nlo = (\lceil n/2\rceil-\MOV) \text{ votes for the loser}
\end{array}
\right..
\end{equation}
Figure~\ref{fig:channel} interprets the sampling (with replacement) process as a communication problem. We (the receiver) get a noisy data point about the winner from the sampling process. According to the above profile, we get the correct winner with $\frac{\nwi}{n}$ probability and get the incorrect winner with $\frac{\nlo}{n} = 1-\frac{\nwi}{n}$ probability. This sampling process is equivalent to the noisy communication channel in Figure~\ref{fig:channel}, which gives the correct binary message with $\frac{\nwi}{n}$ probability.

\begin{figure}[ht]
    \centering
    \includegraphics[width = 0.48\textwidth]{channel.pdf}
    \caption{The communication channel presentation of sampling with replacement.}
    \label{fig:channel}
\end{figure}

According to Equation (1.35) in \citet{mackay2003information}, the capacity of the above communication channel $\cp = 1-H(\nwi/n)$, where $H:(0,1)\to(0,1]$ denotes the binary entropy function. Mathematically,
\begin{equation}\nonumber
    H(p) \triangleq -p\log(p)-(1-p)\log(1-p).
\end{equation}
\begin{proposition}
[$H(p)$'s Bounds, Theorem 1.2 in \citep{topsoe2001bounds}]\label{prop:H}
    Given any $p\in(0,1)$,
    \begin{equation}\nonumber
       4p(1-p) < H(p) < \big(4p(1-p)\big)^{1/\ln4}.
    \end{equation}
\end{proposition}
With the lower bound in Proposition~\ref{prop:H}, we know the communication channel's capacity
\begin{equation}\nonumber
\begin{split}
\cp &= 1-H(\nwi/n) \leq 1-H(1/2+\MOV/n) \\
&< 1-4\cdot(1/2-\MOV/n)\cdot(1/2+\MOV/n)\\
&= 4\MOV^2/n^2.
\end{split}
\end{equation}
The ``$\leq$'' follows by the monotonicity of $H(p)$. %Since the sampling process is without replacement, \emph{i.e.}, each sample is $i.i.d.$, getting $T$ samples is equivalent to receiving samples from $T$ independent channels. 
The next proposition (the well-known Shannon's theorem) connects the channel capacity with the error probability of binary information.
\begin{proposition}[\citep{shannon1948mathematical}]\label{prop:shannon}
Given a communication channel with capacity $\cp$, reconstructing each single-bit message with error probability $\varepsilon\in(0,0.5]$ requires receiving at least $\frac{1-H(\varepsilon)}{\cp}$ bits (in expectation) from the channel.
\end{proposition}
%According to Chapter 7 of~\citet{cover1999elements}, the total capacity of independent channels is the summation of each channel's capacity.
By Proposition~\ref{prop:shannon} and the upper bound in Proposition~\ref{prop:H}, the required number of bits from the channel (the required number of samples)
\begin{equation}\nonumber%\label{equ:bound}
\begin{split}
T &\geq  \frac{1-H(\varepsilon)}{\cp} > \frac{n^2\cdot\left(1-\big(4\varepsilon(1-\varepsilon)\big)^{1/\ln4}\right)}{4\MOV^2}\\
&= \Omega\left(\frac{n^2\cdot\left(\frac{1}{2}-\varepsilon\right)^2}{\MOV^2}\right).
\end{split}
\end{equation}
Lemma~3 follows by the observation that the time-complexity and the space-complexity of getting $T$ samples are $\Omega(T)$ and $\Omega(\log T)$ respectively.
\end{proof}

\subsection{Compare sampling with and without replacement}\label{app:with_witout}
Although Theorem~3 holds only for sampling with replacement algorithms, we believe that when the algorithm only uses the histogram of the sample votes to calculate the winner, and the sampled size $T$ is small compared to $n$, then there is no major difference for sampling without replacement algorithms, because two samplings will converge to the same distribution when $n$ goes to infinity. 
 
Let $\hist$ be the histogram for a profile $P$, and $\hist_j$ is the number of votes for $j$-th ranking in the profile. In the sampling with replacement, the number of votes for $j$-th ranking in the sample follows binomial distribution $\mathcal{B}(T, \hist_j / n)$.
For the sample without replacement, the number of votes for $j$-th ranking follows hypergeometric distribution $\mathcal{H}(n, \hist_j, T)$. (A hypergeometric distribution $\mathcal{H}(n, \hist_j, T)$ considers drawing $T$ samples from $n$ items, among which exactly $\hist_j$ items have a specific feature, and characterizes the probability that a certain number of featured items is sampled.) The following proposition tells us that hypergeometric distribution $H(n, \hist_j, T)$ converges to binomial distribution $\mathcal{B}(T, \hist_j / n)$ when $n\to \infty$. 

% \ao{Can we change the notation of hypergeometric distribution $H$ to $\calH$? $H$ is used an entropy in my proof. I think $\calH$ is not used. Can Qishen double-check?}

\begin{theorem}[Corollary 4.1 in~\citep{Teerapabolarn2011:pointwise}.]\label{thm:with_without}
    Let $X$ be a random variable that follows hypergeometric distribution $\mathcal{H}(n, \hist_j, T)$, and $Y$ be a random variable that follows binomial distribution $\mathcal{B}(T, \frac{\hist_j}{n})$. For any $t\in\{0,\cdots, T\}$, fixed $p = \frac{\hist_j}{n}$, and $T = o(\sqrt{n})$, $\lim_{n\to\infty} |P(X = t) - P(Y = t)| = 0$. 
\end{theorem}

% \begin{proposition}
%     \label{prop:hypergeo}
%     Let $X$ be a random variable that follows hypergeometric distribution $H(N, p\cdot N, T)$, and $Y$ be a random variable that follows binomial distribution $\mathcal{B}(T, p)$. Suppose $T$ and $p \in (0, 1)$ are fixed. Then for all $k = 0,1,\cdots, T$, $\lim_{n\to\infty} P(X = k) = P(Y = k)$. 
% \end{proposition}
Therefore, when $n$ is large and sampling size $T$ is small compared to $n$, the sample histograms will be close to each other between sampling with and without replacement. 


\section{Additional Experiments}
\subsection{Implementation Details}\label{app:detail_exp}
For the classical algorithm, we use MATLAB's built-in function  \texttt{mnrnd} to draw samples for $\hhist$ (follows multi-nominal distribution). For the quantum algorithm, we first calculate the distribution of quantum counting according to (5.26) in \citet{nielsen2002quantum} and then draw samples from the calculated distribution. For all experiments of this paper, we use $10^5$ independent trails to estimate $\prc$. All experiments of this paper are implemented through MATLAB 2022b and run on a Windows 11 desktop with AMD Ryzen 9 5900X CPU and 32GB RAM. 

\subsection{Additional experimental results}\label{app:exp}
\textbf{Plurality. } For plurality, we use the following profile $P$, \begin{equation}\nonumber\left\{
\begin{array}{l}
\frac{n+2(m!-1)\MOV}{m!} \text{ votes for } c_1\succ \cdots \succ c_m\\
\\
\frac{n-2\MOV}{m!} \text{ votes for each other type of votes}
\end{array}
\right..
\end{equation}
It's easy to check that the margin of victory of the above profile is $\MOV$ under plurality. Figure~\ref{fig:bor_m_4} plots the comparison between quantum-accelerated voting and classical voting for $m=4$. Similar acceleration as $m=2$ can be observed for $m=4$.


We also observe that that $\Pr[\text{correct}]$ may not monotonically increase with the increase of $\log_2(K\cdot 2^{\tone})$. \emph{e.g.,} for Figure~\ref{fig:plu_m_4}, $K=1$, and $\MOV=256$, the $\Pr[\text{correct}]$ for $s=15$ is smaller than $s=14$. The non-monotonicity is not an uncommon phenomenon in quantum algorithms (\emph{e.g.,} \citealp{kerenidis2019q,chen2020low,bausch2020recurrent}). This phenomenon comes from the discrete manner of quantum noises, which differs from the noise in classical sampling. We also note that our theoretical analysis bounds the asymptotic manner of $\Pr[\text{correct}]$, instead of the monotonicity. To be slightly more technical, this decrease comes from the noise (\emph{i.e.} tail probability) of the quantum counting, which is different from the classical counting noise. The tail probability of quantum counting also depends on the relative distance between the ground truth and its best $s$-bit estimation. The closer it is, the smaller the tail probability is. The relative distance may not monotonically decrease with the increase of $s$. For example, assume the ground truth of $\phi$ is $0.0001$ (in binary decimal). If using 1-bit estimation, the relative distance is $(0.0001-0.0)/0.1 = 1/8$. However, if using 3-bit estimation, the relative distance becomes $(0.0001-0.0)/0.001 = 1/2$, which is much larger than $1/8$. 


\textbf{Borda.} For Borda, we let $d = \frac{4\MOV}{(m-2)!\cdot m}$ and set the profile as
\begin{equation}\nonumber
\left\{
\begin{array}{ll}
\frac{n+(m-1)d}{m!} \begin{array}{ll}\text{ votes for each type such that }\\
 ~c_1 \text{ is top-ranked}
 \end{array}\\
\\
\frac{n-d}{m!}  \text{ votes for each other type of votes}
\end{array}
\right..
\end{equation}
It's easy to check that the margin of victory of the above profile is $\MOV$ under Borda. Figure~\ref{fig:bor_m_4} plots the comparison between quantum-accelerated voting and classical voting for $m=4$. Similar behavior as plurality can be observed for Borda. 
\begin{figure*}[htp]
    \centering
    \includegraphics[width = 0.99\textwidth]{plurality_m_4.pdf}
    \caption{Compare quantum-accelerated voting (blue circles) with classical fast voting (red squares) for plurality when $m=4$. The horizontal axis can be seen as the logarithm of the algorithms' runtime.}
    \label{fig:plu_m_4}
\end{figure*}

\begin{figure*}[htp]
    \centering
    \includegraphics[width = 0.99\textwidth]{borda_m_4.pdf}
    \caption{Compare quantum-accelerated voting (blue circles) with classical fast voting (red squares) for Borda when $m=4$. The horizontal axis can be seen as the logarithm of the algorithms' runtime.}
    \label{fig:bor_m_4}
\end{figure*}

\textbf{Copeland. }
For Copeland, we set the profile as
\begin{equation}\label{equ:cope}
\left\{
\begin{array}{ll}
\frac{n-2\MOV}{m!}+\frac{2\MOV}{(m-2)!} \begin{array}{ll}\text{votes for each type in the }\\
\text{form of } c_1\succ c_2\succ \text{others}
 \end{array}\\
\\
\frac{n-2\MOV}{m!}  \text{ votes for each other type of votes}
\end{array}
\right..
\end{equation}
It's easy to check that the margin of victory of the above profile is $\MOV$ under Copeland. Figure~\ref{fig:copeland} plot the comparison between quantum-accelerated voting and classical fast voting for $m=4$. Similar behavior as plurality can be observed for Copeland. 

\begin{figure*}[htp]
    \centering
    \includegraphics[width = 0.99\textwidth]{copeland_m_4.pdf}
    \caption{Compare quantum-accelerated voting (blue circles) with classical fast voting (red squares) for Copeland when $m=4$. The horizontal axis can be seen as the logarithm of the algorithms' runtime.}
    \label{fig:copeland}
\end{figure*}

\textbf{Single transferable vote (STV).}
For STV, the same profile as Copeland (see Equation (\ref{equ:cope})) is used. It's easy to check that the margin of victory of the profile is $\MOV$ under STV. Figure~\ref{fig:stv} plot the comparison between quantum-accelerated voting and classical voting for $m=4$. Similar behavior as plurality can be observed for STV. 

\begin{figure*}[htp]
    \centering
    \includegraphics[width = 0.99\textwidth]{stv_m_4.pdf}
    \caption{Compare quantum-accelerated voting (blue circles) with classical fast voting (red squares) for STV when $m=4$. The horizontal axis can be seen as the logarithm of the algorithms' runtime.}
    \label{fig:stv}
\end{figure*}

\textbf{Additional notes. } Since Copeland and STV shares the same profile, Figure~\ref{fig:copeland} and Figure~\ref{fig:stv} look similar. However, they are not the same and some small differences can be observed between the two figures. We also note that all four voting rules (plurality, Borda, Copeland, and STV) reduce to the majority voting when $m=2$. %The profiles of all four rules also become the same when $m=2$. We plot an additional figure for Borda because of its unique settings on $\MOV$.

%\subsection{Additional discussions about the experiments}\label{app:discuss}


\bibliography{sample}

\end{document}
