\documentclass[accepted]{uai2022} 

\usepackage{csquotes}

\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage[american]{babel}
\usepackage{hyperref}       % hyperlinks
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors
\usepackage{braket}         % dirac notation
\usepackage{amsmath}        % mathematical typesetting
\usepackage{graphicx}       % tools for inserting graphics
\usepackage[ruled,vlined]{algorithm2e}  % pseudocode insertion

\usepackage{enumitem}       % allow to modify the layout for list environments
\usepackage{amsthm}         % theorem setup
\usepackage{xspace}         % smart spaces after a command
\usepackage{subfig}         % several subfigures in one figure
\usepackage{bbold}          % for the characteristic function


%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams




%%%%%%%

\newtheoremstyle{cited}%
  {3pt}% (space above)
  {3pt}% (space below)
  {\itshape}% (body font)
  {}% (indent amount)
  {\bfseries}% {theorem head font}
  {.}% {punctuation after theorem head}
  {.5em}% {space after theorem head}
  {\thmname{#1} \thmnumber{#2} \thmnote{\normalfont#3}}% {theorem head spec}

\theoremstyle{cited}
\newtheorem{citedthm}{Theorem}


%%%%%%

\theoremstyle{plain}
\newtheorem{lemma}{Lemma}
\newtheorem{theorem}{Theorem}
\theoremstyle{definition}
\newtheorem{definition}{Definition}

\newcommand{\TODO}[1]{\todo[inline]{#1}}

\renewcommand{\P}{\mathbb{P}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\C}{\mathbb{C}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\1}{\mathbb{1}} % TODO

\newcommand{\cF}{\mathcal{F}}
\newcommand{\cP}{\mathcal{P}}
\newcommand{\cB}{\mathcal{B}}
\newcommand{\cA}{\mathcal{A}}
\newcommand{\cX}{\mathcal{X}}
\newcommand{\cY}{\mathcal{Y}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cE}{\mathcal{E}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cD}{\mathcal{D}}
\newcommand{\cN}{\mathcal{N}}
\newcommand{\cU}{\mathcal{U}}
\newcommand{\cM}{\mathcal{M}}

\newcommand{\W}{\Omega}
\newcommand{\w}{\omega}

\newcommand{\mat}[1]{\left(\begin{matrix}#1\end{matrix}\right)}

\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}


\newcommand{\new}[1]{{\color{red} #1}}

\newcommand{\PercepOnline}{\textsc{Classical online perceptron}\xspace}
\newcommand{\PercepSpace}{\textsc{Classical version space perceptron}\xspace}
\newcommand{\QPercepOnline}{\textsc{Online quantum perceptron}\xspace}
\newcommand{\QPercepSpace}{\textsc{Version space quantum perceptron}\xspace}
\newcommand{\QPercep}{\textsc{Hybrid quantum perceptron}\xspace}



\newcommand{\NoAppendix}[1]{#1}
\SetKwComment{Comment}{$\vartriangleright$}{}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand\mycommfont[1]{\scriptsize\ttfamily\textcolor{blue}{#1}}
\SetCommentSty{mycommfont}

\SetKwInput{KwData}{Input}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\title{Supplementary Material for \enquote{Quantum Perceptron Revisited: Computational-Statistical Tradeoffs}}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1,2]{\href{mailto:<mathieu.roget@ens-lyon.org>?Subject=Your UAI 2022 paper}{Mathieu Roget}{}}
\author[1]{\href{mailto:<giuseppe.dimolfetta@lis-lab.fr>?Subject=Your UAI 2022 paper}{Giuseppe Di Molfetta}{}}
\author[1]{\href{mailto:<hachem.kadri@lis-lab.fr>?Subject=Your UAI 2022 paper}{Hachem Kadri}{}}
% Add affiliations after the authors
\affil[1]{%
    Aix-Marseille University, CNRS, LIS, Marseille, France
}
\affil[2]{%
    École Normale Superieure de Lyon, Lyon, France
  }
  

\begin{document}
\onecolumn
\maketitle


\setcounter{theorem}{3}

\section{Proofs}\label{sec:proofs}
In this appendix, we present the proofs of Theorems \ref{proof:hybrid_quantum_perceptron} and \ref{proof:generalization_quantum_perceptron}.
%

\subsection{Proof of Theorem \ref{proof:hybrid_quantum_perceptron}}
\label{sec:proof1}

After proving a few useful lemma, we provide here the proof of the complexity of our \QPercep.

\begin{lemma}\label{lemma:K}
Let's define 
$K = \left\lceil\frac{\ln(\epsilon/2)}{\ln(1-\sqrt{2}\gamma/\sqrt{\pi})}\right\rceil$, then it holds that 
$$ K \sim \sqrt{\frac{\pi}{2}} \frac{\ln(1/\epsilon)}{\gamma}. $$
\end{lemma}
\begin{proof}
Using a Taylor expansion for $\ln(1-x)$ in 0 we get

\begin{align*}
    \sqrt{\pi/2}\frac{\ln(1/\epsilon)}{K\gamma} &= \sqrt{\pi/2}\frac{\ln(1/\epsilon)\ln(1-\sqrt{2}\gamma/\sqrt{\pi})}{\gamma\ln(\epsilon/2)}\\
    &=  \sqrt{\pi/2}\frac{\ln(1/\epsilon)\left[-\sqrt{2}\gamma/\sqrt{\pi} + \underset{\gamma \to 0}{o}(\gamma)\right]}{\gamma\ln(\epsilon/2)}\\
    &\underset{\gamma \to 0}{\to} \frac{\ln(1/\epsilon)}{\ln(1/\epsilon) + \ln(2)}\\ &\underset{\epsilon \to 0}{\to} 1.
\end{align*}



Thus $K \sim \sqrt{\pi/2}\frac{\ln(1/\epsilon)}{\gamma}$.
\end{proof}

\begin{lemma}\label{lemma:K2}
Let's define 
$K2 = \left\lceil\log_{3/4}\left(1-\left(1-\frac{\epsilon}{2}\right)^{\frac{1}{K-1}}\right)\right\rceil$, then it holds that 
$$ K2 \sim \log_{3/4}(\epsilon\gamma). $$
\end{lemma}
\begin{proof}
Using a Taylor expansion for $\ln(1-\epsilon/2)$ and $\ln(1-\sqrt{\frac{2}{\pi}}\gamma)$ in 0 we get

$$(1-\epsilon/2)^{\frac{1}{K-1}} = \exp\left(\frac{\ln(1-\epsilon/2)\ln(1-\sqrt{\frac{2}{\pi}}\gamma)}{\ln(\epsilon/2)-\ln(1-\sqrt{\frac{2}{\pi}}\gamma)}\right)
= \exp\left(-\alpha\right) $$

where 

$$ \alpha = \frac{1}{\sqrt{2\pi}}\frac{\epsilon\gamma}{\ln(\epsilon/2)-\ln(1-\sqrt{\frac{2}{\pi}}\gamma)} + o(\epsilon\gamma) \sim  \frac{1}{\sqrt{2\pi}}\frac{\epsilon\gamma}{\ln(\epsilon/2)-\ln(1-\sqrt{\frac{2}{\pi}}\gamma)}.$$

Using $\ln(1-e^{-x}) \underset{x\to 0}{\sim} \ln(x)$, it holds that

$$ K_2 =\log_{3/4}\left(1-e^{-\alpha}\right) \sim \log_{3/4}(\alpha) \sim \log_{3/4}(\epsilon\gamma). $$
\end{proof}


\begin{theorem}
\label{proof:hybrid_quantum_perceptron}
Let $S$ be a linearly separable sample of $N$ points of margin $\gamma$. Algorithm \QPercep finds a perfect separator with probability at least $1-\epsilon$ and has a complexity of
\begin{equation*}
     O\left(\frac{\sqrt{N}}{\gamma}\ln(1/\epsilon)\ln\left(\frac{1}{\gamma\epsilon}\right)\right)\; .
\end{equation*}
\end{theorem}
\begin{proof}
The algorithm can fail because of two reasons. It is possible that none of the hyperplanes $w_i$, $i=1,\ldots,K$, separate the classes and it is also 
possible that the quantum search gives a wrong result.\\

The exact value of $K$ we take is $K = \left\lceil\frac{\ln(\epsilon/2)}{\ln(1-\sqrt{2}\gamma/\sqrt{\pi})}\right\rceil  = O\left(\frac{\ln(1/\epsilon)}{\gamma}\right)$ because of lemma \ref{lemma:K2}. The probability that a randomly drawn hyperplane separates the data is $\sqrt{2/\pi}\gamma$ (from \citealp[Proof of theorem 2]{wiebe2016quantum}). Thus, the probability that at least one hyperplane separates the classes is 
\begin{align*}
    \P(\text{separating $w$ exists}) &= 1- \left(1-\sqrt{\frac{2}{\pi}}\gamma\right)^K
    \geq \left(1-\sqrt{\frac{2}{\pi}}\gamma\right)^{\frac{\ln(\epsilon/2)}{\ln(1-\sqrt{2}\gamma/\sqrt{\pi})}} = 1-\frac{\epsilon}{2}\; .
\end{align*}
Next we will assume that one of the $K$ hyperplanes separates the classes. The algorithm will still return a wrong answer if it identifies a non-separating hyperplane as a separating one. The worst case is when the separating hyperplane is the $K^{\text{th}}$ one. The probability that $K-1$ non-separating hyperplanes are all correctly identified is 
\begin{equation*}
    \left( 1 - \frac{3}{4}^{K_2}\right)^{K-1} \geq 1-\frac{\epsilon}{2}\; ,
\end{equation*}

where 
\begin{equation*}
    K_2 = \left\lceil\log_{3/4}\left(1-\left(1-\frac{\epsilon}{2}\right)^{\frac{1}{K-1}}\right)\right\rceil  = O\left(\ln(1/(\gamma\epsilon))\right)\; \text{(from lemma \ref{lemma:K2})}.
\end{equation*}

The probability of failure is then bounded by 
\begin{equation*}
    \P(\text{failure}) \leq \underbrace{\frac{\epsilon}{2}}_{\text{separating $w$ doesn't exist}} + \underbrace{\frac{\epsilon}{2}}_{\text{one non-separating hyperplane misidentified}} = \epsilon
\end{equation*}
and the complexity is 
\begin{equation*}
    O\left(K K_2 \sqrt{N}\right) = O\left(\frac{\sqrt{N}}{\gamma}\ln(1/\epsilon)\ln\left(\frac{1}{\gamma\epsilon}\right)\right)
\end{equation*}
which concludes the proof.
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Proof of Theorem \ref{proof:generalization_quantum_perceptron}}
\label{sec:proof2}

For proving Theorem \ref{proof:generalization_quantum_perceptron}, the following definition and lemma are useful.


\begin{definition}
We define the Leave-one-out (LOO) error on a dataset $S$ by %
\begin{equation}
    \label{eq:rloo}
    \hat{R}_{LOO}(S) = \frac{1}{N}\sum_{i=1}^N{\1\{h_{S-\{x_i\}}(x_i)\neq y_i\}}\; ,
\end{equation}
where $h_{S-\{x_i\}}$ is the hypothesis returned by \QPercep on $S-\{x_i\}$, which is the same
as $S$ except that $x_i$ has been deleted.
\end{definition}

The lemma below shows the link between the expected risk and the Leave-one-out error.
\begin{lemma}[{\citealp[Lemma 5.3]{mohri2018foundations}}]\label{lemma:equal_risk}
For any $N\geq 1$,
\begin{equation*}
    \label{eq:equal_risks}
    \underset{S\sim \cD^N}{\E} \left[R(h_S)\right] = \underset{S'\sim \cD^{N+1}}{\E} [\hat{R}_{LOO}(S')]\; .
\end{equation*}
\end{lemma}

\begin{theorem}
\label{proof:generalization_quantum_perceptron}
Assume that the data is linearly separable. Let $h_S$ be the hypothesis returned by the
\QPercep algorithm after training over a sample $S$ of size $N$ drawn according to some distribution $\cD$. Then, the expected error of $h_S$ is bounded as follows:
\begin{equation*}
    \E_{S\sim \cD^N}\left(R(h_S)\right) \leq \sqrt{\frac{\pi}{2}}\frac{\log 1/\epsilon}{N+1}\E_{S\sim \cD^{N+1}}\left(\frac{1}{\gamma_S}\right)\; .
\end{equation*}
\end{theorem}
\begin{proof}
%
The proof is based on computing an upper bound of the Leave-one-out error. 
%
Since the hyperplanes are drawn beforehand, they are the same for all instances $(S-\{x_i\})_i, \forall i = 1,\ldots,N$. We also assume that there is at least one hyperplane that separates the training set $S$ of size $N$ (true with probability $1-\epsilon$). If $N\leq K$ then  the number of errors in $\hat{R}_{LOO}$ is naturally bounded by $N\leq K$ so it holds that $\hat{R}_{LOO}  \leq K/N$. Thus we can restrict ourselves to the non trivial case where $K<N$.
%


We know that there is an hyperplane that separates the training set $S$ correctly. Apart this hyperplane, noted $w_K$, the worst scenario is when the other ones all classify correctly all the data except one.
%
Without loss of generality we consider that each $w_k \text{ misclassifies only } x_k$, $\forall 1\leq i <K$.
%
So we will have one error for each of the $K-1$ first predictions. 
%
Now, when \QPercep is trained on $S-\{x_i\}$,  $\forall K\leq i \leq N$, the algorithm will choose the hyperplane $w_K$ because it is the only one that correctly separates  $S-\{x_i\}$ for $i=K,\ldots,N$.
%
Since $w_K$ is the hyperplane returned by \QPercep on all the sample $S$, it will also correctly classify the points $x_i$, $\forall K\leq i \leq N$. Hence 
it holds that 
\begin{equation*}
    \hat{R}_{LOO} \leq \frac{K}{N}\; .
\end{equation*}
%
Using Lemma \ref{lemma:equal_risk} and $K \sim \sqrt{\frac{\pi}{2}} \frac{\ln(1/\epsilon)}{\gamma}$ (lemma \ref{lemma:K}), we obtain
\begin{equation*}
    \E_{S\sim \cD^N}\left(R(h_S)\right) \leq \sqrt{\frac{\pi}{2}}\frac{\log 1/\epsilon}{N+1}\E_{S\sim \cD^{N+1}}\left(\frac{1}{\gamma_S}\right)\; .
\end{equation*}
\end{proof}
%%%%



\bibliography{biblio}


\end{document}

