\section{Smoothing preliminaries}\label{sec:smoothing}

In this section we look at some smoothing functions and their properties. The proofs of these properties can be found in \Cref{sec:prelims}.

Let $B_{\eta}(x)=\{y \in \R^n: \norm{y-x}\leq \eta\}$ be the ball of radius $\eta$ around $x \in \R^n$.

\begin{definition}[Randomized smoothing]
    For any function $f:\R^n \rightarrow \R$ and real-valued $\eta > 0$, the randomized smoothing operator $S_\eta$ produces a new function $S_{\eta}[f]:\R^n \rightarrow \R$ from $f$ with the same domain and range, defined as 
    \begin{equation}
        S_{\eta}[f](x) = \E_{y \in B_{\eta}(x)}[f(y)].    
    \end{equation}      
\end{definition}

This smoothing turns non-smooth functions into smooth functions. If we start with a function $f$ that is Lipschitz, then after randomized smoothing, the resulting function's first derivative will be defined and Lipschitz~\cite{agarwal2018lower}. Since we want to construct functions with $p$ derivatives, we define a $p$-fold version of randomized smoothing.
Recall that $p$ is the same $p$ as in the introduction (i.e., we are proving lower bounds on the $p$th order optimization problem). This operation also depends on a parameter $\beta$ that we will fix later.

\begin{restatable}[Smoothing]{definition}{smoothing}
    The smoothing operator $\S$ on input $f:\R^n \to \R$ outputs the function 
    \begin{equation}
        \S[f] = S_{\beta/2^{p}}[S_{\beta/2^{p-1}}[\cdots S_{\beta/2^{2}}[S_{\beta/2^{1}}[f]]\cdots]].
    \end{equation}
\end{restatable}

The main properties we require from this smoothing are as follows.

\begin{restatable}{lemma}{smoothingproperties}
    \label{lem:smoothing}
    For the smoothing operator $\S$ defined above, the following statements hold true.
    \begin{enumerate}
        \item For any functions $f,g : \R^n \rightarrow \R$ for which $\S[f], \S[g]$ are well-defined, $\S[f+g] = \S[f] + \S[g]$.
        \item The value $\S[f](x)$ only depends on the values of $f$ within a $(1-2^{-p})\beta$ radius of $x$.
        \item The gradient and higher order derivatives of $\S[f]$ at $x$ depend only on the values of $f$ within $B_{\beta}(x)$.
        \item If $\nabla^p f$ is $L$-Lipschitz in a ball of radius $\beta$ around $x$, then $\nabla^p \S[f]$ is also $L$-Lipschitz at $x$.
        \item Let $f$ be $G$-Lipschitz in a ball of radius $\beta$ around $x$. Then $\S[f]$ is $p$-times differentiable, and for any $i \leq p$, $\nabla^i \S[f]$ is $L$-Lipschitz in a $\beta/2^p$-ball around $x$ with $L \leq \frac{n^i 2^{i(i+1)/2}}{\beta^i} G$.
        \item Let $f$ be $G$-Lipschitz in a ball of radius $\beta$ around $x$. Then $\abs{\S[f](x) - f(x)} \leq \beta G$.
        \item If $f$ is a convex function, then $\S[f]$ is also a convex function.
    \end{enumerate}
\end{restatable}

We also use the softmax function introduced earlier.

\begin{restatable}[Softmax]{definition}{softmax}
    For a real number $\rho$, the softmax function $\smax_{\rho}: \R^n \rightarrow \R$ is defined as \begin{equation}
        \smax_{\rho}(x) = \rho \ln\Bigl(\sum_{i \in [n]} \exp(x_i/\rho)\Bigr).  
    \end{equation}         
    Let us also define, for $m \leq n$, $\smax^{\leq m}_{\rho}: \R^n \rightarrow \R$ as 
    \begin{equation}
        \smax^{\leq m}_{\rho}(x) = \smax_{\rho}(x_{\leq m}) \text{, or equivalently, } \smax^{\leq m}_{\rho}(x) = \rho \ln\Bigl(\sum_{i \in [m]} \exp(x_i/\rho)\Bigr). 
    \end{equation}
\end{restatable}

We note the following smoothness properties of softmax.

\begin{restatable}{lemma}{softmaxproperties}
    \label{lem:smaxsmoothness}
    The following are true of the function $\smax_{\rho}$ for any $\rho>0$.
    \begin{enumerate}
        \item The first derivative of $\smax$ can be computed as \begin{equation} \frac{\partial \smax_{\rho}(x)}{\partial x_i} = \frac{\exp(x_i/\rho)}{\sum_i \exp(x_i/\rho)}. \end{equation}
        \item $\smax_{\rho}$ is $1$-Lipschitz and convex.
        \item The higher-order derivatives of $\smax$ satisfy \begin{equation} \| \nabla^p \smax_{\rho}(x) - \nabla^p \smax_{\rho}(y) \| \leq \frac{\left(\frac{p+1}{\ln(p+2)}\right)^{p+1} p!}{\rho^{p}} \|x-y\|. \end{equation}
    \end{enumerate}
\end{restatable}

We will also need the following lemma which roughly states that if $\smax(x)$ and $\smax^{\leq m}(x)$ are nearly the same, then their gradients are also nearly the same at $x$.

\begin{restatable}{lemma}{softmaxgradients}
    \label{lem:nearlysamesmax}
    Let $x \in \R^n$ and $m<n$. If \begin{equation} \frac{\smax_{\rho}(x) - \smax^{\leq m}_{\rho}(x)}{\rho} = \delta < 1, \end{equation} Then 
    \begin{equation} \| \nabla \smax_{\rho}(x) - \nabla \smax^{\leq m}_{\rho}(x) \| \leq 4\delta. \end{equation} 
\end{restatable}

\section{Function construction and properties}\label{sec:func-props}

In this section we define the class of functions used in our randomized (and quantum) lower bound, and state the properties of the function that will be exploited in the lower bound. Some proofs have been moved to \Cref{sec:hardclass} due to space constraints.

Let $k \in \N, \gamma, \beta, \rho, \alpha \in \R$ be parameters to be defined shortly. $k$ and $\gamma$ are parameters as used in the high level overview~(\Cref{eqn:nonsmooth}), $\beta$ is the parameter required to define $\S$ and $\rho$ is the parameter used in the definition of $\smax$.

\textbf{Function construction.}
Given a list of orthonormal vectors $v_1, \dots, v_k \in \R^n$, which we collectively call $V$, we recall  that $\aff_V(x) \in \R^k$ denotes the vector 
\begin{equation}
    \aff_V(x) = (\dotp{v_1}{x} + (k-1)\gamma,\dotp{v_2}{x} + (k-2)\gamma, \dots, \dotp{v_k}{x}).
\end{equation}
We can now define our hard function class as follows.

\begin{definition}
    Let $V = (v_1, \dots, v_k) \in \R^n$ be a set of orthonormal vectors. The functions $f_1$, \dots, $f_k$, $h$ and $g$ depend on $V$ as follows.
    Define, for each $i \in [k]$, the function $f_i: \R^n \rightarrow \R$ as \begin{equation} f_{i}(x) = \smax^{\leq i}_{\rho}(\aff_V(x)) + \rho(k-i)n^{-\alpha}. \end{equation}
    Define $h(x) = \max_{i \in [k]} f_i(x)$, and $g(x) = \S[h](x)$.
\end{definition}

Note that in the above definition we apply the $\smax$ functions on $\aff_V(x)$ and not on $x$. However, since $\aff_V(x)$ is obtained by applying a unitary transform on $x$ and then translating it, the observations about $\smax(x)$ in~\Cref{lem:smaxsmoothness,lem:nearlysamesmax} also hold for $\smax(\aff_V(x))$.

We set $\gamma = 40\sqrt{\frac{\ln n}{n}}$, $k = \floor{(0.1/\gamma)^{2/3}}$ (or $\gamma \approx 0.1/k\sqrt{k}$), $\rho = \gamma/100 \alpha\ln n, \beta = \gamma/\ln n, \alpha = p+1$.

\textbf{Function properties.} We now state some properties of the function that will be used to show the lower bounds.

\begin{restatable}{lemma}{smoothnesslemma}
    \label{lem:gsmoothness}
    For any choice of $V$, the function $g$ is convex, $p$-times differentiable and satisfies
    \begin{equation}
        \| \nabla^p g(x) - \nabla^p g(y) \| \leq L_p \|x-y\|
    \end{equation}
    where $L_p \leq O_p(k^{3p/2} (\ln k)^{p})$.
\end{restatable}

The proof relies on the fact that $\smax$ is smooth and hence each $f_i$ is smooth. If $h = f_i$ for a particular $i$ in a $\beta$-neighborhood of $x$, then $g = \S[h]$ would also be smooth  (by~\Cref{lem:smoothing}, item 4). If $h$ depends on multiple $f_i$s in a $\beta$-neighborhood of $x$, then we know that at least two softmax's involved in the definitions of the $f_i$s have nearly the same value in the neighborhood of $x$, and by~\Cref{lem:nearlysamesmax} they have nearly the same gradient. This makes $h$ \emph{nearly} smooth, which will allow us to say that $g$ is smooth at $x$ (by~\Cref{lem:smoothing}, item 5).

\begin{proof}
    Each $f_i$ is an instance of softmax applied to $\aff_V(x)$ plus a constant. Since $\aff_V(x)$ is the vector $x$ transformed by a unitary and then translated, the smoothness and convexity properties of $\smax_{\rho}$ also apply to $f_i$. Hence each $f_i$ is convex, $p$-times differentiable and its $p$th derivatives are $O_p(\rho^{-p})$-Lipschitz (see \Cref{lem:smaxsmoothness}). The function $h$, being a maximum over convex functions, is also convex. By the properties of the smoothing operator $\S$ (\Cref{lem:smoothing}), the function $g= \S[h]$ is also convex.

    Let $x \in \R^n$. Let $j \in [k]$ be the minimum number such that there is a point $y \in B_{\beta}(x)$ for which $h(y) = f_j(y)$. We can rewrite $h$ as follows: $h(z) = f_j(z) + \max_{i>j} (f_i(z) - f_j(z))$. We call $f_j(z)$ the smooth term and $\max_{i>j} (f_i(z) - f_j(z))$ the non-smooth term. We know that $f_j$ has an $O_p(\rho^{-p})$ upper bound on the Lipschitzness of its $p$-th order derivatives. If all points $y \in B_{\beta}(x)$ satisfy $h(y) = f_j(y)$, then the non-smooth term is $0$ and so it does not change the smoothness of $h$. $g = \S[h]$ will maintain this smoothness (see item 4 of \Cref{lem:smoothing}).

    If the non-smooth term is non-zero at some point in $B_{\beta}(x)$, then we wish to show that the non-smooth term has a small Lipschitz constant in $B_{\beta}(x)$. This would imply, via item 5 of \Cref{lem:smoothing}, that the $p$-th order derivative of the smoothing of the non-smooth term with $\S$ would have a small Lipschitz constant. Towards this let $x'$ be any point in $B_{\beta}(x)$. Let $I_{x'}$ be the set $\{i \in [k] | h(x') = f_{i}(x')\}$. The set of subgradients of the non-smooth term at $x'$ is the convex hull of $\{\nabla (f_i - f_j)(x')\}_{i \in I(x')}$. So if we show that for an arbitrary $i \in I(x')$, $\| \nabla (f_i - f_j)(x') \| \leq L$, then we know that the non-smooth part is $L$-Lipschitz at $x'$. If $i=j$, then the gradient is zero. Let us take an $i \neq j$ (since $j$ is the smallest, in fact $i>j$).
    By convexity of the ball and the continuity of $f_i$ and $f_j$, there must be a point $y$ in $B_{\beta}(x)$ for which $h(y) = f_i(y) = f_j(y)$. Note that $x' \in B_{2\beta}(y)$. 
    
    The statement $f_i(y) = f_j(y)$ translates to
    \begin{align}
        \frac{\smax_{\rho}^{\leq i}(\aff_V(y)) - \smax_{\rho}^{\leq j}(\aff_V(y))}{\rho} = (i-j)n^{-\alpha} << 1.
    \end{align}

    Expanding the expression for $\smax_{\rho}^{\leq i}$ and $\smax_{\rho}^{\leq j}$ we get
    \begin{align}
    \frac{\smax_{\rho}^{\leq i}(\aff_V(y)) - \smax_{\rho}^{\leq j}(\aff_V(y))}{\rho} = &\ln\left(\frac{
        \sum_{\ell =1}^{i} \exp\left(
            \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
        \right)
    }{
        \sum_{\ell =1}^{j} \exp\left(
            \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
        \right)
    }\right)\\
        = &
    \ln\left(1 + \frac{
        \sum_{\ell = j+1}^{i} \exp\left(
            \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
        \right)
    }{
        \sum_{\ell =1}^{j} \exp\left(
            \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
        \right)
    }\right)
    \end{align}
    
    Since $\|x'-y\| \leq 2\beta$, we have that $\abs{\dotp{x'}{v} - \dotp{y}{v}} \leq 2\beta$ for any unit vector $v$. Hence
    \begin{equation}
        \frac{\smax_{\rho}^{\leq i}(\aff_V(x')) - \smax_{\rho}^{\leq j}(\aff_V(x'))}{\rho} \leq
        \ln\left(1 + \frac{
            e^{2\beta/\rho} \sum_{\ell = j+1}^{i} \exp\left(
                \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
            \right)
        }{
            e^{-2\beta/\rho} \sum_{\ell =1}^{j} \exp\left(
                \frac{\dotp{y}{v_{\ell}} + (k-\ell)\gamma}{\rho}
            \right)
        }\right).
    \end{equation}

    For all $c>0$, $\ln(1+e^{4\beta/\rho}c) \leq e^{4\beta/\rho} \ln(1+c)$. So we can conclude from the above that
    \begin{equation}
        \frac{\smax_{\rho}^{\leq i}(\aff_V(x')) - \smax_{\rho}^{\leq j}(\aff_V(x'))}{\rho} \leq (i-j)n^{-\alpha} e^{4\beta/\rho}.
    \end{equation}

    Now by~\Cref{lem:nearlysamesmax}, $\|\nabla (f_i - f_j)(x') \| = \| \nabla \smax^{\leq i}_{\rho}(\aff_V(x')) - \nabla \smax^{\leq j}_{\rho}(\aff_V(x')) \| \leq 4(i-j)n^{-\alpha} e^{4\beta/\rho}$.
    
    Hence the non-smooth part of $h$ is $4kn^{-\alpha}\exp(8\beta/\rho)$-Lipschitz in $B_{\beta}(x)$. The $p$th derivatives of $g = \S[h] = \S[f_j] + \S[\max_{i>j} (f_i - f_j)]$ are thus by \Cref{lem:smoothing}, $O_p(\rho^{-p} + n^p\beta^{-p}kn^{-\alpha}\exp(4\beta/\rho))$-Lipschitz. We know that $\alpha = p+1, \beta = \gamma/\ln n$ and $\rho = \gamma/100 \alpha \ln n$, simplifying our bound to $O_p((\ln n/\gamma)^p)$. Furthermore, $k = \floor{(0.1/\gamma)^{2/3}}$ and $\ln n = O(\ln k)$. Hence we can rewrite our upper bound as $O_p\left(k^{3p/2} (\ln k)^{p}\right)$.
\end{proof}

We now see how to prove the query lower bound on optimizing this function class. In order to do so, we need to introduce some intermediate functions. Let $h_i(x) = \max_{j \in [i]} f_i(x)$ and $g_i(x) = \S[h_i](x)$. Let an oracle call to a function $f$ at a point $x$ be denoted by $\Oracle_f(x) = (f(x),\nabla f(x), \nabla^2 f(x), \dots, \nabla^p f(x))$. 
The following results will hold when the set $V$ of orthonormal vectors is chosen uniformly at random (or Haar randomly).
The next two lemmas about these intermediate functions form the backbone of our lower bound.

\begin{restatable}{lemma}{singlesteplemma}
    \label{lem:singlestep}
    Fix any $t \in [0,\dots,k-1]$. Let $V$ be distributed Haar randomly. Conditioned on any fixing of $\{v_i\}_{i \leq t}$, any query $x$ in the unit ball will satisfy $\Oracle_{g}(x) = \Oracle_{g_{t+1}}(x)$ with probability at least $1-1/n^{10}$.
\end{restatable}
% \begin{lemma}\label{lem:singlestep}
%     Fix any $t \in [0,\dots,k-1]$. Let $V$ be distributed Haar randomly. Conditioned on any fixing of $\{v_i\}_{i \leq t}$, any query $x$ in the unit ball will satisfy $\Oracle_{g}(x) = \Oracle_{g_{t+1}}(x)$ with probability at least $1-1/n^{10}$.
% \end{lemma}

\begin{restatable}{lemma}{finaloutputlemma}
    \label{lem:finaloutput}
    Let $V$ be distributed Haar randomly. Conditioned on any fixing of $\{v_i\}_{i \leq k-1}$, any point $x$ in the unit ball will be $\epsilon$-optimal for $g$ with probability at most $1/n^{10}$.
\end{restatable}
% \begin{lemma}\label{lem:finaloutput}
%     Let $V$ be distributed Haar randomly. Conditioned on any fixing of $\{v_i\}_{i \leq k-1}$, any point $x$ in the unit ball will be $\epsilon$-optimal for $g$ with probability at most $1/n^{10}$.
% \end{lemma}

To see how the lemmas above lead us to our lower bound, fix any $k-1$-query algorithm and consider the following experiment. For each $i$ from $1$ to $k-1$, when the algorithm makes its $i$th query do the following.
\begin{itemize}
	\item Sample $v_i$ from the space orthogonal to the vectors $v_1$ to $v_{i-1}$.
	\item Provide the algorithm the value that $\Oracle_{g_{i}}$ returns on the query. Note that the function $g_i$ depends only on the sampled vectors $v_1$ through $v_i$.
\end{itemize}
It follows that the output of the algorithm is independent of the vector $v_k$ (conditioned on the vectors $v_1$ through $v_{k-1}$). Now we use \Cref{lem:finaloutput} to say that with high probability the output of the algorithm is not $\epsilon$-optimal for $g$. We can now use \Cref{lem:singlestep} along with the hybrid argument to conclude that with high probability the transcript of this query algorithm is the same as the actual transcript (i.e. the transcript had $v_1$ to $v_k$ all been sampled at the beginning and all the queries been to $\Oracle_{g}$). Since the transcripts are the same with high probability, the outputs of the algorithms are also the same with high probability. Hence even when all the queries are to $\Oracle_{g}$, with high probability the output is not $\epsilon$-optimal for $g$. This proof is made formal as the proof of~\Cref{thm:randlb}.

\section{Lower bounds}

We can now establish the randomized lower bound using \Cref{lem:gsmoothness}, \Cref{lem:singlestep}, and \Cref{lem:finaloutput}.

\begin{theorem}\label{thm:randlb}
    Let $\mathcal{A}$ be a randomized query algorithm making at most $k-1$ queries to $\Oracle_{g}$. When $V$ is distributed Haar randomly, the probability that the output of $\mathcal{A}$ is $\epsilon$-optimal is $o(1)$.
\end{theorem}

\begin{proof}
    Let the success probability of $\mathcal{A}$ be $p_\mathrm{succ}$ when $V$ is distributed Haar randomly. We can fix the randomness of $\mathcal{A}$ to get a deterministic algorithm $\mathcal{B}$ with success probability at least $p_\mathrm{succ}$ on the same distribution.

    Let us denote the transcript of $\mathcal{B}$ as $\overline{x} = (x_1, x_2, \dots, x_{k-1}, x_\mathrm{out})$ where $x_i$ is the $i$th query made and $x_\mathrm{out}$ is the output of the algorithm. Note that these are random variables that depend only on $V$. We now create hybrid transcripts $\overline{x}^{(i)}$ for $0 \leq i \leq k-1$. The hybrid transcript $\overline{x}^{(i)} = (x_1^{(i)},\cdots,x_{k-1}^{(i)},x_{\mathrm{out}}^{(i)})$ is defined as the transcript of $\mathcal{B}$ when, for all $j \leq i$, its $j$th oracle call (which is supposed to be to $\Oracle_{g}$) is replaced with an oracle call to $\Oracle_{g_j}$. Note that
    \begin{itemize}
        \item For any $V$, $\overline{x} = \overline{x}^{(0)}$.
        \item $\overline{x}^{(k-1)}$ is a function of $\{v_i\}_{i \leq k-1}$.
        \item For any $V$, if $\Oracle_{g}(x_{i}^{(i-1)}) = \Oracle_{g_i}(x_{i}^{(i)})$ then $\overline{x}^{(i-1)} = \overline{x}^{(i)}$. This is because they have queried the same oracles in their first $i-1$ calls, given the same input in the $i$th call and gotten the same output, and have been querying the same oracles thereafter.
    \end{itemize}

    We start with the observation that
    \begin{align}
        \Pr_V[x_{\mathrm{out}}^{(k-1)} \text{ is }\epsilon\text{-optimal}] &= \E_{v_1,\dots,v_{k-1}} \left[ \Pr_{v_k | v_{<k}}[x_{\mathrm{out}}^{(k-1)} \text{ is }\epsilon\text{-optimal}] \right] \\
        &\leq n^{-10}. \tag{by \Cref{lem:finaloutput}}
    \end{align}

    Next we show that $\Pr_V[x_{\mathrm{out}}^{(k-1)} = x_{\mathrm{out}}] \geq 1-o(1)$ which will complete the proof.

    \begin{align}
        \Pr_V[x_{\mathrm{out}} \neq x_{\mathrm{out}}^{(k-1)}] &\leq \sum_{i \in [k-1]} \Pr_V[x_{\mathrm{out}}^{(i-1)} \neq x_{\mathrm{out}}^{(i)}]\\
        &\leq \sum_{i \in [k-1]} \Pr_V[\overline{x}^{(i-1)} \neq \overline{x}^{(i)}]\\
        &\leq \sum_{i \in [k-1]} \Pr_V[\Oracle_{g}(x_{i}^{(i-1)}) \neq \Oracle_{g_i}(x_{i}^{(i)})]\\
        &\leq \sum_{i \in [k-1]} \E_{v_1,\dots,v_{i-1}}\left[\Pr_{v_i,\dots,v_k | v_{<i}}[\Oracle_{g}(x_{i}^{(i-1)}) \neq \Oracle_{g_i}(x_{i}^{(i)})] \right]\\
        &\leq k n^{-10},
    \end{align}
    since $x_{i}^{(i-1)} = x_{i}^{(i)}$, and using \Cref{lem:singlestep}.
\end{proof}

We now translate the above lower bound to the optimization setting and establish the randomized lower bound in \Cref{thm:main}.

% \begin{theorem}
%     Fix any $p \in \N$. There exists $\epsilon>0,R>0,L>0$, and a class of functions $\mF$ with $p$th-order Lipschitz constant $L$ such that any randomized algorithm that $\epsilon$-optimizes $\mF$ must make at least $\Omega_p\left(\left(\frac{LR^{p+1}}{\epsilon}\right)^{\frac{2}{3p+1}} \left(\ln\frac{LR^{p+1}}{\epsilon}\right)^{-2/3}\right)$ queries.
% \end{theorem}

\begin{proof}[Proof of randomized lower bound in \Cref{thm:main}]\ \\
    Our hard function class had $L_p = O_p(k^{3p/2} (\ln k)^{p})$, $R=1$, $\epsilon = 0.1/\sqrt{k}$. Given these parameters, $L_pR^{p+1}/\epsilon = O_p(k^{(3p+1)/2}(\ln k)^p)$ and $\ln(L_pR^{p+1}/\epsilon) = O_p(\ln k)$. Hence
    \begin{align}
        \left(\frac{L_pR^{p+1}}{\epsilon}\right)^{\frac{2}{3p+1}} \left(\ln\frac{L_pR^{p+1}}{\epsilon}\right)^{-2/3} &\leq O_p\left(k (\ln k)^{\frac{2p}{3p+1}} (\ln k)^{-2/3}\right) \leq O_p(k).
    \end{align}
    Since we have a lower bound of $k$, the theorem statement follows.
\end{proof}

We mention here that~\Cref{lem:singlestep,lem:finaloutput} have even more far-reaching consequences. In~\Cref{sec:infhiding}, we note that these lemmas also prove a lower bound of $k$ in (a) the parallel randomized setting where polynomially many non-adaptive queries are allowed in each round and we want to bound the number of rounds and (b) the quantum setting. Hence the best known deterministic algorithm that was recently discovered is also nearly optimal amongst parallel randomized and quantum algorithms.

