
% \section{Proof of Theorem~\ref{HP2} in the main paper}
\section{Proof of Theorem~\ref{HP2}}
\label{SecTheory1}
\begin{proof}
${\rm E}\left[R_n|H_0, \{S_i\}_{i=1}^n\right]$ and ${\rm Var}\left[R_n|H_0, \{S_i\}_{i=1}^n\right]$ have analytical expressions stated in~\citep{friedman1979multivariate} as follows:
\begin{align}
    {\rm E}\left[R_n\mid H_0, \{S_i\}_{i=1}^n\right] &= \frac{2n_0n_1}{n}\label{CutEdgeExp}\\
  \sqrt{{\rm Var}\left[R_n\mid H_0, \{S_i\}_{i=1}^n\right]}&=\sqrt{\frac{2n_0n_1}{n(n-1)}\left\{\frac{2n_0n_1 - n}{n} + \frac{C_n - n+2}{(n-2)(n-3)}\left[n(n-1) -4n_0n_1+2\right]\right\}}\label{CutEdgeVar}
\end{align}
where $C_n$ denotes the number of edge pairs sharing a common node in the MST. Inserting the analytical expressions of ${\rm E}\left[R_n\mid H_0, \{S_i\}_{i=1}^n\right]$~\eqref{CutEdgeExp} and $\sqrt{{\rm Var}\left[R_n\mid H_0, \{S_i\}_{i=1}^n\right]}$~\eqref{CutEdgeVar} to FR statistic $W_n$~\eqref{FRstat}, we have  
\begin{align}
    \frac{W_n}{n} =\frac{\frac{R_n}{n} - \frac{2n_0n_1}{n^2}}{\sqrt{\frac{2n_0n_1}{n(n-1)}\left\{\frac{2n_0n_1 - n}{n} + \frac{C_n - n+2}{(n-2)(n-3)}\left[n(n-1) -4n_0n_1+2\right]\right\}}}
\label{FRfullAppendix}
\end{align}

As stated in~\citep{henze1999multivariate}, under the usual limiting regime, there exists a constant $u\in (0,1)$ such that $n_0$ and $n_1$ tend to infinity, and $n_0/n\to u$. We write $v$ to denote $1-u$; as with $u$,  $n_1/n \to v$ under the usual limiting regime. The variables, $u$ and $v$ can be thought of as class prior probabilities for $Z=0$ and $Z=1$. We have the following under the usual limiting regime:
\begin{align}
    \lim_{n\to\infty}\frac{n_0n_1}{n^2}= uv
    \label{uv}
\end{align}
Theorem 2 in~\citep{henze1999multivariate} gives an almost sure result regarding the convergence of $\frac{R_n}{n}$ under the usual limiting regime:
\begin{align}
    \lim_{n\to\infty}\frac{R_n}{n}&= 2uv\int\frac{p(s\mid Z=0)p(s\mid Z=1)}{up(s\mid Z=0) + vp(s\mid Z=1)}ds\nonumber\\
    &=2uv\int\frac{\frac{p(s)p(Z=0\mid s)}{u}\frac{p(s)p(Z=1\mid s)}{v}}{p(\mathbf{s)}}ds\nonumber\\ 
    &=2\int p(Z=0\mid s)p(Z=1\mid s)p(s)ds
    \label{NNrisk}
\end{align}
The graph-dependent variable $C_n$ in $\sqrt{{\rm Var}[R_n\mid H_0,\{S_i\}_{i=1}^n]}$~\eqref{CutEdgeVar}, formally defined after Eq. (13) in~\citep{friedman1979multivariate}, is the number of edge pairs that share a common node of a Euclidean minimum spanning tree (MST) generated from the data. While $C_n=n-2$ for the one-dimensional case, in~\citep{friedman1979multivariate}, $C_n$ remained unsolved for dimension $d\geq 2$.  
In fact, as stated in Eq. (1.6) in~\citep{steele1987number}, $C_n$ can be expressed as 
\begin{align}
    C_n=1-n+\frac{1}{2}\sum_k k^2V_{n,k},
    \label{Cexpression}
\end{align}
where $V_{n,k}$ stands for the number of nodes with degree $k$ in a MST constructed from $n$ data points. From Theorem 3 in~\citep{steele1987number}, we get $\lim_{n\to\infty}\frac{V_{n,k}}{n}=\alpha_{k,d}$ for all $k\geq 1, d\geq 2$ where $\alpha_{k,d}$'s are constants dependent on dimension $d$. This leads to the following:
\begin{align}
    \lim_{n\to\infty}\frac{C_n}{n}&=\frac{1}{2}\sum_kk^2\frac{V_{n,k}}{n} - 1\nonumber\\
    &=\frac{1}{2}\sum_kk^2\alpha_{k,d}-1
    \label{CNconstant}
\end{align}
Herein, we use $A_d=\frac{C_n}{n}$ to denote the dimension-dependent constant which  $\frac{C_n}{n}$ converges to. 

We reorganize the denominator of $\frac{W_n}{n}$ and rewrite $\frac{W_n}{n}$ as follows
\begin{align}
    \frac{W_n}{n} &= \frac{\frac{R_n}{n} - \frac{2n_0n_1}{n^2}}{\sqrt{\frac{2n_0n_1}{n(n-1)}\left\{\frac{2n_0n_1 - n}{n} + \frac{C_n - n+2}{(n-2)(n-3)}\left[n(n-1) -4n_0n_1+2\right]\right\}}}\nonumber\\
    &=\frac{\frac{R_n}{n} - \frac{2n_0n_1}{n^2}}{\sqrt{\frac{2n_0n_1}{n^2}\left\{\frac{2n_0n_1 - n}{n(n-1)} + \frac{C_n - n+2}{(n-1)(n-2)(n-3)}\left[n(n-1) -4n_0n_1+2\right]\right\}}}\nonumber\\
    &=\frac{\frac{R_n}{n} - \frac{2n_0n_1}{n^2}}{\sqrt{\begin{aligned}&\frac{2n_0n_1}{n^2}\left\{\frac{2n_0n_1}{n(n-1)} - \frac{1}{n-1}\right.+
    \frac{nC_n}{(n-2)(n-3)} - \frac{n^2}{(n-2)(n-3)} +\\ &\frac{2n}{(n-2)(n-3)} - \frac{4n_0n_1C_n}{(n-1)(n-2)(n-3)} + \frac{4nn_0n_1}{(n-1)(n-2)(n-3)} -\\ &\frac{8n_0n_1}{(n-1)(n-2)(n-3)} +
    \frac{2C_n}{(n-1)(n-2)(n-3)} - \frac{2n}{(n-1)(n-2)(n-3)} +\\ &\left.\frac{4}{(n-1)(n-2)(n-3)}
    \right\}\end{aligned}}}\nonumber\\
    &=\frac{\frac{R_n}{n} - \frac{2n_0n_1}{n^2}}{\sqrt{\begin{aligned}&\frac{2n_0n_1}{n^2}\left\{\frac{\frac{2n_0n_1}{n^2}}{1-\frac{1}{n}} - \frac{1}{n-1}\right.+
    \frac{\frac{C_n}{n}}{{(1-\frac{2}{n})(1 - \frac{3}{n})}} - \frac{1}{(1-\frac{2}{n})(1-\frac{3}{n})} +\\ &\frac{2}{(1-\frac{2}{n})(n-3)} - \frac{\frac{4n_0n_1}{n^2}\frac{C_n}{n}}{(1-\frac{1}{n})(1-\frac{2}{n})(1-\frac{3}{n})} + \frac{\frac{4n_0n_1}{n^2}}{(1-\frac{1}{n})(1-\frac{2}{n})(1-\frac{3}{n})} -\\ &\frac{\frac{8n_0n_1}{n^2}}{(1-\frac{1}{n})(1-\frac{2}{n})(n-3)} +
    \frac{\frac{2C_n}{n}}{(1-\frac{1}{n})(n-2)(n-3)} - \frac{2}{(1-\frac{1}{n})(n-2)(n-3)} +\\ &\left.\frac{4}{(n-1)(n-2)(n-3)}
    \right\}\end{aligned}}}
\label{Wmn}
\end{align}

Combining eqns. ~\eqref{uv},~\eqref{NNrisk},~\eqref{CNconstant}, and ~\eqref{Wmn} yields the asymptotic convergence of $\frac{W_n}{n}$ under the usual limiting regime 
\begin{align}
    \lim_{n\to\infty}\frac{W_n}{n}&=\frac{\int 2 p(Z=0\mid s)p(Z=1\mid s)p(s)ds - 2uv}{\sqrt{2uv[2uv+A_d-1 - 4uvA_d+4uv]}}\nonumber\\
     &=\frac{\int 2 p(Z=0\mid s)p(Z=1\mid s)p(s)ds - 2uv}{\sqrt{2uv[2uv+(A_d-1)(1-4uv)]}}
\label{WRappendix}
\end{align}
\end{proof}

% \section{Proof of the closed-form solution to the linear programming in Section~\ref{SecLinear}}
\section{Proof of Theorem~\ref{theolinopt}}
\begin{figure}[t]%
    \centering
    % \hspace{-0.1cm}
    % \hfill
  \stackunder[1pt]{{\includegraphics[width=0.32\linewidth]{{Optimization/optimization0.10}.png}}}{(a) $u=0.2$}\hspace{-0.1cm}
  \stackunder[1pt]{{\includegraphics[width=0.32\linewidth]{{Optimization/optimization0.20}.png} }}{(b) $u=0.4$}\hspace{-0.2cm}
\stackunder[1pt]{{\includegraphics[width=0.32\linewidth]{{Optimization/optimization0.30}.png} }}{(c) $u=0.6$}
% \vspace{-2pt}
\caption{Simulated optimal solutions $P_{q^*}(s)$ to the LP\eqref{linOptFRub} with different $P(Z=0|s)$.}%
\label{Optimization}
%  \vspace{-0.4cm}
\end{figure}
\label{SecLinearAppendix}

\begin{proof}
The solutions~\eqref{linOptFRsol} of the LP~\eqref{linOptFRub} are represented as follows:
\begin{align}
    &p_{q^*}(s_{q_0})=\frac{u-p(Z=0\mid s_{q_1})}{p(Z=0\mid s_{q_0}) - p(Z=0\mid s_{q_1})},\label{sq0}\\ &p_{q^*}(s_{q_1})=\frac{P(Z=0\mid s_{q_0})-u}{p(Z=0\mid s_{q_0}) - p(Z=0\mid s_{q_1})},\label{sq1}\\
    &p_{q^*}(s)=0\quad\forall i\notin\{q_0,q_1\}\label{szero}\\
    &\textit{where } {q_0}=\arg\min_{i}[p(Z=0\mid s)]=\arg\max_{i}[p(Z=1\mid s)], {q_1}=\arg\max_{i}[p(Z=0\mid s)].
    \label{LinearSol}
\end{align}
The following proof presents the derivation of the closed-form solution~\eqref{sq0} to~\eqref{LinearSol} for the LP \eqref{linOptFRub}. In fact, $p(Z=0\mid s_i)$'s are constant coefficients and $p_q(s_i)$'s are variables in the LP. Herein, we use $H$ to denote the number of variables $p_q(s_i)$ such that $i=1,\ldots,H$. 


The feasible solutions to the LP~\eqref{linOptFRub} forms a feasible region. This is a bounded region, since the variables $p_q(s_i)$'s are upper and lower bounded.  Furthermore, the constraints $\sum_ip(Z=0|s_i)p_q(s_i)=u$ and $\sum_ip_q(s_i)=1$ form an $h-2$ dimensional polytope and the constraints $p_q(s_i)\geq 0$ restrict the polytope to the $h-2$ dimensional positive orthant. The optimal solution of the LP occurs at one of the vertices of the corresponding polytope~\citep{korte2011combinatorial}. In what follows, we identify the vertices of the polytope, locate the optimal feasible solution from the vertices, and  present results from a simulation to empirically validate the derived closed-form solution.

\textbf{Identifying the vertices of the polytope:} Given the LP~\eqref{linOptFRub}, the intersection between the $h-2$ dimensional polytope (feasible region) and an $h-1$ dimensional hyper-plane $p_q(s_i)=0$ produces an $h-3$ dimensional facet. Therefore, the intersection between the $h-2$ dimensional polytope and any $h-2$ hyper-planes $p_q(s_i)=0$ produces a zero-dimensional facet. In fact, a vertex is a zero-dimensional facet. Therefore, with the above intersection operations, a vertex of the polytope is a vector with length of $h$ including $h-2$ zero components, and this reduces the constraints in~\eqref{linOptFRub} to the linear equations of two unknowns, $p_q(s_{q_0})$ and $p_q(s_{q_1})$.   
$p_q(s_{q_0})$ and $p_q(s_{q_1})$ are two non-zero components of the vertex and they are specified in~\eqref{sq0} and~\eqref{sq1}.    
% the following form:
% \begin{align}
%     &P_q(s_{q_0})=\frac{u-P(Z=0|s_{q_1})}{P(Z=0|s_{q_0}) - P(Z=0|s_{q_1})},\label{sq0appendix}\\ &P_q(s_{q_1})=\frac{P(Z=0|s_{q_0})-u}{P(Z=0|s_{q_0}) - P(Z=0|s_{q_1})},\label{sq1appendix}\\
%     &P_q(s_i)=0\quad\forall i\notin\{q_0,q_1\}\label{szeroappendix}\\
%     &q_0\neq q_1 \quad q_0=1,...,H \quad q_1=1,...,H\label{szero2appendix}
% \end{align}

\textbf{Locating the optimal solution among the vertices:}
Substituting~\eqref{sq0} and~\eqref{sq1} into the objective in~\eqref{linOptFRub} yields following:
\begin{align}
    &\max_{p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})} p(Z=0\mid s_{q_0})^2p_q(s_{q_0}) + p(Z=0\mid s_{q_1})^2p_q(s_{q_1})\nonumber\\
    =&\max_{p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})} \frac{p(Z=0\mid s_{q_0})^2\left[u-p(Z=0\mid s_{q_1})\right]}{p(Z=0\mid s_{q_0}) - p(Z=0\mid s_{q_1})} + \frac{p(Z=0\mid s_{q_1})^2\left[p(Z=0\mid s_{q_0})-u\right]}{p(Z=0\mid s_{q_0}) - p(Z=0\mid s_{q_1})}\nonumber\\
    =&\max_{p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})}u[p(Z=0\mid s_{q_0}) + p(Z=0\mid s_{q_1})] - p(Z=0\mid s_{q_0})p(Z=0\mid s_{q_1})    
\end{align}
We write $\mathcal{L}[p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})]= p(Z=0\mid s_{q_0})^2p_q(s_{q_0}) + p(Z=1\mid s_{q_1})^2p_q(s_{q_1})$ and compute the partial derivatives of $\mathcal{L}$ w.r.t. the posterior probabilities, $\nabla \mathcal{L}[p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})]$, to yield 
\begin{align}
    \nabla \mathcal{L}[p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})]
    &=\left(\frac{\partial \mathcal{L}}{\partial p(Z=0\mid s_{q_0})}, \frac{\partial \mathcal{L}}{\partial p(Z=0\mid s_{q_1})}\right)\nonumber\\
    &=\left(u-p(Z=0\mid s_{q_1}), u-p(Z=0\mid s_{q_0})\right)\label{Partial}
\end{align}

As observed in \eqref{sq0} and \eqref{sq1}, there are two considerations for $p_q(s_{q_0})$ and $p_q(s_{q_1})$: (1) $p(Z=0\mid s_{q_1})\leq u, p(Z=0\mid s_{q_0}) \geq u$ and (2) $p(Z=0\mid s_{q_1}) \leq u, p(Z=0\mid s_{q_0}) \geq u$. For the first consideration,~\eqref{Partial} yields a non-negative derivative for $\frac{\partial \mathcal{L}}{\partial p(Z=0\mid s_{q_0})}$ and a non-positive derivative for $\frac{\partial\mathcal{L}}{\partial P(Z=1\mid s_{q_1})}$. Therefore, given the convexities of 
$\mathcal{L}[p(Z=0\mid s_{q_0}), p(Z=0\mid s_{q_1})]$ with respect to $p(Z=0\mid s_{q_0})$ and $p(Z=0\mid s_{q_1})$,  we have  $q_0=\arg\max_i[p(Z=0\mid s_i)]$ and $q_1=\arg\min_i[p(Z=0\mid s_i)]$. On the other hand, for the second consideration, ~\eqref{Partial} yields a non-positive derivative for $\frac{\partial\mathcal{L}}{\partial p(Z=0\mid s_{q_0})}$ and a non-negative derivative for $\frac{\partial \mathcal{L}}{\partial p(Z=1\mid s_{q_1})}$. Thus $q_0=\arg\min_i[p(Z=0\mid s_i)]$ and $q_1=\arg\max_i[p(Z=0\mid s_i)]$. Both cases have identical solutions, but with the order of $q_0$ and $q_1$ swapped. The summarized closed-form solution is presented in eqns.~\eqref{sq0} to~\eqref{LinearSol}.  

\textbf{Simulation results:} We simulate the LP~\eqref{linOptFRub} with randomly generated $p(z|s_i)$, and set $u=0.2$, $u=0.4$ and $u=0.6$.  We solve the LP~\eqref{linOptFRub} using the Python optimization package. The bimodal delta functions associated with the optimal $p_{q^*}(s_i)$ are observed in Figure~\ref{Optimization}. The two modes of the bimodal delta functions are generated at the points of highest $p(z|s_i)$ for both classes which agrees with the derived closed-form solutions~\eqref{sq0} to~\eqref{LinearSol}.
\end{proof}

\section{Proof of theorem~\ref{FiniteVariantThy}}
\label{FiniteVariantProof}
\begin{proof}
For $n$ pairs of random variables $(S_i, Z_i)$ i.i.d. generated from $p(s, z)$~\eqref{FiniteVariantSol}, we use $n_0$ and $n_1$ to denote the feature samples generated with membership $Z_i=0$ and $1$. It is easy to see $n=n_0 + n_1$. From~\eqref{CutEdgeExp} we know that ${\rm E}[R_n\mid H_0,\{S_i\}_{i=1}^n]=\frac{2n_0n_1}{n}$, and this says ${\rm E}[R_n\mid H_0,\{S_i\}_{i=1}^n]$ is determined only by the number of features generated from class one or zero. Given $p(Z=0)=u$ and $p(Z=1)=v$, we have 
\begin{align}
  {\rm E}\left[\left.\frac{R_n}{n}\right\rvert H_0\right]={\rm E}\left[\frac{2n_0n_1}{n}\right]=2uv  
\end{align}
Considering that we change the original marginal distribution $p(s)$ to $p_{q^*}(s)$ and $n_q$ samples are i.i.d. generated from $p_{q^*}(s)$, and given that $p_{q^*}(s)$ is derived subject to $p(Z=0)=u$ and $p(Z=1)=v$ (see~\eqref{linOptFRub}), we also have ${\rm E}\left[\left.\frac{R_{n_q}}{n_q}\right\rvert H_0\right]=2uv$. Now we turn to evaluate ${\rm E}\left[\frac{R_{n_q}}{n_q}\right]$ obtained from $n_q$ samples $S_i$ i.i.d. generated from $p_{q^*}(s)$. Same as the notations used in~\eqref{linOptFRsol}, we use $s_{q_0}$ and $s_{q_1}$ to denote the only two points for $p_{q^*}(s_{q_0})>0$ and $p_{q^*}(s_{q_1})>0$, and $p_{q^*}(s)=0$ for any other $s$. Three cases of $\frac{R_{n_q}}{n_q}$ can possibly happen under $p_{q^*}(s)$: (1) all $n_q$ samples are generated at $s_{q_0}$; (2) all $n_q$ samples are generated at $s_{q_1}$; and (3) at least two points are generated at $s_{q_0}$ and $s_{q_1}$. This leads to the expansion of ${\rm E}\left[\frac{R_{n_q}}{n_q}\right]$ in the following:
\begin{align}
    {\rm E}\left[\frac{R_{n_q}}{n_q}\right]&= [p_{q^*}(s_{q_0})]^{n_q}{\rm E}\left[\left.\frac{R_{n_q}}{n_q}\right\rvert \text{case 1}\right] + [p_{q^*}(s_{q_1})]^{n_q}{\rm E}\left[\left.\frac{R_{n_q}}{n_q}\right\rvert \text{case 2}\right]\nonumber\\ 
    &+ \{1- [p_{q^*}(s_{q_0})]^{n_q} - [p_{q^*}(s_{q_1})]^{n_q}\}{\rm E}\left[\left.\frac{R_{n_q}}{n_q}\right\rvert\text{case 3}\right]
    \label{threecasecutedge}
\end{align}
A minimum spanning tree (MST) constructed over $n_q$ samples i.i.d. generated from $p_{q^*}(s)$ contains $n_q -1$ edges, and we write $I_i$ to denote a random variable standing for if an edge in MST is a cut-egde, $I_i=1$, or not, $I_i=0$. Therefore we have ${\rm E}\left[\frac{R_{n_q}}{n_q}\right]=\frac{1}{n_q}\sum_{i=1}^{n_q-1}{\rm E}[I_i]$. Under both case 1 and case 2, $I_i$ is simply described as whether the two endpoints of $I_i$ have same label, and therefore we have ${\rm E}[I_i\mid \text{case 1}]=2p(Z=0|s_{q_0})p(Z=1|s_{q_0})$ and ${\rm E}[I_i\mid \text{case 2}]=2p(Z=0|s_{q_1})p(Z=1|s_{q_1})$. Under the case 3, $I_i$ can be further categorized to an edge variable $I_i^a$ that connects $s_{q_0}$ and $s_{q_1}$ and other edge variables $I_i^b$ at either $s_{q_0}$ or $s_{q_1}$. There is only one edge to connect $s_{q_0}$ and $s_{q_1}$ thus we simply write $I^a$ for $I_i^a$ and ${\rm E}[I^a\mid 
\text{case 3}] = p(Z=0\mid s_{q_0})p(Z=1\mid s_{q_1}) +p(Z=1\mid s_{q_0})p(Z=0\mid s_{q_1})$. Each $I_i^b$ can be viewed as an edge variable that connects a random variable $S\sim p_{q^*}(s)$ and a point at $s_{q_0}$ or $s_{q_1}$ (under the case 3, two points already exist at $s_{q_0}$ and $s_{q_1}$), and therefore $E\left[I_i^b\mid\text{case 3} \right]=\int 2p(Z=0|s)p(Z=1|s)p_{q^*}(s)ds$. Inserting ${\rm E}\left[\frac{R_{n_q}}{n_q}\right]=\frac{1}{n_q}\sum_{i=1}^{n_q-1}{\rm E}[I_i]$, ${\rm E}[I_i\mid \text{case 1}]=2p(Z=0|s_{q_0})p(Z=1|s_{q_0})$, ${\rm E}[I_i\mid \text{case 2}]=2p(Z=0|s_{q_1})p(Z=1|s_{q_1})$, ${\rm E}[I^a\mid 
\text{case 3}] = p(Z=0\mid s_{q_0})p(Z=1\mid s_{q_1}) +p(Z=1\mid s_{q_0})p(Z=0\mid s_{q_1})$ and $E\left[I_i^b\mid\text{case 3} \right]=\int 2p(Z=0|s)p(Z=1|s)p_{q^*}(s)ds$ to~\eqref{threecasecutedge} we have
\begin{align}
    E\left[\frac{R_{n_q}}{n_q}\right]&=[p_{q^*}(s_{q_0})]^{n_q}\frac{2(n_q-1)[2p(Z=0|s_{q_0})p(Z=1|s_{q_0})]}{n_q} + [p_{q^*}(s_{q_1})]^{n_q}\frac{2(n_q-1)[2p(Z=0|s_{q_1})p(Z=1|s_{q_1})]}{n_q}\nonumber\\
    &+ \{1- [p_{q^*}(s_{q_0})]^{n_q} - [p_{q^*}(s_{q_1})]^{n_q}\}\frac{\left[p(Z=0\mid s_{q_0})p(Z=1\mid s_{q_1}) +p(Z=1\mid s_{q_0})p(Z=0\mid s_{q_1})\right]}{n_q}\nonumber\\
    &+\{1- [p_{q^*}(s_{q_0})]^{n_q} - [p_{q^*}(s_{q_1})]^{n_q}\}\frac{(n_q-1)\int 2p(Z=0|s)p(Z=1|s)p_{q^*}(s)ds}{n_q}\nonumber\\
    &=\int 2p(Z=0|s)p(Z=1|s)p_{q^*}(s)ds + \mathcal{O}(n_q^{-1})
\end{align}
Inserting the results of ${\rm E}\left[\left. \frac{R_{n_q}}{n_q}\right\rvert H_0\right]$ and ${\rm E}\left[\frac{R_{n_q}}{n_q}\right]$ to ${\rm E}\left[\frac{\overline{W}_{n_q}}{n_q}\right]={\rm E}\left[\left. \frac{R_{n_q}}{n_q}\right\rvert H_0\right] - {\rm E}\left[\frac{R_{n_q}}{n_q}\right]$ completes the proof. 
\end{proof}
\section{Proof of Theorem~\ref{TypeITheory}}
\label{SecTypeI}
\begin{proof}
%As $H_n$ being rejected with the computed $p$-value smaller than pre-defined significance level $\alpha$,  
We write $\mathcal{S}=\{S_1,\ldots,S_n\}$ and $\mathcal{Z}=\{Z_1,\ldots, Z_n\}$ to denote pairs of $(S_i, Z_i)$ i.i.d generated from $p(s, z)$. We write $\bar{\mathcal{S}}=\{\bar{S}_1,\ldots,\bar{S}_{n_q}\}\subseteq\mathcal{S}$ and $\bar{\mathcal{Z}}=\{\bar{Z}_1,\ldots,\bar{Z}_{n_q}\}\subseteq\mathcal{Z}$ to denote sets of feature random variables and corresponding label random variables obtained in the end of the second stage of the proposed framework. Note that $\bar{S}_i$'s are not necessarily to be i.i.d random variables. Furthermore, we divide $\bar{\mathcal{S}}$ into $\bar{\mathcal{X}}$ for feature random variables with membership $\bar{Z}_i=0$ and $\bar{\mathcal{Y}}$ for feature random variables with membership $\bar{Z}_i=1$. It is easy to see $\mathcal{\bar{S}}= \bar{\mathcal{X}}\bigcup\bar{\mathcal{Y}}$. 

\textbf{Under the null hypothesis $H_0$ ($S_i\perp Z_i$), $\bar{Z}_i$ and $\bar{S}_i$ are independent:} We split the initial unlabelled sample feature set $\mathcal{S}$ ($\mathcal{Z}$ is unknown) to a training feature set $\mathcal{S}_t$ and a hold-out feature set $\mathcal{S}_h$. $\mathcal{S}_t$ corresponds to a collection of sample features uniformly labeled in the first stage of the framework, and $\mathcal{S}_h$ corresponds to the unlabelled sample feature set at the beginning of the second stage. Furthermore, we write $\mathcal{Z}_h$ and 
$\mathcal{Z}_t$ to indicate label sets of $\mathcal{S}_h$ and $\mathcal{S}_t$ respectively. We have $\mathcal{S}_t\subseteq\bar{\mathcal{S}}$ and $\mathcal{Z}_t\subseteq\bar{\mathcal{Z}}$.
In the proposed framework, we train a classifier with $\mathcal{S}_t$ and $\mathcal{Z}_t$, and herein, we write $\theta$ to denote a parameter random variable of the classifier.  First of all, it is easy to see since $p(\mathcal{S}_t, \emph{Z}_t)=p(\mathcal{S}_t)p(\mathcal{Z}_t)$ as  $\mathcal{S}_t$ is uniformly sampled from $\mathcal{S}$ under $H_0$.  $\theta$ is dependent on $\mathcal{S}_t$ and $\mathcal{Z}_t$ since we use
$\mathcal{S}_t$ and $\mathcal{Z}_t$ to train the classifier. Also,  we have $p(\mathcal{S}_h, \mathcal{Z}_h, \theta)= p(\mathcal{S}_h, \mathcal{Z}_h)p(\theta)$ since the classifier training process is independent of the hold-out set $(\mathcal{S}_h, \mathcal{Z}_h)$. Lastly, we write $q=q(\theta)$ to denote a query scheme to query labels based on the output probabilities of the classifier parameterized by $\theta$. In fact, $\bar{\mathcal{S}}/\mathcal{S}_t$ and $\mathcal{\bar{Z}}/\mathcal{Z}_t$ are features and labels returned by the query $q$, hence
     $\bar{\mathcal{S}}/\mathcal{S}_t\subseteq \mathcal{S}_h$ and $\bar{\mathcal{Z}}/\mathcal{Z}_t\subseteq \mathcal{Z}_h$. Given $\theta$ is independent of $\mathcal{S}_h$ and $\mathcal{Z}_h$, and $\mathcal{S}_h$ is independent of $\mathcal{Z}_h$, we have $P(\bar{\mathcal{S}}/\mathcal{S}_t,\bar{\mathcal{Z}}/\mathcal{Z}_t)=P(\bar{\mathcal{S}}/\mathcal{S}_t)P(\bar{\mathcal{Z}}/\mathcal{Z}_t)$. Combining  $P(\mathcal{S}_t, \mathcal{Z}_t)=P(\mathcal{S}_t)P(\mathcal{Z}_t)$ together with $P(\bar{\mathcal{S}}/\mathcal{S}_t,\bar{\mathcal{Z}}/\mathcal{Z}_t)=P(\bar{\mathcal{S}}/\mathcal{S}_t)P(\bar{\mathcal{Z}}/\mathcal{Z}_t)$, we have $P(\bar{\mathcal{S}}, \bar{\mathcal{Z}})=P(\bar{\mathcal{S}})P(\bar{\mathcal{Z}})$.
% A graphical model visualization for $\mathcal{S}_h$, $\mathcal{Z}_h$, $\bar{\mathcal{S}}$, $\bar{\mathcal{Z}}$ and $\theta$ is shown in Figure~\ref{Graph}.
% \begin{figure}[htp]
%  \centering
%  \includegraphics[width=0.3\columnwidth]{Proof/Picture1.png}
%  \caption{Graphical models for all random variables}
%  \label{Graph}
% \end{figure}

We further empirically demonstrate that the independence between $\bar{\mathcal{S}}$ and $\bar{\mathcal{Z}}$ exists by testing the error rate of the classifier used in the framework. Specifically, we consider two possible ways of classifier training that could be used in the $p(z|s)$ modelling: one is one-time training where a classifier is only trained one time with uniformly sampled points; this training fashion is used in the proposed framework and it is stated to be able to maintain the independence between $\bar{\mathcal{S}}$ and $\bar{\mathcal{Z}}$ under the null; and the other way is online training where a classifier is initialized with uniformly sampled points and then it is updated by the queried samples. We use the unqueried samples and their labels as a test set and generate classifier error rates for the above two training fashions. Logistic regression is used to output a logistic classifier. It is easy to see and also stated in~\citep{lopez2016revisiting} that a classifier should have around 0.5 error rate if the testing $\bar{\mathcal{S}}$ and $\bar{\mathcal{Z}}$ are independent. The results are shown in Figure~\ref{IndErr}.  It is observed that the one-time training classifier tested with the unqueried samples and their labels for the passive query, certainty query and the bimodal query all have error rates around 0.5 at different label query proportions of the whole dataset, whereas the error rates generated from the online training classifier are biasedly lower than 0.5. 
\begin{figure}[htp]
 \centering
 \includegraphics[width=0.3\columnwidth]{Proof/ClsErrOneTimeTrain_uncertainty_samplinglogistic.png}
  \includegraphics[width=0.3\columnwidth]{Proof/ClsErrOneTimeTrain_EnhanceUncertaintylogistic.png}
   \includegraphics[width=0.3\columnwidth]{Proof/ClsErrOneTimeTrain_EnhanceUncertainty2logistic.png}
 \caption{Classification error for one-time training and sequential training classifier.}
 \label{IndErr}
\end{figure}

\textbf{The proposed framework with permutation test used upper-bounds the Type I error with significance level $\alpha$:} A permutation test rearranges features in $\bar{\mathcal{S}}$ to obtain the null distribution of $\mathcal{T}_{n_q}$ conditional on $\bar{\mathcal{X}}$ and $\bar{\mathcal{Y}}$ under the $H_0$ ($\bar{S}\perp\bar{Z}$), compute $p$-value with an observed statistic $\mathcal{T}_{n_q}$ obtained from $\bar{\mathcal{X}}$ and $\bar{\mathcal{Y}}$, and reject $H_0$ for $p$-value smaller than the significance level $\alpha$. This is equivalent to $ P(p\leq\alpha\mid H_0, \bar{\mathcal{X}},\bar{\mathcal{Y}})\leq \alpha$. Therefore, we have the Type I error with permutation test used in our framework in the following:
\begin{align}
    P(p\leq \alpha)&= \int P(p\leq\alpha\mid H_0, \bar{\mathcal{X}},\bar{\mathcal{Y}})p(\bar{\mathcal{X}},\bar{\mathcal{Y}})d\bar{\mathcal{X}}d\bar{\mathcal{Y}}\nonumber\\
    &\leq\alpha\int p(\bar{\mathcal{X}},\bar{\mathcal{Y}})d\bar{\mathcal{X}}d\bar{\mathcal{Y}}\nonumber\\
    &=\alpha
\end{align}
% The existing conventional FR test~\citep{friedman1979multivariate} goes as follows: collect sample features $S_1\dots S_n\in\mathbb{R}^d$ and sample labels $Z_1\dots Z_n\in\{0,1\}$ where each pair of $(S_i, Z_i)$ is i.i.d generated from $p(s,z)$, calculate the FR statistic and $p$-value using the collected samples, and lastly compare the $p$-value to a significance level $\alpha$ to decide rejection or acceptance of a null hypothesis. This is a standard two-sample procedure stated in~\citep{johnson2011elementary}, and the Type I error of a conventional FR test is upper-bounded by $\alpha$. 
\end{proof}

\section{Other experimental results}
\label{SecExp}
% \subsection{Other experimental results with synthetic data}
\subsection{Complete results for using different classification algorithms in the first stage and different two-sample tests in the third stage of the proposed framework}
\label{AppendClassifier}
A classification algorithm $\mathcal{A}$ is used to output a classifier with $f:\mathbb{R}^d\to [0,1]$ to model $p(Z=1|s)$. In this section, we present results of our framework using different classification algorithms $\mathcal{A}$. We select a classification algorithm based on two aspects: (1) a large class of the universal learning
machines (e.g. neural networks, and support vector machines based on the appropriate kernels with a large number of training examples) outputs probability $f(s)$ as a monotone function of $p(Z=1|s)$~\citep{friedman2004multivariate}; (2) the classifier calibration
process~\citep{platt1999probabilistic} adjusts $f(s)$ to generate more accurate $p(z|s)$. We will see that in the following, even in the case of small sample size training, the bimodal query produces superior results relative to passive query. Besides, in order to examine the extensibility of the proposed framework to using other two-sample tests, we replace the FR test in the third stage with the Chen test~\citep{chen2017new} and the cross-match test~\citep{rosenbaum2005exact}.\\ 

\textbf{Synthetic datasets}\\
Figure~\ref{ApLogSynTypeII} shows the logistic regression, and Figure~\ref{ApSVMSynTypeII} shows the SVM results of Type II errors of the proposed Framework and its parallel implementations with the bimodal query or the FR test replaced. From Figure~\ref{ApLogSynTypeII}(a) and Figure~\ref{ApSVMSynTypeII}(a) we observe that the proposed framework (FR test + bimodal query) have lower Type II error than the FR test combined with other query schemes with small number of label queries. Figure~\ref{ApLogSynTypeII}(b)(c) and Figure~\ref{ApSVMSynTypeII}(b)(c) show the extensions of the framework to using the Chen test and the cross-match test.  It is observed that the our framework is well extended to the Chen and the cross-match tests with the logistic regression. 

Figure~\ref{ApLogSynTypeI} shows the logistic regression, and Figure~\ref{ApSVMSynTypeI} shows the SVM results of Type I errors of the proposed Framework and its parallel implementations with the FR test replaced. $\alpha=0.05$ either overlaps with or upper-bound the $95\%$ confidence interval of the Type I error in all cases which shows the Type I error is controlled. 

\begin{figure*}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/FR/OneTimeTrain_RejectlogisticInitSize50}.png}}}{\footnotesize $H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/FR/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/Chen/OneTimeTrain_RejectlogisticInitSize50}.png}}}{\footnotesize$H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/Chen/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/Hotelling/OneTimeTrain_Rejectlogistic}.png}}}{\footnotesize$H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/Hotelling/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{Cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{
 Type II error of the proposed framework (Bimodal query based FR test) and its parallel implementations either with FR test replaced by Chen and cross-match tests, or with bimodal query replaced with three baseline queries under the two synthetic dataset alternative hypotheses $H_1^1$ and $H_1^2$. \textbf{Logistic regression} is used. Type II error is on the Y-axis and label query proportions of the whole dataset size is on the X-axis. }
 \label{ApLogSynTypeII}
% \end{wrapfigure}
\end{figure*}

\begin{figure}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/FR/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/Chen/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/cross-match/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{Cross-match test}
\end{subfigure}
 \caption{Type I error ($95\%$ confidence interval) of the proposed framework (Bimodal query based FR test) and its parallel implementations with FR test replaced by Chen and cross-match tests under the synthetic dataset null hypothesis $H_0$. \textbf{Logistic regression} is used. Type I error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApLogSynTypeI}
% \end{wrapfigure}
\vspace{-0.6cm}
\end{figure}

\begin{figure*}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/FR/OneTimeTrain_RejectCaliSVCInitSize50}.png}}}{\footnotesize $H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/FR/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/Chen/OneTimeTrain_RejectCaliSVCInitSize50}.png}}}{\footnotesize$H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/Chen/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn0/TypeII/cross-match/OneTimeTrain_RejectCaliSVCInitSize50}.png}}}{\footnotesize$H_1^1$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{Syn/Syn1/TypeII/cross-match/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^2$}
\vspace{-0.2cm}
\caption{Cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{
 Type II error of the proposed framework (Bimodal query based FR test) and its parallel implementations either with FR test replaced by Chen and cross-match tests, or with bimodal query replaced with three baseline queries under the two synthetic dataset alternative hypotheses $H_1^1$ and $H_1^2$. \textbf{SVM} is used. Type II error is on the Y-axis and label query proportions of the whole dataset size is on the X-axis. }
 \label{ApSVMSynTypeII}
% \end{wrapfigure}
\end{figure*}
 
 \begin{figure}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/FR/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliSVC}.png}}}{}
 \vspace{-0.4cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/Chen/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliSVC}.png}}}{}
 \vspace{-0.4cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{Syn/Syn0/TypeI/cross-match/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliSVC}.png}}}{}
 \vspace{-0.4cm}
\caption{Cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type I error ($95\%$ confidence interval) of the proposed framework (Bimodal query based FR test) and its parallel implementations with FR test replaced by Chen and cross-match tests under the synthetic dataset null hypothesis $H_0$. \textbf{SVM} is used. Type I error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApSVMSynTypeI}
% \end{wrapfigure}
\end{figure}
\textbf{MNIST and ADNI}\\
Figure~\ref{ApLogRealTypeII}, Figure~\ref{ApSVMRealTypeII} and Figure~\ref{ApNNRealTypeII} show the logistic regression, SVM and neural network results of Type II errors for the proposed Framework and its parallel implementations with the bimodal query or the FR test replaced. We observed that not only the proposed framework has lower errors than its parallel implementation with the bimodal query replaced, the framework extended to the Chen and the cross-match tests also generate lower Type II errors in all three classifier cases.  

Figure~\ref{ApLogSynTypeI} shows the logistic regression, and Figure~\ref{ApSVMSynTypeI} shows the SVM results of Type I errors for the proposed Framework and its parallel implementations with the FR test replaced. $\alpha=0.05$ either overlaps with or upper-bound the $95\%$ confidence interval of the Type I error in most of all cases which shows the Type I error is controlled. 

Figure~\ref{ApLogRealTypeI}, Figure~\ref{ApSVMRealTypeI} and Figure~\ref{ApNNRealTypeI} show the logistic regression, SVM and neural network results of Type I errors for the proposed Framework and its parallel implementations with the FR test replaced. It is observed that the proposed framework with the FR test always has $\alpha=0.05$ either overlapping with or upper-bounding the $95\%$ confidence interval of the Type I error in all classifier cases which shows the Type I error is controlled. 
\begin{figure*}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
     \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/FR/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize $H_a^M$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/FR/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/Chen/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize$H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/Chen/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/cross-match/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize$H_a^M$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/cross-match/OneTimeTrain_RejectlogisticInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type II error of the proposed framework (Bimodal query based FR test) and its parallel implementations either with FR test replaced by Chen and cross-match tests, or with bimodal query replaced with three baseline queries under the MNIST alternative hypothesis $H_1^{\rm M}$ and the ADNI hypothesis $H_1^{\rm AD}$. \textbf{Logistic regression} is used. Type II error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApLogRealTypeII}
% \end{wrapfigure}
\end{figure*}

\begin{figure}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/FR/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/Chen/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/cross-match/TypeIErrCIOneTimeTrain_EnhanceUncertainty2logistic}.png}}}{}
 \vspace{-0.4cm}
\caption{Cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type I error ($95\%$ confidence interval) of the proposed framework (Bimodal query based FR test) and its parallel implementations with FR test replaced by Chen and cross-match tests under the MNIST null hypotheses $H_0^{\rm M}$. \textbf{Logistic regression} is used. Type I error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApLogRealTypeI}
\end{figure}

\begin{figure*}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/FR/OneTimeTrain_RejectCaliSVCInitSize100}.png}}}{\footnotesize $H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/FR/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/Chen/OneTimeTrain_RejectCaliSVCInitSize100}.png}}}{\footnotesize$H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/Chen/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/cross-match/OneTimeTrain_RejectCaliSVCInitSize100}.png}}}{\footnotesize$H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/cross-match/OneTimeTrain_RejectCaliSVCInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type II error of the proposed framework (Bimodal query based FR test) and its parallel implementations either with FR test replaced by Chen and cross-match tests, or with bimodal query replaced with three baseline queries under the MNIST alternative hypothesis $H_1^{\rm M}$ and the ADNI hypothesis $H_1^{\rm AD}$. \textbf{SVM} is used. Type II error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApSVMRealTypeII}
% \end{wrapfigure}
\end{figure*}

\begin{figure}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/FR/TypeIErrCIOneTimeTrain_EnhanceUncertainty2SVC}.png}}}{}
 \vspace{-0.4cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/Chen/TypeIErrCIOneTimeTrain_EnhanceUncertainty2SVC}.png}}}{}
 \vspace{-0.4cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/cross-match/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliSVC}.png}}}{}
 \vspace{-0.4cm}
\caption{cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type I error ($95\%$ confidence interval) of the proposed framework (Bimodal query based FR test) and its parallel implementations with FR test replaced by Chen and cross-match tests under the MNIST null hypotheses $H_0^{\rm M}$. \textbf{SVM} is used. Type I error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApSVMRealTypeI}
\vspace{-0.5cm}
\end{figure}

\begin{figure*}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/FR/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize $H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/FR/OneTimeTrain_RejectCaliNNInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/Chen/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize$H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/Chen/OneTimeTrain_RejectCaliNNInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{MNIST/TypeII/cross-match/OneTimeTrain_RejectCaliNNInitSize100}.png}}}{\footnotesize$H_1^{\rm M}$}
\stackunder[1pt]{{\includegraphics[width=0.48\linewidth]{{ADNI/TypeII/cross-match/OneTimeTrain_RejectCaliNNInitSize50}.png} }}{\footnotesize$H_1^{\rm AD}$}
\vspace{-0.2cm}
\caption{cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type II error of the proposed framework (Bimodal query based FR test) and its parallel implementations either with FR test replaced by Chen and cross-match tests, or with bimodal query replaced with three baseline queries under the MNIST alternative hypothesis $H_1^{\rm M}$ and the ADNI hypothesis $H_1^{\rm AD}$. \textbf{Neural network} is used. Type II error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApNNRealTypeII}
% \end{wrapfigure}
\end{figure*}

\begin{figure}[h!]
% \vspace{-0.5cm}
 \centering
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (a)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/FR/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliNN}.png}}}{}
 \vspace{-0.4cm}
\caption{FR test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (b)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/Chen/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliNN}.png}}}{}
 \vspace{-0.4cm}
\caption{Chen test}
\end{subfigure}
 \begin{subfigure}[b]{0.32\linewidth}
%  \makebox[10pt]{\raisebox{43pt}{\rotatebox[origin=c]{0}{{\footnotesize (c)}}}}
 \stackunder[1pt]{{\includegraphics[width=\linewidth]{{MNIST/TypeI/cross-match/TypeIErrCIOneTimeTrain_EnhanceUncertainty2CaliNN}.png}}}{}
 \vspace{-0.4cm}
\caption{cross-match test}
\end{subfigure}
\vspace{-0.2cm}
 \caption{Type I error ($95\%$ confidence interval) of the proposed framework (Bimodal query based FR test) and its parallel implementations with FR test replaced by Chen and cross-match tests under the MNIST null hypotheses $H_0^{\rm M}$. \textbf{Neural network} is used. Type I error is on the Y-axis and label query proportion of the whole dataset size is on the X-axis.}
 \label{ApNNRealTypeI}
\vspace{-0.5cm}
\end{figure}

