% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2021} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2021} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
%\usepackage[british]{babel}
\usepackage{amssymb,amsmath,bm}
\usepackage{float}
\newtheorem{prop}{Proposition}
\newtheorem{corollary}{Corollary}
\newtheorem{proof}{Proof}
\newtheorem{remark}{Remark}
\newtheorem{definition}{Definition}
\usepackage{bbm}
\usepackage{subfig}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{references}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}

\renewcommand{\thesection}{S.\arabic{section}}
\renewcommand{\thesubsection}{\thesection.\arabic{subsection}}

\makeatletter %% With ams
\def\tagform@#1{\maketag@@@{(S\ignorespaces#1\unskip\@@italiccorr)}}
\makeatother

\long\def\symbolfootnote[#1]#2{\begingroup%
\def\thefootnote{\fnsymbol{footnote}}\footnote[#1]{#2}\endgroup}

\renewcommand{\thefigure}{S\arabic{figure}}
\renewcommand{\theequation}{\arabic{equation}}
\renewcommand{\thetable}{S\arabic{table}}
\renewcommand{\bibnumfmt}[1]{[S#1]}

\title{Dimension Reduction for High-dimensional Small Counts with KL Divergence Supplementary Material}

\author[1]{\href{mailto:Yurong Ling <yurong.ling.16@ucl.ac.uk>?Subject=Your UAI 2022 paper}{Yurong Ling}{}} % Lead author
\author[1]{Jing-Hao Xue}
% Add affiliations after the authors
\affil[1]{%
    Department of Statistical Science\\
    University College London\\
    London, UK
}


\begin{document}
\onecolumn
\maketitle
\section{Proofs}\label{secs:proofs}
In this section, we provide the proofs of Proposition 1 and Corollary 2 presented in the main paper.
\subsection{Proof of Proposition 1}
\begin{proof}
First note that 
$R_{D}\left(F_x, F_y\right)$ converges to $ \frac{\mathbb{E}[D^2(x, y)]}{\mathbb{E}[D^2(x, \tilde{x})] + \mathbb{E}[D^2(y, \tilde{y})]}$ in probability, as $p \rightarrow \infty$, 
based on the weak law of large numbers and the Slutsky's theorem.
Now we have
\begin{equation}\label{eq:1}
\begin{split}  
    &\mathbb{E}[D^2(x, y)] = 
    \mathrm{Cov}\left (f(x) - f(y), g(x)-g(y)
    \right ) 
    + \left [\mathrm{E}f(x) - \mathrm{E}f(y) \right ] \left [ \mathrm{E}g(x) - \mathrm{E}g(y) \right ] \\
    &= \mathrm{Cov}\left (f(x), g(x) \right ) 
    +\mathrm{Cov}\left (f(y), g(y) \right ) 
    + \left [\mathrm{E}f(x) - \mathrm{E}f(y) \right ] \left [ \mathrm{E}g(x) - \mathrm{E}g(y) \right ],
\end{split}
\end{equation}
and 
\begin{equation}\label{eq:2}
\begin{split}  
    &\mathbb{E}[D^2(x, \tilde{x})] + \mathbb{E}[D^2(y, \tilde{y})] 
    = 
    2\mathrm{E}\left [ f(x)g(x) \right ]
    - 2\mathrm{E}f(x)\mathrm{E}g(x) 
    + 2\mathrm{E}\left [ f(y)g(y) \right ]
    - 2\mathrm{E}f(y)\mathrm{E}g(y) \\
    &
    =2\mathrm{Cov}\left (f(x), g(x) \right ) 
    +2\mathrm{Cov}\left (f(y), g(y) \right ).
\end{split}
\end{equation}
Combining results from Equation~\eqref{eq:1} and Equation~\eqref{eq:2}, we obtain 
\begin{align*}
\begin{split}
    &\frac{\mathbb{E}[D^2(x, y)]}{\mathbb{E}[D^2(x, \tilde{x})] + \mathbb{E}[D^2(y, \tilde{y})]}
    = \frac{1}{2} 
    + \frac{1}{2}\frac{\left [\mathrm{E}f(x) - \mathrm{E}f(y) \right ] \left [ \mathrm{E}g(x) - \mathrm{E}g(y) \right ]}
    {\mathrm{Cov}\left (f(x), g(x) \right ) 
    +\mathrm{Cov}\left (f(y), g(y) \right )},
\end{split}
\end{align*}
which completes the proof.
\end{proof}

\subsection{Proof of Corollary 2}
\begin{proof}
First note that $\lim\limits_{\mu_x \rightarrow 0}$PMF$(x=0)=1$ for non-negative random variable $x$ and $\lim\limits_{\mu_x \rightarrow 0}\mathrm{Var}\left (x \right ) = 0$. Further, we obtain $\lim\limits_{\mu_x \rightarrow 0}\mathrm{E}\left [g(x) \right ] = g(0)$ and $\lim\limits_{\mu_x \rightarrow 0}\mathrm{Var}\left [g(x) \right ] = 0$. Now consider the limiting difference between $c_g$ and $c_E$:
\begin{align*}
    \begin{split}
        &\lim_{\mu_x \rightarrow 0} c_g - c_E 
        =\lim_{\mu_x \rightarrow 0}
        \left[ \frac{1}{2}
        \frac{\left [\mathrm{E}g(x) - \mathrm{E}g(y) \right ]^2}
        {\mathrm{Var}\left [g(x) \right ] 
        +\mathrm{Var}\left [g(y) \right ]} \right.
         -\left. \frac{1}{2}
        \frac{\left [\mathrm{E}(x) - \mathrm{E}(y) \right ]^2}
        {\mathrm{Var}\left (x \right ) 
        +\mathrm{Var}\left (y \right )} \right]\\
        &=\frac{1}{2}
        \left[\frac{\left [g(0) - \mathrm{E}g(y) \right ]^2}{\mathrm{Var}\left[ g(y)\right]}
       -\frac{\mathrm{E}^2\left (y \right )}
       {\mathrm{Var}\left (y \right )} \right].
    \end{split}
\end{align*}
\end{proof}

\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.5\linewidth]{figures/Poisson.pdf}
    \caption{PMFs of two Poisson distributions.}
    \label{fig:Poisson}
\end{figure}

\section{An example showing the Euclidean distance performs badly on small counts}\label{secs:example}
Suppose we have two Poisson distributions with mean values $0.01$ and $0.96$, respectively. Although both distributions produce small counts, the PMFs of these two distributions, provided in Figure~\ref{fig:Poisson}, display disparate patterns: the mass of one distribution concentrates in $0$, while the frequent values of the other one lie in $\left\{0,1,2\right\}$.
It is highly likely that $R_E\left(F_x, F_y\right)<1$ in the space of high dimension according to Corollary~1 due to the mean-variance dependency, indicating that $D_E$ cannot distinguish these two distributions well.


\begin{table*}[t]
\centering
\caption{Parameter spaces for $F_y$.}
\label{tb:space}
\resizebox{1\textwidth}{!}
{\begin{tabular}{lll}
\hline
Distributions & Parameter $1$  & Parameter $2$           \\
\hline
Poisson       & means: $[0.05,1]$ \#samples: 50 
& variances: the same as the space of means \\
NB            & $r$ (\# failures): $(\frac{3}{4},5]$; \#samples: 50    & $p$ (probability of success): $[0.05,0.2]$; \#samples: 10    \\
\hline
\end{tabular}}
\end{table*}

\section{Simulation details}\label{secs:sim_details}
To obtain $\hat{R}(F_x,F_y)$, we first generate high-dimensional samples in $S_X$ and $S_Y$ with each coordinate i.i.d., and then calculate $\hat{R}(F_x,F_y)$ with different measures. 
As we compared $D_E$ with the Euclidean distances of the transformed data, we let $F_x$ be a count distribution that frequently generates zeros. Specifically, $F_x$ is either $\text{Pois}(0.05)$ or $\text{NB}(r,0.05)$ that has the same $r$ as that of $F_y$. Note that the probability of getting a zero from $\text{Pois}(0.05)$ is around $0.951$ and that from $\text{NB}(r,0.05)$ is $0.95^r$.
The parameter spaces for $F_y$ are provided in Table~\ref{tb:space}. Note that the parameters for simulating NB and Poisson distributions are selected in an evenly spaced way from the intervals provided in Table~\ref{tb:space}.

With the mean value increasing, the proportion of zeros produced from a Poisson distribution decreases to $0$. We hence bound the mean value from above by $1$ to generate small counts and the expected fractions of zeros in the Poisson-distributed count data lie in the range $[0.368,0.905]$. 
When $r$ approaches infinity, the NB distribution approximates an equi-dispersed Poisson distribution. Thus, the upper bound of $r$ for simulating NB distributions should be small in order to generate overdispersed distributions.
When simulating data from NB distributions, the values of $r$ for both $F_x$ and $F_y$ are the same, and we use the true value of $r$ for calculating $D_{NB}$ and $D_{\mathrm{asin}}$. Since $r<\frac{3}{4}$ results in the numerical problem when calculating $D_{\mathrm{asin}}$, we only take values that are larger than $\frac{3}{4}$ for $r$ . 
Again, we restrict the range of $p$ in NB distributions with the aim of simulating small counts. The proportion of zeros in the simulated NB data ranges from $0.363$ to $0.963$.
For Poisson distributions, we set $r$ to $1000$ in $D_{\mathrm{asin}}$ due to the approximation of NB distributions to Poisson distributions when $r$ is large.
The dimension for each data point is set to be $5000$ which is high enough to obtain a reliable estimation of the constant that $R(F_x,F_y)$ converges to. Although the convergence of $R(F_x,F_y)$ holds regardless of the number of samples in each distribution, small values of $n_x$ and $n_y$ possibly result in the meaningless coordinates, in the form of vectors of zeros.
Therefore, we set $n_x$ and $n_y$ to $1000$ to avoid such cases. 


\section{Evaluation details}\label{secs:evaluation}
When applying tSNE with the proposed measures to the datasets, we replace the Euclidean distance with the proposed ones to characterise the dissimilarities between data points for the construction of conditional probabilities.
When performing PCA with the proposed measures, we first get a pseudo Gram matrix by double-centring the corresponding pairwise dissimilarity matrix and then get the low-dimensional components by eigen-decomposing the pseudo Gram matrix.
We empirically found that making the pseudo Gram matrix positive semi-definite would produce better results when combined with GPLVM. Thus, we first eigen-decompose the pseudo Gram matrix and keep all the eigenvectors with non-negative eigenvalues. We then feed the modified pseudo Gram matrix $Q\Lambda Q^{T}$ into the GPLVM, where $Q$ is the matrix whose columns are the kept eigenvectors and $\Lambda$ is the diagonal matrix whose diagonal elements are the corresponding eigenvalues. 

\clearpage
\section{More visualization results}
\subsection{Visualization of GPLVM results}
\begin{figure}[H]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sc_Celseq2_5cl_p1_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p1 dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sc-CEL-seq2-5cl-p1}
\end{figure}

\clearpage
\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sc_Celseq2_5cl_p2_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p2 dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sc-CEL-seq2-5cl-p2}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sc_Celseq2_5cl_p3_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p3 dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sc-CEL-seq2-5cl-p3}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sce_sc_CELseq2_qc_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2 dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sc-CEL-seq2}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/full_Zhengmix8eq_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the Zheng8eq dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_Zheng8eq}
\end{figure}

\begin{figure}[t]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sim_Zheng8eq_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-Zheng8eq dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sim-Zheng8eq}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/GPLVM_VIS/sim_manno_GPLVM_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-ESCs dataset obtained by GPLVMs with different measures.}
    \label{fig:gplvm_vis_sim-manno-ESCs}
\end{figure}

\clearpage

\subsection{Visualization of tSNE results}
\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sc_Celseq2_5cl_p1_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p1 dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sc-CEL-seq2-5cl-p1}
\end{figure}

\clearpage

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sc_Celseq2_5cl_p2_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p2 dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sc-Celseq2-5cl-p2}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sc_Celseq2_5cl_p3_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p3 dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sc-CEL-seq2-5cl-p3}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sce_sc_CELseq2_qc_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2 dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sc-CEL-seq2}
\end{figure}


\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/full_Zhengmix8eq_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the Zheng8eq dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_Zheng8eq}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sim_Zheng8eq_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-Zheng8eq dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sim-Zheng8eq}
\end{figure}

\begin{figure}[htbp]
     \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sim_manno_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-ESCs dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sim-manno-ESCs}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/tSNE_VIS/sim_manno_vm_tSNE_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-vm dataset obtained by tSNE with different measures.}
    \label{fig:tsne_vis_sim-manno-vm}
\end{figure}

\clearpage
\subsection{Visualization of PCA results}

\begin{figure}[H]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sc_Celseq2_5cl_p2_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p2 dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sc-CEL-seq2-5cl-p2}
\end{figure}
\clearpage

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sc_Celseq2_5cl_p3_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p3 dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sc-CEL-seq2-5cl-p3}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sce_sc_CELseq2_qc_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2 dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sc-CEL-seq2}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/full_Zhengmix8eq_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the Zheng8eq dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_Zheng8eq}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sim_Zheng8eq_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-Zheng8eq dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sim-Zheng8eq}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sim_manno_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-ESCs dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sim-manno-ESCs}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=0.9\linewidth]{figures/PCA_VIS/sim_manno_vm_pca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-vm dataset obtained by PCA with different measures.}
    \label{fig:pca_vis_sim-manno-vm}
\end{figure}



\clearpage
\section{Experimental results of GPCA and NMF}\label{secs:gpcas_nmf}
\subsection{Visualization}
\begin{figure}[H]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sc_Celseq2_5cl_p1_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p1 dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sc-CEL-seq2-5cl-p1}
\end{figure}


\begin{figure}[htbp]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sc_Celseq2_5cl_p2_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p2 dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sc-CEL-seq2-5cl-p2}
\end{figure}

\begin{figure}[htbp]
    \centering
    {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sc_Celseq2_5cl_p3_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2-5cl-p3 dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sc-CEL-seq2-5cl-p3}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sce_sc_CELseq2_qc_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sc-CEL-seq2 dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sc-CEL-seq2}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/full_Zhengmix8eq_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the Zheng8eq dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_Zheng8eq}
\end{figure}


\begin{figure}[htbp]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sim_Zheng8eq_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-Zheng8eq dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sim-Zheng8eq}
\end{figure}

\begin{figure}[htbp]
    \centering
     {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sim_manno_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-ESCs dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sim-manno-ESCs}
\end{figure}

\begin{figure}[htbp]
    \centering
      {\includegraphics[width=1\linewidth]{figures/PCA_VIS/sim_manno_vm_gpca_vis_VSTs.pdf}} 
    \caption{Visualization of the sim-manno-vm dataset obtained by GPCA and NMF.}
    \label{fig:gpca_vis_sim-manno-vm}
\end{figure}

\clearpage
\subsection{Clustering results}
\begin{figure}[H]
    \centering
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sc_Celseq2_5cl_p1_gpca_kmeans.pdf}}
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sc_Celseq2_5cl_p2_gpca_kmeans.pdf}} 
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sc_Celseq2_5cl_p3_gpca_kmeans.pdf}} 
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sce_sc_CELseq2_qc_gpca_kmeans.pdf}} \\
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/full_Zhengmix8eq_gpca_kmeans.pdf}} 
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sim_Zheng8eq_gpca_kmeans.pdf}} 
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sim_manno_gpca_kmeans.pdf}} 
     \subfloat[]{\includegraphics[width=0.24\linewidth]{figures/PCA_KMEANS/sim_manno_vm_gpca_kmeans.pdf}} 
    \caption{ARI of $k$-means with GPCA-P and GPCA-NB in comparison with that with PCA and the proposed measures on the following datasets: (a) sc-CEL-seq2-5cl-p1, (b) sc-CEL-seq2-5cl-p2, 
    (c) sc-CEL-seq2-5cl-p3, (d) sc-CEL-seq2, 
    (e) Zheng8eq, (f) sim-Zheng8eq, 
    (g) sim-manno-ESCs, and (h) sim-manno-vm.}
    \label{fig:Kmeans_gpca}
\end{figure}
In this section, GPCA and NMF, which handle original non-negative data, are compared with the PCAs with $D_{NB}$. GPCA assumes observed data follow either Poisson or NB distributions, denoted GPCA-P and GPCA-NB, respectively.
First, we assess the visualization produced by GPCA-P, GPCA-NB and NMF (Figure~\ref{fig:gpca_vis_sc-CEL-seq2-5cl-p1}-\ref{fig:gpca_vis_sim-manno-vm}) and compare them with those obtained by the PCA with $D_{NB}$. It is observed that the PCA with $D_{NB}$ display more well-grouped data than GPCA-P, GPCA-NB and NMF on most datasets except the Zheng8eq dataset (Figure~\ref{fig:gpca_vis_Zheng8eq}) and sim-Zheng8eq dataset (Figure~\ref{fig:gpca_vis_sim-Zheng8eq}). For these two datasets, GPCA-P and GPCA-NB distinguish the groups existing in the data more clearly compared with the PCA with $D_{NB}$.

Secondly, we compare the proposed measures, which are input into PCA, with GPCA-P, GPCA-NB and NMF, according to the clustering results provided in Figure~\ref{fig:Kmeans_gpca}. On the sc-CEL-seq2 dataset and the sim-manno-vm dataset, the PCA with $D_{NB}$ not only outperforms GPCAs and NMF by a large margin when dimension is $2$ but also achieves the highest ARI value with the lowest dimension. The value of ARI obtained by $D_{NB}$ is much higher than those by GPCA-P, GPCA-NB and NMF when dimension equals $2$ on the sc-CEL-seq2-5cl-p1 dataset. For the sim-manno-ESCs dataset, the ARI values of GPCAs dramatically decrease as the dimension increases from $6$ to $7$, while those obtained by the PCA with $D_{NB}$ and $D_{P}$ are higher and more stable. Although the ARI values of $D_{NB}$ do not rise as the dimension grows from $2$ to $4$ on the sc-CEL-seq2-5cl-p1 dataset and the sc-CEL-seq2-5cl-p3 dataset, the PCA with $D_{NB}$ outperforms GPCAs and NMF by a wide margin in the 2D space. For the Zheng8eq dataset and the sim-Zheng8eq dataset, GPCA-P accomplishes the highest value with the lowest dimension. It is found that the results of GPCA-P and GPCA-NB could be highly variable. 

To sum up, the application of $D_{NB}$ often results in better visualization and clustering results when integrated into PCA compared with GPCA-P, GPCA-NB and NMF.



\end{document}

