\begin{figure*}[]
    \centering
    \includegraphics[width=\textwidth]{figs/diffusion_smoothing.png}
    \caption{(a) Left: A graph of two densely connected clusters (orange and blue) with sparse edges across clusters. Right: Visualization of the learned node representation using the strawman approach, in which clusters are not perfectly separated. (b) Left: The connection strengths measured by the heat kernel proximity measure. Right: Visualization of the learned node representations using \method, in which clusters are linearly separable.}
    % , in which nodes have high-dimensional feature vectors randomly sampled from the same uniform distribution
    % \caption{A synthetic graph with two densely connected clusters and sparse edges across clusters ($10\times$ more edges within cluster); the high-dimensional features of each node is randomly sampled from the same uniform distribution. (a) shows the original graph structure and the t-SNE plot of the learned node representations using the strawman approach. Each edge is considered as equally strong, and as a result, the encoder cannot learn to distinguish nodes from different clusters well. (b) shows the strength of connection measured by the diffusion matrix, and the t-SNE plot of the learned node representations using the diffusion matrix as the learning target. The diffusion matrix measures the strength of connections by taking the global graph structure into account; it emphasizes connection strengths within cluster while weakening those across clusters. A weaker connection is shown by a lighter, thinner line. The encoder learns higher quality representations of nodes that are linearly separable.}
    \label{fig:diffusion}
\end{figure*}