%\clearpage
\onecolumn
%\pagestyle{empty} 
%\setcounter{figure}{0}
%\setcounter{section}{0}
\section{Additional Visualized Examples}
\mai{Ignore this section it is just the page that we submitted in the rubuttal}
% To be added as its own page under an annoyn link
In an attempt to address the feedback from some reviewers concerning the lack of accompanying molecular visualizations in our main paper, we will present some results from the experiment described next. 

We use the Qm9 dataset described in the main paper and create splits using VV with the average degree being the split property, $k = 4$ splits, $\sharpness = 10$ and $\epsilon = 0.1$. With those common parameters we create 2 scenarios: \textbf{scenario A}: the held-out portion of the data corresponds to the first split ($j = 1$) while the training portion corresponds to the last 3 splits ($j = 2, 3 \text{ and } 4$) and \textbf{scenario B}: the held-out portion of the data corresponds to the last split ($j = 4$) while the training portion corresponds to the first 3 splits ($j = 1, 2 \text{ and } 3$). 
For both scenarios, we trained DiGress and GDSS (two generative models described in the main paper) on the corresponding training portion for each scenario, sampled from these trained models, and weighted the samples according to the approach described in the paper (which will depend on the held-out portion of each scenario).

\textbf{First:} We visualize the top 4 valid and novel highest weighted molecules (after filtering for novelty and validity) according to the weights assigned by our approach when sampling from DiGress and GDSS for both scenarios in  \autoref{fig:compare}.

\textbf{Second:}
We plot the average degree distribution for  scenarios A and B in \autoref{fig:dist_vis}, and we show case where each of the top weighted 4 molecules that are valid and novel lay with respect to the distribution of the average degree property. 

\textbf{Third:}
For each train-held scenario, we compared the top 100 molecules---according to the weights assigned by our approach---to the corresponding held out data for that scenario. We took care to remove any existing intersection between the samples of the train part and held part (that could have happened if multiple copies of the same sample existed in the original data). We found that the models were able to generate exact matches present in the held out part i.e. the molecules generated were both novel (since the models didn't see the held out part), and viable (since they matched existing molecules in the held-out part), where we define exact matching to signify having the same smiles string. The exact number of matches for both models and for both scenarios is presented in \autoref{fig:present_in_held}.

%For Scenario A: GDSS's top 100 molecules contained 9 molecules present in the held unseen portion from that scenario, while DiGress's top 100 molecules contained 26 such molecules. For Scenario B: The corresponding numbers were 7 for GDSS and 15 for DiGress. 


\begin{figure}[ht]
%\resizebox{0.5\textwidth}{!}{
\begin{subfigure}{.5\textwidth}
  \centering
  % include first image
  \includegraphics[width=.7\linewidth]{figures/GDSS_split0.png}  
  \caption{Top 4 valid and novel molecules from GDSS when trained\\  with scenario A}
  \label{fig:sub-first}
\end{subfigure}
\begin{subfigure}{.5\textwidth}
  \centering
  % include second image
  \includegraphics[width=.7\linewidth]{figures/DiGress_split0.png}  
  \caption{Top 4 valid and novel molecules from DiGress when trained\\  with scenario A}
  \label{fig:sub-second}
\end{subfigure}
\hspace{0.2em}
\newline
\begin{subfigure}{.5\textwidth}
  \centering
  % include third image
  \includegraphics[width=.7\linewidth]{figures/GDSS_split3.png}  
 % \includegraphics[page=5,width=\textwidth, trim=0cm 0.5cm 1cm 0.5cm, clip]
  \caption{Top 4 valid and novel molecules from GDSS when trained\\  with scenario B}
  \label{fig:sub-third}
\end{subfigure}
\begin{subfigure}{.5\textwidth}
  \centering
  % include fourth image
  \includegraphics[width=.7\linewidth]{figures/DiGress_split3.png}  
  \caption{Top 4 valid and novel molecules from DiGress when trained\\  with scenario B}
  \label{fig:sub-fourth}
\end{subfigure}
\caption{Visualization of the top 4 valid and novel molecules sampled from different models under different training scenarios}
\label{fig:compare}
%}
\end{figure}
% if we want to fit in 1 page then 8 molecules is all we prabably can do





\iffalse
\begin{figure}[ht]
%\vspace{-0.8em}
\centering
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av3_molecule_0.png}
  \end{subfigure}%
  \hspace*{\fill}   % maximize separation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av3_molecule_1.png}  
  \end{subfigure}%
  \hspace*{\fill}   % maximizeseparation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av3_molecule_2.png}
  \end{subfigure}
\hspace*{\fill}
   \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av3_molecule_3.png}
  \end{subfigure}
  \caption{ Top 4 molecules corresponding to highest weights when sampling from DiGress trained on splits 1-3 while holding out split 4 }
  \label{fig:digress_av3}
  \end{figure}

\begin{figure}[ht]
%\vspace{-0.8em}
\centering
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av3_molecule_0.png}
  \end{subfigure}%
  \hspace*{\fill}   % maximize separation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av3_molecule_1.png}  
  \end{subfigure}%
  \hspace*{\fill}   % maximizeseparation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av3_molecule_2.png}
  \end{subfigure}
\hspace*{\fill}
   \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av3_molecule_3.png}
  \end{subfigure}
  \caption{ Top 4 molecules corresponding to highest weights when sampling from GDSS trained on splits 1-3 while holding out split 4 }
  \label{fig:gdss_av3}
  \end{figure}


\begin{figure}[ht]
%\vspace{-0.8em}
\centering
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av0_molecule_0.png}
  \end{subfigure}%
  \hspace*{\fill}   % maximize separation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av0_molecule_1.png}  
  \end{subfigure}%
  \hspace*{\fill}   % maximizeseparation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av0_molecule_2.png}
  \end{subfigure}
\hspace*{\fill}
   \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/d_av0_molecule_3.png}
  \end{subfigure}
  \caption{ Top 4 molecules corresponding to highest weights when sampling from DiGress trained on splits 2-4 while holding out split 1 }
  \label{fig:digress_av0}
  \end{figure}

\begin{figure}[ht]
%\vspace{-0.8em}
\centering
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av0_molecule_0.png}
  \end{subfigure}%
  \hspace*{\fill}   % maximize separation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av0_molecule_1.png}  
  \end{subfigure}%
  \hspace*{\fill}   % maximizeseparation between the subfigures
  \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av0_molecule_2.png}
  \end{subfigure}
\hspace*{\fill}
   \begin{subfigure}{0.18\textwidth}
    \includegraphics[width=\linewidth]{figures/g_av0_molecule_3.png}
  \end{subfigure}
  \caption{ Top 4 molecules corresponding to highest weights when sampling from GDSS trained on splits 2-4 while holding out split 1 }
  \label{fig:gdss_av0}
  \end{figure}
\fi

  \begin{figure}[ht]
%\vspace{-0.8em}
\centering
  \begin{subfigure}{0.5\textwidth}
    \includegraphics[width=\linewidth]{figures/split_0.png}
  \end{subfigure}%
  \hspace*{\fill}   % maximize separation between the subfigures
  \begin{subfigure}{0.5\textwidth}
    \includegraphics[width=\linewidth]{figures/split_3.png}  
  \end{subfigure} 
  \caption{visualization of the data distributions of the average degree when the held data corresponded to split 1 (scenario A) on the left and split 4 (scenario B) on the right. In both cases, the distribution of the training portion is in blue and is overlayen with the distribution of the held portion in yellow. We plotted where would our top molecules lie with respect to their average degree (the value of the y-axis is not meaningful for the molecules, we varied it across the molecule for ease of visualization). We use the first letter to signify which model generated the sample D for \textcolor{red}{DiGress} and G  for \textcolor{blue}{GDSS}. }
  \label{fig:dist_vis}
  \end{figure}


  \begin{figure}[htbp]
    \centering
    \includegraphics[width=0.5\textwidth]{figures/present_in_held.png} 
    \caption{Number of exact matches between the top 100 highest weighted molecules sampled from GDSS and DiGress (for the previously described training scenarios) and the held-out unseen portion of the data (under the same training scenario)}
    \label{fig:present_in_held}
\end{figure}

