\begin{table}[H]
\centering
\begin{tabular}{lllll}
Method                    & ROCAUC    & Precision & Recall    & Time    \\
\midrule
Abs-Fit (Bootstrap=0)     &0.965$\pm$0.066&0.957$\pm$0.123&0.940$\pm$0.074&42$\pm$13 \\
Abs-Fit (Bootstrap=1)     &0.977$\pm$0.012&0.980$\pm$0.015&0.953$\pm$0.026&42$\pm$14 \\
Abs-Fit (Bootstrap=2)     &0.977$\pm$0.012&0.980$\pm$0.015&0.952$\pm$0.027&43$\pm$11 \\
Abs-Fit (Bootstrap=5)     &0.977$\pm$0.012&0.980$\pm$0.015&0.952$\pm$0.027&45$\pm$13 \\
Abs-Fit (Bootstrap=10)    &0.977$\pm$0.012&0.980$\pm$0.015&0.952$\pm$0.027&46$\pm$12 \\
Abs-LiNGAM-GT             &0.977$\pm$0.011&0.982$\pm$0.013&0.953$\pm$0.026&45$\pm$15 \\
DirectLiNGAM              &0.977$\pm$0.011&0.980$\pm$0.013&0.953$\pm$0.026&61$\pm$12 \\
\bottomrule
\end{tabular}
\caption{%
    Results of Abs-LiNGAM
    over pairs of abstract ($b=5$ nodes)
    and concrete ($d\in[25,50]$ nodes)
    linear SCMs.
    Abs-LiNGAM-GT denotes
    a ground truth oracle
    where the abstraction function
    and the abstract model
    are given.
    All results are averaged over 30 independent runs
    with $|\mathcal{D}_{\mathcal{L}}| = 15000$
    concrete
    and
    $|\mathcal{D}_J| = 150$
    paired samples.
  }\label{table:small_aucprerec}
\end{table}

\begin{table}[H]
\centering
\begin{tabular}{lllll}
Method                    & ROCAUC    & Precision & Recall    & Time    \\
\midrule
Abs-LiNGAM (Bootstrap=0)  &0.963$\pm$0.043&0.939$\pm$0.119&0.926$\pm$0.067&179$\pm$53\\
Abs-LiNGAM (Bootstrap=1)  &0.952$\pm$0.066&0.914$\pm$0.169&0.914$\pm$0.085&181$\pm$53\\
Abs-LiNGAM (Bootstrap=2)  &0.968$\pm$0.027&0.956$\pm$0.041&0.930$\pm$0.061&182$\pm$50\\
Abs-LiNGAM (Bootstrap=5)  &0.968$\pm$0.027&0.955$\pm$0.041&0.930$\pm$0.061&189$\pm$51\\
Abs-LiNGAM (Bootstrap=10) &0.968$\pm$0.027&0.954$\pm$0.040&0.930$\pm$0.061&194$\pm$51\\
Abs-LiNGAM-GT             &0.969$\pm$0.026&0.965$\pm$0.022&0.931$\pm$0.060&186$\pm$54\\
DirectLiNGAM              &0.968$\pm$0.025&0.958$\pm$0.020&0.930$\pm$0.061&394$\pm$94\\
\bottomrule
\end{tabular}
\caption{%
    Results of Abs-LiNGAM
    over pairs of abstract ($b=10$ nodes)
    and concrete ($d\in[50,100]$ nodes)
    linear SCMs.
    Abs-LiNGAM-GT denotes
    a ground truth oracle
    where the abstraction function
    and the abstract model
    are given.
    All results are averaged over 30 independent runs
    with $|\mathcal{D}_{\mathcal{L}}| = 15000$
    concrete
    and
    $|\mathcal{D}_J| = 270$
    paired samples.
}\label{table:medium_aucprerec}
\end{table}

\begin{table}[H]
\centering
\begin{tabular}{lllll}
Method                    & ROCAUC    & Precision & Recall    & Time    \\
\midrule
Abs-LiNGAM                &0.927$\pm$0.070&0.919$\pm$0.119&0.845$\pm$0.132&748$\pm$121  \\
Abs-LiNGAM (Bootstrap=1)  &0.913$\pm$0.083&0.877$\pm$0.187&0.834$\pm$0.136&731$\pm$116  \\
Abs-LiNGAM (Bootstrap=2)  &0.925$\pm$0.072&0.912$\pm$0.130&0.844$\pm$0.132&738$\pm$123  \\
Abs-LiNGAM (Bootstrap=5)  &0.926$\pm$0.067&0.913$\pm$0.109&0.844$\pm$0.130&755$\pm$140  \\
Abs-LiNGAM (Bootstrap=10) &0.927$\pm$0.065&0.918$\pm$0.090&0.844$\pm$0.130&775$\pm$183  \\
Abs-LiNGAM-GT             &0.927$\pm$0.069&0.920$\pm$0.117&0.845$\pm$0.131&763$\pm$116  \\
DirectLiNGAM              &0.928$\pm$0.061&0.925$\pm$0.047&0.844$\pm$0.128&1608$\pm$212 \\
\bottomrule
\end{tabular}
\caption{%
    Results of Abs-LiNGAM
    over pairs of abstract ($b=10$ nodes)
    and concrete ($d\in[100,150]$ nodes)
    linear SCMs.
    Abs-LiNGAM-GT denotes
    a ground truth oracle
    where the abstraction function
    and the abstract model
    are given.
    All results are averaged over 30 independent runs
    with $|\mathcal{D}_{\mathcal{L}}| = 15000$
    concrete
    and
    $|\mathcal{D}_J| = 270$
    paired samples.
}\label{table:large_aucprerec}
\end{table}

\newpage

\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_shd_small.pdf}
         \caption{NHD}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_f1_small.pdf}
         \caption{F1}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_entries_small.pdf}
         \caption{Abstract per Concrete}
     \end{subfigure}
\caption{%
    Reconstruction metrics
    of the linear abstraction function $\mathbf{T}$
    over pairs of abstract ($b=5$ nodes)
    and concrete ($d\in[25,50]$ nodes)
    linear SCMs
    for an increasing number
    of paired samples $|\mathcal{D_J}|$.
    For different thresholds,
    we report the normalized Hamming Distance (\emph{left}),
    the F1 score (\emph{center}),
    and the average number of abstract variables
    assigned to each concrete variable (\emph{right}).
    All results are averaged over 30 independent runs.
  }\label{fig:rec_t_small}
\end{figure}

\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_shd_medium.pdf}
         \caption{NHD}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_f1_medium.pdf}
         \caption{F1}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_entries_medium.pdf}
         \caption{Abstract per Concrete}
     \end{subfigure}
\caption{%
    Reconstruction metrics
    of the linear abstraction function $\mathbf{T}$
    over pairs of abstract ($b=10$ nodes)
    and concrete ($d\in[50,100]$ nodes)
    linear SCMs
    for an increasing number
    of paired samples $|\mathcal{D_J}|$.
    For different thresholds,
    we report the normalized Hamming Distance (\emph{left}),
    the F1 score (\emph{center}),
    and the average number of abstract variables
    assigned to each concrete variable (\emph{right}).
    All results are averaged over 30 independent runs.
  }\label{fig:rec_t_medium}
\end{figure}

\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_shd_large.pdf}
         \caption{NHD}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_f1_large.pdf}
         \caption{F1}
     \end{subfigure}
     \begin{subfigure}[b]{0.32\textwidth}
         \centering
         \includegraphics[width=\textwidth]{rebuttalplots/exp9_tau_entries_large.pdf}
         \caption{Abstract per Concrete}
     \end{subfigure}
\caption{%
    Reconstruction metrics
    of the linear abstraction function $\mathbf{T}$
    over pairs of abstract ($b=10$ nodes)
    and concrete ($d\in[100,150]$ nodes)
    linear SCMs
    for an increasing number
    of paired samples $|\mathcal{D_J}|$.
    For different thresholds,
    we report the normalized Hamming Distance (\emph{left}),
    the F1 score (\emph{center}),
    and the average number of abstract variables
    assigned to each concrete variable (\emph{right}).
    All results are averaged over 30 independent runs.
  }\label{fig:rec_t_large}
\end{figure}
