\begin{table}[!h]
    \centering
    \begin{adjustbox}{max width=\textwidth}
    \begin{tabular}{l ccc ccc}
    \toprule
        %%%%%%%%%%%%%%%%%%%%%
    %% M
    %%%%%%%%%%%%%%%%%%%%%
    & \multicolumn{6}{c}{\fontfamily{cmr}\textsc{\textbf{All variables observed}}} \\
    \cmidrule(lr){2-7} 
    %%%
     & \multicolumn{3}{c}{\textsc{Bernoulli $|$ Linear}} & \multicolumn{3}{c}{\textsc{Hypergeometric $|$ Quadratic}} \\
     \cmidrule(lr){2-4} \cmidrule(lr){5-7} 
     %%%
$n$ & \textsc{$\z$ Acc} & \textsc{$\z_1$ Prec} & \textsc{$\z_1$ Rec} & \textsc{$\z$ Acc} & \textsc{$\z_1$ Prec} & \textsc{$\z_1$ Rec} \\
\hline

$100$ & 40.5 (38.8-42.1) & 3.0 (0-6.4) & 42.9 (41.3-44.4) & 38.3 (36.8-39.8) & 0.0 (0.0-0.0) & 0.0 (0.0-0.0) \\
$500$ & 80.4 (77.7-83.1) & 81.0 (73.3-88.7) & 89.0 (81.2-96.8) & 50.6 (49.1-52.2) & 7.0 (2.0-12.0) & 50.0 (47.5-52.5) \\
$1k$ & 98.9 (98.3-99.5) & 100 (100-100) & 100 (100-100) & 88.4 (86.5-90.3) & 78.0 (69.8-86.2) & 77.2 (69.2-85.2) \\
%$2.5k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 99.4 (98.9-99.8) & 100 (100-100) & 100 (100-100) \\
$5k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) \\
%$7.5k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) \\
%$10k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) & 100 (100-100) \\
     %%%%%%%%%%%%%%%%%%%
    \bottomrule
    \end{tabular}
    \end{adjustbox}
    \caption{Performance of Algorithm \ref{alg:method} on thirteen-node DAGs containing an M-structure structure (Figure \ref{fig:m_butterfly}) across noise distributions, causal mechanisms, and sample sizes ($n$). In all DAGs, exposure $X$ is a direct cause of outcome $Y$. Metrics reported are accuracy of all labels ($\z$ \textsc{Acc}), precision for partition $\z_1$ ($\z_1$ \textsc{Pre}), and recall for partition $\z_1$ ($\z_1$ \textsc{Rec}). The 95\% confidence interval over the 100 individual DAG metrics is reported in parentheses. A heuristic independence threshold of 0.05 was used for all data generating processes.} %{\color{red} Butterfly manual threshold = 0.01, 0.0075 for linear-hypergeometric}}
    \label{tab:results_m_butterfly}
\end{table}


\begin{table}[!h]
    \centering
    \begin{adjustbox}{max width=\textwidth}
    \begin{tabular}{l ccc ccc}
    \toprule
%%%%%%%%%%%%%%%%%%%%%
%% BUTTERFLY
%%%%%%%%%%%%%%%%%%%%%
          & \multicolumn{6}{c}{\fontfamily{cmr}\textsc{\textbf{DAG with Butterfly Structure}}} \\
         \cmidrule(lr){2-7} 
         %%%
         & \multicolumn{3}{c}{\textsc{Bernoulli $|$ Linear}} & \multicolumn{3}{c}{\textsc{Hypergeometric $|$ Quadratic}} \\
         \cmidrule(lr){2-4} \cmidrule(lr){5-7} 
          %%%
         $n$ & \textsc{$\z$ Acc} & \textsc{$\z_1$ Prec} & \textsc{$\z_1$ Rec} & \textsc{$\z$ Acc} & \textsc{$\z_1$ Prec} & \textsc{$\z_1$ Rec} \\
         %%%
         \hline
$1k$ & 42.0 (41.1-42.9) & 24.8 (24.3-25.2) & 50.0 (49.0-51.0) & 27.5 (27.2-27.7) & 0.0 (0.0-0.0) & 0.0 (0.0-0.0) \\
$1.5k$ & 48.7 (47.8-49.6) & 25.0 (25.0-25.0) & 50.0 (50.0-50.0) & 46.1 (43.0-49.2) & 10.5 (6.5-14.5) & 66.7 (61.3-72.0) \\
$2k$ & 71.7 (67.6-75.8) & 51.2 (44.2-58.3) & 75.9 (71.2-80.6) & 67.1 (65.3-68.9) & 34.5 (29.7-39.3) & 67.3 (61.1-73.5) \\
$2.5k$ & 94.5 (92.3-96.6) & 90.0 (86.1-93.9) & 94.7 (92.1-97.4) & 76.1 (73.4-78.8) & 53.5 (47.8-59.2) & 77.3 (71.4-83.1) \\
$5k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 97.3 (96.1-98.5) & 93.5 (90.9-96.1)  & 98.9 (97.7-100) \\
%$7.5k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 98.8 (98.2-99.4) & 96.8 (95.1-98.4)  & 100 (100-100) \\
$10k$ & 100 (100-100) & 100 (100-100) & 100 (100-100) & 99.2 (98.7-99.7) & 97.8 (96.3-99.2) & 100 (100-100) \\
     %%%%%%%%%%%%%%%%%%%
    \bottomrule
    \end{tabular}
    \end{adjustbox}
    \caption{Performance of Algorithm \ref{alg:method} on thirteen-node DAGs containing a butterfly structure (Figure \ref{fig:m_butterfly}) across noise distributions, causal mechanisms, and sample sizes ($n$). In all DAGs, exposure $X$ is a direct cause of outcome $Y$. Metrics reported are accuracy of all labels ($\z$ \textsc{Acc}), precision for partition $\z_1$ ($\z_1$ \textsc{Pre}), and recall for partition $\z_1$ ($\z_1$ \textsc{Rec}). The 95\% confidence interval over the 100 individual DAG metrics is reported in parentheses. A heuristic independence threshold of 0.05 was used for all data generating processes.} %{\color{red} Butterfly manual threshold = 0.01, 0.0075 for linear-hypergeometric}}
    \label{tab:results_butterfly}
\end{table}