\begin{table*}[!h]
    \centering
    \begin{adjustbox}{max width=\textwidth}
    \begin{tabular}{l | ccc ccc ccc | ccc ccc ccc}
    \toprule[1pt]
        & \multicolumn{9}{c}{\fontfamily{cmr}\textsc{\textbf{Lung}} ($d = 8$, $d' = 8$) $|$ \textsc{either} $\to$ \textsc{dysp}} &  \multicolumn{9}{|c}{\fontfamily{cmr}\textsc{\textbf{Barley}} ($d = 48$, $d' = 7$) $|$ \textsc{jordinf} $\to$ \textsc{markgm}}  \\
        %%%
        \cmidrule(lr){2-10} \cmidrule(lr){11-19} 
        %%%
         & \multicolumn{3}{c}{\textsc{Heuristic}} & \multicolumn{3}{c}{\textsc{Random}} & \multicolumn{3}{c}{\textsc{Restricted}} & \multicolumn{2}{|c}{\textsc{Heuristic}} & \multicolumn{3}{c}{\textsc{Random}} & \multicolumn{3}{c}{\textsc{Restricted}} \\
        \cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13} \cmidrule(lr){14-16} \cmidrule(lr){17-19}
        %%%
        $n$ & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  \\
        %%%
        \hline
        $50k$ & & & &  &  &  \\
        $100k$ & 100.0 & 100.0 & \cmark & 83.33 & 83.33 & \cmark & 66.67 & 66.67 & \xmark & 100.0 & 100.0 & \cmark & 85.71 & 100.0 & \cmark & 100.0 & 100.0 & \cmark  \\
        %%%%%%%%
        %%%%%%%%
        %%%%%%%%
        \toprule[1pt]
        & \multicolumn{9}{c}{\fontfamily{cmr}\textsc{\textbf{Mildew}} ($d = 35$, $d' = 10$) $|$ \textsc{mikro\_1} $\to$ \textsc{meldug\_2}} & \multicolumn{9}{|c}{\fontfamily{cmr}\textsc{\textbf{Barley}} ($d = 48$, $d' = 10$) $|$ \textsc{aks_m2} $\to$ \textsc{keraks} }  \\
        %%%
        \cmidrule(lr){2-10} \cmidrule(lr){11-19} 
        %%%
         & \multicolumn{3}{c}{\textsc{Heuristic}} & \multicolumn{3}{c}{\textsc{Random}} & \multicolumn{3}{c}{\textsc{Restricted}} & \multicolumn{2}{|c}{\textsc{Heuristic}} & \multicolumn{3}{c}{\textsc{Random}} & \multicolumn{3}{c}{\textsc{Restricted}} \\
        %%%
        \cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13} \cmidrule(lr){14-16} \cmidrule(lr){17-19}
        %%%
        $n$ & $\z$ & $\z_1$ & \cmark & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  & $\z$ & $\z_1$ & \cmark  \\
        %%%
        \hline
        $50k$ &  &  &  &  &  &  \\
        $100k$ & 90.0 (0) & 100.0 (0) & 5/5 & 88.0 (4.0) & 90.0 & 3/5 & 80.0 & 90.0 &    \\
        %%%%%%%%
    \toprule[1pt]
    \end{tabular}
    \end{adjustbox}
    \caption{Accuracy of Algorithm \ref{alg:method} on discrete benchmark data from \texttt{bnlearn}. Sample size is denoted by $n$, total variables in the ground truth DAG by $d$, and total variables tested using Algorithm \ref{alg:method} by $d'$. Exposure $X$ and outcome $Y$ are denoted by $X \to Y$. All values are averaged over five independent data samples from the underlying DAG. Accuracies are reported as percent of total labels that are correct ($\z$), percent of correct binary labels \textit{confounder} vs \textit{not confounder} ($\z_1$), and fraction of replicates that admit a sufficient adjustment set (\cmark). Standard deviations are in parentheses. Three thresholding methods were used for determining statistical independence: manual selection (\textsc{Heuristic}), permutation testing with random shuffling of 10 surrogates ($\alpha = 0.1$) (\textsc{Random}), and permutation with restricted shuffling, as described by \citet{runge_conditional_2018} (\textsc{Restricted}).}
    \label{tab:benchmarks}
\end{table*}