\begin{table}[!h]
    \centering
    \begin{adjustbox}{max width=\textwidth}
    \begin{tabular}{l cccc cccc cccc}
        \toprule[1pt]
        %%%
        &  \multicolumn{12}{c}{\fontfamily{cmr}\textsc{\textbf{Barley}}  \textsc{aks\_m2} $\to$ \textsc{keraks} ($|\z| = 46$, $|\z_1| = 12$)} \\
        \cmidrule(lr){2-13}
        %%%
         &  \multicolumn{4}{c}{\textsc{Algorithm \ref{alg:method}}} &  \multicolumn{4}{c}{\textsc{PC}} & \multicolumn{4}{c}{\textsc{FCI}}  \\ 
         %%%
         \cmidrule(lr){2-5} \cmidrule(lr){6-9} \cmidrule(lr){10-13}
         %%%
         $n$ & $\z$ \textsc{Acc} & $\z_1$ \textsc{Pre} & $\z_1$ \textsc{Rec} & \cmark & $\z$ \textsc{Acc} & $\z_1$ \textsc{Pre} & $\z_1$ \textsc{Rec} & \cmark & $\z$ \textsc{Acc} & $\z_1$ \textsc{Pre} & $\z_1$ \textsc{Rec} & \cmark \\
         %%%
         \hline
         %%%
         $10k$ & 60.0 (48.6-71.4) & 93.5 (88.1-98.8) & 58.3 (42.0-74.7) & x & x & x & x & x & x & x  & x & x \\
         $25k$ & 84.3 (76.1-92.4) & 94.8 (90.6-99.0) & 86.7 (76.9-96.5) & x & x & x & x & x & x & x  & x & x \\
         $50k$ & 85.7 (85.7-85.7) & 93.5 (90.2-96.7) & 91.7 (86.5-96.8) & x & x & x & x & x & x & x  & x & x \\
         %$100k$ & x & x & x & x & x & x & x & x & x & x  & x & x \\
         %%%
         \bottomrule[1pt]
    \end{tabular}
    \end{adjustbox}
    \caption{Performance of Algorithm \ref{alg:method} on the \textsc{Barley} benchmark from \texttt{bnlearn} \citep{scutari_learning_2010}, with \textsc{aks\_m2} as exposure and \textsc{keraks} as outcome. All values are averaged over five independent data samples from the ground truth DAG, with 95\% confidence intervals in parentheses. Sample size is denoted by $n$. All 48 variables in the original DAG were submitted to Algorithm \ref{alg:method} with a chi-square independence test ($\alpha = 0.005$). An arbitrary 14-node subset meeting the assumptions of Algorithm \ref{alg:method} was assigned manual labels as ground truth, which were used to assess predicted label quality. Metrics reported are mean accuracy of all labels ($\z$ \textsc{Acc}), mean precision for partition $\z_1$ ($\z_1$ \textsc{Pre}), mean recall for partition $\z_1$ ($\z_1$ \textsc{Rec}), and fraction of replicates that admit a sufficient adjustment set (\cmark). }
    \label{tab:barley}
\end{table}