% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% % Appendix: Motivating examples
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\let\cleardoublepage\clearpage

\section{Motivating Examples: Comparison to global causal discovery} \label{sec:global}

\input{figure_tex/figure_pc_fci_ldp}

\input{figure_tex/figure_fci_finite}

%\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: Partitions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\section{Causal Partitions: Definitions and Proofs} 
%\label{append:partition_defs_proofs}

%\input{appendix_partitions_proofs}

%\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: Partition definitions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% \subsection{Partition Induced Subgraphs}
% \input{figure_tex/figure_primary_structures}

\input{appendix_partitions_def}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: covariate selection criteria
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\input{appendix_preliminaries}
%\subsection{Examples of Valid Adjustment Sets}
%\input{figure_tex/figure_backdoor}

%\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: Proofs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\clearpage

\section{Proofs} \label{sec:proofs}

%In this section, we prove the three main theorems presented in this work: Theorem \ref{theorem:partitions} (Section \ref{sec:partitions}), Theorem \ref{theorem:correctness} (Section \ref{sec:partition_correctness}), and Theorem \ref{theorem:valid_adjustment} (Section \ref{sec:adjustment_correctness}). We assume access to an independence oracle for all proofs. 

In the following proofs, we assume that all assumptions and sufficient conditions  defined in Section \ref{sec:identifiability} are met unless it is explicitly stated that they can be weakened or dropped. 

%%%%%%%%%%%%%%%%%%%%%%%%%%
% PROOFS OF CORRECTNESS
%%%%%%%%%%%%%%%%%%%%%%%%%%

\input{appendix_correctness}

%%%%%%%%%%%%%%%%%%%%%%%%%%
% VAS
%%%%%%%%%%%%%%%%%%%%%%%%%%

\input{appendix_valid}

%\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%
% CAUSAL SUFFICIENCY
%%%%%%%%%%%%%%%%%%%%%%%%%%

%\input{appendix_causal_sufficiency}

%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUPPLEMENTAL FIGURES
%%%%%%%%%%%%%%%%%%%%%%%%%%

\clearpage

\section{Graphs for Experimental Validation} 
\label{sec:custom_dags}

\vspace{10mm}
\input{figure_tex/figure_m_butterfly}
\vspace{20mm}
\input{figure_tex/figure_17_nodes}
\input{figure_tex/figure_bnlearn_full}
\input{figure_tex/figure_complex_backdoor}
\input{figure_tex/figure_latent_experiment}

\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: EXP DESIGN
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Experimental Design} 

\label{sec:experimental_design_appendix}

\input{appendix_experimental_design}

\clearpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Appendix: RESULTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Experimental Results} 

\input{tables_tex/table_time_tests}
\input{tables_tex/table_sem}
\input{tables_tex/table_ten_node_dag}
\input{tables_tex/table_m_butterfly}
\input{tables_tex/table_17_node}
\input{tables_tex/table_bnlearn_mildew}
\input{tables_tex/table_ate}

\clearpage

\begin{figure}[!h]
    \centering
    \includegraphics[width=0.5\textwidth]{figures/ldp_mildew_accuracy.jpg}
    \caption{LDP partition accuracy on the \textsc{Mildew} benchmark. Mean accuracy was computed for 10 replicate samples from the ground truth DAG using \texttt{bnlearn} \citep{scutari_learning_2010}. We measure partition accuracy as the percent of partition labels that are consistent with ground truth. Independence was determined by chi-square tests ($\alpha = 0.005$). Shaded regions represent the 95\% confidence interval. All experiments were run on a 2017 MacBook with 2.9 GHz Quad-Core Intel Core i7. }
    \label{fig:ldp_mildew_acc}
\end{figure}

\begin{figure}[!h]
    \centering
    \includegraphics[width=0.5\textwidth]{figures/vas_cardinality.jpg}
    \caption{Adjustment set cardinality for the linear-Gaussian DAG described in Figure \ref{fig:baselines}. The true adjustment set cardinality is one under the common cause criterion and three under the disjunctive cause criterion.}
    \label{fig:vas_card}
\end{figure}

\clearpage

\input{tables_tex/table_latent_experiments}

\subsection{Impacts of Conditioning Set Size} \label{sec:cond_set_size}

Local baselines faced challenges with chi-square independence tests on \textsc{Mildew} for $n \geq 75k$. LDECC errored out on 2/10 and 10/10 replicates at $n = 75k$ and $n=100k$, respectively, while MB-by-MB could not return results for 3/10 and 9/10. Independence test failures persisted even with resampling from the ground truth DAG, and are likely due to large conditioning sets resulting in low or no samples for some groups during binning. While the maximum conditioning set size for LDP on \textsc{Mildew} was 4, this was 17 for LDECC and 19 for MB-by-MB. Similar sample complexity challenges likely explain our empirical observation that LDP returns VAS for simple discrete DAGs with significantly fewer samples ($n = 1k$) than FCI ($n = 10k$) and PC ($n > 10k)$, as the latter methods require many more higher-order independence tests (Figure \ref{fig:pc_fci}) \citep{spirtes_causation_2000}.
