\documentclass[accepted]{uai2024} 

\usepackage[american]{babel}


\usepackage{natbib} 
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} 
\usepackage{booktabs} 
\usepackage{tikz} 
\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\algrenewcommand\algorithmicindent{1.2em}\renewcommand\algorithmicthen{}
\renewcommand\algorithmicdo{}


\usepackage{sanghack}
\newcommand{\inwoo}[1]{\textcolor{orange}{[IH: #1]}}


\usepackage{amsmath,amsfonts,bm}

\newcommand{\figleft}{{\em (Left)}}
\newcommand{\figcenter}{{\em (Center)}}
\newcommand{\figright}{{\em (Right)}}
\newcommand{\figtop}{{\em (Top)}}
\newcommand{\figbottom}{{\em (Bottom)}}
\newcommand{\captiona}{{\em (a)}}
\newcommand{\captionb}{{\em (b)}}
\newcommand{\captionc}{{\em (c)}}
\newcommand{\captiond}{{\em (d)}}

\newcommand{\newterm}[1]{{\bf #1}}


\def\figref#1{figure~\ref{#1}}
\def\Figref#1{Figure~\ref{#1}}
\def\twofigref#1#2{figures \ref{#1} and \ref{#2}}
\def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}}
\def\secref#1{section~\ref{#1}}
\def\Secref#1{Section~\ref{#1}}
\def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}}
\def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}}
\def\eqref#1{equation~\ref{#1}}
\def\Eqref#1{Equation~\ref{#1}}
\def\plaineqref#1{\ref{#1}}
\def\chapref#1{chapter~\ref{#1}}
\def\Chapref#1{Chapter~\ref{#1}}
\def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}}
\def\algref#1{algorithm~\ref{#1}}
\def\Algref#1{Algorithm~\ref{#1}}
\def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}}
\def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}}
\def\partref#1{part~\ref{#1}}
\def\Partref#1{Part~\ref{#1}}
\def\twopartref#1#2{parts \ref{#1} and \ref{#2}}

\def\ceil#1{\lceil #1 \rceil}
\def\floor#1{\lfloor #1 \rfloor}
\def\1{\bm{1}}
\newcommand{\train}{\mathcal{D}}
\newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}}
\newcommand{\test}{\mathcal{D_{\mathrm{test}}}}

\def\eps{{\epsilon}}


\def\reta{{\textnormal{$\eta$}}}
\def\ra{{\textnormal{a}}}
\def\rb{{\textnormal{b}}}
\def\rc{{\textnormal{c}}}
\def\rd{{\textnormal{d}}}
\def\re{{\textnormal{e}}}
\def\rf{{\textnormal{f}}}
\def\rg{{\textnormal{g}}}
\def\rh{{\textnormal{h}}}
\def\ri{{\textnormal{i}}}
\def\rj{{\textnormal{j}}}
\def\rk{{\textnormal{k}}}
\def\rl{{\textnormal{l}}}
\def\rn{{\textnormal{n}}}
\def\ro{{\textnormal{o}}}
\def\rp{{\textnormal{p}}}
\def\rq{{\textnormal{q}}}
\def\rr{{\textnormal{r}}}
\def\rs{{\textnormal{s}}}
\def\rt{{\textnormal{t}}}
\def\ru{{\textnormal{u}}}
\def\rv{{\textnormal{v}}}
\def\rw{{\textnormal{w}}}
\def\rx{{\textnormal{x}}}
\def\ry{{\textnormal{y}}}
\def\rz{{\textnormal{z}}}

\def\rvepsilon{{\mathbf{\epsilon}}}
\def\rvtheta{{\mathbf{\theta}}}
\def\rva{{\mathbf{a}}}
\def\rvb{{\mathbf{b}}}
\def\rvc{{\mathbf{c}}}
\def\rvd{{\mathbf{d}}}
\def\rve{{\mathbf{e}}}
\def\rvf{{\mathbf{f}}}
\def\rvg{{\mathbf{g}}}
\def\rvh{{\mathbf{h}}}
\def\rvu{{\mathbf{i}}}
\def\rvj{{\mathbf{j}}}
\def\rvk{{\mathbf{k}}}
\def\rvl{{\mathbf{l}}}
\def\rvm{{\mathbf{m}}}
\def\rvn{{\mathbf{n}}}
\def\rvo{{\mathbf{o}}}
\def\rvp{{\mathbf{p}}}
\def\rvq{{\mathbf{q}}}
\def\rvr{{\mathbf{r}}}
\def\rvs{{\mathbf{s}}}
\def\rvt{{\mathbf{t}}}
\def\rvu{{\mathbf{u}}}
\def\rvv{{\mathbf{v}}}
\def\rvw{{\mathbf{w}}}
\def\rvx{{\mathbf{x}}}
\def\rvy{{\mathbf{y}}}
\def\rvz{{\mathbf{z}}}

\def\erva{{\textnormal{a}}}
\def\ervb{{\textnormal{b}}}
\def\ervc{{\textnormal{c}}}
\def\ervd{{\textnormal{d}}}
\def\erve{{\textnormal{e}}}
\def\ervf{{\textnormal{f}}}
\def\ervg{{\textnormal{g}}}
\def\ervh{{\textnormal{h}}}
\def\ervi{{\textnormal{i}}}
\def\ervj{{\textnormal{j}}}
\def\ervk{{\textnormal{k}}}
\def\ervl{{\textnormal{l}}}
\def\ervm{{\textnormal{m}}}
\def\ervn{{\textnormal{n}}}
\def\ervo{{\textnormal{o}}}
\def\ervp{{\textnormal{p}}}
\def\ervq{{\textnormal{q}}}
\def\ervr{{\textnormal{r}}}
\def\ervs{{\textnormal{s}}}
\def\ervt{{\textnormal{t}}}
\def\ervu{{\textnormal{u}}}
\def\ervv{{\textnormal{v}}}
\def\ervw{{\textnormal{w}}}
\def\ervx{{\textnormal{x}}}
\def\ervy{{\textnormal{y}}}
\def\ervz{{\textnormal{z}}}

\def\rmA{{\mathbf{A}}}
\def\rmB{{\mathbf{B}}}
\def\rmC{{\mathbf{C}}}
\def\rmD{{\mathbf{D}}}
\def\rmE{{\mathbf{E}}}
\def\rmF{{\mathbf{F}}}
\def\rmG{{\mathbf{G}}}
\def\rmH{{\mathbf{H}}}
\def\rmI{{\mathbf{I}}}
\def\rmJ{{\mathbf{J}}}
\def\rmK{{\mathbf{K}}}
\def\rmL{{\mathbf{L}}}
\def\rmM{{\mathbf{M}}}
\def\rmN{{\mathbf{N}}}
\def\rmO{{\mathbf{O}}}
\def\rmP{{\mathbf{P}}}
\def\rmQ{{\mathbf{Q}}}
\def\rmR{{\mathbf{R}}}
\def\rmS{{\mathbf{S}}}
\def\rmT{{\mathbf{T}}}
\def\rmU{{\mathbf{U}}}
\def\rmV{{\mathbf{V}}}
\def\rmW{{\mathbf{W}}}
\def\rmX{{\mathbf{X}}}
\def\rmY{{\mathbf{Y}}}
\def\rmZ{{\mathbf{Z}}}

\def\ermA{{\textnormal{A}}}
\def\ermB{{\textnormal{B}}}
\def\ermC{{\textnormal{C}}}
\def\ermD{{\textnormal{D}}}
\def\ermE{{\textnormal{E}}}
\def\ermF{{\textnormal{F}}}
\def\ermG{{\textnormal{G}}}
\def\ermH{{\textnormal{H}}}
\def\ermI{{\textnormal{I}}}
\def\ermJ{{\textnormal{J}}}
\def\ermK{{\textnormal{K}}}
\def\ermL{{\textnormal{L}}}
\def\ermM{{\textnormal{M}}}
\def\ermN{{\textnormal{N}}}
\def\ermO{{\textnormal{O}}}
\def\ermP{{\textnormal{P}}}
\def\ermQ{{\textnormal{Q}}}
\def\ermR{{\textnormal{R}}}
\def\ermS{{\textnormal{S}}}
\def\ermT{{\textnormal{T}}}
\def\ermU{{\textnormal{U}}}
\def\ermV{{\textnormal{V}}}
\def\ermW{{\textnormal{W}}}
\def\ermX{{\textnormal{X}}}
\def\ermY{{\textnormal{Y}}}
\def\ermZ{{\textnormal{Z}}}

\def\vzero{{\bm{0}}}
\def\vone{{\bm{1}}}
\def\vmu{{\bm{\mu}}}
\def\vtheta{{\bm{\theta}}}
\def\va{{\bm{a}}}
\def\vb{{\bm{b}}}
\def\vc{{\bm{c}}}
\def\vd{{\bm{d}}}
\def\ve{{\bm{e}}}
\def\vf{{\bm{f}}}
\def\vg{{\bm{g}}}
\def\vh{{\bm{h}}}
\def\vi{{\bm{i}}}
\def\vj{{\bm{j}}}
\def\vk{{\bm{k}}}
\def\vl{{\bm{l}}}
\def\vm{{\bm{m}}}
\def\vn{{\bm{n}}}
\def\vo{{\bm{o}}}
\def\vp{{\bm{p}}}
\def\vq{{\bm{q}}}
\def\vr{{\bm{r}}}
\def\vs{{\bm{s}}}
\def\vt{{\bm{t}}}
\def\vu{{\bm{u}}}
\def\vv{{\bm{v}}}
\def\vw{{\bm{w}}}
\def\vx{{\bm{x}}}
\def\vy{{\bm{y}}}
\def\vz{{\bm{z}}}

\def\evalpha{{\alpha}}
\def\evbeta{{\beta}}
\def\evepsilon{{\epsilon}}
\def\evlambda{{\lambda}}
\def\evomega{{\omega}}
\def\evmu{{\mu}}
\def\evpsi{{\psi}}
\def\evsigma{{\sigma}}
\def\evtheta{{\theta}}
\def\eva{{a}}
\def\evb{{b}}
\def\evc{{c}}
\def\evd{{d}}
\def\eve{{e}}
\def\evf{{f}}
\def\evg{{g}}
\def\evh{{h}}
\def\evi{{i}}
\def\evj{{j}}
\def\evk{{k}}
\def\evl{{l}}
\def\evm{{m}}
\def\evn{{n}}
\def\evo{{o}}
\def\evp{{p}}
\def\evq{{q}}
\def\evr{{r}}
\def\evs{{s}}
\def\evt{{t}}
\def\evu{{u}}
\def\evv{{v}}
\def\evw{{w}}
\def\evx{{x}}
\def\evy{{y}}
\def\evz{{z}}

\def\mA{{\bm{A}}}
\def\mB{{\bm{B}}}
\def\mC{{\bm{C}}}
\def\mD{{\bm{D}}}
\def\mE{{\bm{E}}}
\def\mF{{\bm{F}}}
\def\mG{{\bm{G}}}
\def\mH{{\bm{H}}}
\def\mI{{\bm{I}}}
\def\mJ{{\bm{J}}}
\def\mK{{\bm{K}}}
\def\mL{{\bm{L}}}
\def\mM{{\bm{M}}}
\def\mN{{\bm{N}}}
\def\mO{{\bm{O}}}
\def\mP{{\bm{P}}}
\def\mQ{{\bm{Q}}}
\def\mR{{\bm{R}}}
\def\mS{{\bm{S}}}
\def\mT{{\bm{T}}}
\def\mU{{\bm{U}}}
\def\mV{{\bm{V}}}
\def\mW{{\bm{W}}}
\def\mX{{\bm{X}}}
\def\mY{{\bm{Y}}}
\def\mZ{{\bm{Z}}}
\def\mBeta{{\bm{\beta}}}
\def\mPhi{{\bm{\Phi}}}
\def\mLambda{{\bm{\Lambda}}}
\def\mSigma{{\bm{\Sigma}}}

\DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl}
\SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n}
\newcommand{\tens}[1]{\bm{\mathsfit{#1}}}
\def\tA{{\tens{A}}}
\def\tB{{\tens{B}}}
\def\tC{{\tens{C}}}
\def\tD{{\tens{D}}}
\def\tE{{\tens{E}}}
\def\tF{{\tens{F}}}
\def\tG{{\tens{G}}}
\def\tH{{\tens{H}}}
\def\tI{{\tens{I}}}
\def\tJ{{\tens{J}}}
\def\tK{{\tens{K}}}
\def\tL{{\tens{L}}}
\def\tM{{\tens{M}}}
\def\tN{{\tens{N}}}
\def\tO{{\tens{O}}}
\def\tP{{\tens{P}}}
\def\tQ{{\tens{Q}}}
\def\tR{{\tens{R}}}
\def\tS{{\tens{S}}}
\def\tT{{\tens{T}}}
\def\tU{{\tens{U}}}
\def\tV{{\tens{V}}}
\def\tW{{\tens{W}}}
\def\tX{{\tens{X}}}
\def\tY{{\tens{Y}}}
\def\tZ{{\tens{Z}}}


\def\gA{{\mathcal{A}}}
\def\gB{{\mathcal{B}}}
\def\gC{{\mathcal{C}}}
\def\gD{{\mathcal{D}}}
\def\gE{{\mathcal{E}}}
\def\gF{{\mathcal{F}}}
\def\gG{{\mathcal{G}}}
\def\gH{{\mathcal{H}}}
\def\gI{{\mathcal{I}}}
\def\gJ{{\mathcal{J}}}
\def\gK{{\mathcal{K}}}
\def\gL{{\mathcal{L}}}
\def\gM{{\mathcal{M}}}
\def\gN{{\mathcal{N}}}
\def\gO{{\mathcal{O}}}
\def\gP{{\mathcal{P}}}
\def\gQ{{\mathcal{Q}}}
\def\gR{{\mathcal{R}}}
\def\gS{{\mathcal{S}}}
\def\gT{{\mathcal{T}}}
\def\gU{{\mathcal{U}}}
\def\gV{{\mathcal{V}}}
\def\gW{{\mathcal{W}}}
\def\gX{{\mathcal{X}}}
\def\gY{{\mathcal{Y}}}
\def\gZ{{\mathcal{Z}}}

\def\sA{{\mathbb{A}}}
\def\sB{{\mathbb{B}}}
\def\sC{{\mathbb{C}}}
\def\sD{{\mathbb{D}}}
\def\sF{{\mathbb{F}}}
\def\sG{{\mathbb{G}}}
\def\sH{{\mathbb{H}}}
\def\sI{{\mathbb{I}}}
\def\sJ{{\mathbb{J}}}
\def\sK{{\mathbb{K}}}
\def\sL{{\mathbb{L}}}
\def\sM{{\mathbb{M}}}
\def\sN{{\mathbb{N}}}
\def\sO{{\mathbb{O}}}
\def\sP{{\mathbb{P}}}
\def\sQ{{\mathbb{Q}}}
\def\sR{{\mathbb{R}}}
\def\sS{{\mathbb{S}}}
\def\sT{{\mathbb{T}}}
\def\sU{{\mathbb{U}}}
\def\sV{{\mathbb{V}}}
\def\sW{{\mathbb{W}}}
\def\sX{{\mathbb{X}}}
\def\sY{{\mathbb{Y}}}
\def\sZ{{\mathbb{Z}}}

\def\emLambda{{\Lambda}}
\def\emA{{A}}
\def\emB{{B}}
\def\emC{{C}}
\def\emD{{D}}
\def\emE{{E}}
\def\emF{{F}}
\def\emG{{G}}
\def\emH{{H}}
\def\emI{{I}}
\def\emJ{{J}}
\def\emK{{K}}
\def\emL{{L}}
\def\emM{{M}}
\def\emN{{N}}
\def\emO{{O}}
\def\emP{{P}}
\def\emQ{{Q}}
\def\emR{{R}}
\def\emS{{S}}
\def\emT{{T}}
\def\emU{{U}}
\def\emV{{V}}
\def\emW{{W}}
\def\emX{{X}}
\def\emY{{Y}}
\def\emZ{{Z}}
\def\emSigma{{\Sigma}}

\newcommand{\etens}[1]{\mathsfit{#1}}
\def\etLambda{{\etens{\Lambda}}}
\def\etA{{\etens{A}}}
\def\etB{{\etens{B}}}
\def\etC{{\etens{C}}}
\def\etD{{\etens{D}}}
\def\etE{{\etens{E}}}
\def\etF{{\etens{F}}}
\def\etG{{\etens{G}}}
\def\etH{{\etens{H}}}
\def\etI{{\etens{I}}}
\def\etJ{{\etens{J}}}
\def\etK{{\etens{K}}}
\def\etL{{\etens{L}}}
\def\etM{{\etens{M}}}
\def\etN{{\etens{N}}}
\def\etO{{\etens{O}}}
\def\etP{{\etens{P}}}
\def\etQ{{\etens{Q}}}
\def\etR{{\etens{R}}}
\def\etS{{\etens{S}}}
\def\etT{{\etens{T}}}
\def\etU{{\etens{U}}}
\def\etV{{\etens{V}}}
\def\etW{{\etens{W}}}
\def\etX{{\etens{X}}}
\def\etY{{\etens{Y}}}
\def\etZ{{\etens{Z}}}

\newcommand{\pdata}{p_{\rm{data}}}
\newcommand{\ptrain}{\hat{p}_{\rm{data}}}
\newcommand{\Ptrain}{\hat{P}_{\rm{data}}}
\newcommand{\pmodel}{p_{\rm{model}}}
\newcommand{\Pmodel}{P_{\rm{model}}}
\newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}}
\newcommand{\pencode}{p_{\rm{encoder}}}
\newcommand{\pdecode}{p_{\rm{decoder}}}
\newcommand{\precons}{p_{\rm{reconstruct}}}

\newcommand{\laplace}{\mathrm{Laplace}} 

\newcommand{\E}{\mathbb{E}}
\newcommand{\Ls}{\mathcal{L}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\emp}{\tilde{p}}
\newcommand{\reg}{\lambda}
\newcommand{\rect}{\mathrm{rectifier}}
\newcommand{\softmax}{\mathrm{softmax}}
\newcommand{\sigmoid}{\sigma}
\newcommand{\softplus}{\zeta}
\newcommand{\KL}{D_{\mathrm{KL}}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\standarderror}{\mathrm{SE}}
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\normlzero}{L^0}
\newcommand{\normlone}{L^1}
\newcommand{\normltwo}{L^2}
\newcommand{\normlp}{L^p}
\newcommand{\normmax}{L^\infty}

\newcommand{\parents}{Pa} 



\DeclareMathOperator{\sign}{sign}
\DeclareMathOperator{\Tr}{Tr}
\let\ab\allowbreak


\newcommand{\paragrapht}[1]{\noindent\textbf{#1}}
 \usepackage{adjustbox}
\usepackage{multirow}
\usepackage{makecell}
\usepackage{color, colortbl}
\definecolor{Gray}{gray}{0.9}
\definecolor{lightgray}{rgb}{0.83, 0.83, 0.83}
\definecolor{navyblue}{rgb}{0.0, 0.0, 0.5}
\hypersetup{colorlinks,citecolor=navyblue,urlcolor=navyblue}

\newcommand{\stdv}[1]{\scriptsize$\pm$#1}

\usepackage{multicol}
\usepackage{sansmath}


\newcommand{\swap}[3][-]{#3#1#2}

\title{Causal Discovery with Deductive Reasoning: One Less Problem}


\author[1]{Jonghwan~Kim}

\author[2]{Inwoo~Hwang}

\author[* 1,2]{Sanghack~Lee}
\affil[1]{Graduate School of Data Science, 
    Seoul National University, South Korea
}
\affil[2]{Artificial Intelligence Institute, Seoul National University, South Korea
}
\affil[*]{Correspondence to: \url{sanghack@snu.ac.kr}
}


\newcommand\blfootnote[1]{\begingroup
  \renewcommand\thefootnote{}\footnote{#1}\addtocounter{footnote}{-1}\endgroup
}
  
\begin{document}
\maketitle

\begin{abstract}
Constraint-based causal discovery algorithms aim to extract causal relationships between variables of interest by using conditional independence tests (CITs). However, CITs with large conditioning sets often lead to unreliable results due to their low statistical power, propagating errors throughout the course of causal discovery. As the reliability of CITs is crucial for their practical applicability, recent approaches rely on either tricky heuristics or complicated routines with high computational costs to tackle inconsistent test results. Against this background, we propose a principled, simple, yet effective method, coined \textsc{deduce-dep}, which corrects unreliable conditional independence statements by replacing them with deductively reasoned results from lower-order CITs. An appealing property of \textsc{deduce-dep} is that it can be seamlessly plugged into existing constraint-based methods and serves as a modular subroutine. In particular, we showcase the integration of \textsc{deduce-dep} into representative algorithms such as HITON-PC and PC, illustrating its practicality. Empirical evaluation demonstrates that our method properly corrects unreliable CITs, leading to improved performance in causal structure learning. 
 \end{abstract}

\section{Introduction}\label{sec:intro}
One of the fundamental tasks in the realm of scientific inquiry is to extract cause-and-effect relationships among diverse variables of interest. Typically, causal relationships are often elucidated through randomized experiments. However, there are instances where extracting causal relationships through experiments is not feasible due to cost or ethical considerations. In such cases, causal discovery from observational data is crucial. \citep{pearl2009causality,spirtes:etal00}. Causal discovery is the process of learning the underlying causal relationships in the form of a directed acyclic graph (DAG) typically from observational data. So far, several methodologies related to this task have been proposed.

Among the well-known causal discovery methodologies is the constraint-based method, represented by the PC algorithm \citep{spirtes:etal00}. It involves sequentially performing conditional independence tests (CITs) in a principled manner and synthesizing the results to induce the equivalence class of underlying causal structure \citep{meek:95,dor:tar92}. 
Therefore, the reliability of CITs is crucial in this methodology. Similarly, the algorithmic correctness of many constraint-based methods is contingent on the critical assumption that all CITs performed on the data are correct.
However, an oracle CIT rarely, if ever, exists in the real world.


In practice, CITs are prone to errors, particularly when dealing with large conditioning sets. As the size of the conditioning set increases, the number of data instances required grows exponentially, and thus, the statistical power of the corresponding CIT decreases. Such high-order CITs frequently lead to unreliable results, propagating errors throughout the process of structure learning. This contributes to the instability and performance degradation of the algorithm \citep{spirtes:etal00,JMLR:v11:aliferis10b, Armen2014EstimationAC}.


Recent approaches to addressing the reliability concerns of CITs utilize rules derived from graphoid axioms \citep{geiger:90,pearl:paz87} 
to build a causal structure that is as consistent as possible given the inconsistent CIT results \citep{bromberg09a, ma2023enabling}. The intuition is that graphoid axioms can be used to constrain conditional independence (CI) statements by other CI statements. However, they lack a principled way of determining the preference of CI statements and their practical applicability is often limited due to the high computational cost of searching possible combinations of rules from graphoid axioms.



Against this background, this paper presents a straightforward, practical approach to causal discovery based on deductive reasoning over CI statements and graphoid axioms.
Our method, coined \textsc{deduce-dep}, offers a simple and sound condition for correcting CITs, in contrast to previous approaches relying on complex routines. Specifically, our method replaces unreliable high-order CIT with outcomes derived from deductive reasoning with lower-order CITs and rules from graphoid axioms. Notably, our method serves as a modular subroutine that can be seamlessly integrated into various constraint-based methods, highlighting its practicality. 


\begin{figure}\centering
\begin{tikzpicture}[x=1.5cm, y=10mm]
    \node[RR] (X) {$X$};
    \node[RR] (Y) at (-1,1) {$Y$};
    \node[RR] (Z1) at (1,1) {$Z'$};
    \node[RR] (Z2) at (0,2) {$Z''$};
    \draw[->] (Y) -- (X);
    \draw[->] (Z1) -- (X);
    \draw[->] (Z2) -- (X);
    \draw[->] (Z1) -- (Z2);
\end{tikzpicture}
\caption{A subgraph of a true causal graph.}
\label{fig:explain-deduce-rule}
\end{figure}


\paragrapht{Motivating example.}
Consider the following scenario where a constraint-based method tries to discover the structure illustrated in \Cref{fig:explain-deduce-rule}. Due to the behavior of the algorithm, it tries to examine the adjacency between $X$ and $Y$ where the following CI statements are accurately obtained  
\begin{align*}
    (X \not\Perp Y \mid Z') \text{ and } (X \not\Perp Y \mid Z'').
\end{align*}
Unfortunately, the relationship between $X$ and $Y$ is relatively weak, and we wrongly obtained
\[
    (X \Perp Y \mid Z', Z''),
\]
which usually grants the removal of the true edge between $X$ and $Y$. We may have a doubt about the CI result, worrying about its power being low. We then examine the following CI between $Y$ and $Z''$ given $Z'$, where the CIT correctly yields $(Y\Perp Z'' \mid Z')$. In such case, we can indeed induce $(X \not\Perp Y \mid Z', Z'')$ from the previous CIT results and $(Y\Perp Z'' \mid Z')$ via applying rules derived from graphoid axioms, which will falsify the suspicious result, $(X \Perp Y \mid Z', Z'')$. Here, we prefer deductively reasoned $(X \not\Perp Y \mid Z', Z'')$ to the CIT result $(X \Perp Y \mid Z', Z'')$ given that tests with a smaller number of conditioning sets are \textit{likely} more reliable.

\paragraph{Contributions.}
We propose a practical correction method for unreliable CITs using deductive reasoning. Our contributions are as follows. 1) We devised conditions and rules for deducing high-order CI statement with low-order CI statements. We developed  \textsc{deduce-dep}, a sound algorithm that implements these rules in a recursive manner. The algorithm corrects unreliable independence statements with the (in)dependence statements from deductive reasoning. 2) Our correction algorithm can be \textit{effortlessly} adapted to any constraint-based causal discovery algorithm as a subroutine. In particular, we presented HITON-PC \citep{JMLR:v11:aliferis10a} and PC \citep{spirtes:etal00} equipped with \textsc{deduce-dep}. 3) We examined our method with comprehensive experiments using both synthetic and semi-synthetic datasets. Empirical evaluation reveals that our method properly rectifies the consequences of unreliable CITs, improving the performance of causal structure learning. 
\section{Preliminaries}\label{sec:prelim}
We denote a variable by a capital letter $X$ and its realized value by the lowercase $x$ in the domain $D_{X}$. A set of variables will be expressed by a bold letter $\*X$. We denote by $\*X \Perp \*Y \mid \*Z$ the CI statement that the random variables $\*X$ and $\*Y$ are conditionally independent given a set of variables $\*Z$. Similarly, we use $\*X \not\Perp \*Y \mid \*Z$ to express conditional dependence.
We denote a CI query by $(\*X; \*Y \mid \*Z)$, which can be either true (independent) or false (dependent).

A Bayesian network (BN) \citep{neapolitan:90,pearl:00a} is a tuple $(\GG, P)$ where the joint distribution $P$ over a set of variables $\*V$ is represented by the directed acyclic graph (DAG) $\GG$. We assume that the underlying causal mechanisms can be encoded using a directed graphical model. A DAG $\GG$ is defined as a tuple ($\*V$, $\*E$) where $\*V$ and $\*E$ are the set of all vertices and edges between pairs of vertices, respectively. We employ graphical kinship terminology $Pa(\cdot)$ and $Ch(\cdot)$ to represent parents and children in $\GG$, respectively. Further, $Ne(\cdot)$ represents neighbors, i.e., nodes adjacent to the argument in an (un)directed graph.

\subsection{Constraint-based Causal Discovery} 
Constraint-based causal discovery methods utilize multiple CITs to uncover causal relations from data. These methods are flexible since they do not impose any functional assumptions on the underlying causal relations. Prominent algorithms in this category include the PC (Peter-Clark) algorithm \citep{spirtes:etal00}, which is sound and complete to discover the completed partially DAG (CPDAG). FCI \citep{spirtes:etal00} also belongs to this group but relaxes causal sufficiency, i.e., no unmeasured confounder. 

Besides, there are local structure learning algorithms that learn the Markov blanket or parent-children set of a target variable from data \citep{yu2020causality}. In practice, such algorithms have shown remarkable scalability with thousands of variables \citep{JMLR:v11:aliferis10a}. IAMB \citep{pmlr-vR4-tsamardinos03a} and PCMB \citep{pena2007towards} are some of the renowned examples of Markov blanket learning algorithm, whereas MMPC \citep{JMLR:v11:aliferis10a} and HITON-PC\footnote{HITON comes from $\chi \imath \tau \acute{\omega} \nu$ (chiton), a form of fabric (blanket). Here, PC stands for Parents and Children, not Peter-Clark.} \citep{JMLR:v11:aliferis10a} are for parents-children learning.

Constraint-based methods vary in multiple aspects, e.g., their underlying assumptions, types of output, strategies, and inductive biases employed to construct causal structures. For instance, the PC algorithm initiates learning from a complete undirected graph, while the MMPC and HITON-PC algorithms start from an empty graph. Despite these differences, these algorithms commonly encounter issues with unreliable CITs and the subsequent negative impacts on the learning process, posing a significant challenge in the task of causal discovery \citep{claassen2012bayesian}. 




\subsection{Reliability criterion} 
So far, several constraint-based methods have introduced a few methods to properly tackle the reliability issue of CITs. The majority of these methods are based on heuristics. Some prominent examples are \textit{heuristic power rule} and \textit{the degree of freedom adjustment heuristic} \citep{tsamardinos2010permutation}, which are adopted by PC and MMHC algorithm \citep{tsamardinos:06a}. 

The heuristic power rule prescribes that a statistical test can be considered reliable and should be conducted only if a \textit{sufficient} number of data instances are available per parameter, i.e., the number of cells in the contingency tables required for the CIT. 
If not, the statistical test does not proceed.
In this way, the heuristic power rule implicitly considers the decision of the latest CIT performed as more reliable than that of the current test. As a result, when tests with higher-order conditioning sets are omitted, algorithms like HITON-PC assume dependence between the variables, delegating their decisions to the latest CIT performed. The threshold for the heuristic power rule varies across different causal discovery algorithms (e.g., 5 in \citet{tsamardinos:06a}, or 10 in \citet{spirtes:etal00}).  However, an issue arises from the uncertainty surrounding how many instances are truly sufficient for accurate statistical inference.

The degree of freedom adjustment heuristic calibrates the degree of freedom (DoF) with respect to \textit{zero} counts that appear in the contingency table when performing CITs.
These zero counts may come from either random events or structural constraints of data generating process where
the latter is called \textit{structural zero}. 
Since structural zeros are not free to vary, the DoF should be recalculated in a way that subtracts one for each structural zero. A reduced DoF is expected to increase the power of the CIT. However, the problem is that we may not know which zero count is structural zero. 


In addition to the previously discussed heuristics, there is another heuristic that involves limiting the size of the conditioning set when performing CITs \citep{tsamardinos:06a, JMLR:v11:aliferis10a}. This prevents certain CITs from being performed if their conditioning set size exceeds a user-specified threshold. The behavior of this heuristic is akin to that of the heuristic power rule, since it skips some CITs to avoid potential false negatives. Several implementations of constraint-based algorithms (e.g., causal-learn Python package \citep{zheng2024causal}) have implicitly adopted this heuristic to prematurely terminate the structure learning process. Despite its frequent adoption, a comprehensive understanding of the outcomes generated by these implementations has been lacking so far. Only recently has the understanding of this heuristic become a subject of ongoing research \citep{kocaoglu2024characterization}.


\subsection{Rules from Graphoid axioms} We introduce rules derived from \textit{graphoid axioms}, which govern the relationships between CI statements \citep{pearl:paz87,lauritzen:96,paz:etal97}. 
The rules play a crucial role as integrity constraints, aiding in the resolution of conflicts arising from inconsistent CI statements \citep{bromberg09a}. Among several rules (see \Cref{appendix:graphoid}), we will make use of the following rules:  


1. Symmetry:\\
$(\*X {\Perp} \*Y | \*Z) \Leftrightarrow(\*Y {\Perp} \*X | \*Z)$

2. Contraction:\\
$(\*X {\Perp} \*Y | \*Z) \wedge(\*X {\Perp} \*W | \*Z, \*Y) \Rightarrow(\*X {\Perp} \*Y, \*W | \*Z)$

3. Decomposition:\\
$(\*X {\Perp} \*Y, \*W | \*Z)\Rightarrow(\*X {\Perp} \*Y | \*Z) \wedge(\*X {\Perp} \*W | \*Z)$

4. Weak transitivity:\\
\mbox{$(\*X {\Perp} \*Y | \*Z) \wedge(\*X {\Perp} \*Y | \*Z, W) \Rightarrow(\*X {\Perp} W | \*Z) \vee(W {\Perp} \*Y | \*Z)$}

Unlike others, weak transitivity holds only under the assumption of a faithful Bayesian network $(\GG, P)$, in which the graph $\GG$ encodes all the CI information from the probability distribution $P$. Also note that $W$ in weak transitivity is a single variable. In the sequel, these rules will be useful for our approach to causal discovery based on deductive reasoning over CI statements. 
\section{Method}\label{sec:method}
In this work, we make two assumptions widely adopted in the literature: causal Markov assumption and faithfulness, which play crucial roles in connecting probability distribution with a graph structure. Causal Markov assumption states that if d-separation holds in a causal graph, then the corresponding CI holds in the associated probability distribution. Faithfulness assumption ensures that all the observed CI in the probability distribution can be read off the corresponding causal graph. For the demonstration, we utilize constraint-based methods which assume causal sufficiency.



\begin{figure}[t]
    \centering\footnotesize
    \begin{tikzpicture}[y=10mm]\sansmath
        \draw (0,0) rectangle (2,1) node[pos=.5,align=left,text width=1.8cm] {$(X\not\Perp Y\mid \*Z)$\\ succeeds \textcolor{bettergreen}{\cmark{}}\par};
        \draw (2,0) rectangle (4,1) node[pos=.5,align=left,text width=1.8cm] {$(X\not\Perp Y\mid \*Z)$\\ fails \textcolor{betterred}{\xmark{}}\par}; 
        \draw[-] (4,1) -- (6.5,1);
        \node[align=left,text width=1.8cm] at (5,0.5) {$(X\not\Perp Y\mid \*Z)$\\ succeeds \textcolor{bettergreen}{\cmark{}} \par};
        \draw[->] (0,0) -- (7,0) node[pos=1.0,below,xshift=-8mm] {\scriptsize \# of available samples};
        \node[align=justify,text width=3.75cm] (T1) at (1.5,1.9) {\scriptsize test cannot be performed due to the \textcolor{betterblue}{reliability criterion} on the amount of data instances (regarded as dep)\par};
        
        \node[align=justify,text width=3.5cm] (T2) at (5.65,1.9) {\scriptsize test is well-performed with \textcolor{bettergreen}{sufficient power}, resulting in TP (regarded as dep)\par};
        
        \node[align=justify,text width=4cm] (T3) at (2.65,-.8) {\scriptsize test is poorly-performed due to \textcolor{betterred}{low-power}, resulting in FN (regarded as ind) \par};
        
        \draw[->,shorten <= -2pt] (T1) -- (1,1);
        \draw[->,shorten <= -2pt] (T2) -- (5.25,1);
        \draw[->,shorten <= -2pt] (T3) -- (3,0);
    \end{tikzpicture}
    \caption{Behavior of HITON-PC and PC with heuristic power rule (adapted from \citet{tsamardinos:06a}).}
    \label{fig:behavior-pc-low}
\end{figure}



As discussed earlier, low-power CITs produce unreliable CI statements, leading to errors and instability in the structure learning. Although several heuristics have been proposed to tackle this issue, they either lack a theoretical basis or fall short of properly correcting unreliable CITs. For instance, the heuristic power rule considers only the amount of data available for the test to determine its reliability. However, as illustrated in \Cref{fig:behavior-pc-low}, it leaves quite a few CITs with CI statements untouched, even though they are suspicious of being unreliable. This phenomenon is problematic because even with the number of data instances above the user-specified threshold, we can expect that CITs falsely declare independence due to their lack of statistical power. In this respect, our approach should aim possibly all the CI statements during the learning process. Putting pre-existing heuristics aside, \textit{how can we properly tell whether CI statements from CITs are true?}

Inspiration for our method comes from the fact that the outcome of CIT can be constrained by the outcomes of other CITs through graphoid axioms \citep{pearl:paz87}. This insight opens the door for CI statement correction through deductive reasoning with the rules derived from graphoid axioms. It provides a sound way to reason about the CI query of interest from multiple other CI statements that we judge to be reliable. In this regard, we sketch our approach as follows. For every CI statement obtained, we conduct deductive reasoning to figure out whether a dependence statement can be logically induced from other low-order CITs. If it can be, we adopt the result from deductive reasoning and, if otherwise, the one from a statistical test.

In our approach, we restricted the ingredients of deductive reasoning to \textit{lower-order} CIT, whose conditioning set is a proper subset of that of the CI query. Clearly, they are expected to be more reliable than CI statements of our interest since the amount of data needed for sound statistical inference is smaller. Therefore, we opt to the result from deductive reasoning whenever dependence is logically attainable from these lower-order tests. If needed, we add new lower-order CITs for our reasoning. As long as the CIT is lower-order, then the corresponding test result is eligible to be the ingredients of our deductive reasoning and should be performed. Allowing conducting more CITs, not just harnessing already executed, broadens the scope to which our deductive reasoning can be applied. 

\subsection{Rules for deductive reasoning} 
In the following proposition, we present the rules for our reasoning method.
Omitted proof is provided in \Cref{appendix:proofs}. 


    


\begin{restatable}[]{proposition}{propdeducerule}
\label{prop:deduce-rule}
Under the faithful Bayesian network $(\GG, P)$, let $\*X$, $\*Y$, and $\*Z$ be disjoint subsets of $\*V$ where $\*Z$ is partitioned into $\*Z'$ and $\*Z''$ such that $\*Z = \*Z'\sqcup \*Z'', |\*Z''|=1$. Then, $(\*X \not\Perp \*Y \mid \*Z)$ if one of the following holds:
\begin{enumerate}
\item $(\*X \not\Perp \*Y \mid \*Z')\wedge (\*X \Perp \*Z'' \mid \*Z')$
\item $(\*X\Perp \*Y \mid \*Z')\wedge (\*X \not\Perp \*Z''\mid \*Z')\wedge (\*Y \not\Perp \*Z'' \mid \*Z')$
\end{enumerate}
\end{restatable}


These rules provide a theoretical background for our deductive reasoning-based approach. 
They stipulate several conditions where we can deduce high-order dependence statements from strictly low-order CI statements.  
Note that the first condition in \Cref{prop:deduce-rule} can be modified to
\[
(\*X \not\Perp \*Y \mid \*Z')\wedge (\*Y \Perp \*Z'' \mid \*Z')
\]
by the symmetry, which we will make use of it in the sequel.
We now proceed to incorporate these rules into an algorithm. 



\subsection{Incorporating deductive reasoning rules into algorithm}

We propose \textsc{deduce-dep} (\Cref{alg:deduce-dep}), a sound algorithm for deducing dependence statements from strictly low-order CI statements. 
In particular, it can be viewed as a special case of \Cref{prop:deduce-rule} where $\*X=\set{X}$ and $\*Y=\set{Y}$. 

\begin{algorithm}[t]\footnotesize
    \begin{algorithmic}[1]
    \State \textbf{Input}: $\set{X},\set{Y},\*Z$ disjoint subsets of $\*V$, {reliability threshold $K$} (default 1) 

    \State \textbf{Output}: Whether $(X\not\Perp Y\mid \*Z)$ is deducible or not. \smallskip
    \State {\textbf{if} $|\*Z|\leq K$ \textbf{return} \textsc{false}}
    \For{$Z \in \*Z$}
        \State $\*Z' \gets \*Z \setminus \set{Z }$
        \For{$(\*A,\*B,\*C)$ in $\{(X,Y,\*Z'),(X,Z ,\*Z'),(Y,Z ,\*Z')\}$}
            \If{$(\*A \Perp \*B \mid\*C)$ \textbf{and} \textbf{not} \textsc{deduce-dep}$(\*A,\*B,\*C)$}
                \State mark $(\*A ; \*B \mid\*C)$ as $\Perp$
            \Else{} mark $(\*A ; \*B \mid\*C)$ as $\not\Perp$ 
            \EndIf
        \EndFor\smallskip

        \If{$(X\not\Perp Y\mid \*Z') \oplus \parens[\Big]{(X\not\Perp Z \mid\*Z')\wedge(Y\not\Perp Z \mid\*Z')}$
        }
        \State \textbf{return} \textsc{true}\smallskip
        \EndIf
    \EndFor
    \State \textbf{return} \textsc{false}
    \end{algorithmic}
\caption{\textsc{deduce-dep}}
\label{alg:deduce-dep}
\end{algorithm}

As \textsc{deduce-dep} is designed to examine CI statements, arguments of the algorithm are $X$, $Y$, and $\*Z$ which constitute the statement $(X \Perp Y \mid \*Z)$. 
We let the size of the minimal conditioning set as a hyperparameter $K$, which specifies the base case for recursive calls. We assume a marker (a cache keeping CIT results) is globally defined. As an output, \textsc{deduce-dep} returns whether a dependence statement $(X \not\Perp Y \mid \*Z)$ is attainable from strictly low-order CITs. 

\textsc{deduce-dep} starts with checking the size of conditioning set $\*Z$ (line 3 in \Cref{alg:deduce-dep}). If it is smaller than the reliability threshold $K$, then \textsc{deduce-dep} returns \textsc{false}, affirming the corresponding CIT result as reliable. If not, \textsc{deduce-dep} proceeds to partition $\*Z$ into a singleton $\set{Z}$ and the complementary $\*Z'$ (lines 4--5). Based on \Cref{prop:deduce-rule}, it performs three CITs, $(X; Y \mid \*Z'), (X; Z \mid \*Z'), (Y; Z \mid \*Z')$, to infer whether $(X \not\Perp Y \mid \*Z',Z )$ is deducible or not (line 6), where the results are marked in marker. If the marked result is independence, then it recursively calls \textsc{deduce-dep} (lines 7--8) and mark the results either from CITs or deductive reasoning (lines 8--9). Here, marks can be updated from independence to dependence. If \Cref{prop:deduce-rule} can be applied based on the marks of three CI statements, it returns \textsc{true} (lines 10--11). It repeats this procedure for all $Z \in \rmZ$ until it returns \textsc{true}; otherwise, it returns \textsc{false} (line 12).

In theory, our algorithm examines three CI statements to determine whether a dependence statement of our interest is deducible. However, in some cases, it can do so with just two CI statements. If we obtain $(X \not\Perp Y \mid \*Z')$ and subsequently obtain $(X \Perp Z  \mid \*Z')$, then with these two CI statements, we can satisfy the conditions to deduce the dependence statement in Line 10. Similarly, if we obtain $(X \not\Perp Y \mid \*Z')$ and then obtain $(Y \Perp Z  \mid \*Z')$, the same applies. This early stopping mechanism could be quite efficient in practical implementation as it reduces unnecessary recursive calls. We now proceed to show the correctness of \textsc{deduce-dep}.

\begin{corollary}\label{cor:xor-deduce-dep}
Under the faithful Bayesian network $(\GG, P)$, let $\set{X}\sqcup\set{Y}\sqcup\*Z\subseteq \*V$, $Z\in \*Z$, and $\*Z'=\*Z\setminus \set{Z}$. 
If
\[
    (X\not\Perp Y\mid \*Z') \oplus \parens[\big]{(X\not\Perp Z \mid\*Z')\wedge(Y\not\Perp Z \mid\*Z')},
\]
then $(X\not\Perp Y \mid \*Z)$ holds.
\end{corollary}


\begin{figure}
\begin{subfigure}{.23\columnwidth}\centering
\begin{tikzpicture}[x=6.5mm,y=8mm,RR/.style={draw,circle,inner sep=0mm, minimum size=4.5mm,font=\sffamily\scriptsize}]
\node[RR] (X) at (-1,-.2) {$X$};
\node[RR] (Y) at (0,-1) {$Y$};
\node[RR] (Z) at (1,0.2) {$Z$};
\node[RR] (Zp) at (0,1) {$\*Z'$};
\draw[->] (Z) -- (X);
\draw[->] (Z) -- (Y);
\draw[->] (Zp) -- (X);
\draw[->] (Zp) -- (Y);
\end{tikzpicture}
\caption{}
\label{prop:compl-ddd}
\end{subfigure}\hfill
\begin{subfigure}{.23\columnwidth}\centering
\begin{tikzpicture}[x=6.5mm,y=8mm,RR/.style={draw,circle,inner sep=0mm, minimum size=4.5mm,font=\sffamily\scriptsize}]
\node[RR] (X) at (-1,-.2) {$X$};
\node[RR] (Y) at (0,-1) {$Y$};
\node[RR] (Z) at (1,0.2) {$Z$};
\node[RR] (Zp) at (0,1) {$\*Z'$};
\draw[->] (Zp) -- (Z);
\draw[->] (Zp) -- (X);
\draw[->] (Zp) -- (Y);
\end{tikzpicture}
\caption{}
\label{prop:compl-iii}
\end{subfigure}\hfill
\begin{subfigure}{.23\columnwidth}\centering
\begin{tikzpicture}[x=6.5mm,y=8mm,RR/.style={draw,circle,inner sep=0mm, minimum size=4.5mm,font=\sffamily\scriptsize}]
\node[RR] (X) at (-1,-.2) {$X$};
\node[RR] (Y) at (0,-1) {$Y$};
\node[RR] (Z) at (1,0.2) {$Z$};
\node[RR] (Zp) at (0,1) {$\*Z'$};
\draw[->] (Zp) -- (Z);
\draw[->] (Zp) -- (X);
\draw[->] (Zp) -- (Y);
\draw[->] (Z) -- (Y);
\end{tikzpicture}
\caption{}
\label{prop:compl-iid}
\end{subfigure}\hfill
\begin{subfigure}{.23\columnwidth}\centering
\begin{tikzpicture}[x=6.5mm,y=8mm,RR/.style={draw,circle,inner sep=0mm, minimum size=4.5mm,font=\sffamily\scriptsize}]
\node[RR] (X) at (-1,-.2) {$X$};
\node[RR] (Y) at (0,-1) {$Y$};
\node[RR] (Z) at (1,0.2) {$Z$};
\node[RR] (Zp) at (0,1) {$\*Z'$};
\draw[->] (Zp) -- (Z);
\draw[->] (Zp) -- (X);
\draw[->] (Zp) -- (Y);
\draw[->] (Z) -- (X);
\end{tikzpicture}
\caption{}
\label{prop:compl-idi}
\end{subfigure}
\caption{Examples for \Cref{prop:completeness}.}
\end{figure}


\Cref{cor:xor-deduce-dep} provides a sound condition for  \textit{deducing} the dependence given the three statements and claims nothing for the cases where the condition is false. We show the possibility of $(X \Perp Y \mid \*Z)$ for the cases that are not true in \Cref{cor:xor-deduce-dep}.


\begin{restatable}[]{proposition}{propcompleteness}
\label{prop:completeness}
The condition in \Cref{cor:xor-deduce-dep} is complete for deducing $(X\not\Perp Y \mid \*Z)$ with respect to the three conditional CI statements.
\end{restatable}
\begin{proof}
We prove by constructing a counterexample for each tuple $((X;Y\mid \*Z'),(X;Z\mid \*Z'),(Y;Z\mid \*Z'))$ where the condition is \textit{not} satisfied:
($\not\Perp$, $\not\Perp$,$\not\Perp$) in \Cref{prop:compl-ddd}, ($\Perp$, $\Perp$,$\Perp$) in \Cref{prop:compl-iii}, 
($\Perp$, $\Perp$,$\not\Perp$) in  \Cref{prop:compl-iid}, and ($\Perp$, $\not\Perp$,$\Perp$) in \Cref{prop:compl-idi} where
$\*Z'\to W$ implies $Z'\to W$ for every  $Z'\in \*Z'$.
For all cases, we have $(X \Perp Y \mid \*Z)$.
\end{proof}




\section{Application of deductive reasoning on causal discovery}

An appealing property of \textsc{deduce-dep} is that it can be seamlessly integrated into any constraint-based methods and serves as a modular subroutine. 
This highlights the practicality of \textsc{deduce-dep} since it aims to correct statistical errors from unreliable CITs, which could further prevent error propagation throughout the learning process.
To elucidate how our method works inside the constraint-based methods, we take HITON-PC (\Cref{sec:method-application-hiton}) and PC algorithms (\Cref{sec:method-application-pc}), representative local and global structure learning algorithms, respectively, as illustrative examples.  


\subsection{HITON-PC with \textsc{deduce-dep}}
\label{sec:method-application-hiton}

\begin{algorithm}[t]
    \begin{algorithmic}[1]\footnotesize
    \State \textbf{Input}: a set of variables $\*V$, the target variable $T$, dependency measure, CI tester
    \State \textbf{Output}: a tentative parents-children set \textsc{tpc} for $T$ \smallskip
    \State Initialize $\textsc{tpc}$ with an empty set.
    \State Let $\textsc{open}$ be $\set{X\in\*V \mid X\not\Perp T}$
    \State Sort $\textsc{open}$ in descending order w.r.t correlation with $T$
    \While{\textsc{open} is not empty}
        \State Let $X$ be an element popped from \textsc{open}.
        \State Update $\textsc{tpc}$ with $X$. 
        \Repeat
            \For{$Y\in \textsc{tpc}$}
                \If {$(Y \Perp T \mid \*S)$ for some  $\*S \subseteq \textsc{tpc} \setminus \{Y\}$}
                    \If{not \textsc{deduce-dep}$(\set{Y},\set{T},\*S)$}
                        \State Remove $Y$ from \textsc{tpc}.
                    \EndIf
                \EndIf
            \EndFor
        \Until{No change in \textsc{tpc}}
    \EndWhile
    \State \textbf{return} \textsc{tpc}
    \end{algorithmic}
\caption{HITON-PC-Nonsym with \textsc{deduce-dep}}\label{alg:hiton-pc-nonsym-dd}
\end{algorithm}


We augment a local structure learning algorithm, HITON-PC \citep{JMLR:v11:aliferis10a}, with \textsc{deduce-dep} as a subroutine. HITON-PC is an algorithmic instance from the Generalized Local Learning Parents and Children framework \citep{JMLR:v11:aliferis10a}, which outputs parents and children of a target variable. It consists of two subroutines, namely, HITON-PC-Nonsym and symmetry correction. 
The former outputs the superset of the parents-children set of a target variable, which is then pruned by the latter.
Although pruning ensures the outcome of HITON-PC-NonSym is correct, empirical evaluations have shown that applying symmetry correction adds little to performance improvement, sometimes leading to performance degradation. 
This is due to the frequent occurrence of low-power CITs and their subsequent influences on the algorithm \citep{JMLR:v11:aliferis10b}. 

In \Cref{alg:hiton-pc-nonsym-dd}, we augment HITON-PC-Nonsym with \textsc{deduce-dep} and illustrate how our method serves as a subroutine. Specifically, \textsc{deduce-dep} is called whenever a CIT outputs independence (lines 11--12). If \textsc{deduce-dep} confirms the CI statement, a subsequent removal operation is performed accordingly (line 13). It is worth noting that \textsc{deduce-dep} reuses previous low-order CIT results, enhancing the efficiency of correcting unreliable CITs.


\subsection{PC with \textsc{deduce-dep}}
\label{sec:method-application-pc}


PC algorithm \citep{spirtes:etal00} is a representative,  sound and complete constraint-based method. It begins with a complete graph and proceeds to identify the skeleton of the underlying Bayesian network, subsequently orienting the edges. For skeleton identification, it conducts multiple CITs, removing an edge between variables $X$ and $Y$ if there exists a conditioning set $\*S$ such that $(X\Perp Y \mid \*S)$. Since previous CIT results are used to orient other edges, even a single error from the unreliable CIT may incur other errors accordingly \citep{dash2003robust, tsamardinos:06a}. Despite several attempts to mitigate error propagation \citep{colombo2014order}, this issue is still prevalent.

In \Cref{alg:pc-dd}, we integrate the \textsc{deduce-dep} into the PC algorithm. 
Analogous to the previous example, \textsc{deduce-dep} comes into play in the PC algorithm after performing CIT to confirm its result (line 8). If \textsc{deduce-dep} ultimately confirms the result of CIT, edges are removed (line 9). Considering that PC performs CITs by increasing the order of tests, our method will efficiently reuse many of the previous CIT results generated as the algorithm runs. In \Cref{sec:experiments-results-performance}, we demonstrate that \textsc{deduce-dep} effectively controls the negative influence of \textit{low-power} CITs, leading to improved performance for PC algorithm. \footnote{For the demonstration, we implemented PC-stable \citep{colombo2014order}, which is order-independent, making it more suitable for comparing the performance.}







\begin{algorithm}[t]
    \begin{algorithmic}[1]\footnotesize
    \State \textbf{Input}: a set of variables $\*V$, CI tester
    \State \textbf{Output}: a CPDAG \smallskip
    \State Initialize $\GG$ with a complete undirected graph
    \For{$k \in 1,2,\dotsc,$  }
        \For{an ordered pair of adjacent vertices $(X,Y)\in\GG$ s.t. $|Ne(\set{X})_\GG\setminus\set{Y}|\geq k$}
            \For{$\*S\subseteq Ne(\set{X})_\GG\setminus\set{Y}$ s.t. $|\*S|=k$}
                \If{$(X\Perp Y \mid \*S)$}
                    \If{\textbf{not} \textsc{deduce-dep}($\set{X},\set{Y},\*S $)}
                        \State Remove $X$-$Y$ from $\GG$
                    \EndIf
                \EndIf
            \EndFor
        \EndFor
        \State \textbf{else} \textbf{break}
    \EndFor\smallskip
    \State Orient $\GG$ for unshielded colliders
    \State Complete orientation of $\GG$ with Meek's rules
    \State \textbf{return} $\GG$
    \end{algorithmic}
\caption{PC with \textsc{deduce-dep}}\label{alg:pc-dd}
\end{algorithm}


 
\section{Empirical Evaluations}\label{sec:experiments}
We empirically investigate whether replacing the high-order CI statement with the result from deductive reasoning leads to a better performance in structure learning. Specifically, we first examine the effectiveness of our approach for correcting false negatives (FN) from CITs (\Cref{sec:experiments-results-correction}). This allows us to assess the inherent capabilities and limitations of our method under controlled conditions. We then assess the performance improvement our method brings to constraint-based causal discovery algorithms (\Cref{sec:experiments-results-performance}). This experiment enables us to assess the complementary nature of our method within the context of structure learning. We now describe the experimental setup.





\begin{table*}[t!]
\centering
\caption{$|\*V|=20$ with 95\% confidence interval. $N$ and $e$ denote the number of instances and edges, respectively.}\label{table:correction-exp-v20-conf}
\begin{adjustbox}{width=\linewidth}

\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=24$}} & \multicolumn{3}{c}{\makecell{$e=30$}} & \multicolumn{3}{c}{\makecell{$e=40$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.26\stdv{0.06}& \textbf{0.63}\stdv{0.11}& 0.18\stdv{0.04}
& 0.28\stdv{0.04}& \textbf{0.79}\stdv{0.09}& 0.18\stdv{0.03}
& 0.30\stdv{0.04}& \textbf{0.92}\stdv{0.06}& 0.19\stdv{0.03}	\\
& CIT + \textsc{dd}	& \textbf{0.59}\stdv{0.03}& 0.50\stdv{0.04}& \textbf{0.76}\stdv{0.04}
& \textbf{0.70}\stdv{0.03}& 0.69\stdv{0.04}& \textbf{0.73}\stdv{0.03}
& \textbf{0.78}\stdv{0.03}& 0.86\stdv{0.03}& \textbf{0.73}\stdv{0.03}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.40\stdv{0.05}& \textbf{0.78}\stdv{0.08}& 0.29\stdv{0.04}
& 0.39\stdv{0.05}& \textbf{0.84}\stdv{0.07}& 0.26\stdv{0.04}
& 0.41\stdv{0.03}& \textbf{0.97}\stdv{0.02}& 0.26\stdv{0.02}	\\
& CIT + \textsc{dd}	& \textbf{0.60}\stdv{0.03}& 0.50\stdv{0.04}& \textbf{0.78}\stdv{0.04}
& \textbf{0.71}\stdv{0.04}& 0.66\stdv{0.04}& \textbf{0.80}\stdv{0.04}
& \textbf{0.82}\stdv{0.02}& 0.86\stdv{0.03}& \textbf{0.78}\stdv{0.03}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.50\stdv{0.05}& \textbf{0.86}\stdv{0.05}& 0.37\stdv{0.05}
& 0.47\stdv{0.04}& \textbf{0.89}\stdv{0.06}& 0.33\stdv{0.04}
& 0.49\stdv{0.04}& \textbf{0.97}\stdv{0.02}& 0.34\stdv{0.03}	\\
& CIT + \textsc{dd}	& \textbf{0.61}\stdv{0.03}& 0.50\stdv{0.04}& \textbf{0.81}\stdv{0.04}
& \textbf{0.74}\stdv{0.02}& 0.70\stdv{0.03}& \textbf{0.82}\stdv{0.03}
& \textbf{0.83}\stdv{0.02}& 0.86\stdv{0.03}& \textbf{0.81}\stdv{0.03}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}




 
\subsection{Experimental Setup}
\paragraph{Correcting CIT with \textsc{deduce-dep}.} 
In this experiment, we utilize random CI queries within random Bayesian networks (BNs) and compare the performance of our method with a counterpart where our method was not applied. This allows us to isolate the algorithm-dependent factors and purely validate the intrinsic effectiveness of our method: \textit{how well does our method correct CI statements?} For this, we used the Erdös-Renyi model \citep{erdHos1960evolution} to generate random DAGs. Based on these DAG topologies, we designed conditional probability tables with binary variables, i.e., \( P(V_i \mid Pa(V_i)) \) 
follows Bernoulli distribution with its parameter uniformly sampled from $[0, 1]$. Following this process, we generated random BNs across 50 iterations, varying the number of variables (\( |\rmV| \in \{10, 20, 30\} \)) and the ratio of edges to variables (\( |\rmE| \mathbin{/} |\rmV| \in \{1.2, 1.5, 2\} \)). For each random BN, data instances were sampled with the number of instances varying across \( n \in \{200, 500, 1000\} \). Additionally, we composed random combinations of disjoint sets \( X \), \( Y \), and \( \*Z \) to formulate CI queries, repeating this process 20 times for each BN. The range of conditioning set sizes was set between 2 and 4.

\paragrapht{Causal discovery with \textsc{deduce-dep}.} 
We assess the performance improvement of HITON-PC with \textsc{deduce-dep} (\Cref{alg:hiton-pc-nonsym-dd}) and PC with \textsc{deduce-dep} (\Cref{alg:pc-dd}). We used semi-synthetic datasets for evaluation. We sampled data instances from three benchmark BNs (i.e., Sachs, Alarm, and Insurance). HITON-PC and PC are both constraint-based; however, the former learns structure from an empty graph, whereas the latter learns structure from a complete graph. 
This experiment allows us to examine the efficacy of our method under different algorithmic behaviors. We note that we focused on the skeleton discovery phase from PC algorithm for evaluation. 

\paragrapht{Implementation detail.} We employed the $G$-test for CIT to use. We set the default significance level for CIT to 0.05. The minimal conditioning set size $K$ is set to 1 for \textsc{deduce-dep}. We primarily utilized the following evaluation metrics: precision, recall, and F1. We consider the output from the causal discovery algorithm with CI oracle as ground truth, and we compare our method's output to assess performance improvement. 
For HITON-PC, we calculated the evaluation score by averaging the score for every local skeleton learned. For PC, we calculated the evaluation score based on the global skeleton learned.

\begin{figure*}
\includegraphics[width=.31\linewidth]{sections/figures/all_working_F1_stdev.pdf}\hfill \includegraphics[width=.31\linewidth]{sections/figures/all_working_Precision_stdev.pdf}\hfill \includegraphics[width=.31\linewidth]{sections/figures/all_working_Recall_stdev.pdf}
\caption{Performances (F1, precision, and recall) with varying dataset sizes. Blue (w/ DD) and red (as-is) lines are for with and without \textsc{deduce-dep}. Darker and lighter lines are for $\alpha=0.05$ and $0.01$, respectively. Error bars are standard deviation.}\label{fig:perf-all-main}
\end{figure*}


\subsection{Results of Correction Experiment} 
\label{sec:experiments-results-correction}

\Cref{table:correction-exp-v20-conf} presents the experimental results on random BN datasets with varying numbers of edges and data instances, where the number of variables is set to 20. 
It is evident that across all the cases listed, applying \textsc{deduce-dep} resulted in higher F1 scores compared to when it was not applied. 
While a significant increase in recall was accompanied by a corresponding decrease in precision, the overall improvement in F1 scores implies that our method does not indiscriminately retain any edge but rather accurately targets low-power tests to correct. However, a decrease in precision might indicate that the control of false positive rates might be important for further improving our method since statistical errors can also occur in CITs with dependence results. We also illustrate the results with similar experiments with the number of variables set to 10 and 30 in \Cref{fig:perf-all-F,fig:perf-all-R,fig:perf-all-P} in \Cref{appendix:experiments-additional}. Furthermore, we present the experimental results on continuous data with linear/non-linear relationships \Cref{fig:continuous-K1} in \Cref{appendix:experiments-additional}.


One interesting finding from \Cref{table:correction-exp-v20-conf} is that as the number of edges increases and the amount of data decreases, the difference in F1 scores between using and not using our method becomes more pronounced. Considering the experimental setup, CI queries randomly generated from dense graphs are more likely to indicate actual dependence. However, under such graph structures with a small amount of data, the original CIT tends to produce more false negatives due to the decrease in statistical power. In contrast, our method effectively corrects these false negatives, leading to an improvement in F1 scores. This suggests that our approach can be particularly useful in situations with low data volume and dense graph structures.

\subsection{Results of Performance Experiment} 
\label{sec:experiments-results-performance}

In the performance experiment, we investigate how much \textsc{deduce-dep} can enhance the overall performance of causal discovery algorithms by correcting unreliable CITs. As shown in \Cref{fig:perf-all-main} (values are reported in \Cref{table:performance-exp-PC,table:performance-exp-HITON-PC} in \Cref{appendix:experiments-additional}), \textsc{deduce-dep} consistently improves performance in terms of F1 score regardless of the algorithm used, with a significant increase in recall. 

Notably, unlike the previous experiment, the precision when applying \textsc{deduce-dep} is quite comparable to cases where it is not applied.
In cases where \textsc{deduce-dep} was applied, there were instances where both recall and precision increased compared to scenarios without it. This phenomenon can be explained by the interplay of false positives and false negatives \citep{Armen2014EstimationAC}. False positives (in this context, wrongly added edges) occurring in the structure learning process can actually be induced by false negatives from previous steps. False positives, in turn, can incur false negatives (wrongly omitted edges), further propagating errors. Our experiment indicates that \textsc{deduce-dep} might help break this negative cycle to some extent.


\begin{figure}
    \centering
    \includegraphics[width=\linewidth]{sections/figures/all_working_SHD_CI95_1_latexfont.pdf}
    \caption{Results on real-world datasets measured in SHD. Darker and lighter lines are for $\alpha=0.05$ and $0.01$, respectively. Error bars represent 95\% confidence interval.}
    \label{fig:SHD}
\end{figure}

\paragrapht{Evaluation after edge orientation.} So far, our focus has been primarily on the skeleton discovery phase of structure learning algorithms. This is because our method intervenes specifically in the skeleton learning step, while the subsequent steps remain unchanged for algorithms like PC (and HITON-PC focuses solely on finding neighbor sets for individual variables, leaving no further steps after skeleton discovery). To understand the broader impact of our method, we extend our analysis to orient edges after the skeleton learning step of PC and evaluate the resulting structure using the structural hamming distance (SHD). 

As shown in \Cref{fig:SHD}, \textsc{deduce-dep} indeed leads to improvements in SHD as well, consistent with our previous findings in \Cref{fig:perf-all-main}. This observation can be further illustrated through our motivating example in \Cref{sec:intro}. Suppose the PC algorithm tries to learn the structure in \Cref{fig:explain-deduce-rule}. By correctly retaining the edge between $X$ and $Y$ through deductive reasoning, we can anticipate accurately identifying colliders, thereby facilitating the correct orientation of other edges. This indicates the importance of robust skeleton discovery in enhancing the performance of structure learning.


\begin{figure*}
\hfill\includegraphics[width=.48\linewidth]{sections/figures/all_working_Time_stdev_transposed_stdev_1.pdf}\hfill \includegraphics[width=.48\linewidth]{sections/figures/all_working_CI_number_stdev_transposed_stdev_1.pdf}\hfill\null
\caption{Computational performances (wall-clock time in seconds and the number of CITs) with varying dataset sizes. Blue and red lines are for with and without \textsc{deduce-dep}. Darker and lighter lines are for $\alpha=0.05$ and $0.01$, respectively. Error bars are standard deviation.}
\label{fig:perf-all}
\end{figure*}



\subsection{Analysis}
\label{sec:experiments-analysis}



\begin{figure}
    \centering
    \includegraphics[width=\linewidth]{sections/figures/K_ablation_CI95-new-775-51.pdf}
    \caption{Ablation on $K$. (\textbf{Top}) F1 score. (\textbf{Bottom}) Computational performances measured as wall-clock time (seconds). Error bars represent 95\% confidence interval.}
    \label{fig:ablation-k}
\end{figure}


\paragrapht{Computational cost.}
Applying \textsc{deduce-dep} to causal discovery algorithms may lead to an increase in the total number of CITs performed since it conducts new CITs if needed. In practice, our method efficiently reuses previously conducted CI information during the structure learning process. \Cref{fig:perf-all} demonstrates the computational performance measured in wall-clock time (seconds) and the number of CITs, across varying dataset sizes.
We observe that computational costs are comparable to the original algorithm when data is scarce ($n = 200, 500, 1000$), indicating that our method efficiently corrects unreliable CI statements. 



\paragrapht{Ablation on $K$.} We investigate the effect of the size of the minimal conditioning set $K$ in \textsc{deduce-dep}. As shown in \Cref{fig:ablation-k}, a higher $K$ enhances computational efficiency by reducing the depth of recursion, whereas a lower $K$ tends to improve performance by allowing for more thorough exploration. Our method allows practitioners to tailor this trade-off between computational cost and performance to their specific needs and preferences.


\begin{figure}
\centering
\includegraphics[width=\linewidth]{sections/figures/with_reliability_criterion_F1_CI95_1_latexfont.pdf}
\caption{Performances with the reliability criterion (heuristic power rule) applied, measured in F1 score. Darker and lighter lines are for $\alpha=0.05$ and $0.01$, respectively. Error bars represent 95\% confidence interval.}
\label{fig:with-rc}
\end{figure}


\paragrapht{\textsc{deduce-dep} with reliability criterion.}
We investigate how our method performs in conjunction with the reliability criterion heuristic. Specifically, we applied the heuristic power rule to the HITON-PC with a threshold of 5. As shown in \Cref{fig:with-rc}, our method continues to improve performance over its counterpart. It is worth noting that this may not always be the case, as the reliability criterion heuristic distorts the whole structure learning process by selectively skipping certain CITs necessary for determining the underlying causal structure. We also observe only marginal performance improvement when data is scarce (e.g., \( n = 200 \)). We speculate that this is because skipping CITs based on heuristics deprived our method of opportunities to correct tests, limiting its effectiveness. 
 
\section{Related Works}\label{sec:related}

We categorize prior works addressing unreliable CITs into two main perspectives: namely, \textit{internal} approaches focusing solely on the test itself and \textit{external} approaches taking into account relationships with other tests. 

\subsection{Internal Approaches}
The majority of methods tackling reliability issues of CIT 
involves seeking solutions within the boundary of a single statistical test, such as enhancing the innate performance of the test or omitting the test itself. One prominent example is to employ heuristics for reliability criteria \citep{spirtes:etal00, tsamardinos:06a}, which decides whether to perform or omit the test solely based on the amount of data needed. Although this renders simple measures to tackle the unreliable CITs, this lacks theoretical soundness, and to our knowledge, it is only applicable to discrete data.

Another example involves opting for permutation-based tests over classical asymptotic ones (e.g., $\chi^2$ test, $G$-test) \citep{tsamardinos2010permutation}. As permutation-based tests exhibit better calibration under data-scarce scenarios, various works extend permutation-based CIT to tackle various regimes (e.g., continuous settings), with the prospect of broadly applying it to structure learning \citep{doran2014permutation, lee2017self, berrett2020conditional, zhang2022residual, kim2023conditional}.

Recent research suggested causal structure learning within the confines of low-order CITs \citep{wienobst2020recovering,kocaoglu2024characterization}. By excluding unreliable, high-order CITs, they attempt to yield more robust causal structures. However, such approaches may lack the full specification of a graph structure. 

\subsection{External Approaches} 
Another line of work goes beyond individual tests and considers relationships with other tests, addressing statistical errors of CITs with conflict resolution among inconsistent CI statements. By translating CI statements from data into logical constraints, researchers aim to search for a causal structure consistent with all the encoded constraints using various solvers (e.g., SAT, ASP solvers) \citep{hyttinen2013discovering, hyttinen2014constraint}. 
Another prominent example is utilizing rules from graphoid axioms \citep{bromberg09a, ma2023enabling}. This involves adopting either a preference-based argumentative framework \citep{bromberg09a} or off-the-shelf SMT solver \citep{ma2023enabling}, designed to facilitate reasoning with CI statements.  
However, they often involve complicated routines and high computational costs, limiting their practical applicability.


\subsection{Discussion}
In contrast to prior approaches, our work combines internal and external perspectives, considering the relationships with other CITs but strictly limited to lower-order ones that are deemed reliable.
This strategy allows \textsc{deduce-dep} to selectively apply rules derived from graphoid axioms, providing a more efficient and principled means of constraining higher-order CIT results and avoiding the complicated computation associated with all possible combinations of rules from graphoid axioms.

Additionally, \textsc{deduce-dep} actively executes new CITs, unlike other methods utilizing graphoid axioms. 
This allows us to effectively correct unreliable CITs. For instance, consider the scenario where a structure learning algorithm attempts to learn a causal graph in \Cref{fig:explain-deduce-rule}. Initially, the algorithm performs unconditional independence tests, yielding $( Y \Perp Z'' )$, which we assume to be correct. Subsequently, it no longer examines CI between $Y$ and $Z''$ given any conditioning set, resulting in $(Y \Perp Z'' \mid Z' )$, not available. In such a case, inferring higher-order CI statements solely based on previously performed lower-order CIT results might be limited, potentially failing to correct $( X \Perp Y \mid Z', Z'')$. Without performing additional tests, like $( Y \Perp Z'' \mid Z' )$, the correction of unreliable CITs can be restricted. This suggests that correcting false negatives might require more than just resolving conflicts among existing CIT results. 

 
\section{Conclusion}\label{sec:conclusion}
We presented \textsc{deduce-dep}, a simple, principled, and practical correction method for addressing unreliable CITs using deductive reasoning. By leveraging rules derived from graphoid axioms, we explored the conditions for deducing high-order CI statements from low-order CI statements and integrated these rules into our algorithm. \textsc{deduce-dep} systematically replaces unreliable independence statements with deductively reasoned dependence statements from lower-order CITs. We showed how our method can be seamlessly integrated into causal discovery algorithms like HITON-PC or PC and provided empirical evidence of its efficacy. Despite its distinct results, there still remains a need to address dependence statements to prevent errors from false positives. Therefore, future research directions may involve combining our method with existing false positive control methods \citep{li2009controlling, strobl2019estimating} for a more robust causal discovery. Another promising future direction is extending our framework to local independence relationships, e.g., context-specific independence \citep{boutilier1996context,pensar2016role,hwang2023discovery}. 

\begin{acknowledgements} 
We would like to thank anonymous reviewers for their constructive comments.
This work was partly supported by the IITP (2022-0-00953-PICA/30\%) and NRF (RS-2023-00222663/30\%, RS-2023-00211904/40\%) grant funded by the Korean government.
\end{acknowledgements}


\bibliography{uai_references}

\newpage
\onecolumn
\title{Causal Discovery with Deductive Reasoning: One Less Problem \\ (Supplementary Material)}
\maketitle

\appendix
\section{Rules Derived from Graphoid Axioms}\label{appendix:graphoid}
We present rules derived from graphoid axioms \citep{pearl:paz87,geiger:90,bromberg09a}.
$$
\begin{aligned}
\text { (Symmetry) } & (\*X \Perp \*Y \mid \*Z) \Longleftrightarrow(\*Y \Perp \*X \mid \*Z) \\
\text { (Decomposition) } & (\*X \Perp \*Y, \*W \mid \*Z) \Longrightarrow(\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*W \mid \*Z) \\
\text { (Weak Union) } & (\*X \Perp \*Y, \*W \mid \*Z) \Longrightarrow(\*X \Perp \*Y \mid \*Z, \*W) \\
\text { (Contraction) } & (\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*W \mid \*Z, \*Y) \Longrightarrow(\*X \Perp \*Y, \*W \mid \*Z) \\
\text { (Intersection) } & (\*X \Perp \*Y \mid \*Z, \*W) \wedge(\*X \Perp \*W \mid \*Z, \*Y) \Longrightarrow(\*X \Perp \*Y, \*W \mid \*Z)
\end{aligned}
$$
Under a (causal) faithfulness assumption, we have a more relaxed set of rules as follows.
$$
\begin{aligned}
\text { (Symmetry) } & (\*X \Perp \*Y \mid \*Z) \Longleftrightarrow(\*Y \Perp \*X \mid \*Z) \\
\text { (Composition) } & (\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*W \mid \*Z)\Longrightarrow (\*X \Perp \*Y, \*W \mid \*Z) \\
\text { (Decomposition) } & (\*X \Perp \*Y, \*W \mid \*Z)\Longrightarrow(\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*W \mid \*Z) \\
\text { (Intersection) } & (\*X \Perp \*Y \mid \*Z, \*W) \wedge(\*X \Perp \*W \mid \*Z, \*Y) \Longrightarrow(\*X \Perp \*Y, \*W \mid \*Z) \\
\text { (Weak Union) } & (\*X \Perp \*Y, \*W \mid \*Z) \Longrightarrow (\*X \Perp \*Y \mid \*Z, \*W) \\
\text { (Contraction) } & (\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*W \mid \*Z, \*Y) \Longrightarrow(\*X \Perp \*Y, \*W \mid \*Z) \\
\text { (Weak Transitivity) } & (\*X \Perp \*Y \mid \*Z) \wedge(\*X \Perp \*Y \mid \*Z, W) \Longrightarrow(\*X \Perp W \mid \*Z) \vee(W \Perp \*Y \mid \*Z) \\
\text { (Chordality) } & (X  \Perp Y \mid W, Z) \wedge(W \Perp Z \mid X , Y) \Longrightarrow(X  \Perp Y \mid W) \vee(X  \Perp Y \mid Z)
\end{aligned}
$$
Each non-bold letter in weak transitivity and chordality is a variable (i.e., a singleton).

\section{Omitted Proofs}\label{appendix:proofs}

\propdeducerule*
\begin{proof}
We prove each item below.

(1) We begin with the definition of the contraction rule:
\begin{align*}
    (\*X \Perp \*Z'' \mid \*Z')\wedge (\*X &\Perp \*Y \mid \*Z',\*Z'') \\ 
    &\implies  (\*X \Perp \*Y,\*Z'' \mid \*Z').
\intertext{By the decomposition rule, we have:}
    &\implies  (\*X \Perp \*Z'' \mid \*Z') \wedge (\*X \Perp \*Y \mid \*Z').
\end{align*}

Now, by taking contraposition, we acquire
\begin{align*}
(\*X \not\Perp \*Z'' \mid \*Z') \vee (\*X &\not\Perp \*Y \mid \*Z') \implies  \\&(\*X \not\Perp \*Z'' \mid \*Z')\vee (\*X \not\Perp \*Y \mid \*Z',\*Z'').
\end{align*}
Therefore, if $(\*X \not\Perp \*Y \mid \*Z')$ and $(\*X \Perp \*Z'' \mid \*Z')$, then $(\*X \not\Perp \*Y \mid \*Z'\cup\*Z'')$.


(2)
By weak transitivity rule, 
\begin{align*}
    (\*X \Perp \*Y \mid \*Z')\wedge (\*X \Perp &\*Y \mid \*Z',\*Z'') \implies \\ 
    &(\*X \Perp \*Z'' \mid \*Z') \vee (\*Y \Perp \*Z'' \mid \*Z').
\end{align*}
By contraposition, 
$(\*X \not\Perp \*Z'' \mid \*Z') \wedge (\*Y \not\Perp \*Z'' \mid \*Z') \implies  (\*X \not\Perp \*Y \mid \*Z')\vee (\*X \not\Perp \*Y \mid \*Z',\*Z'')$. 
Therefore, if \[(\*X\Perp \*Y \mid \*Z')\wedge (\*X \not\Perp \*Z''\mid \*Z')\wedge (\*Y \not\Perp \*Z'' \mid \*Z'),\] 
then $(\*X \not\Perp \*Y \mid \*Z',\*Z'')$.
\end{proof}


\section{Appendix for Experiments}\label{appendix:experiments}

\subsection{Experimental Details}\label{appendix:experiments-details}

\subsubsection{Datasets}

Sachs dataset \citep{sachs2005causal} pertains to protein expression in human immune system cells. It encompasses simultaneous measurements of 11 phosphorylated proteins and phospholipids obtained from thousands of individual primary immune system cells. The dataset comprises 11 vertices representing the different proteins and phospholipids, with a total of 17 edges indicating the relationships between them. On average, each node has a degree of 3.09, with the maximum in-degree being 3.

ALARM dataset \citep{beinlich1989alarm}, short for "A Logical Alarm Reduction Mechanism," represents a Bayesian network tailored to serve as an alarm message system for patient monitoring. It consists of 37 variables, each representing various factors of patient health and monitoring parameters. These nodes are interconnected by 46 edges, reflecting the relationships between different variables in the network. On average, each node has a degree of 2.49, with the maximum in-degree being 4.

Insurance dataset \citep{binder1997adaptive} is designed for evaluating car insurance risks. It comprises 27 variables representing different features related to insurance risk assessment. These variables are associated with each other by 52 edges.  On average, each node in the Insurance network has a degree of 3.85, with the maximum in-degree being 3.

\subsubsection{Implementation details}
We utilized several semi-synthetic BN datasets from the repository of R package BNlearn \citep{JSSv035i03}.
All experiments were processed using Intel(R) Xeon(R) Gold 6342 CPU @ 2.80GHz.
Our code is available at \url{https://github.com/snu-causality-lab/deduce-dep}.


\begin{table*}[th]
\centering
\caption{$|\*V|=10$ with Standard Deviation.}\label{table:correction-exp-v10}
\begin{adjustbox}{width=.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=12$}} & \multicolumn{3}{c}{\makecell{$e=15$}} & \multicolumn{3}{c}{\makecell{$e=20$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.46\stdv{0.19}& 0.92\stdv{0.14}& 0.32\stdv{0.16}	& 0.44\stdv{0.17}& 0.96\stdv{0.09}& 0.30\stdv{0.14}	& 0.51\stdv{0.14}& 0.99\stdv{0.03}& 0.35\stdv{0.12}	\\
& CIT + \textsc{dd}	& 0.65\stdv{0.17}& 0.71\stdv{0.16}& 0.63\stdv{0.21}
& 0.67\stdv{0.12}& 0.82\stdv{0.13}& 0.59\stdv{0.15}
& 0.79\stdv{0.10}& 0.95\stdv{0.06}& 0.70\stdv{0.14}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.57\stdv{0.17}& 0.92\stdv{0.13}& 0.44\stdv{0.17}	& 0.62\stdv{0.14}& 0.96\stdv{0.07}& 0.47\stdv{0.14}	& 0.71\stdv{0.10}& 0.99\stdv{0.04}& 0.56\stdv{0.12}	\\
& CIT + \textsc{dd}& 0.68\stdv{0.13}& 0.69\stdv{0.14}& 0.71\stdv{0.18}
& 0.76\stdv{0.11}& 0.8\stdv{0.13}& 0.73\stdv{0.15}
& 0.87\stdv{0.08}& 0.93\stdv{0.06}& 0.82\stdv{0.13}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.68\stdv{0.14}& 0.90\stdv{0.11}& 0.56\stdv{0.17}	& 0.70\stdv{0.12}& 0.96\stdv{0.08}& 0.57\stdv{0.14}	& 0.79\stdv{0.09}& 0.99\stdv{0.02}& 0.67\stdv{0.13}	\\
& CIT + \textsc{dd}	& 0.71\stdv{0.11}& 0.66\stdv{0.13}& 0.78\stdv{0.13}
& 0.81\stdv{0.09}& 0.82\stdv{0.12}& 0.82\stdv{0.12}
& 0.91\stdv{0.06}& 0.94\stdv{0.06}& 0.88\stdv{0.10}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}




\begin{table*}[th]
\centering
\caption{$|\*V|=10$ with 95\% Confidence Interval.}\label{table:correction-exp-v10-conf}
\begin{adjustbox}{width=.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=12$}} & \multicolumn{3}{c}{\makecell{$e=15$}} & \multicolumn{3}{c}{\makecell{$e=20$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.46\stdv{0.05}& 0.92\stdv{0.04}& 0.32\stdv{0.05}
& 0.44\stdv{0.05}& 0.96\stdv{0.02}& 0.30\stdv{0.04}
& 0.51\stdv{0.04}& 0.99\stdv{0.01}& 0.35\stdv{0.03}	\\
& CIT + \textsc{dd}	& 0.65\stdv{0.05}& 0.71\stdv{0.04}& 0.63\stdv{0.06}
& 0.67\stdv{0.03}& 0.82\stdv{0.04}& 0.59\stdv{0.04}
& 0.79\stdv{0.03}& 0.95\stdv{0.02}& 0.70\stdv{0.04}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.57\stdv{0.05}& 0.92\stdv{0.04}& 0.44\stdv{0.05}
& 0.62\stdv{0.04}& 0.96\stdv{0.02}& 0.47\stdv{0.04}
& 0.71\stdv{0.03}& 0.99\stdv{0.01}& 0.56\stdv{0.03}	\\
& CIT + \textsc{dd}& 0.68\stdv{0.03}& 0.69\stdv{0.04}& 0.71\stdv{0.05}
& 0.76\stdv{0.03}& 0.80\stdv{0.03}& 0.73\stdv{0.04}
& 0.87\stdv{0.02}& 0.93\stdv{0.02}& 0.82\stdv{0.03}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.68\stdv{0.04}& 0.90\stdv{0.03}& 0.56\stdv{0.05}
& 0.70\stdv{0.03}& 0.96\stdv{0.02}& 0.57\stdv{0.04}
& 0.79\stdv{0.03}& 0.99\stdv{0.01}& 0.67\stdv{0.04}	\\
& CIT + \textsc{dd}	& 0.71\stdv{0.03}& 0.66\stdv{0.04}& 0.78\stdv{0.04}
& 0.81\stdv{0.03}& 0.82\stdv{0.03}& 0.82\stdv{0.03}
& 0.91\stdv{0.02}& 0.94\stdv{0.02}& 0.88\stdv{0.03}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}


\begin{table*}[t!]
\centering
\caption{$|\*V|=20$ with Standard Deviation.}\label{table:correction-exp-v20}
\begin{adjustbox}{width=0.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=24$}} & \multicolumn{3}{c}{\makecell{$e=30$}} & \multicolumn{3}{c}{\makecell{$e=40$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.26\stdv{0.21}& 0.63\stdv{0.41}& 0.18\stdv{0.16}
& 0.28\stdv{0.16}& 0.79\stdv{0.33}& 0.18\stdv{0.11}
& 0.30\stdv{0.13}& 0.92\stdv{0.22}& 0.19\stdv{0.09}	\\
& CIT + \textsc{dd}	& 0.59\stdv{0.11}& 0.50\stdv{0.13}& 0.76\stdv{0.14}
& 0.70\stdv{0.10}& 0.69\stdv{0.13}& 0.73\stdv{0.12}
& 0.78\stdv{0.10}& 0.86\stdv{0.11}& 0.73\stdv{0.12}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.40\stdv{0.17}& 0.78\stdv{0.28}& 0.29\stdv{0.14}
& 0.39\stdv{0.17}& 0.84\stdv{0.27}& 0.26\stdv{0.13}
& 0.41\stdv{0.11}& 0.97\stdv{0.08}& 0.26\stdv{0.09}	\\
& CIT + \textsc{dd}	& 0.60\stdv{0.12}& 0.50\stdv{0.13}& 0.78\stdv{0.14}
& 0.71\stdv{0.13}& 0.66\stdv{0.16}& 0.80\stdv{0.14}
& 0.82\stdv{0.09}& 0.86\stdv{0.1}& 0.78\stdv{0.12}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.50\stdv{0.18}& 0.86\stdv{0.20}& 0.37\stdv{0.19}
& 0.47\stdv{0.16}& 0.89\stdv{0.2}& 0.33\stdv{0.13}
& 0.49\stdv{0.14}& 0.97\stdv{0.09}& 0.34\stdv{0.12}	\\
& CIT + \textsc{dd}	& 0.61\stdv{0.13}& 0.50\stdv{0.14}& 0.81\stdv{0.14}
& 0.74\stdv{0.09}& 0.70\stdv{0.12}& 0.82\stdv{0.12}
& 0.83\stdv{0.07}& 0.86\stdv{0.09}& 0.81\stdv{0.10}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}


\begin{table*}[th]
\centering
\caption{$|\*V|=30$ with Standard Deviation.}\label{table:correction-exp-v30}
\begin{adjustbox}{width=.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=36$}} & \multicolumn{3}{c}{\makecell{$e=45$}} & \multicolumn{3}{c}{\makecell{$e=60$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.24\stdv{0.21}& 0.54\stdv{0.43}& 0.17\stdv{0.16}
& 0.23\stdv{0.14}& 0.69\stdv{0.36}& 0.14\stdv{0.10}
& 0.23\stdv{0.13}& 0.83\stdv{0.32}& 0.14\stdv{0.09}	\\
& CIT + \textsc{dd}	& 0.51\stdv{0.14}& 0.38\stdv{0.13}& 0.84\stdv{0.15}
& 0.64\stdv{0.13}& 0.57\stdv{0.16}& 0.77\stdv{0.13}
& 0.8\stdv{0.09}& 0.80\stdv{0.11}& 0.81\stdv{0.11}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.32\stdv{0.24}& 0.60\stdv{0.38}& 0.24\stdv{0.21}
& 0.30\stdv{0.17}& 0.77\stdv{0.32}& 0.19\stdv{0.12}
& 0.32\stdv{0.13}& 0.90\stdv{0.21}& 0.20\stdv{0.10}	\\
& CIT + \textsc{dd}	& 0.51\stdv{0.14}& 0.38\stdv{0.13}& 0.85\stdv{0.14}
& 0.67\stdv{0.11}& 0.58\stdv{0.15}& 0.83\stdv{0.10}
& 0.80\stdv{0.09}& 0.78\stdv{0.12}& 0.83\stdv{0.09}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.38\stdv{0.21}& 0.69\stdv{0.34}& 0.27\stdv{0.18}
& 0.36\stdv{0.15}& 0.80\stdv{0.27}& 0.25\stdv{0.12}
& 0.39\stdv{0.13}& 0.92\stdv{0.20}& 0.25\stdv{0.10}	\\
& CIT + \textsc{dd}	& 0.54\stdv{0.15}& 0.41\stdv{0.15}& 0.87\stdv{0.14}
& 0.65\stdv{0.12}& 0.56\stdv{0.15}& 0.83\stdv{0.12}
& 0.81\stdv{0.08}& 0.79\stdv{0.12}& 0.86\stdv{0.09}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}



\begin{table*}[th]
\centering
\caption{$|\*V|=30$ with 95\% Confidence Interval.}\label{table:correction-exp-v30-conf}
\begin{adjustbox}{width=.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{$e=36$}} & \multicolumn{3}{c}{\makecell{$e=45$}} & \multicolumn{3}{c}{\makecell{$e=60$}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& CIT	& 0.24\stdv{0.06}& 0.54\stdv{0.12}& 0.17\stdv{0.04}
& 0.23\stdv{0.04}& 0.69\stdv{0.10}& 0.14\stdv{0.03}
& 0.23\stdv{0.04}& 0.83\stdv{0.09}& 0.14\stdv{0.02}	\\
& CIT + \textsc{dd}	& 0.51\stdv{0.04}& 0.38\stdv{0.03}& 0.84\stdv{0.04}
& 0.64\stdv{0.04}& 0.57\stdv{0.04}& 0.77\stdv{0.04}
& 0.80\stdv{0.02}& 0.80\stdv{0.03}& 0.81\stdv{0.03}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& CIT	& 0.32\stdv{0.07}& 0.60\stdv{0.10}& 0.24\stdv{0.06}
& 0.30\stdv{0.05}& 0.77\stdv{0.09}& 0.19\stdv{0.03}
& 0.32\stdv{0.04}& 0.90\stdv{0.06}& 0.20\stdv{0.03}	\\
& CIT + \textsc{dd}	& 0.51\stdv{0.04}& 0.38\stdv{0.04}& 0.85\stdv{0.04}
& 0.67\stdv{0.03}& 0.58\stdv{0.04}& 0.83\stdv{0.03}
& 0.80\stdv{0.02}& 0.78\stdv{0.03}& 0.83\stdv{0.02}\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& CIT	& 0.38\stdv{0.06}& 0.69\stdv{0.09}& 0.27\stdv{0.05}
& 0.36\stdv{0.04}& 0.80\stdv{0.07}& 0.25\stdv{0.03}
& 0.39\stdv{0.03}& 0.92\stdv{0.05}& 0.25\stdv{0.03}	\\
& CIT + \textsc{dd}	& 0.54\stdv{0.04}& 0.41\stdv{0.04}& 0.87\stdv{0.04}
& 0.65\stdv{0.03}& 0.56\stdv{0.04}& 0.83\stdv{0.03}
& 0.81\stdv{0.02}& 0.79\stdv{0.03}& 0.86\stdv{0.03}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}


\begin{figure*}
\hfill\includegraphics[height=.29\textheight]{sections/figures/fn_F1_CI95.pdf}\hfill \includegraphics[height=.29\textheight]{sections/figures/fn_F1_stdev.pdf}\hfill\null
\caption{Results of Correction Experiment (F1) (Left, 95\% Confidence Interval, Right, Standard Deviation).}\label{fig:perf-all-F}
\end{figure*}

\begin{figure*}
\hfill\includegraphics[height=.29\textheight]{sections/figures/fn_Precision_CI95.pdf}\hfill \includegraphics[height=.29\textheight]{sections/figures/fn_Precision_stdev.pdf}\hfill\null
\caption{Results of Correction Experiment (Precision) (Left, 95\% Confidence Interval, Right, Standard Deviation).}\label{fig:perf-all-P}
\end{figure*}

\begin{figure*}
\hfill\includegraphics[height=.29\textheight]{sections/figures/fn_Recall_CI95.pdf}\hfill \includegraphics[height=.29\textheight]{sections/figures/fn_Recall_stdev.pdf}\hfill\null
\caption{Results of Correction Experiment (Recall) (Left, 95\% Confidence Interval, Right, Standard Deviation).}\label{fig:perf-all-R}
\end{figure*}








\begin{table*}[t!]
\centering
\caption{Results of Performance Experiment (PC).}\label{table:performance-exp-PC}
\begin{adjustbox}{width=0.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{Alarm}} & \multicolumn{3}{c}{\makecell{Sachs}} & \multicolumn{3}{c}{\makecell{Insurance}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& PC	& 0.41\stdv{0.04}	& 0.87\stdv{0.05}	& 0.27\stdv{0.03}	& 0.40\stdv{0.11}	& 0.92\stdv{0.12}	& 0.26\stdv{0.09}	& 0.37\stdv{0.03}	& 0.85\stdv{0.05}	& 0.24\stdv{0.02}	\\
& PC + \textsc{dd}	& 0.49\stdv{0.08}& 0.91\stdv{0.07}& 0.34\stdv{0.07}& 0.48\stdv{0.08}& 0.86\stdv{0.10}& 0.34\stdv{0.08}& 0.41\stdv{0.03}& 0.91\stdv{0.04}& 0.26\stdv{0.03}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& PC	& 0.50\stdv{0.07}	& 0.90\stdv{0.03}	& 0.35\stdv{0.06}	& 0.54\stdv{0.06}	& 1.00\stdv{0.00}	& 0.37\stdv{0.06}	& 0.46\stdv{0.04}	& 0.95\stdv{0.05}	& 0.30\stdv{0.03}	\\
& PC + \textsc{dd}	& 0.56\stdv{0.05}& 0.91\stdv{0.02}& 0.40\stdv{0.05}& 0.74\stdv{0.08}& 1.00\stdv{0.00}& 0.60\stdv{0.10}& 0.53\stdv{0.04}& 0.96\stdv{0.05}& 0.37\stdv{0.03}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& PC	& 0.68\stdv{0.04}	& 0.97\stdv{0.05}	& 0.52\stdv{0.04}	& 0.72\stdv{0.07}	& 1.00\stdv{0.00}	& 0.57\stdv{0.09}	& 0.53\stdv{0.03}	& 1.00\stdv{0.00}	& 0.36\stdv{0.02}	\\
& PC + \textsc{dd}	& 0.71\stdv{0.05}& 0.96\stdv{0.05}& 0.57\stdv{0.05}& 0.78\stdv{0.06}& 1.00\stdv{0.00}& 0.64\stdv{0.08}& 0.59\stdv{0.03}& 1.00\stdv{0.00}& 0.41\stdv{0.03}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{2000}}}
& PC	& 0.78\stdv{0.03}	& 1.00\stdv{0.00}	& 0.64\stdv{0.04}	& 0.65\stdv{0.10}	& 1.00\stdv{0.00}	& 0.49\stdv{0.10}	& 0.55\stdv{0.04}	& 1.00\stdv{0.00}	& 0.38\stdv{0.03}	\\
& PC + \textsc{dd}	& 0.81\stdv{0.03}& 1.00\stdv{0.01}& 0.68\stdv{0.04}& 0.88\stdv{0.05}& 1.00\stdv{0.00}& 0.78\stdv{0.08}& 0.63\stdv{0.04}& 1.00\stdv{0.00}& 0.47\stdv{0.04}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}


\begin{table*}[t!]
\centering
\caption{Results of Performance Experiment (HITON-PC).}\label{table:performance-exp-HITON-PC}
\begin{adjustbox}{width=0.95\linewidth}
\begin{tabular}{@{}clccccccccc@{}}
\toprule
& Method & \multicolumn{3}{c}{\makecell{Alarm}} & \multicolumn{3}{c}{\makecell{Sachs}} & \multicolumn{3}{c}{\makecell{Insurance}} \\
\cmidrule(r){3-5}
\cmidrule(l){6-8}
\cmidrule(l){9-11}
$N$ & & F1 & Precision & Recall & F1 & Precision & Recall & F1 & Precision & Recall \\ 
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{200}}}
& HITON	& 0.52\stdv{0.01}& 0.75\stdv{0.02}& 0.44\stdv{0.01} & 0.64\stdv{0.02}& 0.91\stdv{0.04}& 0.54\stdv{0.02}	& 0.38\stdv{0.01}& 0.75\stdv{0.02}& 0.29\stdv{0.01}	\\
& HITON + \textsc{dd}	& 0.55\stdv{0.01}& 0.77\stdv{0.01}& 0.48\stdv{0.01}& 0.69\stdv{0.02}& 0.88\stdv{0.01}& 0.61\stdv{0.02}& 0.42\stdv{0.01}& 0.77\stdv{0.02}& 0.33\stdv{0.01}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{500}}}
& HITON	& 0.58\stdv{0.01}& 0.83\stdv{0.02}& 0.50\stdv{0.01}	& 0.75\stdv{0.00}& 0.92\stdv{0.01}& 0.66\stdv{0.00}	& 0.46\stdv{0.01}& 0.85\stdv{0.01}& 0.35\stdv{0.01}	\\
& HITON + \textsc{dd}	& 0.66\stdv{0.01}& 0.88\stdv{0.01}& 0.57\stdv{0.02}& 0.78\stdv{0.02}& 0.93\stdv{0.02}& 0.70\stdv{0.02}& 0.49\stdv{0.01}& 0.85\stdv{0.02}& 0.38\stdv{0.01}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{1000}}}
& HITON	& 0.71\stdv{0.01}& 0.90\stdv{0.01}& 0.64\stdv{0.01}	& 0.78\stdv{0.02}& 0.99\stdv{0.00}& 0.67\stdv{0.02}	& 0.48\stdv{0.01}& 0.87\stdv{0.01}& 0.36\stdv{0.01}	\\
& HITON + \textsc{dd}	& 0.72\stdv{0.01}& 0.88\stdv{0.01}& 0.65\stdv{0.01}& 0.83\stdv{0.02}& 0.99\stdv{0.00}& 0.74\stdv{0.03}& 0.52\stdv{0.01}& 0.88\stdv{0.01}& 0.40\stdv{0.01}	\\
\midrule
\multirow{2}{*}{\rotatebox{0}{\makecell{2000}}}
& HITON	& 0.77\stdv{0.00}& 0.95\stdv{0.01}& 0.69\stdv{0.00}	& 0.86\stdv{0.00}& 1.00\stdv{0.00}& 0.77\stdv{0.00}	& 0.51\stdv{0.01}& 0.90\stdv{0.01}& 0.38\stdv{0.01}	\\
& HITON + \textsc{dd}	& 0.78\stdv{0.01}& 0.94\stdv{0.01}& 0.72\stdv{0.01}& 0.89\stdv{0.01}& 1.00\stdv{0.00}& 0.83\stdv{0.01}& 0.54\stdv{0.01}& 0.90\stdv{0.01}& 0.42\stdv{0.01}	\\
\bottomrule
\end{tabular}
\end{adjustbox}
\end{table*}



\subsection{Additional Experimental Results}\label{appendix:experiments-additional}

We provide all experimental results with varying numbers of nodes ($|\rmV|=10, 20, 30$) in \Cref{fig:perf-all-F,fig:perf-all-P,fig:perf-all-R} and \Cref{table:correction-exp-v10,table:correction-exp-v10-conf,table:correction-exp-v20,table:correction-exp-v30,table:correction-exp-v30-conf,table:performance-exp-PC,table:performance-exp-HITON-PC}.


\clearpage\newpage

\subsection{Results of Correction Experiment on Continuous Data}
\begin{figure*}
\centering
\includegraphics[width=\linewidth]{sections/figures/new_fn_all_metrics_1.pdf}
\caption{Results of correction experiments on continuous data with linear/non-linear relationships and different graph topologies, Erdös-Renyi (ER) and Scale-Free (SF). Error bars represent 95\% confidence interval.}
\label{fig:continuous-K1}
\end{figure*}


Although our experiments primarily focused on discrete data, it's worth noting that our method is versatile and applicable to various data types. To demonstrate this, we conducted additional evaluations using continuous data exhibiting both linear and non-linear relationships. We present the experimental results in \Cref{fig:continuous-K1}. We constructed DAGs with 10 variables and 24 edges, adopting both Erdös-Renyi (ER) and Scale-Free (SF) models for DAG topologies. In parameterizing the Bayesian networks, we explored both linear and non-linear models. Specifically, we utilized multi-layer perceptrons to conduct non-linear parameterizations. For performing CIT, we employed partial correlation for linear settings and kernel-based CIT (KCI) \citep{zhang2012kernel} for non-linear settings. We employed a Gaussian kernel for KCI and set kernel width using the median heuristic.\footnote{We adopted KCI implementation from causal-learn Python package \citep{zheng2024causal}.}

Similar to our evaluations on discrete data, \Cref{fig:continuous-K1} illustrates that \textsc{deduce-dep} has the ability to effectively correct underpowered tests in a continuous data setting as well. This is evidenced by the increased recall and competitive performance in terms of precision. Notably, this improvement was consistently observed across different DAG topologies and Bayesian network parameterizations. This suggests that our method holds promise for a broader range of applications beyond discrete data analysis.




\clearpage\newpage
\section{Recursive Relationships Induced by \textsc{deduce-dep}}
\begin{figure*}[h!]
\tiny\centering
\begin{adjustbox}{height=.75\textheight}\rotatebox{90}{
\begin{tikzpicture}[x=30cm,y=6cm]
\node (BC-A) at (-0.19444444444444442,1) {$(B{\Perp} C|A)$};
\node (XA-BC) at (0.33333333333333337,2) {$(X{\Perp} A|BC)$};
\node (XC-) at (-0.15000000000000002,0) {$(X{\Perp} C)$};
\node (XA-B) at (-0.08333333333333331,1) {$(X{\Perp} A|B)$};
\node (AB-) at (0.25,0) {$(A{\Perp} B)$};
\node (XY-AC) at (-0.1111111111111111,2) {$(X{\Perp} Y|AC)$};
\node (AC-B) at (0.13888888888888884,1) {$(A{\Perp} C|B)$};
\node (YB-A) at (-0.3055555555555556,1) {$(Y{\Perp} B|A)$};
\node (YC-) at (0.15000000000000002,0) {$(Y{\Perp} C)$};
\node (XY-) at (-0.45,0) {$(X{\Perp} Y)$};
\node (AB-C) at (0.4722222222222222,1) {$(A{\Perp} B|C)$};
\node (YA-BC) at (0.4444444444444444,2) {$(Y{\Perp} A|BC)$};
\node (YA-B) at (0.02777777777777779,1) {$(Y{\Perp} A|B)$};
\node (XC-A) at (-0.3611111111111111,1) {$(X{\Perp} C|A)$};
\node (XY-ABC) at (0.0,3) {$(X{\Perp} Y|ABC)$};
\node (XY-C) at (0.19444444444444442,1) {$(X{\Perp} Y|C)$};
\node (XC-B) at (-0.02777777777777779,1) {$(X{\Perp} C|B)$};
\node (XB-AC) at (0.0,2) {$(X{\Perp} B|AC)$};
\node (XC-AB) at (-0.33333333333333337,2) {$(X{\Perp} C|AB)$};
\node (XB-) at (-0.25,0) {$(X{\Perp} B)$};
\node (YC-A) at (-0.25,1) {$(Y{\Perp} C|A)$};
\node (XA-) at (-0.35,0) {$(X{\Perp} A)$};
\node (XY-A) at (-0.4722222222222222,1) {$(X{\Perp} Y|A)$};
\node (BC-) at (0.44999999999999996,0) {$(B{\Perp} C)$};
\node (AC-) at (0.35,0) {$(A{\Perp} C)$};
\node (YC-B) at (0.08333333333333337,1) {$(Y{\Perp} C|B)$};
\node (XY-BC) at (0.2222222222222222,2) {$(X{\Perp} Y|BC)$};
\node (YB-AC) at (0.11111111111111116,2) {$(Y{\Perp} B|AC)$};
\node (YC-AB) at (-0.2222222222222222,2) {$(Y{\Perp} C|AB)$};
\node (XY-B) at (-0.1388888888888889,1) {$(X{\Perp} Y|B)$};
\node (XY-AB) at (-0.4444444444444444,2) {$(X{\Perp} Y|AB)$};
\node (XB-C) at (0.3055555555555556,1) {$(X{\Perp} B|C)$};
\node (XA-C) at (0.25,1) {$(X{\Perp} A|C)$};
\node (YB-) at (0.050000000000000044,0) {$(Y{\Perp} B)$};
\node (YA-) at (-0.04999999999999999,0) {$(Y{\Perp} A)$};
\node (XB-A) at (-0.4166666666666667,1) {$(X{\Perp} B|A)$};
\node (YB-C) at (0.41666666666666663,1) {$(Y{\Perp} B|C)$};
\node (YA-C) at (0.36111111111111116,1) {$(Y{\Perp} A|C)$};
\draw[-,betterred](XY-ABC) -- (XY-BC);
\draw[-,betterred](XY-ABC) -- (XA-BC);
\draw[-,betterred](XY-ABC) -- (YA-BC);
\draw[-,betterblue](XY-ABC) -- (XY-AC);
\draw[-,betterblue](XY-ABC) -- (XB-AC);
\draw[-,betterblue](XY-ABC) -- (YB-AC);
\draw[-,bettergreen](XY-ABC) -- (XY-AB);
\draw[-,bettergreen](XY-ABC) -- (XC-AB);
\draw[-,bettergreen](XY-ABC) -- (YC-AB);
\draw[-,betterred](XY-BC) -- (XY-C);
\draw[-,betterred](XY-BC) -- (XB-C);
\draw[-,betterred](XY-BC) -- (YB-C);
\draw[-,betterblue](XY-BC) -- (XY-B);
\draw[-,betterblue](XY-BC) -- (XC-B);
\draw[-,betterblue](XY-BC) -- (YC-B);
\draw[-,betterred](XA-BC) -- (XA-C);
\draw[-,betterred](XA-BC) -- (XB-C);
\draw[-,betterred](XA-BC) -- (AB-C);
\draw[-,betterblue](XA-BC) -- (XA-B);
\draw[-,betterblue](XA-BC) -- (XC-B);
\draw[-,betterblue](XA-BC) -- (AC-B);
\draw[-,betterred](YA-BC) -- (YA-C);
\draw[-,betterred](YA-BC) -- (YB-C);
\draw[-,betterred](YA-BC) -- (AB-C);
\draw[-,betterblue](YA-BC) -- (YA-B);
\draw[-,betterblue](YA-BC) -- (YC-B);
\draw[-,betterblue](YA-BC) -- (AC-B);
\draw[-,betterred](XY-AC) -- (XY-C);
\draw[-,betterred](XY-AC) -- (XA-C);
\draw[-,betterred](XY-AC) -- (YA-C);
\draw[-,betterblue](XY-AC) -- (XY-A);
\draw[-,betterblue](XY-AC) -- (XC-A);
\draw[-,betterblue](XY-AC) -- (YC-A);
\draw[-,betterred](XB-AC) -- (XB-C);
\draw[-,betterred](XB-AC) -- (XA-C);
\draw[-,betterred](XB-AC) -- (AB-C);
\draw[-,betterblue](XB-AC) -- (XB-A);
\draw[-,betterblue](XB-AC) -- (XC-A);
\draw[-,betterblue](XB-AC) -- (BC-A);
\draw[-,betterred](YB-AC) -- (YB-C);
\draw[-,betterred](YB-AC) -- (YA-C);
\draw[-,betterred](YB-AC) -- (AB-C);
\draw[-,betterblue](YB-AC) -- (YB-A);
\draw[-,betterblue](YB-AC) -- (YC-A);
\draw[-,betterblue](YB-AC) -- (BC-A);
\draw[-,betterred](XY-AB) -- (XY-B);
\draw[-,betterred](XY-AB) -- (XA-B);
\draw[-,betterred](XY-AB) -- (YA-B);
\draw[-,betterblue](XY-AB) -- (XY-A);
\draw[-,betterblue](XY-AB) -- (XB-A);
\draw[-,betterblue](XY-AB) -- (YB-A);
\draw[-,betterred](XC-AB) -- (XC-B);
\draw[-,betterred](XC-AB) -- (XA-B);
\draw[-,betterred](XC-AB) -- (AC-B);
\draw[-,betterblue](XC-AB) -- (XC-A);
\draw[-,betterblue](XC-AB) -- (XB-A);
\draw[-,betterblue](XC-AB) -- (BC-A);
\draw[-,betterred](YC-AB) -- (YC-B);
\draw[-,betterred](YC-AB) -- (YA-B);
\draw[-,betterred](YC-AB) -- (AC-B);
\draw[-,betterblue](YC-AB) -- (YC-A);
\draw[-,betterblue](YC-AB) -- (YB-A);
\draw[-,betterblue](YC-AB) -- (BC-A);
\draw[-,betterred](XY-C) -- (XY-);
\draw[-,betterred](XY-C) -- (XC-);
\draw[-,betterred](XY-C) -- (YC-);
\draw[-,betterred](XB-C) -- (XB-);
\draw[-,betterred](XB-C) -- (XC-);
\draw[-,betterred](XB-C) -- (BC-);
\draw[-,betterred](YB-C) -- (YB-);
\draw[-,betterred](YB-C) -- (YC-);
\draw[-,betterred](YB-C) -- (BC-);
\draw[-,betterred](XY-B) -- (XY-);
\draw[-,betterred](XY-B) -- (XB-);
\draw[-,betterred](XY-B) -- (YB-);
\draw[-,betterred](XC-B) -- (XC-);
\draw[-,betterred](XC-B) -- (XB-);
\draw[-,betterred](XC-B) -- (BC-);
\draw[-,betterred](YC-B) -- (YC-);
\draw[-,betterred](YC-B) -- (YB-);
\draw[-,betterred](YC-B) -- (BC-);
\draw[-,betterred](XA-C) -- (XA-);
\draw[-,betterred](XA-C) -- (XC-);
\draw[-,betterred](XA-C) -- (AC-);
\draw[-,betterred](AB-C) -- (AB-);
\draw[-,betterred](AB-C) -- (AC-);
\draw[-,betterred](AB-C) -- (BC-);
\draw[-,betterred](XA-B) -- (XA-);
\draw[-,betterred](XA-B) -- (XB-);
\draw[-,betterred](XA-B) -- (AB-);
\draw[-,betterred](AC-B) -- (AC-);
\draw[-,betterred](AC-B) -- (AB-);
\draw[-,betterred](AC-B) -- (BC-);
\draw[-,betterred](YA-C) -- (YA-);
\draw[-,betterred](YA-C) -- (YC-);
\draw[-,betterred](YA-C) -- (AC-);
\draw[-,betterred](YA-B) -- (YA-);
\draw[-,betterred](YA-B) -- (YB-);
\draw[-,betterred](YA-B) -- (AB-);
\draw[-,betterred](XY-A) -- (XY-);
\draw[-,betterred](XY-A) -- (XA-);
\draw[-,betterred](XY-A) -- (YA-);
\draw[-,betterred](XC-A) -- (XC-);
\draw[-,betterred](XC-A) -- (XA-);
\draw[-,betterred](XC-A) -- (AC-);
\draw[-,betterred](YC-A) -- (YC-);
\draw[-,betterred](YC-A) -- (YA-);
\draw[-,betterred](YC-A) -- (AC-);
\draw[-,betterred](XB-A) -- (XB-);
\draw[-,betterred](XB-A) -- (XA-);
\draw[-,betterred](XB-A) -- (AB-);
\draw[-,betterred](BC-A) -- (BC-);
\draw[-,betterred](BC-A) -- (AB-);
\draw[-,betterred](BC-A) -- (AC-);
\draw[-,betterred](YB-A) -- (YB-);
\draw[-,betterred](YB-A) -- (YA-);
\draw[-,betterred](YB-A) -- (AB-);

\end{tikzpicture}}\end{adjustbox}
\caption{Recursive relationships induced by \textsc{deduce-dep}. Note, however, that not all recursion is necessary due to early stopping and/or conditional dependence. Given one higher-order test, the three one-less-order tests connected with the same colored edges form a single condition to deduce the dependence of the higher-order test.}
\label{fig:call-tree}
\end{figure*}

In theory, the worst-case complexity of \textsc{deduce-dep} is exponential with respect to the worst-case depth. The worst-case scenario can be understood through a hypothetical call-tree exemplified in \Cref{fig:call-tree}, where every CIT invokes recursive calls. However, we emphasize that this is generally not the case in practice. As shown in \Cref{fig:perf-all}, the additional computational cost is not prohibitive due to cached previous CIT results and early stopping (i.e., not meeting conditions for deduction).
As a side note, we can introduce a new hyperparameter $H$ which limits the maximum depth of recursion. The worst-case depth would then depend on $H$, involving the trade-off between efficiency (lower $H$) and accuracy (higher $H$).

 

\end{document}
