\documentclass[accepted,american,hidelinks]{uai2023}

\usepackage[american]{babel}

\emergencystretch=1.4em
%\overfullrule=2mm

\usepackage{balance}


\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    %\bibliographystyle{abbrvnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}

\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors


\usepackage{mathtools}

\usepackage{stfloats}

%\usepackage[colorlinks=true,linkcolor=black,citecolor=black,urlcolor=black]{hyperref}       % hyperlinks
\makeatletter
\def\Hy@Warning#1{} % squelch hyperref
\makeatother

\def\dense{\medmuskip=2.0mu plus 2.0mu minus 2.0mu
\thinmuskip=2.0mu
\thickmuskip=2.0mu plus 5.0mu}


\let\oldparagraph\paragraph
\def\paragraph#1{\oldparagraph{#1.}}

\usepackage{relsize}

\def\CC{C\nolinebreak[4]\hspace{-.05em}\raisebox{.4ex}{\relsize{-2}{\textbf{++}}}}



\usepackage{calc}
\usepackage{enumitem}
\usepackage{float}


\usepackage{amsmath}
\usepackage{amsthm}

\usepackage[capitalize,noabbrev]{cleveref}


\newtheorem{theorem}{Theorem}
%\newtheorem{definition}{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{observation}[theorem]{Observation}


\let\originalleft\left
\let\originalright\right
\renewcommand{\left}{\mathopen{}\mathclose\bgroup\originalleft}
\renewcommand{\right}{\aftergroup\egroup\originalright}



\usepackage{bbm}
\usepackage{caption}
\usepackage{pgfplots}
\usepackage{tikz}
\usetikzlibrary{trees, shapes}
\usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning}
\usetikzlibrary{shadows,shadings,shapes.symbols}
\tikzset{
    double color fill/.code 2 args={
        \pgfdeclareverticalshading[%
        tikz@axis@top,tikz@axis@middle,tikz@axis@bottom%
        ]{diagonalfill}{100bp}{%
            color(0bp)=(tikz@axis@bottom);
            color(50bp)=(tikz@axis@bottom);
            color(50bp)=(tikz@axis@middle);
            color(50bp)=(tikz@axis@top);
            color(100bp)=(tikz@axis@top)
        }
        \tikzset{shade, left color=#1, right color=#2, shading=diagonalfill}
    }
}
\usepgfplotslibrary{fillbetween}
\usepackage{graphicx}
\usetikzlibrary{decorations.pathreplacing, matrix}

\usepackage[boxed, linesnumbered, noend, noline]{algorithm2e}
\SetKwInput{KwData}{Input}
\SetKwInput{KwResult}{Output}


\newcommand{\wxstar}[1]{\vW^\star_{{#1}}}
\newcommand{\wx}[1]{\vW_{{#1}}}
\newcommand{\wxj}[2]{\vW_{{#1},{#2}}}
\newcommand{\wtx}[1]{\overline{\vW}_{{#1}}}
\newcommand{\wq}{\sum_{j=1}^{s-1} 2^{j/s-1} w_j}


\renewcommand\log{\ln}
\newcommand\disteq{\sim}
\newcommand\dist{\mathrm{dist}}

\newcommand\SPIV{{\tt SPIV}}
\newcommand\DD{{DD}}
\newcommand\SCOMP{{SCOMP}}
\newcommand\COMP{{COMP}}


\newcommand{\fB}{\mathfrak B}
\newcommand{\fT}{\mathfrak T}
\newcommand{\fZ}{\mathfrak Z}

\newcommand{\va}{\vec a}
\newcommand{\vA}{\vec A}
\newcommand{\vg}{\vec g}
\newcommand{\vh}{\vec h}
\newcommand{\vd}{\vec d}
\newcommand{\vD}{\vec D}
\newcommand{\vt}{\vec t}
\newcommand{\vw}{\vec w}
\newcommand{\vc}{\vec c}
\newcommand{\vs}{\vec s}
\newcommand{\vz}{\vec z}
\newcommand{\vX}{\vec X}
\newcommand{\vY}{\vec Y}
\newcommand{\vR}{\vec R}
\newcommand{\vH}{\vec H}
\newcommand{\vC}{\vec C}

\renewcommand{\epsilon}{\eps}


\newcommand\vI{\vec I}
\newcommand\vJ{\vec J}

\newcommand{\GG}{\mathbb G}
\newcommand{\limby}{\lim_{\beta \to \infty: \beta y =\Theta(1)}}

\newcommand\ALPHA{\vec\alpha}
\newcommand\MU{\vec\mu}
%\newcommand\vY{\vec Y}
\newcommand\vB{\vec B}
\newcommand\vr{\vec r}
\newcommand\vm{\vec m}
\newcommand\vn{\vec n}
\newcommand\NU{\vec\nu}
\newcommand\cMU{\check\MU}
\newcommand\cSIGMA{\check\SIGMA}

\newcommand\vU{\vec U}
\newcommand\hG{\hat\G}

\newcommand\GAMMA{{\vec\gamma}}
\newcommand\OMEGA{\vec\omega}
\newcommand\PSI{\vec\psi}
\newcommand\RHO{{\vec\rho}}
\newcommand\PHI{\vec\Phi}
\newcommand\VARPHI{\vec\varphi}

\newcommand\nix{\,\cdot\,}
\newcommand\vV{\vec V}
\newcommand\vW{\vec W}
\newcommand\vS{\vec S}
\newcommand\lam{\lambda}

\newcommand\reg{\G}
\newcommand\dd{{\mathrm d}}

\newcommand\GR{{\bf G}}
\newcommand\GRE{G_{\text{ER}}}
\newcommand\T{\vec T}
% \newcommand\vH{\vec H}

\renewcommand{\vec}[1]{\boldsymbol{#1}}

\newcommand\bem{\bf\em}
\newcommand\bemph[1]{{\bf\em #1}}

\newcommand\KL[2]{D_{\mathrm{KL}}\bc{{{#1}\|{#2}}}}

\newcommand\SIGMA{\vec\sigma}
\newcommand\CHI{\vec\chi}
\newcommand\TAU{\vec\tau}
\newcommand\cutm{\Delta_{\Box}}
\newcommand\Cutm{\cD_{\Box}}
\newcommand\CUTM{D_{\Box}}


\newcommand\fG{\mathfrak{G}}
\newcommand\fS{\mathfrak{S}}
\newcommand\fK{\mathfrak{K}}
\newcommand\fD{\mathfrak{D}}
\newcommand\fM{\mathfrak{M}}
\newcommand\fQ{\mathfrak{Q}}
\newcommand\fC{\mathfrak{C}}
\newcommand\fY{\mathfrak{Y}}
\newcommand\fA{\mathfrak{A}}
\newcommand\fF{\mathfrak{F}}
\newcommand\fP{\mathfrak{P}}
\newcommand\fp{\mathfrak{a}}
\newcommand\fr{\mathfrak{r}}

\newcommand\cA{\mathcal{A}}
\newcommand\cB{\mathcal{B}}
\newcommand\cC{\mathcal{C}}
\newcommand\cD{\mathcal{D}}
\newcommand\cF{\mathcal{F}}
\newcommand\cG{\mathcal{G}}
\newcommand\cE{\mathcal{E}}
\newcommand\cU{\mathcal{U}}
\newcommand\cN{\mathcal{N}}
\newcommand\cQ{\mathcal{Q}}
\newcommand\cH{\mathcal{H}}
\newcommand\cS{\mathcal{S}}
\newcommand\cT{\mathcal{T}}
\newcommand\cI{\mathcal{I}}
\newcommand\cK{\mathcal{K}}
\newcommand\cJ{\mathcal{J}}
\newcommand\cL{\mathcal{L}}
\newcommand\cM{\mathcal{M}}
\newcommand\cO{\mathcal{O}}
\newcommand\cP{\mathcal{P}}
\newcommand\cX{\mathcal{X}}
\newcommand\cY{\mathcal{Y}}
\newcommand\cV{\mathcal{V}}
\newcommand\cW{\mathcal{W}}
\newcommand\cZ{\mathcal{Z}}
\def\cR{{\mathcal R}}
\def\cE{{\mathcal E}}
\def\bC{{\bf C}}
\def\bT{{\bf T}}
\def\bM{{\bf M}}


\newcommand\fX{\mathfrak{X}}
\newcommand\ve{\vec e}
\newcommand\vu{\vec u}
%\newcommand\vv{\vec v}
\newcommand\vx{\vec x}
\newcommand\vZ{\vec Z}
\newcommand\vM{\vec M}
\newcommand\vy{\vec y}
\newcommand\THETA{\vec\theta}

\newcommand\atom{\delta}


\newcommand\eul{\mathrm{e}}
\newcommand\eps{\varepsilon}
\newcommand\del{\delta}
\newcommand\ZZ{\mathbb{Z}}
\newcommand\NN{\mathbb{N}}
\newcommand\ZZpos{\mathbb{Z}_{\geq0}}
\newcommand\Var{\mathrm{Var}}
\newcommand\Erw{\mathbb{E}}
\newcommand{\vecone}{\vec{1}}
\newcommand{\Vol}{\mathrm{Vol}}

\newcommand{\set}[1]{\left\{#1\right\}}
\newcommand{\Po}{{\rm Po}}
\newcommand{\Bin}{{\rm Bin}}
\newcommand{\Mult}{{\rm Mult}}
\newcommand{\Be}{{\rm Be}}

\newcommand\TV[1]{\left\|{#1}\right\|_{\mathrm{TV}}}
\newcommand\tv[1]{\|{#1}\|_{\mathrm{TV}}}
\newcommand\dTV{d_{\mathrm{TV}}}

\newcommand{\bink}[2] {{\binom{#1}{#2}}}

\newcommand\bc[1]{\left({#1}\right)}
\newcommand\cbc[1]{\left\{{#1}\right\}}
\newcommand\bcfr[2]{\bc{\frac{#1}{#2}}}
\newcommand{\bck}[1]{\left\langle{#1}\right\rangle}
\newcommand\brk[1]{\left\lbrack{#1}\right\rbrack}
\newcommand\scal[2]{\bck{{#1},{#2}}}
\newcommand\norm[1]{\left\|{#1}\right\|}
\newcommand\abs[1]{\left|{#1}\right|}
\newcommand\uppergauss[1]{\left\lceil{#1}\right\rceil}
\newcommand\lowergauss[1]{\left\lfloor{#1}\right\rfloor}
\newcommand\ug[1]{\left\lceil{#1}\right\rceil}
\newcommand\RR{\mathbb{R}}
\newcommand\RRpos{\RR_{\geq0}}
\newcommand{\Whp}{W.h.p.}
\newcommand{\whp}{w.h.p.}
\newcommand{\wupp}{w.u.p.p.}
\newcommand{\tensor}{\otimes}

\newcommand{\Karonski}{Karo\'nski}
\newcommand{\Erdos}{Erd\H{o}s}
\newcommand{\Renyi}{R\'enyi}
\newcommand{\Lovasz}{Lov\'asz}
\newcommand{\Juhasz}{Juh\'asz}
\newcommand{\Bollobas}{Bollob\'as}
\newcommand{\Furedi}{F\"uredi}
\newcommand{\Komlos}{Koml\'os}
\newcommand{\Luczak}{\L uczak}
\newcommand{\Mezard}{M\'ezard}
\newcommand{\Kucera}{Ku\v{c}era}
\newcommand{\Szemeredi}{Szemer\'edi}

\newcommand\pr{\mathbb{P}} 
\renewcommand\Pr{\pr} 
\newcommand{\eig}{\mathrm{Eig}}
\newcommand{\Pomast}{\cP^2_\ast (\Omega)}
\newcommand\ZZZ{\mathbb{Z}}
\newcommand{\planted}{\pi^{\mathrm{pl}}}
\newcommand{\dcond}{d_{\mathrm{cond}}}
\newcommand{\dsat}{d_{\mathrm{sat}}}

\newcommand{\cGg}{\cG_{\Gamma}}
\newcommand{\cGd}{\cG_{\Delta}}



\newcommand{\ecut}{e_{\text{cut}}}
\newcommand{\ecutm}{{e}^*_{\text{cut}}}
\newcommand{\euncut}{e_{\text{uncut}}}
\newcommand{\euncutm}{{e}^*_{\text{uncut}}}



\newcommand{\floor}[1]{\left\lfloor#1\right\rfloor}
\newcommand{\ceil}[1]{\left\lceil#1\right\rceil}
\newcommand{\rk}[1]{\rank(#1)}
\def\bin{{\bf Bin}}
\newcommand{\supp}[1]{{\text{supp}\left(#1\right)}}
\newcommand{\ind}[1]{\mathrm{ind}(#1)}
\def\geo{{\bf Geo}}
\def\con{{\bf Con}}


\newcommand\A{\vA}

\def\E{{\mathcal E}}
\def\G{{\vec G}}


\def\po{{\bf Po}}
\def\ex{{\mathbb E}}
\def\pr{{\mathbb P}}
\newcommand\expc[1]{{\left< #1\right>}}



\def\bfa{{\bf a}}
\def\bfb{{\bf b}}
\def\bfc{{\bf c}}
\def\bfd{{\vec d}}
\def\bfe{{\bf e}}
\def\bff{{\bf f}}
\def\bfg{{\bf g}}
\def\bfh{{\bf h}}
\def\bfi{{\bf i}}
\def\bfj{\vec j}
\def\bfk{\vk}
\def\bfl{{\bf l}}
\def\bfm{{\vec m}}
\def\bfn{{\vec n}}
\def\bfo{{\bf o}}
\def\bfp{{\bf p}}
\def\bfq{{\bf q}}
\def\bfr{{\bf r}}
\def\bfs{{\bf s}}
\def\bft{{\bf t}}
\def\bfu{{\bf u}}
\def\bfv{{\bf v}}
\def\bfw{{\bf w}}
\def\bfx{{\bf x}}
\def\bfy{{\bf y}}
\def\bfz{{\bf z}}

\def\bfA{{\bf A}}
\def\bfB{{\bf B}}
\def\bfC{{\bf C}}
\def\bfD{{\bf D}}
\def\bfE{{\bf E}}
\def\bfF{{\bf F}}
\def\bfG{{\bf G}}
\def\bfH{{\bf H}}
\def\bfI{{\bf I}}
\def\bfJ{{\bf J}}
\def\bfK{{\bf K}}
\def\bfL{{\bf L}}
\def\bfM{{\bf M}}
\def\bfN{{\bf N}}
\def\bfO{{\bf O}}
\def\bfP{{\bf P}}
\def\bfQ{{\bf Q}}
\def\bfR{{\bf R}}
\def\bfS{{\bf S}}
\def\bfT{{\bf T}}
\def\bfU{{\bf U}}
\def\bfV{{\bf V}}
\def\bfW{{\bf W}}
\def\bfX{{\bf X}}
\def\bfY{{\bf Y}}
\def\bfZ{{\bf Z}}

\def\bfgamma{{\pmb{\gamma}}}
\def\bfmu{{\pmb{\mu}}}
\def\bftheta{{\pmb{\theta}}}
\def\bfsigma{{\pmb{\sigma}}}
\def\bftau{{\pmb{\tau}}}

\usepackage{lipsum}

\def\bbF{{\mathbb F}}
\def\bbN{{\mathbb N}}
\def\bbC{{\mathbb C}}
\def\bbR{{\mathbb R}}
\def\bbZ{{\mathbb Z}}

\def\cH{{\mathcal H}}

\newcommand{\remove}[1]{}
\newcommand\eqn[1]{(\ref{#1})}


%\newcommand\coloneqq{:=}
\newcommand{\vecGamma}{\vec{\Gamma}}

\newcommand{\one}{V_1}
\newcommand{\zero}{V_0}
\newcommand{\zerominus}{V_{0-}}
\newcommand{\oneminusminus}{V_{1--}}
\newcommand{\zeroplus}{V_{0+}}
\newcommand{\oneplus}{V_{1+}}
\newcommand\w{{\omega}}


\newcommand{\muk}{\mu_{k}}
\newcommand{\sigmai}{\SIGMA_{i}}

\newcommand{\quantnsm}{\vec\Phi_i}
\newcommand{\quantns}{\vec\Psi_i}

\newcommand\mcount{m_{\mathrm{count}}}
\newcommand\mada{m_{\mathrm{ada}}}
\newcommand\madapt{m_{\mathrm{ad}}}
\newcommand\minf{m_{\mathrm{inf}}}
\newcommand{\minfm}[1]{\minf(#1)}
\newcommand\malg{m_{\mathtt{DD}}}
\newcommand\mDD{m_{\mathrm{DD}}}
\newcommand\mSC{m_{\mathrm{SPIV}}}

\newcommand\mseed{m_{\mathrm{seed}}}
\newcommand\mbulk{m_{\mathrm{bulk}}}

\newcommand{\gk}{G}
\newcommand{\gp}{\hat G}
\newcommand{\gd}{G'}

\newcommand{\aco}[1]{\textcolor{red}{#1}}
\newcommand{\pl}[1]{\textcolor{blue}{#1}}
\newcommand{\mhk}[1]{\textcolor{blue}{#1}}
\newcommand{\geb}[1]{\textcolor{green}{#1}}
\newcommand{\op}[1]{\textcolor{violet}{#1}}
\newcommand{\manpen}[1]{\textcolor{mvertexa}{#1}}
\newcommand{\nt}[1]{\textcolor{olive}{#1}}
\newcommand{\mr}[1]{\textcolor{cyan}{#1}}
\newcommand{\lk}[1]{\textcolor{magenta}{#1}}

\newcommand{\be}{\begin{equation}}
    \newcommand{\bel}[1]{\begin{equation}\lab{#1}\ }
        \newcommand{\ee}{\end{equation}}
    \newcommand{\bea}{\begin{eqnarray}}
        \newcommand{\eea}{\end{eqnarray}}
    \newcommand{\bean}{\begin{eqnarray*}}
        \newcommand{\eean}{\end{eqnarray*}}
    
    \newcommand{\vN}{\vec N}
    
    \newcommand{\mone}{\vm_1}
    \newcommand{\mzero}{\vm_0}

\graphicspath{{..}}
\pgfplotsset{compat=1.14}



\tikzstyle{node} = [shape=circle,draw=black]
\newcommand{\colLil}{violet}
\newcommand{\colOrange}{orange}
\newcommand{\colGreen}{green}
\newcommand{\colBlue}{blue}
\tikzstyle{node} = [shape=circle,draw=black]
\tikzstyle{dottetReplace} =[fill = violet!20!white]
\newcommand{\betweenDist}{0}




\newcommand{\pz}{\vec{\omega}}
\newcommand{\cc}{\vec{\omega}_c}
\newcommand{\activenodes}{\vec{X}^\star}
\newcommand{\candidates}{\mathcal{C}}
\newcommand{\distactive}[1]{t_{#1}^{\activenodes}}
\newcommand{\lca}{\cc}



\title{Inference of a Rumor's Source in the Independent Cascade Model}


% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Petra Berenbrink}
\author[2]{Max Hahn-Klimroth}
\author[3]{Dominik Kaaser}
\author[2]{Lena Krieg}
\author[1]{Malin Rau}

\affil[1]{%
    Universität Hamburg\\
    Hamburg, Germany
}
\affil[2]{%
    TU Dortmund University\\
    Dortmund, Germany
}
\affil[3]{%
    TU Hamburg,
    Hamburg, Germany
}


\iffalse
\usepackage{refcheck}

\makeatletter
\newcommand{\refcheckize}[1]{%
  \expandafter\let\csname @@\string#1\endcsname#1%
  \expandafter\DeclareRobustCommand\csname relax\string#1\endcsname[1]{%
    \csname @@\string#1\endcsname{##1}\@for\@temp:=##1\do{\wrtusdrf{\@temp}\wrtusdrf{{\@temp}}}}%
  \expandafter\let\expandafter#1\csname relax\string#1\endcsname
}
\makeatother

\refcheckize{\cref}
\refcheckize{\Cref}
\fi



\begin{document}
\maketitle
\allowdisplaybreaks

\begin{abstract}
We consider the so-called \emph{Independent Cascade Model} for rumor spreading or epidemic processes popularized by Kempe et al.\ (2003).
In this model, a node of a network is the source of a rumor -- it is \emph{informed}.
In discrete time steps, each informed node ``infects'' each of its uninformed neighbors with probability $p$.
While many facets of this process are studied in the literature, less is known about the inference problem: given a number of infected nodes in a network, can we learn the source of the rumor?
In the context of epidemiology this problem is often referred to as \emph{patient zero problem}.
It belongs to a broader class of problems where the goal is to infer parameters of the underlying spreading model.
In this work we present a maximum likelihood estimator for the rumor's source, given a snapshot of the process in terms of a set of active nodes $X$ after $t$ steps. Our results show that, for acyclic graphs,  the likelihood estimator undergoes a phase transition as a function of $t$. We provide a rigorous analysis for two prominent classes of acyclic network, namely $d$-regular trees and Galton-Watson trees, and verify empirically that our heuristics work well in various general networks. 
\end{abstract}

\section{Introduction}

In this paper, we consider a stochastic diffusion process which models the spread of information or influence in networks. 
Influence propagation is motivated by many applications from various fields: in marketing these processes are studied to maximize the adoption of a new product; these processes are used to study how social media influencers manipulate humans in social networks, in epidemiology they are used to study the spread of viruses or disease \citep{Brauer2017,Kermack1927}, or, in general as processes that spread information in networks \citep{Becker_Coro_DAngelo_Gilbert_2020, Kempe2003,Lerman_Ghosh_2010,rum2007,Sadilek_Kautz_Silenzio_2021}.  

On a very high level these processes work as follows. 
Initially, a small subset of the vertices are in a distinguished state (they might have  a piece of information, or they are infected, depending on the application in mind).
In this paper, we will call these vertices \emph{informed}. 
Informed vertices can inform their neighbors and the rumor spreads as time passes by through the network. 
Most publications studying stochastic diffusion processes study these processes in a \emph{forward direction}, i.e., they consider how information spreads in a network, how many nodes will become informed,  or which nodes have the largest influence on the vertices in a social network. 
Those processes are well understood in simple networks \citep{britton_janson_martin-loef_2007,sir_random_rigor}. 
In this paper we study the problem in a \emph{backward direction}. Our goal is to detect the \emph{source} of the rumor, thus we study the learning problem of inferring $\pz$, a problem which received far less attention. 
In the disease spreading settings this problem is referred to as  the \emph{patient zero problem}.
This inference problem was studied with respect to the SI model from epidemiology rigorously \citep{DBLP:conf/sigmetrics/ShahZ12}. 
%Under the simple SI disease spreading model once infected nodes stay infected and they can infect randomly chosen neighboring nodes. 
% Ggf unser ICM vorher definieren
Understanding inference problems of this kind better will help us to find the source of outbreaks of infectious diseases like COVID-19 or to find the source of rumors. 
The later might help to prevent that political elections are influenced from the outside world. 



\paragraph{Model Overview}
In this paper we employ the well-known \emph{Independent Cascade Model} (ICM) \citep{Kempe2003}.
The process starts with an initial set of active nodes $I_0$ and it works in discrete steps.  When node $v$ first becomes active in step $t$, it is given a single chance (one shot) to activate each (currently inactive) neighbor with probability $p$. 
Whether or not $v$ succeeds to activate any of its neighbors, it cannot make any further attempts in later rounds. It remains inactive for all steps $t'>t$. 
The process runs until no more activations are possible. 
We assume that $I_0$ (the \emph{rumor's source}) contains only a single node denoted by $\pz$. 
We call all nodes that were activated at one point of time during the process \emph{informed}. 
Note that this  \emph{one-shot} property is  a very fitting model for rumor or disease spreading in social networks. 
Indeed, once a user hears about an article supporting her opinion, she will either ignore it or share it within her social contacts in the near future. 
Every of the possible recipients either ignores her opinion (does not get activated) or decides to share it again with its peers (gets activated). 
Furthermore, users are unlikely to share the same article twice. 
In the case of disease spreading the informed vertices model the persons which caught the illness and the active ones model the persons which are infectious at any point of time. 
There are two well-known variants of the ICM. 
We study the variant in which we cannot distinguish formerly informed vertices from never informed vertices. 
Thus, a snapshot consists only of currently active vertices and the network graph itself which reduces the available information dramatically.


\paragraph{Teacher Student Model}
Our problem fits well into the so-called  \emph{teacher-student model}, introduced by \citet{Gardner_1989} in the context of studying fundamental properties of the Perceptron, a fairly simple binary classifier. 
This framework is frequently used to model (machine) learning tasks \citep{lenka_florent_stat_physics_inference,coja_spiv}. 
Suppose a \emph{teacher} samples a \emph{ground-truth} $\SIGMA$ from a distribution $\mu_P$ called \emph{teacher's prior}. 
Rather than directly revealing this ground-truth to a student, the teacher creates a condensed version $\hat \SIGMA$ of the ground-truth by means of a \emph{teacher's model} $\mu_M$. 
Now, in the so-called Bayes optimal case, the teacher conveys $\hat \SIGMA, \mu_P, \mu_M$ to her student. 
The student's task is to infer a non-trivial guess of $\SIGMA$ from the observed data ($\hat \SIGMA, \mu_P, \mu_M$). 
In the context of our contribution, the teacher's prior is the uniform distribution on all nodes of a given network $G$.
The ground-truth is a sample $\pz$ from this distribution. 
The vertex $\pz$ is the \emph{rumor's source}.
The teacher's model is the distribution of the outcome of the $t$-step forward process of the ICM on $G$ starting with the source $\pz$.
The condensed information $\hat \SIGMA$ that the student receives consists of the network $G$ and a set $\activenodes$ of active nodes at the same time step.
Note that the student does not receive any information about the time at which the set is drawn.
The student's task now is to infer $\pz$ from $G$ and $\activenodes$. 


\paragraph{Results in a Nutshell}
In our first result, we prove that our model is Bayes optimal (or, in terms of statistical physics, the \emph{Nishimori property} holds). 
Secondly, with respect to $d-$regular trees, we show that for a small spreading parameter $p(d-1) < 1$ it is not possible to infer the source of the rumor. 
For $p(d-1) \gg 1$ we show that we can detect the source node with a very large probability. 
%The probability approaches one as a function of the time $t$ at which the snapshot was done. 
For $1 < p(d-1) = \Theta(1)$ we show that the source of the rumor can be inferred with a constant probability.
Furthermore, we bound the probability that the real rumor source $\pz$ is far away from our algorithm's output. 
%Another way to read our results is that inference of $\pz$ gets easier for increasing value of $(d-1) \cdot p$. 
Finally, we establish a similar phase transitions with respect to Galton-Watson processes with spreading parameter $\Po( \lambda )$.


\paragraph{Related Work}
Forward propagation processes, like the epidemic models \citep{Brauer2017,SEIR,Kermack1927,SIS}, rumor spreading \citep{goffman1964generalization,Lerman_Ghosh_2010,rum2007,Sadilek_Kautz_Silenzio_2021}, information cascades \citep{Kempe2003,Zhao2011}, blog propagation models \citep{Leskovec2007PatternsOC}, and marketing strategies \citep{Becker_Coro_DAngelo_Gilbert_2020} have been studied extensively and for a long time within different research communities and we refrain from discussing the extensive literature here.
On the contrary, rigorous contributions on the corresponding inference problem, the source detection task, are scarce. 

To the best of our knowledge, there is only little rigorous knowledge on how to infer a rumor's source. With respect to the SI model, the rigorous contributions are by \citet{kazemitabar2019approximate, DBLP:conf/sigmetrics/ShahZ10, DBLP:conf/sigmetrics/ShahZ12}. \citet{DBLP:conf/sigmetrics/ShahZ10, DBLP:conf/sigmetrics/ShahZ12} prove that on some infinite acyclic networks like $d$-regular trees, super-critical Galton-Watson processes, and geometric graphs, approximate inference of $\pz$ is always possible in the SI model as long as the infinite tree satisfies certain expansion properties (for instance, in the $d$-regular case, this requires $d \geq 3$). 
Furthermore, they prove that the probability of declaring a \emph{far-apart} node as the rumor's source is small. 
The results are proved by a fundamental connection between a generalized Polya's urn and the SI model on acyclic networks.
\citet{DBLP:conf/icml/DawkinsLX21} introduce a statistical framework 
for the recovery of the diffusion source. Their framework covers the SI model but can be extended to other diffusion processes as well.
Finally, there is also a line of work towards the more general discrete time SIR model \citep{zhuchenying_sir, zhuying_ic} which includes the ICM as a special case (if individuals recover after one round). \citet{zhuchenying_sir} study a Jordan centrality based inference algorithm for $d$-regular trees. Our \cref{thm_lca_dreg} deepens some of their results with regard to the ICM. The exact connection will be discussed in \cref{sec_discussion_zhu}. 

Moreover, there are well explained heuristics towards the source detection task on various network types which are supported by extensive simulation studies \citep{Amoruso2020,Bindi2017,influencemaximation_chen_kdd, Jain2016,Ji2017AnAF,Prakash2013,Wang_Wang_Pei_Ye_2017}, in recent contributions also based on neural networks \citep{biazzo2021epidemic,Shah2020FindingPZ,Shu2021}. 
Finally, a related problem that attained attention recently, is not to infer $\pz$ given $\activenodes$ but to infer the parameters of the underlying spreading model. 
Over the last years, learning strategies towards this problem were proposed and studied experimentally \citep{reconstruction_epidemics_parameters,spread_covid_neurips}.

%\nopagebreak
\section{Model and Results}
We are given a communication network $G = (V,E)$ with $n$ vertices $\rho_1 \ldots \rho_n$ and $m$ edges.  We assume $I_0=\{\pz\}$ and $\pz$ is chosen uniformly at random from all vertices. 
Hence, the rumor originates at a single source.
A \emph{spreading parameter} $p \in (0,1)$ determines the  viciousness of the rumor.
The diffusion process in the ICM runs in discrete, synchronous steps.
Let $I_t$ be the set of vertices which are active in step $t$.
In step $t$ we will call the vertices in $I_0\cup I_1\ldots \cup I_t$ \emph{informed}.
In every step $t\ge 1$ every  active node $\rho_i \in I_t$ activates any of its uninformed neighbors with probability $p$.  
All these  newly informed vertices form  the set $I_{t+1}$.  Note that  every node becomes active exactly once but vertices have potentially the chance to be activated by each of their neighbors.  
Note that it can happen that the process dies out at a step $t$. 
In this case for all $t' \geq t$ it holds that $I_{t'}= \emptyset$.




The \emph{interference problem} is defined as follows.
We observe the state of the network at an arbitrary time $t$. 
The task is to infer $\pz$ given only $(G, I_t)$ and the parameter $p$.
In this paper  we study the following variants of the problem.
\begin{itemize}
    \item \emph{Strong detection:} infer $\pz$ with high probability,
    \item\emph{Weak detection:}  infer $\pz$ with positive probability\footnote{We say that  a sequence of events $\cE_1, \cE_2, \ldots$ takes place with positive probability if $\lim_{t \to \infty} \Pr\bc{\cE_t} > 0$ and with high probability (w.h.p.) if $\lim_{t \to \infty} \Pr\bc{\cE_t} = 1$.}.
\end{itemize}

Our first result relates the probability that a given set $X$ is active conditioned on some node $v$ being the source to the probability that $v$ is the source conditioned on $X$ being active. 
This establishes the so-called \emph{Nishimory property} (or Bayes optimality) of our inference problem. 
In terms of the teacher-student model, it states that the student has access to the teacher's prior and the teacher's model. 
Equivalently, in expectation there is no statistical difference between the ground-truth and a uniform sample from the posterior distribution \citep{lenka_florent_stat_physics_inference}. \Cref{thm_mle_general} applies to all types of networks as long as the rumor's source $\pz$ is chosen uniformly at random.


\begin{figure}[t]
    \centering
%    \begin{minipage}{0.4\textwidth}

            \tikzstyle{level 1}=[sibling angle=90,node, minimum size=0.5em,inner sep =0]
            \tikzstyle{level 2}=[sibling angle=39.5, node, minimum size=0.5em,inner sep =0]
            \tikzstyle{level 3}=[sibling angle=19, node, minimum size=0.5em,inner sep =0]
            \tikzstyle{level 4}=[sibling angle=13.2, node, minimum size=4pt,inner sep =0]
            \tikzstyle{edge from parent}=[draw, very thin]
            
            
            \newcommand{\infNodes}{ k32,  k41}
            \newcommand{\infLeafs}{k2222,k2223,k2311,k2313}
            \newcommand{\originNode}{k2}
            \newcommand{\candSub}{1}
            
            % \resizebox{\textwidth}{!}{
            \begin{tikzpicture}[decoration=brace,
              grow cyclic,
              shape = circle,
              level distance=2.5em,
              scale = 1.0]
              
            
            \node[circle, minimum size=1em, level 1] at (-\betweenDist,0) {} child [level 1] foreach \A in {k4, k1, k2, k3}
                { node[level 1](\A) {} child [level 2] foreach \B in {\A1, \A2, \A3}
                    { node[level 2](\B) {} child [level 3] foreach \C in {\B1, \B2, \B3}
                        { node[level 3](\C) {} child [level 4] foreach \D in {\C1, \C2, \C3}
                            {node[level 4](\D) {}} 
                        }
                    }
                };
                
            
            
            
            \node[level 1,dottetReplace, inner sep = 0.2mm] (origin) at (\originNode) {{$\footnotesize\lca$}};
            
            \node[level 2,dottetReplace] (origin) at (\originNode\candSub) {};
            \foreach \x in {1,2,3}{
                \node[level 3, dottetReplace] () at (\originNode\candSub\x){};
                \foreach \y in {1,2,3}{
                    \node[level 4, dottetReplace] () at (\originNode\candSub\x\y){};
                }
            }
            \foreach \x in \infLeafs
            {
                    \node[level 4, fill=\colOrange] (test) at (\x) {};
            }
            \foreach \x in \infNodes
            {
                    \node[level 2, fill=\colOrange] (test) at (\x) {};
            }
            
            %  \draw[\colBlue, rounded corners] (v11) rectangle (c123);%node[below left = 0.4ex and 2*\triWidth+2*\hDiff]{$T^{v}_{\activenodes}$};
            
            
            
            \end{tikzpicture}
            % }
            

                
%    \end{minipage}\begin{minipage}{0.6\textwidth}
    \caption{
    Visualization of a possible snapshot of the spreading process. 
    Here, $\cc$ spawned four sub-trees out of which three contain active elements of $\activenodes$ (orange nodes) and one does not contain active elements (purple). 
    Thus, the candidate set $\candidates$ of possible rumor's sources consists of all vertices in the purple sub-tree. 
    %Note that here only a finite part of the infinitely expanding 4-regular tree is presented.
    }
    \label{fig:circltree}
    %\end{minipage}
\end{figure}

\begin{theorem} \label{thm_mle_general} Let $G = (V, E)$ be an arbitrary network and  fix an arbitrary step $t$. 
Let $\activenodes$ be the set  active vertices  in step $t$. For any $X\subseteq V$
{\dense
\begin{align*}
\arg\max_{v \in V} \Pr \bc{ \activenodes = X \mid \pz = v} = \arg\max_{v \in V} \Pr \bc{ \pz = v \mid \activenodes = X}.
\end{align*}}
\end{theorem}

The above theorem is used to show the main results of this paper.
It allows us to calculate  $\Pr \bc{ \activenodes= X \mid \pz = v}$ instead of $\Pr \bc{ \pz = v \mid \activenodes=X }$,  which often is  more accessible.
Note that calculating the first probability is quite challenging in general networks $G$ since the entropy of $\activenodes$ is very large.


For the remaining analytical part of the of the paper we consider acyclic networks, namely $d$-regular trees and Galton-Watson trees with offspring distribution $\Po(\lambda)$. The latter
Galton-Watson trees with offspring distribution $\cD$ are defined by the following experiment. 
We start with one node which spawns $\vec d_0 \sim \cD$ children. 
Recursively, any of the children $w_1, \ldots w_{\vec d_0}$ spawns $\vec \eta_1, \ldots, \vec \eta_{\vec d_0} \sim  \cD $ children (and so on). 
We call such a process super-critical, if with positive probability, the vertices spawned during the process form an infinite tree.
It is well known that a (not too dense) instance of an Erdős–Rényi random graph $\mathbb{G} \bc{ n, \frac{d}{n} }$ \emph{locally} looks like a Galton-Watson tree with offspring distribution $\Po(d)$.
In contrast, a random $d$-regular graph looks locally like a $d$-regular tree, provided $d$ is not too large.

We assume that a teacher fixes a time $t$ at which she observes the network. 
We assume that the student is not aware of the time when the process started. 
We define  $\activenodes$ as the set of nodes active in step $t$ of the ICM (starting from an unknown and randomly chosen source $\pz$).  
Note that $\activenodes$ can be empty. 
The set of candidate nodes $\candidates$ are all nodes $v$ that have the same distance in $G$ to each node in $\activenodes$, i.e, for all $x,x' \in \activenodes$ it holds that $\dist(x,v) = \dist(x',v)$.
Note that the set $\candidates$ is not empty since $\pz\in \candidates$ and that $\candidates$ is the whole graph if $\activenodes = \emptyset$.
The \emph{closest candidate} $\cc \in \candidates$ is defined as the node with minimum distance to all nodes in $\activenodes$, see \cref{fig:circltree} for an example. 
Our \emph{source detection heuristic} calculates the closest candidate $\cc\in \candidates$ and returns it as the estimated rumors source.
If  $\activenodes=\emptyset$  (the process died out before time $t$) or contains at most one node, the heuristic returns a failure.
The following results describe the probability of success or failure for this source detection heuristic with respect to the models parameter.



The first theorem shows a phase transition between  weak and strong detection for $d$-regular trees.
We show that, for small $p\cdot(d-1) $ it is not possible to infer the source of the rumor. 
This is due to the huge likelihood that, in this setting,  the process dies out and $\activenodes=\emptyset$ or the corresponding set $\activenodes$ is very small which makes the inference impossible. 

For large $p\cdot (d-1)$ we show that the source node is the closest candidate with probability $1 - o_d(1)$.
Note that in this scenario, each active node will infect several nodes and it is unlikely that the process dies out.
The size of $\activenodes$ grows as a function of $d$ and $t$ which makes the prediction more and more reliable. 
For intermediate $p\cdot(d-1)$ we show that the closest candidate $\cc$ is the source of the rumor with a constant probability. 
Furthermore, we bound the probability that the real rumor source $\pz$ is far away from $\cc$. 
Intuitively this means that inference of $\pz$ gets easier for increasing values of $(d-1) \cdot p$. 


\begin{theorem}[$d$-regular trees]\label{thm_lca_dreg}
Let $G = (V, E)$ be an infinite $d$-regular tree and let  $\activenodes$ be the set of active nodes generated 
by the ICM with spreading parameter $p$ after $t = \omega(1)$ steps. 
Then, the following phase-transitions occur.
\begin{itemize}
    \item If $(d-1)\cdot p \leq 1$, any estimator fails at weak detection with probability $1-o_t(1)$.\footnote{We denote by $o_t(1)$ a quantity that tends to zero with $t \to \infty$.}
    \item If $1 < (d-1)\cdot p = \Theta(1)$ then the closest candidate $\cc$ is the source  of the rumor $\pz$ with constant probability (weak detection). 
    Furthermore, the probability that $\dist(\cc, \pz) > k$ is at most $\exp \bc{ - \Omega(k)}$.
    \item If $(d-1)\cdot p = \omega(1)$ then closest candidate $\cc$ is the source of the rumor $\pz$  with probability $1 - o_{d}(1)$ (strong detection).
\end{itemize}
\end{theorem}





We also study the patient zero problem on  $\Po(\lambda)$-Galton-Watson trees. 
This model describes the local structure of Erdős-Rényi random graphs very well. 
We find a similar phase transition as before.

\begin{theorem}[Galton-Watson processes]\label{thm_lca_gw}
Let $G = (V, E)$ be an infinite tree generated by a $\Po(\lambda)$-Galton-Watson process. Let  $\activenodes$ be the set of active nodes generated 
by the ICM with spreading parameter $p$ after $t = \omega(1)$ steps.  Then, the following phase-transition occurs.
\begin{itemize}
    \item If $\lambda p \leq 1$, any estimator fails at weak detection with probability $1-o_t(1)$.
    \item If $1 < \lambda p = \Theta(1)$, then the closest candidate $\cc$ is the source  of the rumor $\pz$ with positive probability (weak detection).
    Furthermore, the probability that $\dist(\lca, \pz) > k$ is at most $\exp \bc{ - \Omega(k)}$.
    \item If $\lambda p = \omega(1)$, then closest candidate $\cc$ is the source of the rumor $\pz$  with probability $1 - o_{\lambda}(1)$  (strong detection).
\end{itemize}
\end{theorem}
In \cref{thm_lca_dreg,thm_lca_gw} we assume that the underlying tree network is infinitely large. This is conceptually necessary. Indeed, the trivial algorithm that outputs one node uniformly at random succeeds at weak detection in finite networks. In the next section, we provide extensive simulations that verify the asymptotic statements of the theorems on small networks. Furthermore, we present simulation results on non-acyclic networks such as random geometric graphs and show that (the natural extension of) the closest candidate heuristics works well. 





\section{Simulations}




In this section we present simulation results that support and complement our main theorems for cyclic graphs.%, implementation details are given in the supplementary material.
More precisely, we run the simulations on random geometric graphs ($n = 10^5$ and expected degree $16$), Erdős-Rényi graphs with $n = 10^5$ nodes and expected node degree $4$, and random $4$-regular graphs in the configuration model.

Our algorithm is generalized to cyclic graphs as follows.
For $v\in V$ and $t'\ge 0$ let $N_{t'}(v)$ denote the set of nodes $w \in V$ that have distance at most $t'$ to $v$.
First we calculate 
\[ N_{t'}=\bigcap_{u \in \activenodes} N_{t'}(u).\]
Hence, $N_t'$ is the set of nodes with distance at most $t'$ to 
every node in $\activenodes$.
We then pick the minimum $t'$ such that $N_{t'} \neq \emptyset$ and return $N_{t'}$ as the candidate set.
Note that such a $t'$ exists since $\pz \in N_t$.

To generate our data we execute 100 independent simulation runs for each network, where we simulate the ICM for $8$ rounds.
The success rates are visualized in \cref{fig:plot-1}. In \cref{fig:plot-2} we present simulation data for random geometric graphs with expected node degree $16$, where we increased the numbers of rounds of the ICM. We show the success rates after $8$, $16$, and $32$ rounds. \Cref{fig:plot3} highlights the phase transition behavior of our algorithm. 
The data which built the basis for the plots is given in the supplementary material.

Our simulation software is implemented in the \CC{} programming language.
As a source of randomness we use the Mersenne Twister \texttt{mt19937\_64} provided by the \CC{11} \texttt{\textless random\textgreater{}} library.
All simulations have been carried out on four machines equipped with two Intel(R) Xeon(R) E5-2630 v4 CPUs and 128 GiB of RAM, running the Linux 5.13 kernel.

A simulation run consists of three parts. First, we generate a network $G = (V, E)$.
To this end, we have implemented generators for Erdős-Rényi graphs, random $d$-regular graphs (configuration model \citep{Janson_2011}) and random geometric graphs \citep{Penrose2003}.
Second, we run the Independent Cascade Process for $t$ rounds  starting from a randomly chosen node $\pz \in V$.
Finally, we run the generalized variant of our source detection algorithm.

% 
% For random geometric graphs (with $n = 10^5$ nodes and expected node degree $16$), we present our data in \cref{fig:plot-2}. % \cref{tab:table1}. 
% Erdős-Rényi graphs with $n = 10^5$ nodes and expected node degree $4$ are shown in \cref{fig:plot-2}. % \cref{tab:table2}.
% Our data for random $4$-regular graphs in the configuration model can be found in \cref{fig:plot3}. %  \cref{tab:table3}. 
% The tables show for various spreading probabilities $p$ the number of successes in detecting the source, the numbers of errors for the two error cases $\pz \not\in N_{t'}$ and $\activenodes = 0$, and the average and maximum distance of $\pz$ to nodes in $N_{t'}$.


\begin{figure*}[t]
\begin{minipage}{0.30\textwidth}
\begin{figure}[H]
\def\ps{} %\def\ps{\small}
%\begin{minipage}{1.8in}

% GNUPLOT: LaTeX picture with Postscript
\begingroup
  \makeatletter
  \providecommand\color[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package color not loaded in conjunction with
      terminal option `colourtext'%
    }{See the gnuplot documentation for explanation.%
    }{Either use 'blacktext' in gnuplot or load the package
      color.sty in LaTeX.}%
    \renewcommand\color[2][]{}%
  }%
  \providecommand\includegraphics[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package graphicx or graphics not loaded%
    }{See the gnuplot documentation for explanation.%
    }{The gnuplot epslatex terminal needs graphicx.sty or graphics.sty.}%
    \renewcommand\includegraphics[2][]{}%
  }%
  \providecommand\rotatebox[2]{#2}%
  \@ifundefined{ifGPcolor}{%
    \newif\ifGPcolor
    \GPcolorfalse
  }{}%
  \@ifundefined{ifGPblacktext}{%
    \newif\ifGPblacktext
    \GPblacktexttrue
  }{}%
  % define a \g@addto@macro without @ in the name:
  \let\gplgaddtomacro\g@addto@macro
  % define empty templates for all commands taking text:
  \gdef\gplbacktext{}%
  \gdef\gplfronttext{}%
  \makeatother
  \ifGPblacktext
    % no textcolor at all
    \def\colorrgb#1{}%
    \def\colorgray#1{}%
  \else
    % gray or color?
    \ifGPcolor
      \def\colorrgb#1{\color[rgb]{#1}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color[rgb]{1,0,0}}%
      \expandafter\def\csname LT1\endcsname{\color[rgb]{0,1,0}}%
      \expandafter\def\csname LT2\endcsname{\color[rgb]{0,0,1}}%
      \expandafter\def\csname LT3\endcsname{\color[rgb]{1,0,1}}%
      \expandafter\def\csname LT4\endcsname{\color[rgb]{0,1,1}}%
      \expandafter\def\csname LT5\endcsname{\color[rgb]{1,1,0}}%
      \expandafter\def\csname LT6\endcsname{\color[rgb]{0,0,0}}%
      \expandafter\def\csname LT7\endcsname{\color[rgb]{1,0.3,0}}%
      \expandafter\def\csname LT8\endcsname{\color[rgb]{0.5,0.5,0.5}}%
    \else
      % gray
      \def\colorrgb#1{\color{black}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color{black}}%
      \expandafter\def\csname LT1\endcsname{\color{black}}%
      \expandafter\def\csname LT2\endcsname{\color{black}}%
      \expandafter\def\csname LT3\endcsname{\color{black}}%
      \expandafter\def\csname LT4\endcsname{\color{black}}%
      \expandafter\def\csname LT5\endcsname{\color{black}}%
      \expandafter\def\csname LT6\endcsname{\color{black}}%
      \expandafter\def\csname LT7\endcsname{\color{black}}%
      \expandafter\def\csname LT8\endcsname{\color{black}}%
    \fi
  \fi
    \setlength{\unitlength}{0.0500bp}%
    \ifx\gptboxheight\undefined%
      \newlength{\gptboxheight}%
      \newlength{\gptboxwidth}%
      \newsavebox{\gptboxtext}%
    \fi%
    \setlength{\fboxrule}{0.5pt}%
    \setlength{\fboxsep}{1pt}%
    \definecolor{tbcol}{rgb}{1,1,1}%
\begin{picture}(3024.00,2014.00)%
    \gplgaddtomacro\gplbacktext{%
      \csname LTb\endcsname%%
      \put(321,302){\makebox(0,0)[r]{\strut{}$0.0$}}%
      \put(321,624){\makebox(0,0)[r]{\strut{}$0.2$}}%
      \put(321,946){\makebox(0,0)[r]{\strut{}$0.4$}}%
      \put(321,1268){\makebox(0,0)[r]{\strut{}$0.6$}}%
      \put(321,1590){\makebox(0,0)[r]{\strut{}$0.8$}}%
      \put(321,1912){\makebox(0,0)[r]{\strut{}$1.0$}}%
      \put(453,82){\makebox(0,0){\strut{}$0.0$}}%
      \put(937,82){\makebox(0,0){\strut{}$0.2$}}%
      \put(1420,82){\makebox(0,0){\strut{}$0.4$}}%
      \put(1904,82){\makebox(0,0){\strut{}$0.6$}}%
      \put(2387,82){\makebox(0,0){\strut{}$0.8$}}%
      \put(2871,82){\makebox(0,0){\strut{}$1.0$}}%
    }%
    \gplgaddtomacro\gplfronttext{%
      \csname LTb\endcsname%%
      \put(-152,1107){\rotatebox{-270}{\makebox(0,0){\strut{}success rate}}}%
      \put(1662,-138){\makebox(0,0){\strut{}probability $p$}}%
      \csname LTb\endcsname%%
      \put(2280,915){\makebox(0,0)[r]{\strut{}\ps ER}}%
      \csname LTb\endcsname%%
      \put(2280,695){\makebox(0,0)[r]{\strut{}\ps CM}}%
      \csname LTb\endcsname%%
      \put(2280,475){\makebox(0,0)[r]{\strut{}\ps RGG}}%
      \csname LTb\endcsname%%
      \put(1662,1912){\makebox(0,0){\strut{}}}%
    }%
    \gplbacktext
    \put(0,0){\includegraphics[width={151.20bp},height={100.70bp}]{plot-plot1}}%
    \gplfronttext
  \end{picture}%
\endgroup


%\vspace{0.5ex}
\caption{Visualization of success rates for Erdős-Rényi graphs (ER), random regular graphs (CM), and random geometric graphs (RGG).}
\label{fig:plot-1}
%\end{minipage}
\end{figure}
\end{minipage}\hfill
\begin{minipage}{0.30\textwidth}
\begin{figure}[H]
\def\ps{} %\def\ps{\small}
%\begin{minipage}{1.8in}

% GNUPLOT: LaTeX picture with Postscript
\begingroup
  \makeatletter
  \providecommand\color[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package color not loaded in conjunction with
      terminal option `colourtext'%
    }{See the gnuplot documentation for explanation.%
    }{Either use 'blacktext' in gnuplot or load the package
      color.sty in LaTeX.}%
    \renewcommand\color[2][]{}%
  }%
  \providecommand\includegraphics[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package graphicx or graphics not loaded%
    }{See the gnuplot documentation for explanation.%
    }{The gnuplot epslatex terminal needs graphicx.sty or graphics.sty.}%
    \renewcommand\includegraphics[2][]{}%
  }%
  \providecommand\rotatebox[2]{#2}%
  \@ifundefined{ifGPcolor}{%
    \newif\ifGPcolor
    \GPcolorfalse
  }{}%
  \@ifundefined{ifGPblacktext}{%
    \newif\ifGPblacktext
    \GPblacktexttrue
  }{}%
  % define a \g@addto@macro without @ in the name:
  \let\gplgaddtomacro\g@addto@macro
  % define empty templates for all commands taking text:
  \gdef\gplbacktext{}%
  \gdef\gplfronttext{}%
  \makeatother
  \ifGPblacktext
    % no textcolor at all
    \def\colorrgb#1{}%
    \def\colorgray#1{}%
  \else
    % gray or color?
    \ifGPcolor
      \def\colorrgb#1{\color[rgb]{#1}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color[rgb]{1,0,0}}%
      \expandafter\def\csname LT1\endcsname{\color[rgb]{0,1,0}}%
      \expandafter\def\csname LT2\endcsname{\color[rgb]{0,0,1}}%
      \expandafter\def\csname LT3\endcsname{\color[rgb]{1,0,1}}%
      \expandafter\def\csname LT4\endcsname{\color[rgb]{0,1,1}}%
      \expandafter\def\csname LT5\endcsname{\color[rgb]{1,1,0}}%
      \expandafter\def\csname LT6\endcsname{\color[rgb]{0,0,0}}%
      \expandafter\def\csname LT7\endcsname{\color[rgb]{1,0.3,0}}%
      \expandafter\def\csname LT8\endcsname{\color[rgb]{0.5,0.5,0.5}}%
    \else
      % gray
      \def\colorrgb#1{\color{black}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color{black}}%
      \expandafter\def\csname LT1\endcsname{\color{black}}%
      \expandafter\def\csname LT2\endcsname{\color{black}}%
      \expandafter\def\csname LT3\endcsname{\color{black}}%
      \expandafter\def\csname LT4\endcsname{\color{black}}%
      \expandafter\def\csname LT5\endcsname{\color{black}}%
      \expandafter\def\csname LT6\endcsname{\color{black}}%
      \expandafter\def\csname LT7\endcsname{\color{black}}%
      \expandafter\def\csname LT8\endcsname{\color{black}}%
    \fi
  \fi
    \setlength{\unitlength}{0.0500bp}%
    \ifx\gptboxheight\undefined%
      \newlength{\gptboxheight}%
      \newlength{\gptboxwidth}%
      \newsavebox{\gptboxtext}%
    \fi%
    \setlength{\fboxrule}{0.5pt}%
    \setlength{\fboxsep}{1pt}%
    \definecolor{tbcol}{rgb}{1,1,1}%
\begin{picture}(3024.00,2014.00)%
    \gplgaddtomacro\gplbacktext{%
      \csname LTb\endcsname%%
      \put(321,302){\makebox(0,0)[r]{\strut{}$0.0$}}%
      \put(321,624){\makebox(0,0)[r]{\strut{}$0.2$}}%
      \put(321,946){\makebox(0,0)[r]{\strut{}$0.4$}}%
      \put(321,1268){\makebox(0,0)[r]{\strut{}$0.6$}}%
      \put(321,1590){\makebox(0,0)[r]{\strut{}$0.8$}}%
      \put(321,1912){\makebox(0,0)[r]{\strut{}$1.0$}}%
      \put(453,82){\makebox(0,0){\strut{}$0.0$}}%
      \put(937,82){\makebox(0,0){\strut{}$0.2$}}%
      \put(1420,82){\makebox(0,0){\strut{}$0.4$}}%
      \put(1904,82){\makebox(0,0){\strut{}$0.6$}}%
      \put(2387,82){\makebox(0,0){\strut{}$0.8$}}%
      \put(2871,82){\makebox(0,0){\strut{}$1.0$}}%
    }%
    \gplgaddtomacro\gplfronttext{%
      \csname LTb\endcsname%%
      \put(-152,1107){\rotatebox{-270}{\makebox(0,0){\strut{}success rate}}}%
      \put(1662,-138){\makebox(0,0){\strut{}probability $p$}}%
      \csname LTb\endcsname%%
      \put(2280,915){\makebox(0,0)[r]{\strut{}\ps 8 rounds}}%
      \csname LTb\endcsname%%
      \put(2280,695){\makebox(0,0)[r]{\strut{}\ps 16 rounds}}%
      \csname LTb\endcsname%%
      \put(2280,475){\makebox(0,0)[r]{\strut{}\ps 32 rounds}}%
      \csname LTb\endcsname%%
      \put(1662,3824){\makebox(0,0){\strut{}}}%
    }%
    \gplbacktext
    \put(0,0){\includegraphics[width={151.20bp},height={100.70bp}]{plot-plot2}}%
    \gplfronttext
  \end{picture}%
\endgroup


%\vspace{0.5ex}
\caption{Visualization of success rates in a random geometric graph with expected node degree $16$ after $8$, $16$, $32$ rounds of the ICM}
\label{fig:plot-2}
%\end{minipage}
\end{figure}
\end{minipage}\hfill
\begin{minipage}{0.30\textwidth}
\begin{figure}[H]
\def\ps{} %\def\ps{\small}
%\begin{minipage}{1.8in}

% GNUPLOT: LaTeX picture with Postscript
\begingroup
  \makeatletter
  \providecommand\color[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package color not loaded in conjunction with
      terminal option `colourtext'%
    }{See the gnuplot documentation for explanation.%
    }{Either use 'blacktext' in gnuplot or load the package
      color.sty in LaTeX.}%
    \renewcommand\color[2][]{}%
  }%
  \providecommand\includegraphics[2][]{%
    \GenericError{(gnuplot) \space\space\space\@spaces}{%
      Package graphicx or graphics not loaded%
    }{See the gnuplot documentation for explanation.%
    }{The gnuplot epslatex terminal needs graphicx.sty or graphics.sty.}%
    \renewcommand\includegraphics[2][]{}%
  }%
  \providecommand\rotatebox[2]{#2}%
  \@ifundefined{ifGPcolor}{%
    \newif\ifGPcolor
    \GPcolorfalse
  }{}%
  \@ifundefined{ifGPblacktext}{%
    \newif\ifGPblacktext
    \GPblacktexttrue
  }{}%
  % define a \g@addto@macro without @ in the name:
  \let\gplgaddtomacro\g@addto@macro
  % define empty templates for all commands taking text:
  \gdef\gplbacktext{}%
  \gdef\gplfronttext{}%
  \makeatother
  \ifGPblacktext
    % no textcolor at all
    \def\colorrgb#1{}%
    \def\colorgray#1{}%
  \else
    % gray or color?
    \ifGPcolor
      \def\colorrgb#1{\color[rgb]{#1}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color[rgb]{1,0,0}}%
      \expandafter\def\csname LT1\endcsname{\color[rgb]{0,1,0}}%
      \expandafter\def\csname LT2\endcsname{\color[rgb]{0,0,1}}%
      \expandafter\def\csname LT3\endcsname{\color[rgb]{1,0,1}}%
      \expandafter\def\csname LT4\endcsname{\color[rgb]{0,1,1}}%
      \expandafter\def\csname LT5\endcsname{\color[rgb]{1,1,0}}%
      \expandafter\def\csname LT6\endcsname{\color[rgb]{0,0,0}}%
      \expandafter\def\csname LT7\endcsname{\color[rgb]{1,0.3,0}}%
      \expandafter\def\csname LT8\endcsname{\color[rgb]{0.5,0.5,0.5}}%
    \else
      % gray
      \def\colorrgb#1{\color{black}}%
      \def\colorgray#1{\color[gray]{#1}}%
      \expandafter\def\csname LTw\endcsname{\color{white}}%
      \expandafter\def\csname LTb\endcsname{\color{black}}%
      \expandafter\def\csname LTa\endcsname{\color{black}}%
      \expandafter\def\csname LT0\endcsname{\color{black}}%
      \expandafter\def\csname LT1\endcsname{\color{black}}%
      \expandafter\def\csname LT2\endcsname{\color{black}}%
      \expandafter\def\csname LT3\endcsname{\color{black}}%
      \expandafter\def\csname LT4\endcsname{\color{black}}%
      \expandafter\def\csname LT5\endcsname{\color{black}}%
      \expandafter\def\csname LT6\endcsname{\color{black}}%
      \expandafter\def\csname LT7\endcsname{\color{black}}%
      \expandafter\def\csname LT8\endcsname{\color{black}}%
    \fi
  \fi
    \setlength{\unitlength}{0.0500bp}%
    \ifx\gptboxheight\undefined%
      \newlength{\gptboxheight}%
      \newlength{\gptboxwidth}%
      \newsavebox{\gptboxtext}%
    \fi%
    \setlength{\fboxrule}{0.5pt}%
    \setlength{\fboxsep}{1pt}%
    \definecolor{tbcol}{rgb}{1,1,1}%
\begin{picture}(3024.00,2014.00)%
    \gplgaddtomacro\gplbacktext{%
      \csname LTb\endcsname%%
      \put(453,82){\makebox(0,0){\strut{}$0$}}%
      \put(695,82){\makebox(0,0){\strut{}$1$}}%
      \put(937,82){\makebox(0,0){\strut{}$2$}}%
      \put(1178,82){\makebox(0,0){\strut{}$3$}}%
      \put(1420,82){\makebox(0,0){\strut{}$4$}}%
      \put(1662,82){\makebox(0,0){\strut{}$5$}}%
      \put(1904,82){\makebox(0,0){\strut{}$6$}}%
      \put(2146,82){\makebox(0,0){\strut{}$7$}}%
      \put(2387,82){\makebox(0,0){\strut{}$8$}}%
      \put(2629,82){\makebox(0,0){\strut{}$9$}}%
      \put(2871,82){\makebox(0,0){\strut{}$10$}}%
    }%
    \gplgaddtomacro\gplfronttext{%
      \csname LTb\endcsname%%
      \put(244,1107){\rotatebox{-270}{\makebox(0,0){\strut{}fraction of candidates}}}%
      \put(1662,-138){\makebox(0,0){\strut{}distance to $\pz$}}%
      \csname LTb\endcsname%%
      \put(1113,1739){\makebox(0,0)[r]{\strut{}0.45}}%
      \csname LTb\endcsname%%
      \put(1113,1519){\makebox(0,0)[r]{\strut{}0.50}}%
      \csname LTb\endcsname%%
      \put(1113,1299){\makebox(0,0)[r]{\strut{}0.55}}%
      \csname LTb\endcsname%%
      \put(1662,5736){\makebox(0,0){\strut{}}}%
    }%
    \gplbacktext
    \put(0,0){\includegraphics[width={151.20bp},height={100.70bp}]{plot-plot3}}%
    \gplfronttext
  \end{picture}%
\endgroup


%\vspace{0.5ex}
%\begin{minipage}{\columnwidth-1.8in}
\caption{Histogram of the distribution of the distances of the candidates returned by our heuristic to $\pz$ for $p = 0.45, 0.5, 0.55$.}
\label{fig:plot3}
%\end{minipage}
\end{figure}
\end{minipage}
\end{figure*}









\section{Analysis}
In this section we formally prove our main results, \cref{thm_lca_dreg,thm_lca_gw}. The proof of \cref{thm_mle_general} is given in the supplementary material.
The main proof strategy of \cref{thm_lca_dreg,thm_lca_gw} is to interpret the transmission process as a special type of percolation on the underlying network. As in \cref{thm_lca_gw} the underlying network itself is random, it turns out to be technically non-trivial to pin down the exact distributions involved in this process due to subtle rare events that might yield either very small or large node degrees. The Poisson thinning technique allows us to carry out the calculations smoothly.


\subsection{Proof of Theorem \ref{thm_lca_dreg}}
A crucial observation in the proof of \cref{thm_lca_dreg} is the following. 
If node $v$ gets activated during the spreading process by node $w$, it has $d-1$ additional neighbors $v_1, \ldots, v_{d-1}$ except $w$ which we call \emph{children} of $v$. 
Any of those children gets activated with probability $p$ independently from everything else in the next step. 
Suppose without loss of generality that $v_1, \ldots, v_{\vec d_0}$ are the activated children where $\vec d_0 \sim \Bin( d-1, p )$. 
In every of those children $v_i$, a new and independent rumor spreading process starts in the tree rooted at $v_i$ and directed away from $v$. 
As this tree is, itself, $d$-regular, this process is distributed equally as starting $\vec d_0$ independent Galton-Watson processes with offspring distribution $\Bin \bc{ d-1, p }$. 
Depending on $(d-1) p$, few, some or many of those processes will die out eventually.




To prove our result, we need some additional notation, see \cref{fig:Notation_Tree}. 
Given a node $v$, we can direct the tree away from $v$ and denote the set of subtrees rooted at $v$'s children by $\cT^v$. 
Most interesting to our proof are the subtrees that contain active nodes. 
We denote them by $\cT_{\activenodes }^{v}$ and denote 
\[Y_v = Y_v( \activenodes ) = \abs{\cT_{\activenodes }^{v}}.\] 
Note that all candidates but one have at most one subtree containing active nodes. 
Only the closest candidate can have more than one.
Finally, let $ \distactive{v}$ denote the distance from $v$ to any of the vertices in $\activenodes$.


\begin{proposition}\label{prop_dreg}
If $\rho_t$ is the probability that at time $t$ there are no more active vertices under the spreading model on an infinite $d$-regular tree with infection probability $p$, we get
\[\rho_t = (1-p+p \rho_{t-1})^{d-1}\]
and the ultimate extinction probability of the spreading process is the smallest fixed-point of $x \mapsto (1 - p + p x)^{d-1}$.
Furthermore for $v \in \cC$
{\dense
\begin{align*}
    & \Pr \bc{ Y_v \bc{ \activenodes } = k \mid \pz = v} = \\
    &\begin{cases}
        \sum\limits_{d_0 = k}^d \Pr ( \Bin(d,p) = \vec d_0) \binom{\vec d_0}{k} \rho_{\distactive{v}}^{\vec d_0 - k}(1-\rho_{\distactive{v}})^k  &\!\!\!\!\!\!\text{if }v = \lca\\ 
        \sum\limits_{d_0 = 1}^d \Pr ( \Bin(d,p) = \vec d_0) \binom{\vec d_0}{1}\rho_{\distactive{v}}^{\vec d_0 -1}(1-\distactive{v}) & \!\!\!\!\!\!\text{if }v \not = \lca. \\
    \end{cases}
\end{align*}
}
\end{proposition}
\begin{proof}
The first part, namely that $\rho_t = (1-p+p \rho_{t-1})^{d-1}$, is an application of probability generating functions and their connection to branching processes. The probability generating function $f_{\Bin(n,p)}$ of a Binomial distribution with parameters $n$ and $p$ reads
\[ f_{\Bin(n,p)}(s) = \Erw \brk{ s^{ \Bin(n,p) } } = \bc{ 1 - p + ps }^n. \]
Now, let $p_k = \Pr \bc{ \Bin(d-1, p) = k}$. It is immediate that
\begin{align*}
    \rho_{t+1} = p_0 + p_1 \rho_{t-1} + \ldots + p_{d-1} \rho_{t-1}^{d-1} = \sum_{k = 0}^{d-1} p_k \rho_{t-1}^k 
\end{align*}
which is exactly the probability generating function of the Binomial distribution. A detailed explanation and a formal proof of this statement can be, for instance, found in \citep{branching_2020}. 

Let us now suppose that $v = \lca$. Let $\cV_0$ be the event that $v$ has exactly $k \leq \vec d_0 \leq d$ children that get activated by $v$. 
Clearly, $\Pr \bc{ \cV_0 } = \Pr \bc{ \Bin \bc{ d, p } = \vec d_0 }$ and of course, $\vec d_0$ needs to be at least $k$ as differently, the probability of having $k$ active sub-trees was zero. 
Given $\cV_0$, we find that the probability of observing exactly $k$ active sub-trees is the probability that exactly $k$ out of $\vec d_0$ independent Galton-Watson processes with offspring distribution $\Bin(d-1, p)$ survived the first $\distactive{v}$ steps. 
Therefore, the number of active sub-trees at time $t$ is distributed as $ \Bin \bc{ \vec d_0, \rho_{\distactive{v}} } $ given $\cV_0$ and the first part of the formula follows.

If, on contrary, $v$ is not the closest candidate but a node that has a different distance from $\activenodes$, we observe that from the originally $1 \leq \vec d_0 \leq d$ Galton-Watson processes originated in the children of $v$, exactly one process needed to survive and $\vec d_0 - 1$ needed to become extinct at time $\distactive{v}$. The proposition follows. %TODO: wording.
\end{proof}

\begin{figure}[t]
    \centering
%    \begin{minipage}{0.7 \textwidth}
            \tikzstyle{smallNode} = [shape=circle,minimum size = 4pt, draw=black, inner sep =0, fill = \colOrange]
            \tikzstyle{smallNodeNotInf} = [shape=circle,minimum size = 3.5pt, draw=black, inner sep =0]
            \newcommand{\tri}[4]{ \draw[rounded corners, draw=black,#4] #1--#2--#3 --#1; } 
            \newcommand{\vDiff}{0.8}
            \newcommand{\hDiff}{0.9}
            \newcommand{\triWidth}{0.8}
            \newcommand{\triHeight}{1.5}
            \newcommand{\padding}{0.2}
            \newcommand{\smallwidth}{0.3}
            % \tikzstyle{every node}=[shape =circle, draw=black]
            
            
            
            
            % \newcommand{\lca}{\omega_{\mathrm{LCA}}}
            % \newcommand{\pz}{\vec{\omega}}
            % \newcommand{\activenodes}{\vec{X}^\star}
            % \newcommand{\candidates}{\mathcal{C}}
            
            \tikzset{snake it/.style={decorate, decoration={snake,amplitude=.3mm, segment length=3mm}}}
            
            % \resizebox{\textwidth}{!}{
            \begin{tikzpicture}[decoration=brace,
              grow cyclic,
              shape = circle,
              level distance=2em,
              scale = 0.9]
              
            %   \input{circleTree}
              
                \node[node,dottetReplace, inner sep =1] (root) at (0,0) {\tiny$\lca$};
                
                \node[node] (c1) at (-2.2,-0.3) {};
                \node[node] (c11) at ($(c1)+(-\hDiff,-\vDiff)$) {};
                % \node[node] (c12) at ($(c1)+(0,-\vDiff)$) {};
                \node[node] (c13) at ($(c1)+(\hDiff,-\vDiff)$) {};
                \draw(c1) edge (c11);
                % \draw(c1) edge (c12);
                \draw(c1) edge (c13);
                \tri{(c11)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}{}
                % \tri{(c12)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}
                \tri{(c13)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}{}
                 \draw[\colGreen, rounded corners] ($(c1.north)+(-\hDiff,0)+(-\triWidth,0)+(-\padding,\padding)$) rectangle ($(c1)+(\hDiff,-\vDiff)+(\triWidth,-\triHeight)+(\padding,-\padding)$);% node[below right = 0.4ex and 0]{$T_{\activenodes}^{\lca}$};
                 
                \node[smallNode] (s12) at ($(c11) + (-\triWidth,-\triHeight) + (\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s13) at ($(c11) + (-\triWidth,-\triHeight) + (2*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s14) at ($(c11) + (-\triWidth,-\triHeight) + (3*\smallwidth,0.1) $) {};
                \node[smallNode] (s15) at ($(c11) + (-\triWidth,-\triHeight) + (4*\smallwidth,0.1) $) {};
                \draw[snake it] (c11) -- (s12);
                \draw[snake it] (c11) -- (s13);
                \draw[snake it] (c11) -- (s14);
                \draw[snake it] (c11) -- (s15);
                
                \node[smallNode] (s22) at ($(c13) + (-\triWidth,-\triHeight) + (\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s23) at ($(c13) + (-\triWidth,-\triHeight) + (2*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s24) at ($(c13) + (-\triWidth,-\triHeight) + (3*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s25) at ($(c13) + (-\triWidth,-\triHeight) + (4*\smallwidth,0.1) $) {};
                \draw[snake it] (c13) -- (s22);
                \draw[snake it] (c13) -- (s25);
                \draw[snake it] (c13) -- (s23);
                \draw[snake it] (c13) -- (s24);
                
                \node[node] (c2) at (2.2,-0.3) {};
                \node[node] (c21) at ($(c2)+(-\hDiff,-\vDiff)$) {};
                % \node[node] (c22) at ($(c2)+(0,-\vDiff)$) {};
                \node[node] (c23) at ($(c2)+(\hDiff,-\vDiff)$) {};
                \draw(c2) edge (c21);
                % \draw(c2) edge (c22);
                \draw(c2) edge (c23);
                \tri{(c21)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}{}
                % \tri{(c22)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}
                \tri{(c23)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}{}{}
                 \draw[\colGreen, rounded corners] ($(c2.north)+(-\hDiff,0)+(-\triWidth,0)+(-\padding,\padding)$) rectangle ($(c2)+(\hDiff,-\vDiff)+(\triWidth,-\triHeight)+(\padding,-\padding)$);
                 \draw[\colBlue, rounded corners] ($(c1.north)+(-\hDiff,0)+(-\triWidth,0)+(-\padding,0)+(-\padding,0)+(0,0.6)$) rectangle ($(c2)+(\hDiff,-\vDiff)+(\triWidth,-\triHeight)+(2*\padding,-2*\padding)$);%node[below left = 0.4ex and 2*\triWidth+2*\hDiff]{$T^{v}_{\activenodes}$};
                 
                
                \node[smallNode] (s32) at ($(c21) + (-\triWidth,-\triHeight) + (\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s33)at ($(c21) + (-\triWidth,-\triHeight) + (2*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s34)at ($(c21) + (-\triWidth,-\triHeight) + (3*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s35)at ($(c21) + (-\triWidth,-\triHeight) + (4*\smallwidth,0.1) $) {};
                \draw[snake it] (c21) -- (s32);
                \draw[snake it] (c21) -- (s33);
                \draw[snake it] (c21) -- (s34);
                \draw[snake it] (c21) -- (s35);
                 
                 
                \node[smallNodeNotInf] (s42) at ($(c23) + (-\triWidth,-\triHeight) + (\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s43)at ($(c23) + (-\triWidth,-\triHeight) + (2*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s44)at ($(c23) + (-\triWidth,-\triHeight) + (3*\smallwidth,0.1) $) {};
                \node[smallNodeNotInf] (s45)at ($(c23) + (-\triWidth,-\triHeight) + (4*\smallwidth,0.1) $) {};
                \draw[snake it] (c23) -- (s42);
                \draw[snake it] (c23) -- (s43);
                \draw[snake it] (c23) -- (s44);
                \draw[snake it] (c23) -- (s45);
                 
                
                \node[node,dottetReplace] (c3) at ($(root)+(0,\vDiff)+(0,0.2)$) {$\scriptsize v$};
                \node[node,dottetReplace] (c31) at ($(c3)+(-\hDiff,0.4*\vDiff)$) {};
                % \node[node] (c32) at ($(c3)+(0,-\vDiff)$) {};
                \node[node,dottetReplace] (c33) at ($(c3)+(\hDiff,0.4*\vDiff)$) {};
                \draw(c3) edge (c31);
                % \draw(c3) edge (c32);
                \draw(c3) edge (c33);
                \tri{(c31)}{+(-\triWidth,\triHeight)}{+(\triWidth,\triHeight)}{dottetReplace}
                % \tri{(c32)}{+(-\triWidth,-\triHeight)}{+(\triWidth,-\triHeight)}
                \tri{(c33)}{+(-\triWidth,\triHeight)}{+(\triWidth,\triHeight)}{dottetReplace}
                \node[node,dottetReplace] (c31) at (c31) {};
                % \node[node] (c32) at ($(c3)+(0,-\vDiff)$) {};
                \node[node,dottetReplace] (c33) at (c33) {};
                
                
                
                \node[smallNodeNotInf] (s52) at ($(c33) + (-\triWidth,\triHeight) + (\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s53)at ($(c33) + (-\triWidth,\triHeight) + (2*\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s54)at ($(c33) + (-\triWidth,\triHeight) + (3*\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s55)at ($(c33) + (-\triWidth,\triHeight) + (4*\smallwidth,-0.1) $) {};
                \draw[snake it] (c33) -- (s52);
                \draw[snake it] (c33) -- (s53);
                \draw[snake it] (c33) -- (s54);
                \draw[snake it] (c33) -- (s55);
                \node[smallNodeNotInf] (s62) at ($(c31) + (-\triWidth,\triHeight) + (\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s63)at ($(c31) + (-\triWidth,\triHeight) + (2*\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s64)at ($(c31) + (-\triWidth,\triHeight) + (3*\smallwidth,-0.1) $) {};
                \node[smallNodeNotInf] (s65)at ($(c31) + (-\triWidth,\triHeight) + (4*\smallwidth,-0.1) $) {};
                \draw[snake it] (c31) -- (s62);
                \draw[snake it] (c31) -- (s63);
                \draw[snake it] (c31) -- (s64);
                \draw[snake it] (c31) -- (s65);
                % \draw[rounded corners, \colLil]($(c33) +(\triWidth,-\triHeight)+ (\padding,-\padding) $)  -- ($(c31) +(-\triWidth,-\triHeight)+ (-\padding,-\padding)$ ) -- ($(c31) +(-\triWidth,0)+ (\padding,)$ ) -- ($ (root) +(-3*\padding, -3*\padding )$)-- ($ (root) +(-3*\padding, 3*\padding )$)-- ($ (root) +(3, 3*\padding )$) -- node[above right= 0.4ex]{$\mathcal{P}$}($(c3.north east) + (\padding,\padding)  +(0,0.6)$) -- ($(c33.east)+(\padding,0)$) -- cycle;
                
                 \draw[-|, dashed, rounded corners] ($(root.east)+(\padding, 0)$) -- node[above right =-0.9ex and 0.1ex]{$t^*_v$} ($(c3.north east)+(\padding, 0)+(0.0,0)  $);
                %  \draw[dashed] ($(root.north east)+(\padding, 0)$) --  ; 
                 \draw[|-|] ($(root.east)+(0,0)+(0.2, 0 )$) -- node[above right=0.8ex and 0.1ex] {$t^*_{\lca}$} ($(root.east)+ (0,-0.3)+(0,0-\triHeight-\vDiff)+(0.2, 0 )$) ;
                
                % -- ($(c33) + (\padding, 0) $)
                
                
                \draw (root) edge (c1);
                \draw (root) edge (c2);
                \draw (root) edge (c3);
                
                
                \node[] (legend) at ($(c11) + (0, -\triHeight) + (-0.5, -1)+ (-\betweenDist/2,0)$) {};
                \draw[] ($(legend) + (-\padding, \padding)$) rectangle ($(legend) + 3*(\padding, -\padding)+ (7, 0)$) ; 
                \node[smallNode, label={[label distance=4]0:$\activenodes$}] (activeNodes) at ($(legend) + (\padding, -\padding)$) {} ;
                \node[node, dottetReplace, label={[label distance=0.1]0:$\candidates$}] (candidates) at ($(legend) + (\padding, -\padding)+ (2, 0)$) {};
                \node[shape = rectangle, draw = \colBlue, label={[label distance=0.1]0:$\cT^{v}_{\activenodes}$}] (blue) at ($(legend) + (\padding, -\padding)+ 2*(2, 0)$) {};
                \node[shape = rectangle, draw = \colGreen, label={[label distance=0.1]0:$\cT_{\activenodes}^{\lca}$}] (blue) at ($(legend) + (\padding, -\padding)+ 3*(2, 0)$) {};
                  
                % \node[] (legend) at ($(c3) + (4*\triWidth, \triHeight) + (\hDiff, 0.5)+(0.4,-0.5)$) {};
                % \draw[] ($(legend) + (-2*\padding, \padding)$) rectangle ($(legend) + 3*(\padding, -\padding)+ 4*(0.25, -1.2)$) ; 
                % \node[smallNode, label={[label distance=4]0:$\activenodes$}] (activeNodes) at ($(legend) + (\padding, -2*\padding)$) {} ;
                % \node[node, dottetReplace, label={[label distance=0.1]0:$\candidates$}] (candidates) at ($(legend) + (\padding, -2*\padding)+ (0, -1.5)$) {};
                % \node[shape = rectangle, draw = \colBlue, label={[label distance=0.1]0:$\cT^{v}_{\activenodes}$}] (blue) at ($(legend) + (\padding, -2*\padding)+ 2*(0, -1.5)$) {};
                % \node[shape = rectangle, draw = \colGreen, label={[label distance=0.1]0:$\cT_{\activenodes}^{\lca}$}] (blue) at ($(legend) + (\padding, -2*\padding)+ 3*(0, -1.5)$) {};
            \end{tikzpicture}
% }


% Legenden nach unten in einer Zeile (T_X*), Kasten kleiner
% todo (max): \distactive{v} instead of t_{v}^*

%    \end{minipage}\begin{minipage}{0.3 \textwidth}
    \caption{
    Here, $\cc$ spawned three sub-trees out of which two contain active elements of $\activenodes$ (orange) and one does not contain active elements (purple). Thus, $\candidates$ consists of all vertices in the purple sub-tree rooted at $\cc$.
    }
%    \end{minipage}
    \label{fig:Notation_Tree}
\end{figure}

\begin{proof}[Proof of \cref{thm_lca_dreg} (i)]
To prove the first part of \cref{thm_lca_dreg}, it suffices to apply the first part of \cref{prop_dreg}. Indeed, if $(d-1)p \leq 1$, we find that the smallest fixed-point of $x \mapsto (1 - p + p x)^{d-1}$ is $x = 1$. Therefore, $\rho_t = 1 - o_t(1)$. Furthermore, as $p$ is a constant, we have that in this case $d = \Theta(1)$ as well. Therefore, a union bound over the at most $d$ possible independent Galton-Watson processes with offspring distribution $\Bin(d-1, p)$ originated in the children of $\pz$, yields that with probability $1 - o_t(1)$, we find $\activenodes = \emptyset$. In this case, detection clearly fails with high probability.
\end{proof}



\begin{proof}[Proof of \cref{thm_lca_dreg} (ii)]
We start with the part of the theorem that claims that weak detection succeeds by the source detection heuristic, namely that $\Pr \bc{\lca = \pz} = \Omega(1)$. We find that $\lca = \pz$ with probability one if the set of possible candidate nodes $\cC$ has size 1. Therefore, it suffices to prove that $\Pr \bc{ \abs{ \cC } = 1 } = \Omega(1)$. This is the case if (and only if), the rumor's source $\pz$ propagated the rumor to all of its $d$ children and all $d$ independent Galton-Watson processes with offspring distribution $\Bin(d-1, p)$ originated in the children of $\pz$ did not become extinct. Let $\vec d_0$ denote the number of children of $\pz$ that get activated. Clearly,
\begin{align} \label{eq_dreg_ii_i}
    \Pr \bc{ \vec d_0 = d } = \Pr \bc{ \Bin(d, p) = d } = p^d = \Omega(1)
\end{align}
as, by assumption, $p$ and $d$ are constants.
Furthermore, since $1 < (d-1)p = \Theta(1)$ holds, the smallest fixed point of $x \mapsto (1 - p + p x)^{d-1}$ is a real number between zero and one. Therefore, by \cref{prop_dreg}, there are constants $0 < \gamma_1 \leq \gamma_2 < 1$ such that
\begin{align} \label{eq_dreg_ii_ii}
    \gamma_1 - o_t(1) < \rho_t < \gamma_2 + o_t(1). 
\end{align}
Thus, it follows that the source detection heuristic succeeds at weak detection if $1 < (d-1)p = \Theta(1)$ with probability $1 - o_t(1)$ by \eqref{eq_dreg_ii_i} -- \eqref{eq_dreg_ii_ii}.

It is left to prove that under the same assumptions we have
\begin{align} \label{eq_dreg_ii_iii}
\Pr \bc{ \dist \bc{ \lca, \pz } \geq k  } \leq \exp \bc{ - \Omega(k) }.
\end{align}
Suppose that $ \abs{\distactive{\pz} - \distactive{\lca} } = \dist \bc{ \lca, \pz } = k > 3$. Therefore, there is a unique path $P_{\lca, \pz}$ in $G$ that connects $\lca$ and $\pz$ with $k-2$ internal vertices. All of those internal vertices will get activated at most once during the spreading process. Therefore, for any of those $k-2$ steps, the process needs to either activate only exactly one child or, it activates more than one child, but the remaining processes have died out at the observation time. The probability that a node spawns exactly one active child is given by
\begin{align} \label{eq_dreg_onechild}
    \Pr \bc{ \Bin( d, p ) = 1 } = p (1-p)^{d-1} 
\end{align}
which is bounded away from zero and one if $d, p = \Theta(1)$.
By \eqref{eq_dreg_ii_i} -- \eqref{eq_dreg_ii_ii} as well as \eqref{eq_dreg_onechild}, we find that there is a sequence of constants $\cbc{\gamma_i}_{i=1 \ldots k-2}$ dependent only on $p, d,$ and $k$ all of which  are uniformly bounded away from zero and one. Therefore,  
\[\Pr \bc{ \dist \bc{ \lca, \pz } \geq k  } \leq \min_{i=1 \ldots k-2} \gamma_i^k = \exp \bc{ - \Omega(k) } \]
which implies \eqref{eq_dreg_ii_iii}.
\end{proof}

The last part of \cref{thm_lca_dreg} states that the source detection heuristic succeeds at strong detection with high probability if $(d-1)p \in \omega(1)$. We start with the following simple observation which is an immediate consequence of Chernoff bounds applied to the $\Bin \bc{(d-1), p}$ distribution.
\begin{observation} \label{lem_dreg_iii_i}
If $(d-1)p \in \omega(1)$, we find that if node $v$ gets activated, the number of activated children $\omega_v$ satisfies $\omega_v \geq {(d-1)p}/{2} = \omega(1)$ with high probability. 
\end{observation}
Next, we show that out of $\vec d_0 = \omega(1)$ independent Galton-Watson processes with offspring distribution $\Bin \bc{(d-1), p}$ at least a $(1 - \eps)$-fraction will not become eventually extinct with high probability for any $\eps > 0$.
\begin{lemma} \label{lem_dreg_iii_ii}
Suppose that $\ell$ Galton-Watson processes with offspring distribution $\Bin \bc{(d-1), p}$ start independently under the condition that $(d-1)p \in \omega(1)$. Let $\vec Y$ denote the number of processes that do not ultimately become extinct. Then, $\Pr \bc{ \vec Y \geq (1 - o(1)) \ell} \geq 1 - o(1)$.
\end{lemma}
\begin{proof}
By \cref{prop_dreg}, we have that the probability that one of the processes becomes extinct is $p_e = o(1)$. Thus, the number of not-extinct processes is Binomially distributed with parameters $\ell$ and $1 - p_e$. Therefore, the lemma follows from Chernoff bounds.
\end{proof}

\begin{proof}[Proof of \cref{thm_lca_dreg} (iii)]
By \cref{lem_dreg_iii_i,lem_dreg_iii_ii} we directly get that, with high probability, all but $o(dp)$ of the processes started in the children of $\pz$ are still active at the observation time.

Suppose that $\pz \neq \lca$ and $\dist(\pz, \lca) = k \geq 1$. This implies that either $k$ times only exactly one child is activated or, given that multiple children are activated, only exactly one of those spreading processes survived eventually. For a specific step $1 \leq i \leq k-1$, the probability that this occurs is by \cref{lem_dreg_iii_i,lem_dreg_iii_ii} at most $\gamma_i = o(1)$. Therefore,
$\Pr \bc{ \lca \neq \pz \mid \activenodes} = o(1)$
which implies the third part of \cref{thm_lca_dreg}.
\end{proof}

\subsection{Proof of Theorem \ref{thm_lca_gw}}
The main proof strategy is similar to the proof of \cref{thm_lca_dreg}. However, one fundamental difference makes the analysis more involved: In the $d$-regular case an activated node spawns a random number of independent Galton-Watson processes with offspring distribution $\Bin(d-1, p)$. This is not the case in the setting of \cref{thm_lca_gw}. Here, the underlying network itself is a Galton-Watson process with offspring distribution $\Po(\lambda)$. Fortunately, we can apply the \emph{Poisson thinning principle} \citep{Kingman1992}.

\begin{observation}
Let $\vec X \sim \Po(d)$ and furthermore, given $\vec X$, define $\vec Y = \Bin \bc{ \vec X, p }$. Then $\vec Y \sim \Po( \lambda p )$.
\end{observation}

This Poisson thinning principle implies that in distribution we can analyze the following spreading process: Once $v$ gets activated, it spawns $\vec d_0 \sim \Po(\lambda p)$ active children and thus $\vec d_0$ independent Galton-Watson processes with offspring distribution $\Po(\lambda p)$.
%Once $v$ gets activated, it spawns $\vec d_0 \sim \Po(\lambda p)$ active children and thus $\vec d_0$ independent Galton-Watson processes with offspring distribution $\Po(\lambda p)$. 
The following proposition characterizes the extinction probability of such processes, the formal proof can be found in the supplementary materials.

\begin{proposition}\label{prop_gwa}
If $\bar{x}_t$ is the probability that at time $t$ there are no more active vertices under the spreading model on a super-critical Galton-Watson tree with offspring distribution $\Po (\lambda p)$, we find
$\bar{x}_t = \exp \bc{- \lambda p (1 - \bar{x}_{t-1})}$
and the ultimate extinction probability of the spreading process is the smallest fixed-point of $\bar{x} \mapsto \exp \bc{- \lambda p (1 - \bar{x})}$.
Furthermore for $v \in \cC$
{\dense%
\begin{align*}
    &\Pr \bc{ Y_v \bc{ \activenodes } = k \mid \pz = v} = \\
    &\begin{cases}
        \sum\limits_{d_0 = k}^d \Pr ( \Po( \lambda p ) = \vec d_0) \binom{\vec d_0}{k} \rho_{\distactive{v}}^{\vec d_0 - k}(1-\rho_{\distactive{v}})^k & \!\!\!\!\!\!\text{if }v = \lca\\ 
        \sum\limits_{d_0 = 1}^d \Pr ( \Po( \lambda p ) \binom{\vec d_0}{1}\rho_{\distactive{v}}^{\vec d_0 -1}(1-\distactive{v}) & \!\!\!\!\!\!\text{if }v \not = \lca. \\
    \end{cases}
\end{align*}}
\end{proposition}

\begin{proof}[Proof of \cref{thm_lca_gw} (i)]
As in the $d$-regular case, the first part of \cref{thm_lca_gw} follows by the first part of \cref{prop_gwa} and a large deviation bound. Details can be found in the supplementary material.
\end{proof}



\begin{proof}[Proof of \cref{thm_lca_gw} (ii)]
Again, as in the $d$-regular case, we start proving the weak detection property of the source detection heuristic. Thus, we aim to prove $\Pr \bc{ \abs{ \cC } = 1 } = \Omega(1)$. 

This is the case if (and only if) $\pz$ propagated the rumor to all of its $\vec d_{\pz} \Po \bc{ \lambda }$ children and all $\vec d_0$ independent Galton-Watson processes with offspring distribution $\Po \bc{\lambda p}$ rooted at the children of $\pz$ did die out eventually. Let $\vec d_0$ denote the number of children of $\pz$ that get activated. We first need to calculate the probability that $\vec d_0 = \vec d_{\pz}$. To this end, let
\begin{align*}
    I_0(x) = \sum_{k=0}^\infty \frac{1}{k! \Gamma(k + 1)} \bc{\frac{x}{2}}^{2k} = \frac{\exp \bc{x}}{\sqrt{2 \pi x}} \bc{ 1 + O \bc{ \frac{1}{x}}\!} 
\end{align*}
denote the modified Bessel function of order zero \citep{besselfunctions}.

We have
\begin{align*} 
    \Pr \bc{ \vec d_0 = \vec d_{\pz} \mid \vec d_{\pz}} = \frac{ (\lambda p)^{\vec d_{\pz}} \exp \bc{- \lambda p}}{\vec d_{\pz}!}. 
\end{align*}
Therefore, by the law of total probability,
{\dense %footnotesize  
\allowdisplaybreaks
\begin{align} \label{eq_gw_ii_i} 
    \Pr & \bc{ \text{all children of $\pz$ get activated} } \\
    & = \sum_{k = 1}^\infty \Pr \bc{\vec d_{\pz} = k } \frac{ (\lambda p)^{k} \exp \bc{- \lambda p}}{k!} \notag \\ 
    &= \sum_{k = 1}^\infty \frac{\lambda^k \exp \bc{ - \lambda}}{k!} \frac{ (\lambda p)^{k} \exp \bc{- \lambda p}}{k!} \notag \\
    & = \exp \bc{ - \lambda (1 + p) } \sum_{k = 1}^\infty \frac{ \bc{\lambda^{2} p}^k }{ (k!)^2} \notag \\
    & = \exp \bc{ - \lambda (1 + p) } \bc{ I_0( 2 \lambda \sqrt{p} ) - 1 } \notag \\
    & = \exp \bc{ - \lambda (1 + p) } \bc{ \frac{\exp \bc{ 2 \lambda \sqrt{p} }}{ \sqrt{4 \pi \lambda \sqrt{p}} } - 1 + O \bc{ \bc{\lambda \sqrt{p}}^{-1} } } \notag \\
    & = \frac{\exp \bc{ - \lambda (1 - \sqrt{p})^2} }{\sqrt{4 \pi \lambda \sqrt{p}}} - \exp \bc{ - \lambda (1 + p) } \notag \\ & \phantom{{}={}} + O \bc{ \exp \bc{ - \lambda (1 + p) } \bc{\lambda \sqrt{p}}^{-3/2} }. \notag
\end{align}
}
If $\lambda$ and $p$ are constants, it is immediate from \eqref{eq_gw_ii_i} that there is a constant $\gamma > 0$ such that
\[
    \Pr \bc{ \text{all children of $\pz$ get activated} } > \gamma.
\]
Finally, since $1 < \lambda p = \Theta(1)$ by assumption, the smallest fixed point of $\bar{x} \mapsto \exp \bc{ - \lambda(1 - \bar{x}) }$ is a real number between zero and one. Therefore, by \cref{prop_gwa}, there are constants $0 < \gamma_1 \leq \gamma_2 < 1$ such that
\begin{align} \label{eq_gw_ii_ii}
    \gamma_1 - o_t(1) < \bar{x}_t < \gamma_2 + o_t(1). 
\end{align}
Therefore, the source detection heuristic succeeds at weak detection if $1 < \lambda p = \Theta(1)$ with probability $1 - o_t(1)$ by \eqref{eq_gw_ii_i} -- \eqref{eq_gw_ii_ii}.

Again, we are left to prove the decay property, namely
\begin{align} \label{eq_gw_ii_iii}
\Pr \bc{ \dist \bc{ \lca, \pz } \geq k  } \leq \exp \bc{ - \Omega(k) }.
\end{align}
Suppose that $ \abs{\distactive{\pz} - \distactive{\lca} } = \dist \bc{ \lca, \pz } = k > 3$. As before we find a unique path $P_{\lca, \pz}$ in $G$ that connects $\lca$ and $\pz$ with $k-2$ internal vertices. Exactly as in the $d$-regular case, these internal vertices will get activated exactly once during the spreading process and so, in every step, the process needs to either activate exactly one child or all but one of the remaining processes ultimately become extinct. The probability that a node spawns exactly one child is given by
\begin{align} \label{eq_gw_onechild}
    \Pr \bc{ \Po( \lambda p ) = 1 } = \lambda \exp(- \lambda) 
\end{align}
which is a real number bounded away from zero and one if $\lambda = \Theta(1)$.
By \eqref{eq_gw_ii_i} -- \eqref{eq_gw_ii_ii} and \eqref{eq_gw_onechild}, there is a sequence of constants $\cbc{\gamma_i}_{i=1 \ldots k-2}$ dependent only on $\gamma(p, \lambda, k)$ and uniformly bounded away from zero and one, such that  
\[\Pr \bc{ \dist \bc{ \lca, \pz } \geq k  } \leq \min_{i=1 \ldots k-2} \gamma_i^k = \exp \bc{ - \Omega(k) } \]
and \eqref{eq_gw_ii_iii} follows.
\end{proof}

\noindent We start the proof of \cref{thm_lca_gw} (iii) with the following lemma.

\begin{lemma} \label{lem_gw_iii_i}
If $\lambda p \to \infty$, then, with high probability, an activated node $v$ satisfies the following.
\begin{itemize}
    \item $\deg(v) \geq \frac{\lambda}{2} = \omega(1)$ with high probability,
    \item The number of activated children $\vec \omega_v$ satisfies $\omega_v \geq \frac{\lambda}{2} = \omega(1)$ with high probability. 
\end{itemize}
\end{lemma}
\begin{proof}
This is an immediate consequence of Chernoff bounds applied to the $\Po(\lambda)$ and, respectively, $\Po(\lambda p)$ distribution given that $\lambda p \to \infty$.
\end{proof}
As a next observation, we claim that if $\vec \omega_v$ (as given by \cref{lem_gw_iii_i}) Galton-Watson processes with offspring distribution $\Po(\lambda p)$ are initialized independently, at least $(1 - \eps)$-fraction will survive with high probability for any $\eps > 0$.
\begin{lemma} \label{lem_gw_iii_ii}
Suppose that $X$ Galton-Watson processes with offspring distribution $\Po(\gamma)$ start independently under the condition that $\gamma \to \infty$. Let $\vec Y$ denote the number of such processes that did not ultimately become extinct. Then, $\Pr \bc{ \vec Y \geq (1 - o(1)) X} \geq 1 - o(1)$.
\end{lemma}
\begin{proof}
By \cref{prop_gwa}, the probability that one specific process out of the $X$ processes gets extinct is $p_e = o(1)$. Thus, as in the $d$-regular case, we have that the number of not-extinct processes is $\Bin(X, 1-p_e)$-distributed. The lemma follows from Chernoff bounds.
\end{proof}

\begin{proof}[Proof of \cref{thm_lca_gw} (iii)]
As in the $d$-regular case, the previous lemmas imply that with high probability $\omega(1)$ of the processes started in the children of $\pz$ are still active. 

Suppose that $\pz \neq \lca$ and $\dist(\pz, \lca) = k \geq 1$. This implies that either $k$ times only one active child is spawned or, if multiple children are spawned, only exactly one rumor spreading process rooted in those children survives eventually. But by \cref{lem_gw_iii_i,lem_gw_iii_ii}, we find that this probability is $o(1)$ for all $k \geq 1$.
\end{proof}




















\section{Discussion} 

\paragraph{Comparison with the results of \citet{zhuchenying_sir}} \label{sec_discussion_zhu}
{Let us briefly discuss how our results of \cref{thm_lca_dreg} extend and strengthen the results obtained by \citet{zhuchenying_sir}. 
They use a Jordan centrality based estimator and show that if $(d-1)p > 1$, their algorithm outputs a vertex $v$ which is with high probability close to $\pz$. 
Moreover, the probability that their algorithm outputs a vertex at distance $k$ from $\pz$ decreases polynomially in $k$. 
Our \cref{thm_lca_dreg} is based on a different estimator which enables us to refine and extend these results. 
We split the case $(d-1)p > 1$ into two cases.
More precisely, if $(d-1)p \gg 1$ our algorithm returns w.h.p. the correct source. 
If $1 < (d-1)p = O(1)$ the returned node is the correct source with constant probability. 
Furthermore, we show that the probability for the algorithm to output a node at distance $k$ of $\pz$ decreases exponentially fast in $k$ (opposed to polynomial decrease). 
Finally, in \cref{thm_lca_dreg} (iii), we also prove an impossibility result. 
Inference is impossible with high probability if $(d-1)p < 1$.}

\paragraph{Conclusion}
We pin down exact information-theoretic phase-transitions in the source detection task on important tree-network models by proving that as soon as weak detection is possible information-theoretically, the efficient closest candidate heuristics succeeds at this task. Those findings imply, of course, the same result for $\mathbb{G}(n,p)$ and random $d$-regular graphs as long as they are sufficiently sparse and the spreading process ran for only $o(\log(n))$ steps as those random networks are then, locally, given by the described tree-networks with high probability.

Furthermore, we show empirically that the source detection heuristic performs well on non-acyclic networks and seems to be a very decent and efficient estimator of the rumor's source. A natural question is whether it is possible to prove similar information-theoretic bounds for non-acyclic networks. While this seems to be a very challenging task in general, it might become accessible if we restrict ourselves to specific random networks or networks with a specific tree-width. 

Finally, on the empirical side, it is an interesting question whether the rumor's source of the ICM can be learned by graph neural networks. This seems challenging as only few vertices are active and the network would need to learn possible propagation paths in a graph given only a snapshot of the network.



\subsubsection*{Acknowledgements}
% only in final version
Petra Beerenbrink, Max Hahn-Klimroth, Lena Krieg, and Malin Rau obtained support from the German Research Council, grant DFG FOR 2975.


\bibliography{berenbrink_676}


% \newpage
% \onecolumn
% \aistatstitle{Instructions for Paper Submissions to AISTATS 2023: \\
% Supplementary Materials}

\end{document}
