# avg_degree_tables_min.py
import math, random, numpy as np, torch
from torch_geometric.datasets import Reddit, Amazon, WikiCS, CoraFull, Coauthor, Planetoid
from torch_geometric.utils import subgraph, to_undirected, degree

DATASETS = [
    # "reddit",
    "amazon-computers", "amazon-photo",
    "wikics", "corafull",
    "coauthor-cs", "coauthor-physics",
    "planetoid-cora", "planetoid-citeseer", "planetoid-pubmed",
]
ROOT="data"; SEED=42
FRACTIONS=[0.01,0.02,0.05,0.10,0.20,0.50,1.00]; MIN_N=100

def load_graph(name):
    n=name.lower()
    if n=="reddit": d=Reddit(ROOT)[0]
    elif n in {"amazon-computers","amazon-photo"}: d=Amazon(ROOT,name=("Computers" if "computers" in n else "Photo"))[0]
    elif n=="wikics": d=WikiCS(ROOT)[0]
    elif n=="corafull": d=CoraFull(ROOT)[0]
    elif n in {"coauthor-cs","coauthor-physics"}: d=Coauthor(ROOT,name=("CS" if "cs" in n else "Physics"))[0]
    elif n.startswith("planetoid-"): d=Planetoid(ROOT,name=n.split("-")[-1].capitalize())[0]
    else: raise ValueError(f"Unknown dataset: {name}")
    d.edge_index=to_undirected(d.edge_index,num_nodes=d.num_nodes)
    return d.num_nodes,d.edge_index

@torch.no_grad()
def induced(E, nodes): return subgraph(nodes, E, relabel_nodes=True)[0]
def avg_deg(E,n): return float(degree(E[0],num_nodes=n).mean())
def sizes(N):
    s=sorted({max(MIN_N, math.ceil(f*N)) for f in FRACTIONS if f>0 and math.ceil(f*N)<=N})
    if N not in s: s.append(N)
    return s

def main():
    torch.manual_seed(SEED); np.random.seed(SEED); random.seed(SEED)
    rows=[]
    for ds in DATASETS:
        N,E=load_graph(ds); perm=torch.randperm(N)
        for n in sizes(N):
            nodes=perm[:n]; Es=induced(E,nodes); dbar=avg_deg(Es,n)
            ratio=dbar/(math.log(n) if n>1 else 1.0)
            rows.append((ds,n,round(n/N,4),round(dbar,3),round(ratio,3)))
    # markdown table
    print("# Average degree vs size (aligned by n/N)\n")
    print("| dataset | n | n/N | avg_degree | avg_degree/log n |")
    print("|---|---:|---:|---:|---:|")
    for ds,n,frac,dbar,ratio in sorted(rows, key=lambda r:(r[0],r[1])):
        print(f"| {ds} | {n} | {frac} | {dbar} | {ratio} |")

if __name__=="__main__": main()