{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a51f6a44",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/deyu/.local/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import time\n",
    "import math\n",
    "import copy\n",
    "import random\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from torch.utils.data import DataLoader, Dataset, TensorDataset\n",
    "from torch.utils.data.sampler import SubsetRandomSampler\n",
    "import numpy as np\n",
    "import scipy\n",
    "import scipy.stats as st\n",
    "from numpy.linalg import eig, eigh\n",
    "from sklearn.metrics import roc_auc_score, mean_absolute_error, accuracy_score\n",
    "\n",
    "import networkx as nx\n",
    "import torch_geometric\n",
    "from torch_geometric.datasets import Planetoid, Coauthor, Flickr, PPI, GitHub, WikiCS, FacebookPagePage\n",
    "from torch_geometric.utils import to_scipy_sparse_matrix, to_undirected, degree\n",
    "from ogb.nodeproppred.dataset_pyg import PygNodePropPredDataset\n",
    "from ogb.linkproppred.dataset_pyg import PygLinkPropPredDataset\n",
    "from torch_geometric.utils import get_laplacian\n",
    "from torch_geometric.transforms import ToUndirected\n",
    "\n",
    "import time\n",
    "from scipy.io import loadmat\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "88372be9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def check_symmetric(a, rtol=1e-05, atol=1e-05):\n",
    "    return np.allclose(a, a.T, rtol=rtol, atol=atol)\n",
    "\n",
    "def count_parameters(model):\n",
    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
    "\n",
    "def init_params(module):\n",
    "    if isinstance(module, nn.Linear):\n",
    "        module.weight.data.normal_(mean=0.0, std=0.01)\n",
    "        if module.bias is not None:\n",
    "            module.bias.data.zero_()\n",
    "\n",
    "def mean_confidence_interval(data, confidence=0.95):\n",
    "    a = 1.0 * np.array(data)\n",
    "    n = len(a)\n",
    "    m, se = np.mean(a), st.sem(a)\n",
    "    h = se * st.t.ppf((1 + confidence) / 2., n-1)\n",
    "    return m, h\n",
    "\n",
    "def seed_everything(seed):\n",
    "    random.seed(seed)\n",
    "    os.environ['PYTHONHASHSEED'] = str(seed)\n",
    "    np.random.seed(seed)\n",
    "    torch.manual_seed(seed)\n",
    "    torch.cuda.manual_seed(seed)\n",
    "    torch.cuda.manual_seed_all(seed)\n",
    "    \n",
    "def connected_components(sparse_adj):\n",
    "    G = nx.from_scipy_sparse_matrix(sparse_adj)\n",
    "    cc = nx.connected_components(G)\n",
    "    \n",
    "    components = []\n",
    "    lens = []\n",
    "\n",
    "    for c in cc:\n",
    "        c = list(c)\n",
    "        components.append(c)\n",
    "        lens.append(len(c))\n",
    "\n",
    "    return lens, components\n",
    "\n",
    "def normalize_sparse_adj(A):\n",
    "    deg = np.array(A.sum(axis=0)).flatten()\n",
    "    D_ = scipy.sparse.diags(deg ** -0.5)\n",
    "    A_ = D_.dot(A.dot(D_))\n",
    "    L_ = scipy.sparse.eye(adj.shape[0]) - A_\n",
    "    return L_\n",
    "\n",
    "def normalize_adj(adj):\n",
    "    \"\"\" Symmetrically normalize adjacency matrix.\"\"\"\n",
    "    \"\"\" Copy from https://github.com/tkipf/gcn \"\"\"\n",
    "    adj = sp.coo_matrix(adj)\n",
    "    rowsum = np.array(adj.sum(1))\n",
    "    d_inv_sqrt = np.power(rowsum, -0.5).flatten()\n",
    "    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.\n",
    "    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)\n",
    "    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()\n",
    "\n",
    "\n",
    "def Eigen(adj, d):\n",
    "    \n",
    "    adj = normalize_adj(adj)\n",
    "    lamb, X = sp.linalg.eigs(adj, d)\n",
    "    lamb, X = lamb.real, X.real\n",
    "    X = X[:, np.argsort(lamb)]\n",
    "\n",
    "    return X\n",
    "\n",
    "\n",
    "def Eigen_multi(adj, d):\n",
    "    \"\"\"\n",
    "    Handle if the graph has multiple connected components\n",
    "    Arguments are the same as Eigen\n",
    "    \"\"\"\n",
    "    G = nx.from_scipy_sparse_matrix(adj)\n",
    "    comp = list(nx.connected_components(G))\n",
    "    X = np.zeros((adj.shape[0],d))\n",
    "    for i in range(len(comp)):\n",
    "        node_index = np.array(list(comp[i]))\n",
    "        d_temp = min(len(node_index) - 2, d)\n",
    "        if d_temp < 1:\n",
    "            continue\n",
    "        adj_temp = adj[node_index,:][:,node_index].asfptype()\n",
    "        X[node_index,:d_temp] = Eigen(adj_temp, d_temp)\n",
    "    return X\n",
    "\n",
    "\n",
    "def arxiv_split_to_mask(data, split):    \n",
    "    train_mask = torch.LongTensor([0]*data.num_nodes)\n",
    "    train_mask[split['train']] = 1\n",
    "    train_mask = train_mask.bool()\n",
    "    \n",
    "    val_mask = torch.LongTensor([0]*data.num_nodes)\n",
    "    val_mask[split['valid']] = 1\n",
    "    val_mask = val_mask.bool()\n",
    "    \n",
    "    test_mask = torch.LongTensor([0]*data.num_nodes)\n",
    "    test_mask[split['test']] = 1\n",
    "    test_mask = test_mask.bool()\n",
    "    \n",
    "    data.train_mask = train_mask\n",
    "    data.val_mask = val_mask\n",
    "    data.test_mask = test_mask\n",
    "    \n",
    "    data.y = data.y.view(-1)\n",
    "    \n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ca98a2d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = loadmat('data/Harvard1.mat')\n",
    "# Counter(data['local_info'][:, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8034552",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "86e50e1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Planetoid('data/plane', 'Pubmed')\n",
    "\n",
    "# dataset = Coauthor('data/coauthor', 'Physics')\n",
    "# dataset = Coauthor('data/coauthor', 'CS')\n",
    "\n",
    "# dataset = FacebookPagePage('data/facebook')\n",
    "# dataset = GitHub('data/github')\n",
    "\n",
    "# dataset = Flickr('data/flickr')\n",
    "# dataset = PygNodePropPredDataset('ogbn-arxiv', root='data', transform=ToUndirected())\n",
    "# dataset = PPI('data/ppi')\n",
    "\n",
    "# dataset = Amazon('data/computer', 'Computers')\n",
    "# dataset = Amazon('data/photo', 'Photo')\n",
    "# dataset = WikiCS('data/wiki', is_undirected=True)\n",
    "\n",
    "# dataset = TUDataset('data/MUTAG', 'MUTAG')\n",
    "\n",
    "# dataset = Airports('data/air', 'USA')\n",
    "# dataset = Airports('data/air', 'Brazil')\n",
    "# dataset = Airports('data/air', 'Europe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e83393c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "61f3c97c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cebceed",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c064ef20",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.is_directed()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97a4842c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "25e7d8b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Part EVD\n",
    "\n",
    "index, attr = get_laplacian(data.edge_index, normalization='sym')\n",
    "L = to_scipy_sparse_matrix(index, attr)\n",
    "e, u = scipy.sparse.linalg.eigsh(L, k=100, which='SM', tol=1e-3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6830d22c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0830ed5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fb91136",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Full EVD\n",
    "\n",
    "L = torch.FloatTensor(L.todense())\n",
    "e, u = torch.linalg.eigh(L)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "257f32e7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "85182782",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "module 'networkx' has no attribute 'from_scipy_sparse_matrix'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# detecting connected components\u001b[39;00m\n\u001b[1;32m      3\u001b[0m adj \u001b[38;5;241m=\u001b[39m to_scipy_sparse_matrix(data\u001b[38;5;241m.\u001b[39medge_index)\n\u001b[0;32m----> 4\u001b[0m lens, components \u001b[38;5;241m=\u001b[39m \u001b[43mconnected_components\u001b[49m\u001b[43m(\u001b[49m\u001b[43madj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28mprint\u001b[39m(lens)\n",
      "Cell \u001b[0;32mIn[2], line 29\u001b[0m, in \u001b[0;36mconnected_components\u001b[0;34m(sparse_adj)\u001b[0m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconnected_components\u001b[39m(sparse_adj):\n\u001b[0;32m---> 29\u001b[0m     G \u001b[38;5;241m=\u001b[39m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_scipy_sparse_matrix\u001b[49m(sparse_adj)\n\u001b[1;32m     30\u001b[0m     cc \u001b[38;5;241m=\u001b[39m nx\u001b[38;5;241m.\u001b[39mconnected_components(G)\n\u001b[1;32m     32\u001b[0m     components \u001b[38;5;241m=\u001b[39m []\n",
      "\u001b[0;31mAttributeError\u001b[0m: module 'networkx' has no attribute 'from_scipy_sparse_matrix'"
     ]
    }
   ],
   "source": [
    "# detecting connected components\n",
    "\n",
    "adj = to_scipy_sparse_matrix(data.edge_index)\n",
    "lens, components = connected_components(adj)\n",
    "print(lens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0371cb9d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "29bfd507",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.e = torch.FloatTensor(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e0ef2e37",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.u = torch.FloatTensor(u)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f9070a3d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717], e=[100], u=[19717, 100])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cea5aa0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "a4d9e2ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(data, 'data/pubmed-3.pt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36b94767",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79f78912",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58068a44",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7eec9ff8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# prime_index = components[0]\n",
    "\n",
    "# train_index = torch.where(data.train_mask)[0]\n",
    "# valid_index = torch.where(data.val_mask)[0]\n",
    "# test_index = torch.where(data.test_mask)[0]\n",
    "\n",
    "# prime_test_index = torch.LongTensor(np.intersect1d(prime_index, test_index))\n",
    "\n",
    "# prime_test_edge_index = torch_geometric.utils.subgraph(prime_test_index, data.edge_index, relabel_nodes=True)[0]\n",
    "# prime_test_adj = to_scipy_sparse_matrix(prime_test_edge_index)\n",
    "# lens, components = connected_components(prime_test_adj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc6b448f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e36cb06",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
