{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "16e6533c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import csv\n",
    "from collections import defaultdict\n",
    "import pickle\n",
    "import torch\n",
    "import dgl\n",
    "import os\n",
    "import numpy as np\n",
    "\n",
    "datapath = f'../data/DBLP/'\n",
    "with open(datapath+'statistics', 'rb') as file:\n",
    "    num_task, num_class = pickle.load(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "0b9359aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import deque \n",
    "\n",
    "def dfs(g, u, visited):\n",
    "    q = deque([u])\n",
    "    subnodes = []\n",
    "    count = 0\n",
    "    while q:\n",
    "        v = q.popleft()\n",
    "        if v in visited:\n",
    "            continue\n",
    "        visited.add(v)\n",
    "        subnodes.append(v)\n",
    "        count += 1\n",
    "        node_list = g.edges()[1][(g.edges()[0] == v).nonzero().view(-1)].tolist()\n",
    "        node_list += g.edges()[0][(g.edges()[1] == v).nonzero().view(-1)].tolist()\n",
    "        for n in node_list:\n",
    "            if n not in visited:\n",
    "                q.append(n)\n",
    "    return count, subnodes\n",
    "\n",
    "def get_largest_cluster(g):\n",
    "    count_list = []\n",
    "    visited = set()\n",
    "    for u in g.nodes().tolist():\n",
    "        if u not in visited:\n",
    "#             print (len(visited))\n",
    "            count_list.append(dfs(g,u, visited))\n",
    "    count_list.sort(reverse=True)\n",
    "    return count_list[0][1]\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "37677be6",
   "metadata": {},
   "outputs": [],
   "source": [
    "for time_slot in range(num_task):\n",
    "    with open(datapath+f'graph_{time_slot}_by_edges', 'rb') as file:\n",
    "        g = pickle.load(file)\n",
    "        subnodes = get_largest_cluster(g)\n",
    "        \n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "cbef04d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Graph(num_nodes=11757, num_edges=98516,\n",
       "      ndata_schemes={'num_new_nodes': Scheme(shape=(), dtype=torch.int64), 'x': Scheme(shape=(244,), dtype=torch.int64), 'node_idxs': Scheme(shape=(), dtype=torch.int64), 'y': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}\n",
       "      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g.subgraph(subnodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "cdb051f7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Graph(num_nodes=61185, num_edges=2109512,\n",
       "      ndata_schemes={'x': Scheme(shape=(244,), dtype=torch.int64), 'y': Scheme(shape=(), dtype=torch.int64)}\n",
       "      edata_schemes={})"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "050a38b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn [26], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(datapath\u001b[38;5;241m+\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgraph_whole\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m      2\u001b[0m     g \u001b[38;5;241m=\u001b[39m pickle\u001b[38;5;241m.\u001b[39mload(file)\n\u001b[0;32m----> 3\u001b[0m     subnodes \u001b[38;5;241m=\u001b[39m \u001b[43mget_largest_cluster\u001b[49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn [24], line 27\u001b[0m, in \u001b[0;36mget_largest_cluster\u001b[0;34m(g)\u001b[0m\n\u001b[1;32m     25\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m u \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m visited:\n\u001b[1;32m     26\u001b[0m         \u001b[38;5;28mprint\u001b[39m (\u001b[38;5;28mlen\u001b[39m(visited))\n\u001b[0;32m---> 27\u001b[0m         count_list\u001b[38;5;241m.\u001b[39mappend(\u001b[43mdfs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m,\u001b[49m\u001b[43mu\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvisited\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m     28\u001b[0m count_list\u001b[38;5;241m.\u001b[39msort(reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m     29\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m count_list[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m1\u001b[39m]\n",
      "Cell \u001b[0;32mIn [24], line 14\u001b[0m, in \u001b[0;36mdfs\u001b[0;34m(g, u, visited)\u001b[0m\n\u001b[1;32m     12\u001b[0m subnodes\u001b[38;5;241m.\u001b[39mappend(v)\n\u001b[1;32m     13\u001b[0m count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 14\u001b[0m node_list \u001b[38;5;241m=\u001b[39m g\u001b[38;5;241m.\u001b[39medges()[\u001b[38;5;241m1\u001b[39m][(\u001b[43mg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43medges\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m v)\u001b[38;5;241m.\u001b[39mnonzero()\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)]\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[1;32m     15\u001b[0m node_list \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m g\u001b[38;5;241m.\u001b[39medges()[\u001b[38;5;241m0\u001b[39m][(g\u001b[38;5;241m.\u001b[39medges()[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m v)\u001b[38;5;241m.\u001b[39mnonzero()\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)]\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m node_list:\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/dgl/view.py:166\u001b[0m, in \u001b[0;36mHeteroEdgeView.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    165\u001b[0m     \u001b[38;5;124;03m\"\"\"Return all the edges.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 166\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mall_edges\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/dgl/heterograph.py:3417\u001b[0m, in \u001b[0;36mDGLHeteroGraph.all_edges\u001b[0;34m(self, form, order, etype)\u001b[0m\n\u001b[1;32m   3347\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mall_edges\u001b[39m(\u001b[38;5;28mself\u001b[39m, form\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124muv\u001b[39m\u001b[38;5;124m'\u001b[39m, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124meid\u001b[39m\u001b[38;5;124m'\u001b[39m, etype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m   3348\u001b[0m     \u001b[38;5;124;03m\"\"\"Return all edges with the specified edge type.\u001b[39;00m\n\u001b[1;32m   3349\u001b[0m \n\u001b[1;32m   3350\u001b[0m \u001b[38;5;124;03m    Parameters\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   3415\u001b[0m \u001b[38;5;124;03m    out_edges\u001b[39;00m\n\u001b[1;32m   3416\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 3417\u001b[0m     src, dst, eid \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43medges\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_etype_id\u001b[49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3418\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m form \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mall\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m   3419\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m src, dst, eid\n",
      "File \u001b[0;32m~/miniconda3/lib/python3.8/site-packages/dgl/heterograph_index.py:609\u001b[0m, in \u001b[0;36mHeteroGraphIndex.edges\u001b[0;34m(self, etype, order)\u001b[0m\n\u001b[1;32m    606\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m order \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msrcdst\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124meid\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m    607\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m DGLError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpect order to be one of None, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msrcdst\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124meid\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    608\u001b[0m                    \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgot \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(order))\n\u001b[0;32m--> 609\u001b[0m edge_array \u001b[38;5;241m=\u001b[39m \u001b[43m_CAPI_DGLHeteroEdges\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    610\u001b[0m src \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mfrom_dgl_nd(edge_array(\u001b[38;5;241m0\u001b[39m))\n\u001b[1;32m    611\u001b[0m dst \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mfrom_dgl_nd(edge_array(\u001b[38;5;241m1\u001b[39m))\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "with open(datapath+f'graph_whole', 'rb') as file:\n",
    "    g = pickle.load(file)\n",
    "    subnodes = get_largest_cluster(g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9c56b217",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ceb2af18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Graph(num_nodes=18007, num_edges=99446,\n",
       "      ndata_schemes={'num_new_nodes': Scheme(shape=(), dtype=torch.int64), 'x': Scheme(shape=(244,), dtype=torch.int64), 'node_idxs': Scheme(shape=(), dtype=torch.int64), 'y': Scheme(shape=(), dtype=torch.int64)}\n",
       "      edata_schemes={})"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f6486f9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
