{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.sparse as sp\n",
    "import numpy as np\n",
    "from torch_geometric_signed_directed.data import SignedData, DirectedData, load_signed_real_data, load_directed_real_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "183 298 False\n",
      "251 515 False\n",
      "183 325 False\n"
     ]
    }
   ],
   "source": [
    "for name in ['Cornell', 'Wisconsin', 'Texas']:\n",
    "        directed_dataset = load_directed_real_data(\n",
    "                dataset='WebKB', root='../tmp_data/', name=name)\n",
    "        print(directed_dataset.num_nodes, directed_dataset.num_edges, directed_dataset.is_weighted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2277 36101 False\n",
      "5201 222134 True\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/anaconda3/envs/py37/lib/python3.7/site-packages/torch_geometric/data/dataset.py:152: UserWarning: The `pre_transform` argument differs from the one used in the pre-processed version of this dataset. If you want to make use of another pre-processing technique, make sure to sure to delete '../tmp_data/wikipedianetwork/squirrel/processed' first\n",
      "  f\"The `pre_transform` argument differs from the one used in \"\n"
     ]
    }
   ],
   "source": [
    "for name in ['chameleon', 'squirrel']:\n",
    "    directed_dataset = load_directed_real_data(\n",
    "        dataset='wikipedianetwork', root='../tmp_data/wikipedianetwork', name=name)\n",
    "    print(directed_dataset.num_nodes, directed_dataset.num_edges, directed_dataset.is_weighted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2388953 5018445 False\n",
      "245 8912 True\n",
      "3075 721432 True\n"
     ]
    }
   ],
   "source": [
    "for dataset_name in ['wikitalk', 'telegram', 'migration']:\n",
    "        directed_dataset = load_directed_real_data(\n",
    "            dataset=dataset_name, root='../tmp_data/'+dataset_name)\n",
    "        print(directed_dataset.num_nodes, directed_dataset.num_edges, directed_dataset.is_weighted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "268.36842105263156 29159.36842105263\n",
      "1222 19024 True\n",
      "2995 8416 False\n",
      "3312 4715 False\n",
      "11701 297110 False\n"
     ]
    }
   ],
   "source": [
    "num_nodes_array = np.zeros(19)\n",
    "num_edges_array = np.zeros(19)\n",
    "for year in range(2001, 2020):\n",
    "    directed_dataset = load_directed_real_data(\n",
    "        dataset='lead_lag'+str(year), root='./tmp_data/lead_lag/')\n",
    "    num_nodes_array[year-2001] = directed_dataset.num_nodes\n",
    "    num_edges_array[year-2001] = directed_dataset.num_edges\n",
    "print(num_nodes_array.mean(), num_edges_array.mean())\n",
    "for dataset_name in ['blog', 'cora_ml', 'citeseer', 'wikics']:\n",
    "    directed_dataset = load_directed_real_data(\n",
    "        dataset=dataset_name, root='./tmp_data/'+dataset_name)\n",
    "    print(directed_dataset.num_nodes, directed_dataset.num_edges, directed_dataset.is_weighted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25 148 182 False True\n",
      "3058 7996 3864 False True\n",
      "7634 135753 37579 True True\n",
      "1193 1069319 353930 False True\n",
      "306 64408 29228 False True\n",
      "3783 22650 1536 True True\n",
      "5881 32029 3563 True True\n",
      "131580 589888 121322 True False\n",
      "82140 380933 119548 True False\n"
     ]
    }
   ],
   "source": [
    "for dataset_name in ['Sampson', 'PPI', 'wikirfa', 'SP1500', 'rainfall', 'bitcoin_alpha', 'bitcoin_otc', 'epinions', 'slashdot']:\n",
    "    signed_dataset = load_signed_real_data(\n",
    "        root='../tmp_data/', dataset=dataset_name)\n",
    "    signed_dataset.separate_positive_negative()\n",
    "    print(signed_dataset.num_nodes, len(signed_dataset.A_p.nonzero()[0]), len(signed_dataset.A_n.nonzero()[0]), signed_dataset.is_directed, signed_dataset.is_weighted)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "444.0 84677.19047619047 112014.80952380953\n",
      "430.0 84456.85714285714 100013.14285714286\n",
      "451.0 148527.7619047619 54312.76190476191\n"
     ]
    }
   ],
   "source": [
    "num_nodes_array = np.zeros(21)\n",
    "num_edges_pos_array = np.zeros(21)\n",
    "num_edges_neg_array = np.zeros(21)\n",
    "for year in range(2000, 2021):\n",
    "    signed_dataset = load_signed_real_data(\n",
    "        dataset='FiLL-pvCLCL'+str(year), root='../tmp_data/FiLL/')\n",
    "    signed_dataset.separate_positive_negative()\n",
    "    num_nodes_array[year-2000], num_edges_pos_array[year-2000], num_edges_neg_array[year-2000] = signed_dataset.num_nodes, len(signed_dataset.A_p.nonzero()[0]), len(signed_dataset.A_n.nonzero()[0])\n",
    "print(num_nodes_array.mean(), num_edges_pos_array.mean(), num_edges_neg_array.mean())\n",
    "\n",
    "num_nodes_array = np.zeros(21)\n",
    "num_edges_pos_array = np.zeros(21)\n",
    "num_edges_neg_array = np.zeros(21)\n",
    "for year in range(2000, 2021):\n",
    "    signed_dataset = load_signed_real_data(\n",
    "        dataset='FiLL-OPCL'+str(year), root='../tmp_data/FiLL/')\n",
    "    signed_dataset.separate_positive_negative()\n",
    "    num_nodes_array[year-2000], num_edges_pos_array[year-2000], num_edges_neg_array[year-2000] = signed_dataset.num_nodes, len(signed_dataset.A_p.nonzero()[0]), len(signed_dataset.A_n.nonzero()[0])\n",
    "print(num_nodes_array.mean(), num_edges_pos_array.mean(), num_edges_neg_array.mean())\n",
    "\n",
    "num_nodes_array = np.zeros(21)\n",
    "num_edges_pos_array = np.zeros(21)\n",
    "num_edges_neg_array = np.zeros(21)\n",
    "for year in range(2000, 2021):\n",
    "    signed_dataset = load_signed_real_data(\n",
    "        dataset='Fin_YNet'+str(year), root='../tmp_data/Fin_YNet/')\n",
    "    signed_dataset.separate_positive_negative()\n",
    "    num_nodes_array[year-2000], num_edges_pos_array[year-2000], num_edges_neg_array[year-2000] = signed_dataset.num_nodes, len(signed_dataset.A_p.nonzero()[0]), len(signed_dataset.A_n.nonzero()[0])\n",
    "print(num_nodes_array.mean(), num_edges_pos_array.mean(), num_edges_neg_array.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.11 ('py37': conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "95aa51ff06b76c3082fe9756423211ea07cd0dd935e712d4ff620b8d6044080e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
