{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d7054c11",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from scipy.sparse import csr_matrix, diags\n",
    "import numpy as np\n",
    "from sklearn.metrics.cluster import normalized_mutual_info_score as nmi\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "from os import listdir\n",
    "from time import time\n",
    "\n",
    "import sys\n",
    "sys.path += ['utils/']  \n",
    "\n",
    "from node2vec.model import Node2Vec\n",
    "\n",
    "from node_embedding import *\n",
    "from dcsbm import *\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "directory = 'dataset/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d3543d39",
   "metadata": {},
   "outputs": [],
   "source": [
    "def RunSimulation(dim):\n",
    "\n",
    "    datasets = ['amazon', 'dblp', 'livejournal', 'youtube']\n",
    "    \n",
    "    tv = []\n",
    "    tnsv = []\n",
    "    sv = []\n",
    "    snsv = []\n",
    "\n",
    "    for name in datasets:\n",
    "\n",
    "        EL = pd.read_csv(directory + name + '.csv')[['id1', 'id2']]\n",
    "        n = len(pd.concat([EL.id1, EL.id2]).unique())\n",
    "\n",
    "        ℓtrue = pd.read_csv(directory + name + '_label.csv').set_index('node')\n",
    "        ℓtrue = ℓtrue.loc[np.arange(n)].label.values\n",
    "        n_clusters = len(np.unique(ℓtrue))\n",
    "\n",
    "        A = csr_matrix((np.ones(len(EL)), (EL.id1, EL.id2)), shape = (n,n))    \n",
    "        d = A@np.ones(A.shape[0])\n",
    "\n",
    "        t0 = time()\n",
    "        res = NodeEmbedding(A, dim, n_epochs = 50, walk_length = 3, k = 1, verbose = True, η = 1., sym = True)\n",
    "        t = time() - t0\n",
    "        s = computeScore(res.X, ℓtrue, n_trials = 1, norm_bool = True)\n",
    "\n",
    "        t0 = time()\n",
    "        X = Node2VecNS(A, dim, verbose = True)\n",
    "        tns = time() - t0\n",
    "        sns = computeScore(X, ℓtrue, n_trials = 1)\n",
    "    \n",
    "        tv.append(t)\n",
    "        tnsv.append(tns)\n",
    "        sv.append(s)\n",
    "        snsv.append(sns)\n",
    "       \n",
    "    df = pd.DataFrame(columns = datasets)\n",
    "    df_time = pd.DataFrame(columns = datasets)\n",
    "    \n",
    "    row = dict(zip(datasets, sv))\n",
    "    df = df.append(row, ignore_index = True)\n",
    "    row = dict(zip(datasets, tv))\n",
    "    df_time = df_time.append(row, ignore_index = True)\n",
    "    \n",
    "    row = dict(zip(datasets, snsv))\n",
    "    df = df.append(row, ignore_index = True)\n",
    "    row = dict(zip(datasets, tnsv))\n",
    "    df_time = df_time.append(row, ignore_index = True)\n",
    "    \n",
    "   # save the result\n",
    "    try:\n",
    "        nn = (np.max([int(x.split('_')[1]) for x in listdir('saved_files/real_graphs/perf/')]))\n",
    "        df.to_csv('saved_files/real_graphs/perf/v_' + str(nn+1) + '_.csv', index = False)\n",
    "        df_time.to_csv('saved_files/real_graphs/time/v_' + str(nn+1) + '_.csv', index = False)\n",
    "\n",
    "    except:\n",
    "        df.to_csv('saved_files/real_graphs/perf/v_' + str(1) + '_.csv', index = False)\n",
    "        df_time.to_csv('saved_files/real_graphs/time/v_' + str(1) + '_.csv', index = False)\n",
    "        \n",
    "    return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ada850fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# n_sim = 10\n",
    "# dim = 32\n",
    "\n",
    "# for i in range(n_sim):\n",
    "#     RunSimulation(dim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c1f1347c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         amazon      dblp  livejournal   youtube\n",
      "index                                           \n",
      "0      0.936627  0.554096     0.907121  0.607016\n",
      "1      0.934032  0.508152     0.909789  0.646905\n",
      "          amazon        dblp  livejournal    youtube\n",
      "index                                               \n",
      "0       3.004778   20.845956    30.225652   9.312429\n",
      "1      16.604569  145.069742   166.920541  64.386668\n",
      "\n",
      "\n",
      "         amazon      dblp  livejournal   youtube\n",
      "index                                           \n",
      "0      0.001966  0.002051     0.000818  0.000960\n",
      "1      0.003293  0.002359     0.000409  0.001013\n",
      "         amazon      dblp  livejournal   youtube\n",
      "index                                           \n",
      "0      0.426105  2.264605     2.136620  0.696179\n",
      "1      0.803745  6.656247    12.593651  2.748380\n"
     ]
    }
   ],
   "source": [
    "files = listdir('saved_files/real_graphs/perf/')\n",
    "\n",
    "df_list_perf = []\n",
    "df_list_time = []\n",
    "\n",
    "for f in files:\n",
    "    df_list_perf.append(pd.read_csv('saved_files/real_graphs/perf/' + f))\n",
    "    df_list_time.append(pd.read_csv('saved_files/real_graphs/time/' + f))\n",
    "    \n",
    "df_list_perf = pd.concat(df_list_perf).reset_index()\n",
    "df_list_time = pd.concat(df_list_time).reset_index()\n",
    "\n",
    "print(df_list_perf.groupby('index').mean())\n",
    "print(df_list_time.groupby('index').mean())\n",
    "print('\\n')\n",
    "print(df_list_perf.groupby('index').std())\n",
    "print(df_list_time.groupby('index').std())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6dd4391f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1fbbe1da",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
