{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import networkx as nx\n",
    "import codecs\n",
    "import pickle\n",
    "import os\n",
    "import math\n",
    "import itertools as it\n",
    "from scipy import sparse\n",
    "from utils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# temporal aggregation interval\n",
    "AGGR_TIME = 10*60\n",
    "\n",
    "# temporal coupling weight\n",
    "temp_edge_weight_list = [1]#,2,4,8,16]\n",
    "\n",
    "# temporal coupling repr\n",
    "rep_time_list = ['Non']\n",
    "#rep_time_list = ['Inverse']\n",
    "\n",
    "# dataset_list = ['LyonSchool', 'InVS15', 'SFHH', 'LH10', 'Thiers13']\n",
    "dataset_list = ['LyonSchool']\n",
    "# dataset_list = ['InVS15']\n",
    "#dataset_list = ['SFHH']\n",
    "#dataset_list = ['LH10']\n",
    "# dataset_list = ['Thiers13']\n",
    "\n",
    "# supra_rep_list = ['Dyn', 'Stat']\n",
    "supra_rep_list = ['Dyn']\n",
    "\n",
    "# random walks window sizes (only for \"Dyn\")\n",
    "window_list = [10]\n",
    "\n",
    "# final tensor way (only for \"Dyn\")\n",
    "ways_list = ['3Redu' ,'4']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_supra_tensor_sparse_3Way(dataset, supra_tensor, supra_rep, temp_edge_weight=None, rep_time=None, window_size=None, tensor_way=None):\n",
    "    target_dir = '../preprocessed/SupraAdjacencyTensorSparse/%s/%s' %(dataset, supra_rep)\n",
    "    os.makedirs(target_dir, exist_ok = True)\n",
    "    if supra_rep == 'Stat':\n",
    "        #save p(i,j,k)\n",
    "        pickle.dump(supra_tensor, open( target_dir + '/Aggtime%dWay%s.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        #save p(i), p(j), p(k)\n",
    "        pickle.dump(supra_tensor.sum(axis=(1,2)), open( target_dir + '/Aggtime%dWay%s_V.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,2)), open( target_dir + '/Aggtime%dWay%s_Z.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1)), open( target_dir + '/Aggtime%dWay%s_W.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "    else:\n",
    "        #save p(i,j,k)\n",
    "        pickle.dump(supra_tensor, open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        #save p(i), p(j), p(k)\n",
    "        pickle.dump(supra_tensor.sum(axis=(1,2)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_V.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,2)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_Z.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_W.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_supra_tensor_sparse_4Way(dataset, supra_tensor, supra_rep, temp_edge_weight=None, rep_time=None, window_size=None, tensor_way=None):\n",
    "    target_dir = '../preprocessed/SupraAdjacencyTensorSparse/%s/%s' %(dataset, supra_rep)\n",
    "    os.makedirs(target_dir, exist_ok = True)\n",
    "    if supra_rep == 'Stat':\n",
    "        #save p(i,j,k,l)\n",
    "        pickle.dump(supra_tensor, open( target_dir + '/Aggtime%dWay%s.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        #save p(i), p(j), p(k), p(l)\n",
    "        pickle.dump(supra_tensor.sum(axis=(1,2,3)), open( target_dir + '/Aggtime%dWay%s_V.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,2,3)), open( target_dir + '/Aggtime%dWay%s_Z.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1,3)), open( target_dir + '/Aggtime%dWay%s_W.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1,2)), open( target_dir + '/Aggtime%dWay%s_T.pkl' % (AGGR_TIME, tensor_way), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "    else:\n",
    "        #save p(i,j,k,l)\n",
    "        pickle.dump(supra_tensor, open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        #save p(i), p(j), p(k), p(l)\n",
    "        pickle.dump(supra_tensor.sum(axis=(1,2,3)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_V.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,2,3)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_Z.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1,3)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_W.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)\n",
    "        pickle.dump(supra_tensor.sum(axis=(0,1,2)), open( (target_dir + '/Aggtime%dWeight%dReptime%sWindow%dWay%s_T.pkl' %\\\n",
    "                    (AGGR_TIME, temp_edge_weight, rep_time, window_size, tensor_way)), \"wb\" ), protocol=pickle.HIGHEST_PROTOCOL)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SPARSE TENSORS STAT, DYN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for dataset in dataset_list:\n",
    "    for supra_rep in supra_rep_list:\n",
    "        target_dir = '../preprocessed/SupraAdjacencyMatrixAllNodes/%s/%s' %(dataset, supra_rep)\n",
    "        if supra_rep == 'Stat':\n",
    "            # read networkx stat\n",
    "            supra_H = nx.read_gpickle(target_dir + '/Aggtime%d.gpickle' % (AGGR_TIME))\n",
    "            node_name = list(supra_H.nodes())\n",
    "            nr_nodes = np.unique([int(n_node.split('-')[0]) for n_node in node_name]).shape[0]\n",
    "            nr_times = np.unique([int(n_node.split('-')[1]) for n_node in node_name]).shape[0]\n",
    "            ordered_nodelist = list(map(lambda x: str(x[0])+'-'+str(x[1]), it.product(range(nr_nodes), range(nr_times))))\n",
    "            # compute probability matrix\n",
    "            supra_P = nx.to_scipy_sparse_matrix(supra_H.to_undirected(), nodelist=ordered_nodelist, format='csr', dtype=np.float32)\n",
    "            # reshape to 3way and save\n",
    "            supra_T = reshape_supra_to_3way_sparse(supra_H, supra_P/supra_P.sum(), reduce_contexts=True)\n",
    "            save_supra_tensor_sparse_3Way(dataset, supra_T, supra_rep, tensor_way='3Redu')\n",
    "            \n",
    "        else:\n",
    "            for temp_edge_weight in temp_edge_weight_list:\n",
    "                for rep_time in rep_time_list:\n",
    "                    for window_size in window_list:\n",
    "                        # read networkx supra-dyn\n",
    "                        supra_H = nx.read_gpickle(target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time))\n",
    "                        # compute probability matrix from random walks\n",
    "                        supra_P = make_random_walks_from_supra_allnodes(supra_H.to_undirected(), window_size)\n",
    "                        for tensor_way in ways_list:\n",
    "                            if tensor_way == '3Redu':\n",
    "                                # reshape to 3way and save\n",
    "                                supra_T = reshape_supra_to_3way_sparse(supra_H, supra_P, reduce_contexts=True)\n",
    "                                save_supra_tensor_sparse_3Way(dataset, supra_T, supra_rep, temp_edge_weight, rep_time, window_size, tensor_way)\n",
    "                            if tensor_way == '4':\n",
    "                                # reshape to 4way and save\n",
    "                                supra_T = reshape_supra_to_4way_sparse(supra_H, supra_P)\n",
    "                                save_supra_tensor_sparse_4Way(dataset, supra_T, supra_rep, temp_edge_weight, rep_time, window_size, tensor_way)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SPARSE TENSOR STAT|DYN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in dataset_list:\n",
    "    target_dir = '../preprocessed/SupraAdjacencyMatrixAllNodes/%s/%s' %(dataset, 'Dyn')\n",
    "    stat_dir = '../preprocessed/SupraAdjacencyMatrixAllNodes/%s/%s' %(dataset, 'Stat')\n",
    "            \n",
    "    for temp_edge_weight in temp_edge_weight_list:\n",
    "        for rep_time in rep_time_list:\n",
    "            for window_size in window_list:\n",
    "                \n",
    "                #Dyn\n",
    "                supra_H = nx.read_gpickle(target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time))\n",
    "                # compute probability matrix from random walks\n",
    "                supra_P = make_random_walks_from_supra_allnodes(supra_H.to_undirected(), window_size)\n",
    "\n",
    "                #Stat\n",
    "                stat_H = nx.read_gpickle(stat_dir + '/Aggtime%d.gpickle' % (AGGR_TIME))\n",
    "                node_name = list(stat_H.nodes())\n",
    "                nr_nodes = np.unique([int(n_node.split('-')[0]) for n_node in node_name]).shape[0]\n",
    "                nr_times = np.unique([int(n_node.split('-')[1]) for n_node in node_name]).shape[0]\n",
    "                ordered_nodelist = list(map(lambda x: str(x[0])+'-'+str(x[1]), it.product(range(nr_nodes), range(nr_times))))\n",
    "                \n",
    "                # compute probability matrix\n",
    "                stat_P = nx.to_scipy_sparse_matrix(stat_H.to_undirected(), nodelist=ordered_nodelist, format='csr', dtype=np.float32)\n",
    "                # average probability matrices\n",
    "                supra_P = ((supra_P + stat_P/stat_P.sum())/2).astype(np.float32, copy=False)\n",
    "\n",
    "                for tensor_way in ways_list:\n",
    "\n",
    "                    if tensor_way == '3Redu':\n",
    "                        # reshape to 3way and save\n",
    "                        supra_T = reshape_supra_to_3way_sparse(supra_H, supra_P, reduce_contexts=True)\n",
    "                        save_supra_tensor_sparse_3Way(dataset, supra_T, 'StatDyn', temp_edge_weight, rep_time, window_size, tensor_way)\n",
    "\n",
    "                    if tensor_way == '4':\n",
    "                        # reshape to 4way and save\n",
    "                        supra_T = reshape_supra_to_4way_sparse(supra_H, supra_P)\n",
    "                        save_supra_tensor_sparse_4Way(dataset, supra_T, 'StatDyn', temp_edge_weight, rep_time, window_size, tensor_way)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
