{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import networkx as nx\n",
    "import codecs\n",
    "import pickle\n",
    "import os\n",
    "import math\n",
    "import itertools as it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# temporal aggregation interval\n",
    "AGGR_TIME = 10*60\n",
    "\n",
    "# temporal coupling weight\n",
    "temp_edge_weight_list = [1]#,2,4,8,16]\n",
    "\n",
    "# temporal coupling repr\n",
    "rep_time_list = ['Non']\n",
    "#rep_time_list = ['Inverse']\n",
    "\n",
    "# dataset_list = ['LyonSchool', 'InVS15', 'SFHH', 'LH10', 'Thiers13']\n",
    "dataset_list = ['LyonSchool']\n",
    "# dataset_list = ['InVS15']\n",
    "#dataset_list = ['SFHH']\n",
    "#dataset_list = ['LH10']\n",
    "# dataset_list = ['Thiers13']\n",
    "\n",
    "supra_rep_list = ['Dyn', 'Stat']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# code node ID and timestamp into new \"temporal\" node ID of the form \"node-time\"\n",
    "def add_temporal_edge(g, n1, t1, n2, t2, weight):\n",
    "    n1_t1 = '%d-%d' % (n1, t1)\n",
    "    n2_t2 = '%d-%d' % (n2, t2)\n",
    "    \n",
    "    g.add_edge(n1_t1, n2_t2, weight = weight)\n",
    "\n",
    "# get first event for node n strictly after time t0\n",
    "def get_next_event(df_tnet, n, t0):\n",
    "    df = df_tnet[(df_tnet.tslice > t0) & ((df_tnet.i == n) | (df_tnet.j == n)) ]\n",
    "    if len(df) > 0:\n",
    "        return df.iloc[0].tslice\n",
    "    else:\n",
    "        return None\n",
    "def get_previous_event(df_tnet, n, t0):\n",
    "    df = df_tnet[(df_tnet.tslice < t0) & ((df_tnet.i == n) | (df_tnet.j == n)) ]\n",
    "    if len(df) > 0:\n",
    "        return df.iloc[-1].tslice\n",
    "    else:\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# return time_agregated_temporal_net(s_temp_net), \n",
    "# also, save the list of ID and active time, whichi is a index of embedding step.\n",
    "def load_temp_data(dataset):\n",
    "    df_temp_net = pd.read_csv(('../data/Data_SocioPatterns_20s_nonights/tij_%s.dat_nonights.dat' % dataset),\n",
    "                        sep = '\\t', header = None,\n",
    "                        names = ['t', 'i', 'j'])\n",
    "    # compute slice each contact event belongs to\n",
    "    df_temp_net.loc[:,'tslice'] = np.floor((df_temp_net.t - df_temp_net.t.iloc[0]) / AGGR_TIME)\n",
    "    # group over (slice, i, j), and compute number of contacts within time slice,\n",
    "    # regarded as \"weight\" for contacts in each time slice\n",
    "    \n",
    "    df_temp_net = df_temp_net[df_temp_net.i!=df_temp_net.j]\n",
    "    s = df_temp_net['i'] > df_temp_net['j']\n",
    "    df_temp_net.loc[s, ['i','j']] = df_temp_net.loc[s, ['j','i']].values\n",
    "    df_temp_net.drop_duplicates(['t','i','j'], inplace=True)\n",
    "    \n",
    "    s_temp_net = df_temp_net.groupby(['tslice','i','j']).size().rename('weight')\n",
    "    \n",
    "    # times for all temporal slices, note that it may have a big gap (return to home)\n",
    "    partial_times = sorted(list(s_temp_net.index.levels[0]))\n",
    "\n",
    "    # convenience: dataframe version of the series above\n",
    "    df_tnet = s_temp_net.reset_index()\n",
    "\n",
    "    #make list of ID and active time\n",
    "    #sort the embedding result by using this list.\n",
    "    pat_active_time = [[('%d-%d' % (e[1], e[0])), ('%d-%d' % (e[2], e[0]))] for e, weight in s_temp_net.iteritems()]\n",
    "    pat_active_time = list(set([item for sublist in pat_active_time for item in sublist]))\n",
    "\n",
    "    target_dir = '../preprocessed/SupraAdjacencyMatrix/%s' %dataset\n",
    "    os.makedirs(target_dir, exist_ok=True)\n",
    "    \n",
    "    pat_active_time_file_name = \"../preprocessed/SupraAdjacencyMatrix/%s/PatActiveTimeAggtime%d.pkl\" % (dataset, AGGR_TIME)\n",
    "    if (0==os.path.isfile(pat_active_time_file_name)):\n",
    "        pickle.dump(pat_active_time, open(pat_active_time_file_name, \"wb\" ) )\n",
    "    \n",
    "    return partial_times, s_temp_net, df_tnet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_dyn_supra(partial_times,s_temp_net, df_tnet, temp_edge_weight, rep_time):\n",
    "    supra_G = nx.DiGraph() # directed\n",
    "    # loop over time slices\n",
    "    for t0 in partial_times:\n",
    "        # loop over all edges at time t0\n",
    "        for e, w in s_temp_net[t0].iteritems():\n",
    "            n1, n2 = e\n",
    "            # future event times for nodes n1 and n2\n",
    "            t1 = get_next_event(df_tnet, n1, t0)\n",
    "            t2 = get_next_event(df_tnet, n2, t0)\n",
    "\n",
    "            if t1 != None:\n",
    "                cop_edge_weight_1 = temp_edge_weight\n",
    "                if rep_time == 'Inverse':\n",
    "                    cop_edge_weight_1 = cop_edge_weight_1 * 1./float(t1-t0)\n",
    "                    w = w * 1./float(t1-t0)    \n",
    "                elif rep_time == 'Non':\n",
    "                    pass\n",
    "                add_temporal_edge(supra_G, n2, t0, n1, t1, weight=w) # add \"cross\" edge\n",
    "                add_temporal_edge(supra_G, n1, t0, n1, t1, weight=cop_edge_weight_1) # add \"temporal\" edge\n",
    "\n",
    "            if t2 != None:\n",
    "                cop_edge_weight_2 = temp_edge_weight\n",
    "                if rep_time == 'Inverse':\n",
    "                    cop_edge_weight_2 = cop_edge_weight_2 * 1./float(t2-t0)\n",
    "                    w = w * 1./float(t2-t0)\n",
    "                elif rep_time == 'Non':\n",
    "                    pass\n",
    "                add_temporal_edge(supra_G, n2, t0, n2, t2, weight=cop_edge_weight_2) # add \"cross\" edge\n",
    "                add_temporal_edge(supra_G, n1, t0, n2, t2, weight=w) # add \"temporal\" edge\n",
    "    return supra_G"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_stat_supra(partial_times, s_temp_net, df_tnet):\n",
    "    # loop over time slices\n",
    "    supra_G = nx.DiGraph() # directed\n",
    "    for t0 in partial_times:\n",
    "        # loop over all edges at time t0\n",
    "        for e, w in s_temp_net[t0].iteritems():\n",
    "            n1, n2 = e        \n",
    "            # add inter-layer edges\n",
    "            add_temporal_edge(supra_G, n2, t0, n1, t0, weight=w)\n",
    "            add_temporal_edge(supra_G, n1, t0, n2, t0, weight=w)\n",
    "    return supra_G"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_supra(dataset, supra_G, supra_rep, temp_edge_weight=None, rep_time=None):\n",
    "    target_dir = '../preprocessed/SupraAdjacencyMatrix/%s/%s' %(dataset, supra_rep)\n",
    "    os.makedirs(target_dir, exist_ok = True)\n",
    "    if supra_rep == 'Stat':\n",
    "        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%d.gpickle' % (AGGR_TIME)))\n",
    "    else:\n",
    "        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_supra_allnodes(dataset, supra_G, supra_rep, temp_edge_weight=None, rep_time=None):\n",
    "    target_dir = '../preprocessed/SupraAdjacencyMatrixAllNodes/%s/%s' %(dataset, supra_rep)\n",
    "    os.makedirs(target_dir, exist_ok = True)\n",
    "    if supra_rep == 'Stat':\n",
    "        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%d.gpickle' % (AGGR_TIME)))\n",
    "    else:\n",
    "        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_inactive_nodes_to_supra(supra_G):\n",
    "    #rename nodes and times according to their ordering\n",
    "    node_name = [(int(node.split('-')[0]), int(node.split('-')[1])) for node in supra_G.nodes()]\n",
    "    unique_node_name = np.unique([n for n,t in node_name])\n",
    "    unique_time_name = np.unique([t for n,t in node_name])\n",
    "    map_node_index = {node:index for index, node in enumerate(unique_node_name)}\n",
    "    map_time_index = {time:index for index, time in enumerate(unique_time_name)}\n",
    "    \n",
    "    renamed_nodes = {str(n)+'-'+ str(t): str(map_node_index[n])+'-'+str(map_time_index[t]) for n,t in node_name}\n",
    "    supra_H = nx.relabel_nodes(supra_G, renamed_nodes)\n",
    "    \n",
    "    NR_NODES = unique_node_name.shape[0]\n",
    "    NR_TIMES = unique_time_name.shape[0]\n",
    "    allnodes_alltimes = set(map(lambda x: str(x[0])+'-'+str(x[1]), it.product(range(NR_NODES), range(NR_TIMES))))\n",
    "    to_add = allnodes_alltimes.difference(renamed_nodes.values())\n",
    "    supra_H.add_nodes_from(to_add)\n",
    "    return supra_H"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in dataset_list:\n",
    "    partial_times, s_temp_net, df_tnet = load_temp_data(dataset)\n",
    "    for supra_rep in supra_rep_list:\n",
    "        if supra_rep == 'Stat':\n",
    "            supra_G = make_stat_supra(partial_times, s_temp_net, df_tnet)\n",
    "            supra_G = add_inactive_nodes_to_supra(supra_G)\n",
    "            save_supra(dataset, supra_G, supra_rep)\n",
    "        else:\n",
    "            for temp_edge_weight in temp_edge_weight_list:\n",
    "                for rep_time in rep_time_list:\n",
    "                    if supra_rep == 'Dyn':\n",
    "                        supra_G = make_dyn_supra(partial_times, s_temp_net, df_tnet, temp_edge_weight, rep_time)\n",
    "                    save_supra(dataset, supra_G, supra_rep, temp_edge_weight, rep_time)\n",
    "                    supra_G = add_inactive_nodes_to_supra(supra_G)\n",
    "                    save_supra_allnodes(dataset, supra_G, supra_rep, temp_edge_weight, rep_time)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
