{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "dataset = 'oag'\n",
    "data = pd.read_csv(f'../processed_data/{dataset}/ml_{dataset}.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['ts'] = data['ts']-data['ts'].loc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>u</th>\n",
       "      <th>i</th>\n",
       "      <th>ts</th>\n",
       "      <th>label_u</th>\n",
       "      <th>label_i</th>\n",
       "      <th>idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1705</td>\n",
       "      <td>7014</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6396</td>\n",
       "      <td>8112</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1061</td>\n",
       "      <td>2714</td>\n",
       "      <td>64</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24487</th>\n",
       "      <td>8164</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24488</th>\n",
       "      <td>8164</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24489</th>\n",
       "      <td>8172</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24490</th>\n",
       "      <td>870</td>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>24491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24491</th>\n",
       "      <td>624</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>24492</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24492 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          u     i    ts  label_u  label_i    idx\n",
       "0      2183  2714     0        1        3      1\n",
       "1      2183  2714     0        1        3      2\n",
       "2      1705  7014     7        3        0      3\n",
       "3      6396  8112    56        1        3      4\n",
       "4      1061  2714    64        2        3      5\n",
       "...     ...   ...   ...      ...      ...    ...\n",
       "24487  8164  8172  3998        2        2  24488\n",
       "24488  8164  6182  3998        2        2  24489\n",
       "24489  8172  6182  3998        2        2  24490\n",
       "24490   870  6901  3998        1        2  24491\n",
       "24491   624   870  3998        2        1  24492\n",
       "\n",
       "[24492 rows x 6 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "u_df = data[['u', 'ts']].rename(columns={'u': 'user'})\n",
    "i_df = data[['i', 'ts']].rename(columns={'i': 'user'})\n",
    "\n",
    "combined_df = pd.concat([u_df, i_df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2183</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2183</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1705</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6396</td>\n",
       "      <td>56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1061</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24487</th>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24488</th>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24489</th>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24490</th>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24491</th>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>48984 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       user    ts\n",
       "0      2183     0\n",
       "1      2183     0\n",
       "2      1705     7\n",
       "3      6396    56\n",
       "4      1061    64\n",
       "...     ...   ...\n",
       "24487  8172  3998\n",
       "24488  6182  3998\n",
       "24489  6182  3998\n",
       "24490  6901  3998\n",
       "24491   870  3998\n",
       "\n",
       "[48984 rows x 2 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combined_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_timestamps = combined_df.groupby('user')['ts'].max().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>3929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>3501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>3683</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>3993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8316</th>\n",
       "      <td>8317</td>\n",
       "      <td>2690</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8317</th>\n",
       "      <td>8318</td>\n",
       "      <td>3388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8318</th>\n",
       "      <td>8319</td>\n",
       "      <td>3950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8319</th>\n",
       "      <td>8320</td>\n",
       "      <td>3451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8320</th>\n",
       "      <td>8321</td>\n",
       "      <td>3836</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8321 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      user    ts\n",
       "0        1  3929\n",
       "1        2  2150\n",
       "2        3  3501\n",
       "3        4  3683\n",
       "4        5  3993\n",
       "...    ...   ...\n",
       "8316  8317  2690\n",
       "8317  8318  3388\n",
       "8318  8319  3950\n",
       "8319  8320  3451\n",
       "8320  8321  3836\n",
       "\n",
       "[8321 rows x 2 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_timestamps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=data.loc[:, ~data.columns.str.contains('^Unnamed')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>u</th>\n",
       "      <th>i</th>\n",
       "      <th>ts</th>\n",
       "      <th>label_u</th>\n",
       "      <th>label_i</th>\n",
       "      <th>idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1705</td>\n",
       "      <td>7014</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6396</td>\n",
       "      <td>8112</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1061</td>\n",
       "      <td>2714</td>\n",
       "      <td>64</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24487</th>\n",
       "      <td>8164</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24488</th>\n",
       "      <td>8164</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24489</th>\n",
       "      <td>8172</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24490</th>\n",
       "      <td>870</td>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>24491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24491</th>\n",
       "      <td>624</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>24492</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24492 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          u     i    ts  label_u  label_i    idx\n",
       "0      2183  2714     0        1        3      1\n",
       "1      2183  2714     0        1        3      2\n",
       "2      1705  7014     7        3        0      3\n",
       "3      6396  8112    56        1        3      4\n",
       "4      1061  2714    64        2        3      5\n",
       "...     ...   ...   ...      ...      ...    ...\n",
       "24487  8164  8172  3998        2        2  24488\n",
       "24488  8164  6182  3998        2        2  24489\n",
       "24489  8172  6182  3998        2        2  24490\n",
       "24490   870  6901  3998        1        2  24491\n",
       "24491   624   870  3998        2        1  24492\n",
       "\n",
       "[24492 rows x 6 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.merge(max_timestamps, left_on='u', right_on='user', suffixes=('', 'last_u_ts'),sort=False)\n",
    "data = data.merge(max_timestamps, left_on='i', right_on='user', suffixes=('', 'last_i_ts'),sort=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.sort_values(by='idx').reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>u</th>\n",
       "      <th>i</th>\n",
       "      <th>ts</th>\n",
       "      <th>label_u</th>\n",
       "      <th>label_i</th>\n",
       "      <th>idx</th>\n",
       "      <th>user</th>\n",
       "      <th>tslast_u_ts</th>\n",
       "      <th>userlast_i_ts</th>\n",
       "      <th>tslast_i_ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2183</td>\n",
       "      <td>3685</td>\n",
       "      <td>2714</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2183</td>\n",
       "      <td>3685</td>\n",
       "      <td>2714</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1705</td>\n",
       "      <td>7014</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1705</td>\n",
       "      <td>3972</td>\n",
       "      <td>7014</td>\n",
       "      <td>3774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6396</td>\n",
       "      <td>8112</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>6396</td>\n",
       "      <td>3864</td>\n",
       "      <td>8112</td>\n",
       "      <td>3984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1061</td>\n",
       "      <td>2714</td>\n",
       "      <td>64</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>1061</td>\n",
       "      <td>64</td>\n",
       "      <td>2714</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24487</th>\n",
       "      <td>8164</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24488</td>\n",
       "      <td>8164</td>\n",
       "      <td>3998</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24488</th>\n",
       "      <td>8164</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24489</td>\n",
       "      <td>8164</td>\n",
       "      <td>3998</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24489</th>\n",
       "      <td>8172</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24490</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24490</th>\n",
       "      <td>870</td>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>24491</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24491</th>\n",
       "      <td>624</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>24492</td>\n",
       "      <td>624</td>\n",
       "      <td>3998</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24492 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          u     i    ts  label_u  label_i    idx  user  tslast_u_ts  \\\n",
       "0      2183  2714     0        1        3      1  2183         3685   \n",
       "1      2183  2714     0        1        3      2  2183         3685   \n",
       "2      1705  7014     7        3        0      3  1705         3972   \n",
       "3      6396  8112    56        1        3      4  6396         3864   \n",
       "4      1061  2714    64        2        3      5  1061           64   \n",
       "...     ...   ...   ...      ...      ...    ...   ...          ...   \n",
       "24487  8164  8172  3998        2        2  24488  8164         3998   \n",
       "24488  8164  6182  3998        2        2  24489  8164         3998   \n",
       "24489  8172  6182  3998        2        2  24490  8172         3998   \n",
       "24490   870  6901  3998        1        2  24491   870         3998   \n",
       "24491   624   870  3998        2        1  24492   624         3998   \n",
       "\n",
       "       userlast_i_ts  tslast_i_ts  \n",
       "0               2714          790  \n",
       "1               2714          790  \n",
       "2               7014         3774  \n",
       "3               8112         3984  \n",
       "4               2714          790  \n",
       "...              ...          ...  \n",
       "24487           8172         3998  \n",
       "24488           6182         3998  \n",
       "24489           6182         3998  \n",
       "24490           6901         3998  \n",
       "24491            870         3998  \n",
       "\n",
       "[24492 rows x 10 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.drop(columns=['user','userlast_i_ts'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={'tslast_u_ts':'last_u_ts','tslast_i_ts':'last_i_ts'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>u</th>\n",
       "      <th>i</th>\n",
       "      <th>ts</th>\n",
       "      <th>label_u</th>\n",
       "      <th>label_i</th>\n",
       "      <th>idx</th>\n",
       "      <th>last_u_ts</th>\n",
       "      <th>last_i_ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3685</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2183</td>\n",
       "      <td>2714</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3685</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1705</td>\n",
       "      <td>7014</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3972</td>\n",
       "      <td>3774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6396</td>\n",
       "      <td>8112</td>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3864</td>\n",
       "      <td>3984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1061</td>\n",
       "      <td>2714</td>\n",
       "      <td>64</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>64</td>\n",
       "      <td>790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24487</th>\n",
       "      <td>8164</td>\n",
       "      <td>8172</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24488</td>\n",
       "      <td>3998</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24488</th>\n",
       "      <td>8164</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24489</td>\n",
       "      <td>3998</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24489</th>\n",
       "      <td>8172</td>\n",
       "      <td>6182</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>24490</td>\n",
       "      <td>3998</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24490</th>\n",
       "      <td>870</td>\n",
       "      <td>6901</td>\n",
       "      <td>3998</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>24491</td>\n",
       "      <td>3998</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24491</th>\n",
       "      <td>624</td>\n",
       "      <td>870</td>\n",
       "      <td>3998</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>24492</td>\n",
       "      <td>3998</td>\n",
       "      <td>3998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24492 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          u     i    ts  label_u  label_i    idx  last_u_ts  last_i_ts\n",
       "0      2183  2714     0        1        3      1       3685        790\n",
       "1      2183  2714     0        1        3      2       3685        790\n",
       "2      1705  7014     7        3        0      3       3972       3774\n",
       "3      6396  8112    56        1        3      4       3864       3984\n",
       "4      1061  2714    64        2        3      5         64        790\n",
       "...     ...   ...   ...      ...      ...    ...        ...        ...\n",
       "24487  8164  8172  3998        2        2  24488       3998       3998\n",
       "24488  8164  6182  3998        2        2  24489       3998       3998\n",
       "24489  8172  6182  3998        2        2  24490       3998       3998\n",
       "24490   870  6901  3998        1        2  24491       3998       3998\n",
       "24491   624   870  3998        2        1  24492       3998       3998\n",
       "\n",
       "[24492 rows x 8 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv(f'../processed_data/{dataset}/ml_{dataset}.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "node = np.load(f'../processed_data/{dataset}/ml_{dataset}_node.npy')\n",
    "edge = np.load(f'../processed_data/{dataset}/ml_{dataset}.npy')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ncem",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
