{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "datadir = os.path.join('..', 'dat', 'lastfm_filtered')\n",
    "max_file = 430000\n",
    "# max_file = 20000\n",
    "increments = 10000\n",
    "file_idx = np.arange(0,max_file+increments, step=increments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "friend_data = {}\n",
    "activity = {}\n",
    "for f_idx in file_idx:\n",
    "    with open(os.path.join(datadir, 'egodata_' + str(f_idx) + '.tsv'), 'r') as f:\n",
    "        lines = f.readlines()\n",
    "        for idx, line in enumerate(lines):\n",
    "            if \"#\" == line[0]:\n",
    "                continue\n",
    "            line = line.strip()\n",
    "            \n",
    "            if (idx-1) % 5 == 0:\n",
    "                line = line.split(' ')\n",
    "                userid = int(line[0])\n",
    "                hasfriends = int(line[1])\n",
    "                if hasfriends:\n",
    "                    if 'None' in lines[idx+1]:continue\n",
    "                    friends = [int(f) for f in lines[idx+1].strip().split(' ')]\n",
    "                    friend_data[userid] = friends\n",
    "                    \n",
    "#                 if 'None' not in lines[idx+3]:\n",
    "#                     acts = lines[idx+3].strip().split(' ')\n",
    "#                     acts = [tuple(a.split(':')) for a in acts]\n",
    "#                     activity[userid] = acts\n",
    "                    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num. unique users: 421864\n"
     ]
    }
   ],
   "source": [
    "users = set().union(*friend_data.values()) | set(friend_data.keys())\n",
    "print(\"Num. unique users:\", len(users))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean. num friendships: 71.16750021434083\n"
     ]
    }
   ],
   "source": [
    "print(\"Mean. num friendships:\", np.mean([len(friends) for friends in friend_data.values()]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "u_idx = {uid:i for i,uid in enumerate(users)}\n",
    "adj_mat = np.zeros((len(users), len(users)))\n",
    "for u, friends in friend_data.items():\n",
    "    fids = list(map(lambda x: u_idx[x], friends))\n",
    "    adj_mat[u_idx[u], fids] = 1    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
