{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../infrastructure\")\n",
    "from maximal_independent_set import maximal_n_hop_independent_set\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Process facebook graph\n",
    "\n",
    "def kth_order_neighborhood(network, node, k):\n",
    "    if k == 0:\n",
    "        return {node}\n",
    "    \n",
    "    neighbors = set([node])\n",
    "    visited = set([node])\n",
    "\n",
    "    for _ in range(k):\n",
    "        temp_neighbors = set()\n",
    "        for neighbor in neighbors:\n",
    "            temp_neighbors.update(set(network[neighbor]))\n",
    "        temp_neighbors -= visited\n",
    "        neighbors = temp_neighbors\n",
    "        visited.update(temp_neighbors)\n",
    "\n",
    "    return neighbors\n",
    "\n",
    "def find_best_5_hop_ind_set(graph):\n",
    "    best_set_size = float('-inf')\n",
    "    best_effective_set_size = float('-inf')\n",
    "    best_effective_set = None\n",
    "\n",
    "    for _ in range(10):\n",
    "        # Run your code here\n",
    "        ind_set = maximal_n_hop_independent_set(graph, 5)\n",
    "        \n",
    "        # go through each element of result and check if \n",
    "        # its first and second order neighborhoods are both non-empty\n",
    "        effective_ind_set = []\n",
    "        for node in ind_set:\n",
    "            if graph[node] != [] and [graph[nb] for nb in graph[node]] != []:\n",
    "                effective_ind_set.append(node)\n",
    "                \n",
    "        # Update the largest value if necessary\n",
    "        if len(effective_ind_set) > best_effective_set_size:\n",
    "            best_effective_set_size = len(effective_ind_set)\n",
    "            best_effective_set = effective_ind_set\n",
    "            best_set_size = len(ind_set)\n",
    "\n",
    "    return best_set_size, best_effective_set_size, best_effective_set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original graph size: 4039\n",
      "3 3\n"
     ]
    }
   ],
   "source": [
    "file_path = \"./data/facebook_combined.txt\"\n",
    "\n",
    "def process_file_to_graph_fb(file_path):\n",
    "    graph = defaultdict(list)\n",
    "    \n",
    "    with open(file_path, 'r') as file:\n",
    "        for line in file:\n",
    "            node1, node2 = map(int, line.split())\n",
    "            graph[node1].append(node2)\n",
    "            graph[node2].append(node1)\n",
    "    \n",
    "    return graph\n",
    "\n",
    "graph = process_file_to_graph_fb(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original graph size: 37700\n",
      "211 211\n"
     ]
    }
   ],
   "source": [
    "# Github ML graph\n",
    "\n",
    "def process_file_to_graph_csv(file_path):\n",
    "    graph = defaultdict(list)\n",
    "    \n",
    "    with open(file_path, 'r') as file:\n",
    "        next(file)  # Skip the first row\n",
    "        for line in file:\n",
    "            node1, node2 = map(int, line.split(','))\n",
    "            graph[node1].append(node2)\n",
    "            graph[node2].append(node1)\n",
    "    \n",
    "    return graph\n",
    "\n",
    "file_path = \"./data/git_web_ml/musae_git_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original graph size: 28281\n",
      "474 474\n"
     ]
    }
   ],
   "source": [
    "# Deezer Europe graph  \n",
    "\n",
    "file_path = \"./data/deezer_europe/deezer_europe_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original graph size: 54573\n",
      "327 327\n",
      "Original graph size: 47538\n",
      "475 475\n",
      "Original graph size: 41773\n",
      "1183 1183\n"
     ]
    }
   ],
   "source": [
    "# Deezer Europe clean graph  \n",
    "\n",
    "file_path = \"./data/deezer_clean_data/HR_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)\n",
    "\n",
    "file_path = \"./data/deezer_clean_data/HU_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)\n",
    "\n",
    "file_path = \"./data/deezer_clean_data/RO_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Facebook pages clean graph  \n",
    "\n",
    "file_path = \"./data/facebook_clean_data/artist_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)\n",
    "\n",
    "file_path = \"./data/facebook_clean_data/athletes_edges.csv\"\n",
    "graph = process_file_to_graph_csv(file_path)\n",
    "print(\"Original graph size:\", len(graph))\n",
    "best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)\n",
    "print(best_set_size, best_effective_set_size)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
