{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0cb86ffd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using backend: pytorch\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pyvis.network import Network\n",
    "from tqdm import tqdm\n",
    "import os\n",
    "import dgl\n",
    "import torch\n",
    "import networkx as nx\n",
    "from dataset import load_graph_dataset\n",
    "from igraph import *\n",
    "from dgl.data import CoraGraphDataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "adfba2c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  NumNodes: 2708\n",
      "  NumEdges: 10556\n",
      "  NumFeats: 1433\n",
      "  NumClasses: 7\n",
      "  NumTrainingSamples: 140\n",
      "  NumValidationSamples: 500\n",
      "  NumTestSamples: 1000\n",
      "Done loading data from cached files.\n"
     ]
    }
   ],
   "source": [
    "df_node_infl = pd.read_csv('df_node_preprocessed.csv')\n",
    "df_edge_infl = pd.read_csv('df_edge_preprocessed_0.csv')\n",
    "graph, feat, labels, train_mask, val_mask, test_mask, number_classes = load_graph_dataset('cora')\n",
    "labels = labels.numpy().astype(int)\n",
    "train_mask = train_mask.numpy().astype(int)\n",
    "val_mask = val_mask.numpy().astype(int)\n",
    "test_mask = test_mask.numpy().astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "02e8ee40",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node = pd.DataFrame([labels, train_mask, val_mask, test_mask]).T\n",
    "node_infl_list = []\n",
    "train_index = np.sort(df_node_infl['node_id'].values)\n",
    "\n",
    "for i in range(len(df_node)):\n",
    "    if i in train_index:\n",
    "        infl_val = df_node_infl.loc[df_node_infl['node_id'] == i]['actual influence'].values[0]\n",
    "    else:\n",
    "        infl_val = 0.0\n",
    "    node_infl_list.append(infl_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "14dec8cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['infl'] = node_infl_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "da202e3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('df_node_infl_v2.csv', index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c052bc3",
   "metadata": {},
   "source": [
    "##### Initialize a graph, set up vertex"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b34c7e6",
   "metadata": {},
   "source": [
    "Insert node id, influence, train, test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c83f4eab",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_self_loop = df_edge_infl[df_edge_infl['from_edges'] == df_edge_infl['to_edges']]\n",
    "df_edge = df_edge_infl[df_edge_infl['from_edges'] != df_edge_infl['to_edges']]\n",
    "df_edge = df_edge[1::2]\n",
    "df_edge.index = range(len(df_edge))\n",
    "df_edge['from_edges'] = df_edge['from_edges'].astype(int)\n",
    "df_edge['to_edges'] = df_edge['to_edges'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1d291abc",
   "metadata": {},
   "outputs": [],
   "source": [
    "f_l = df_edge['from_edges'].values\n",
    "t_l = df_edge['to_edges'].values\n",
    "infl = df_edge['influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5ceda3f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "g = Graph(directed = False)\n",
    "g.add_vertices(len(train_mask))\n",
    "for i in range(len(g.vs)):\n",
    "    g.vs[i]['id']= i\n",
    "    g.vs[i]['label']= labels[i]\n",
    "    g.vs[i]['train'] = train_mask[i]\n",
    "    g.vs[i]['val'] = val_mask[i]\n",
    "    g.vs[i]['test'] = test_mask[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "202d169d",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(f_l)):\n",
    "    g.add_edges([(f_l[i], t_l[i])])\n",
    "g.es['weight'] = infl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "76fdf02b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.columns = ['label', 'train', 'val', 'test', 'influence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "27dc45be",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.insert(loc=0, column='name', value=range(len(df_node)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0d1cddec",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge.columns = ['src', 'target', 'einfluence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9f32d7b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence.csv', index = False)\n",
    "df_edge.to_csv('Cyto/edge_influence.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "62f9bdbd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2703</th>\n",
       "      <td>2703</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>2704</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2705</th>\n",
       "      <td>2705</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>2706</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2707</th>\n",
       "      <td>2707</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2708 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  influence\n",
       "0        0      4      0    0     1   0.000000\n",
       "1        1      4      0    0     1   0.000000\n",
       "2        2      4      0    1     0   0.000000\n",
       "3        3      4      1    0     0   5.340206\n",
       "4        4      4      0    0     1   0.000000\n",
       "...    ...    ...    ...  ...   ...        ...\n",
       "2703  2703      3      0    1     0   0.000000\n",
       "2704  2704      4      0    0     1   0.000000\n",
       "2705  2705      4      0    0     0   0.000000\n",
       "2706  2706      3      0    0     0   0.000000\n",
       "2707  2707      3      0    0     0   0.000000\n",
       "\n",
       "[2708 rows x 6 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "4c9ac332",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>src</th>\n",
       "      <th>target</th>\n",
       "      <th>einfluence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014</td>\n",
       "      <td>1281</td>\n",
       "      <td>2.605583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>554</td>\n",
       "      <td>1152</td>\n",
       "      <td>2.116021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1857</td>\n",
       "      <td>2297</td>\n",
       "      <td>1.551643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1931</td>\n",
       "      <td>1152</td>\n",
       "      <td>1.129093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>645</td>\n",
       "      <td>1091</td>\n",
       "      <td>1.122829</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5273</th>\n",
       "      <td>1898</td>\n",
       "      <td>2337</td>\n",
       "      <td>-0.549112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5274</th>\n",
       "      <td>138</td>\n",
       "      <td>142</td>\n",
       "      <td>-0.570400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5275</th>\n",
       "      <td>1692</td>\n",
       "      <td>2224</td>\n",
       "      <td>-0.620110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5276</th>\n",
       "      <td>2079</td>\n",
       "      <td>1829</td>\n",
       "      <td>-0.636412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5277</th>\n",
       "      <td>741</td>\n",
       "      <td>1423</td>\n",
       "      <td>-0.923729</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5278 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       src  target  einfluence\n",
       "0     2014    1281    2.605583\n",
       "1      554    1152    2.116021\n",
       "2     1857    2297    1.551643\n",
       "3     1931    1152    1.129093\n",
       "4      645    1091    1.122829\n",
       "...    ...     ...         ...\n",
       "5273  1898    2337   -0.549112\n",
       "5274   138     142   -0.570400\n",
       "5275  1692    2224   -0.620110\n",
       "5276  2079    1829   -0.636412\n",
       "5277   741    1423   -0.923729\n",
       "\n",
       "[5278 rows x 3 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2ac4ed26",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2014"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge.loc[0, ['src']].values[0].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "26e245d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.340205937078736"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node.loc[3, ].influence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8fdbac38",
   "metadata": {},
   "outputs": [],
   "source": [
    "src_node_label = []\n",
    "src_node_train = []\n",
    "src_node_val = []\n",
    "src_node_test = []\n",
    "src_node_infl = []\n",
    "\n",
    "tgt_node_label = []\n",
    "tgt_node_train = []\n",
    "tgt_node_val = []\n",
    "tgt_node_test = []\n",
    "tgt_node_infl = []\n",
    "\n",
    "\n",
    "for i in range(len(df_edge)):\n",
    "    temp_src = df_edge.loc[i, ['src']].values[0].astype(int)\n",
    "    temp_tgt = df_edge.loc[i, ['target']].values[0].astype(int)\n",
    "    \n",
    "    temp_src_label = df_node.loc[temp_src, ].label.astype(int)\n",
    "    temp_src_train = df_node.loc[temp_src, ].train.astype(int)\n",
    "    temp_src_val = df_node.loc[temp_src, ].val.astype(int)\n",
    "    temp_src_test = df_node.loc[temp_src, ].test.astype(int)\n",
    "    temp_src_infl = df_node.loc[temp_src, ].influence\n",
    "    \n",
    "    src_node_label.append(temp_src_label)\n",
    "    src_node_train.append(temp_src_train)\n",
    "    src_node_val.append(temp_src_val)\n",
    "    src_node_test.append(temp_src_test)\n",
    "    src_node_infl.append(temp_src_infl)\n",
    "    \n",
    "    \n",
    "    temp_tgt_label = df_node.loc[temp_tgt, ].label.astype(int)\n",
    "    temp_tgt_train = df_node.loc[temp_tgt, ].train.astype(int)\n",
    "    temp_tgt_val = df_node.loc[temp_tgt, ].val.astype(int)\n",
    "    temp_tgt_test = df_node.loc[temp_tgt, ].test.astype(int)\n",
    "    temp_tgt_infl = df_node.loc[temp_tgt, ].influence\n",
    "\n",
    "    tgt_node_label.append(temp_tgt_label)\n",
    "    tgt_node_train.append(temp_tgt_train)\n",
    "    tgt_node_val.append(temp_tgt_val)\n",
    "    tgt_node_test.append(temp_tgt_test)\n",
    "    tgt_node_infl.append(temp_tgt_infl)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2020b422",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_combined = df_edge.copy()\n",
    "df_combined['src_node_label'] = src_node_label\n",
    "df_combined['src_node_train'] = src_node_train\n",
    "df_combined['src_node_val'] = src_node_val\n",
    "df_combined['src_node_test'] = src_node_test\n",
    "df_combined['src_node_infl'] = src_node_infl\n",
    "df_combined['tgt_node_label'] = tgt_node_label\n",
    "df_combined['tgt_node_train'] = tgt_node_train\n",
    "df_combined['tgt_node_val'] = tgt_node_val\n",
    "df_combined['tgt_node_test'] = tgt_node_test\n",
    "df_combined['tgt_node_infl'] = tgt_node_infl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "08dc5fa1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_combined.to_csv('Cyto/df_combined.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "8cd0ff8f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>influence</th>\n",
       "      <th>label</th>\n",
       "      <th>name</th>\n",
       "      <th>selected</th>\n",
       "      <th>shared name</th>\n",
       "      <th>src_node_infl</th>\n",
       "      <th>src_node_label</th>\n",
       "      <th>src_node_test</th>\n",
       "      <th>src_node_train</th>\n",
       "      <th>src_node_val</th>\n",
       "      <th>test</th>\n",
       "      <th>tgt_node_infl</th>\n",
       "      <th>tgt_node_label</th>\n",
       "      <th>tgt_node_test</th>\n",
       "      <th>tgt_node_train</th>\n",
       "      <th>tgt_node_val</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1195</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>177</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>697</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>5.340206</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>428</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1948</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2702</td>\n",
       "      <td>False</td>\n",
       "      <td>2702</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1950</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>3</td>\n",
       "      <td>2703</td>\n",
       "      <td>False</td>\n",
       "      <td>2703</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1947</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2704</td>\n",
       "      <td>False</td>\n",
       "      <td>2704</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1955</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>3</td>\n",
       "      <td>2706</td>\n",
       "      <td>False</td>\n",
       "      <td>2706</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1954</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>3</td>\n",
       "      <td>2707</td>\n",
       "      <td>False</td>\n",
       "      <td>2707</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2647 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      influence  label  name  selected  shared name  src_node_infl  \\\n",
       "1195   0.000000      4     0     False            0       0.000000   \n",
       "177    0.000000      4     1     False            1       0.000000   \n",
       "697    0.000000      4     2     False            2       0.000000   \n",
       "176    5.340206      4     3     False            3       5.340206   \n",
       "428    0.000000      4     4     False            4       0.000000   \n",
       "...         ...    ...   ...       ...          ...            ...   \n",
       "1948   0.000000      4  2702     False         2702       0.000000   \n",
       "1950   0.000000      3  2703     False         2703       0.000000   \n",
       "1947   0.000000      4  2704     False         2704       0.000000   \n",
       "1955   0.000000      3  2706     False         2706            NaN   \n",
       "1954   0.000000      3  2707     False         2707       0.000000   \n",
       "\n",
       "      src_node_label  src_node_test  src_node_train  src_node_val  test  \\\n",
       "1195             4.0            1.0             0.0           0.0     1   \n",
       "177              4.0            1.0             0.0           0.0     1   \n",
       "697              4.0            0.0             0.0           1.0     0   \n",
       "176              4.0            0.0             1.0           0.0     0   \n",
       "428              4.0            1.0             0.0           0.0     1   \n",
       "...              ...            ...             ...           ...   ...   \n",
       "1948             4.0            0.0             0.0           1.0     0   \n",
       "1950             3.0            0.0             0.0           1.0     0   \n",
       "1947             4.0            1.0             0.0           0.0     1   \n",
       "1955             NaN            NaN             NaN           NaN     0   \n",
       "1954             3.0            0.0             0.0           0.0     0   \n",
       "\n",
       "      tgt_node_infl  tgt_node_label  tgt_node_test  tgt_node_train  \\\n",
       "1195       0.000000             4.0            1.0             0.0   \n",
       "177        0.000000             4.0            1.0             0.0   \n",
       "697        0.000000             4.0            0.0             0.0   \n",
       "176        5.340206             4.0            0.0             1.0   \n",
       "428        0.000000             4.0            1.0             0.0   \n",
       "...             ...             ...            ...             ...   \n",
       "1948       0.000000             4.0            0.0             0.0   \n",
       "1950       0.000000             3.0            0.0             0.0   \n",
       "1947       0.000000             4.0            1.0             0.0   \n",
       "1955       0.000000             3.0            0.0             0.0   \n",
       "1954            NaN             NaN            NaN             NaN   \n",
       "\n",
       "      tgt_node_val  train  val  \n",
       "1195           0.0      0    0  \n",
       "177            0.0      0    0  \n",
       "697            1.0      0    1  \n",
       "176            0.0      1    0  \n",
       "428            0.0      0    0  \n",
       "...            ...    ...  ...  \n",
       "1948           1.0      0    1  \n",
       "1950           1.0      0    1  \n",
       "1947           0.0      0    0  \n",
       "1955           0.0      0    0  \n",
       "1954           NaN      0    0  \n",
       "\n",
       "[2647 rows x 18 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('Cyto/cyto_node.csv').sort_values(['name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c252d267",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_label_visulize = df_node['label'].values * 2 + df_node['train'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "5f93e6de",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['newlab'] = new_label_visulize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "22f89b8e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v2.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "925aff1a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>influence</th>\n",
       "      <th>newlab</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2703</th>\n",
       "      <td>2703</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>2704</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2705</th>\n",
       "      <td>2705</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>2706</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2707</th>\n",
       "      <td>2707</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2708 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  influence  newlab\n",
       "0        0      4      0    0     1   0.000000       8\n",
       "1        1      4      0    0     1   0.000000       8\n",
       "2        2      4      0    1     0   0.000000       8\n",
       "3        3      4      1    0     0   5.340206       9\n",
       "4        4      4      0    0     1   0.000000       8\n",
       "...    ...    ...    ...  ...   ...        ...     ...\n",
       "2703  2703      3      0    1     0   0.000000       6\n",
       "2704  2704      4      0    0     1   0.000000       8\n",
       "2705  2705      4      0    0     0   0.000000       8\n",
       "2706  2706      3      0    0     0   0.000000       6\n",
       "2707  2707      3      0    0     0   0.000000       6\n",
       "\n",
       "[2708 rows x 7 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "e972d0c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "src = df_combined['src'].values\n",
    "tgt = df_combined['target'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "4a4baa91",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_one_hop_node(nd, src, tgt):\n",
    "    n1_idx = np.where(src == nd)[0]\n",
    "\n",
    "    n1 = tgt[n1_idx]\n",
    "\n",
    "    n2_idx = np.where(tgt == nd)[0]\n",
    "\n",
    "    n2 = src[n2_idx]\n",
    "\n",
    "    n_all = np.concatenate([n1, n2])\n",
    "    return list(n_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c85f3b02",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_one_hop_node_list(nd_list, src, tgt):\n",
    "    one_hop_node_list = []\n",
    "    for i in nd_list:\n",
    "        nd = i\n",
    "        n1_idx = np.where(src == nd)[0]\n",
    "\n",
    "        n1 = tgt[n1_idx]\n",
    "\n",
    "        n2_idx = np.where(tgt == nd)[0]\n",
    "\n",
    "        n2 = src[n2_idx]\n",
    "\n",
    "        n_all = np.concatenate([n1, n2])\n",
    "        \n",
    "        one_hop_node_list.extend(list(n_all))\n",
    "        \n",
    "    \n",
    "    return sorted(list(set(one_hop_node_list)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "181c4ac8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_two_hop_subgraph_node(nd, src, tgt):\n",
    "    two_hop_subgraph_node = []\n",
    "    two_hop_subgraph_node.append(nd)\n",
    "    \n",
    "    l1 = find_one_hop_node(nd, src, tgt)\n",
    "    \n",
    "    l2 = find_one_hop_node_list(l1, src, tgt)\n",
    "    \n",
    "    two_hop_subgraph_node.extend(l1)\n",
    "    two_hop_subgraph_node.extend(l2)\n",
    "    return sorted(list(set(two_hop_subgraph_node)))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "608d5923",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_k_hop_subgraph_node(nd, src, tgt, k):\n",
    "    k_hop_subgraph_node = []\n",
    "    k_hop_subgraph_node.append(nd)\n",
    "    for i in range(k):\n",
    "        l_temp = find_one_hop_node_list(k_hop_subgraph_node, src, tgt)\n",
    "        k_hop_subgraph_node.extend(l_temp)\n",
    "        k_hop_subgraph_node = sorted(list(set(k_hop_subgraph_node)))\n",
    "        \n",
    "    return k_hop_subgraph_node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "13475b20",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_subgraph(nd, src, tgt, k, df, val):\n",
    "    new_lab_subgraph = np.zeros(len(df)).astype(int)\n",
    "    k_index = find_k_hop_subgraph_node(nd, src, tgt, k)\n",
    "    new_lab_subgraph[k_index] = val\n",
    "    return new_lab_subgraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f15b6d7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "c691357d",
   "metadata": {},
   "outputs": [],
   "source": [
    "idx_1 = find_k_hop_subgraph_node(479, src, tgt, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "660d4c25",
   "metadata": {},
   "outputs": [],
   "source": [
    "idx_2 = find_k_hop_subgraph_node(1793, src, tgt, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "8a2eba44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[631,\n",
       " 632,\n",
       " 633,\n",
       " 1278,\n",
       " 1279,\n",
       " 1280,\n",
       " 1281,\n",
       " 1329,\n",
       " 1484,\n",
       " 1598,\n",
       " 1645,\n",
       " 1777,\n",
       " 1797,\n",
       " 1848,\n",
       " 1850,\n",
       " 1861,\n",
       " 1976,\n",
       " 1978,\n",
       " 2013,\n",
       " 2014,\n",
       " 2084,\n",
       " 2265,\n",
       " 2267,\n",
       " 2296,\n",
       " 2419,\n",
       " 2600]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_k_hop_subgraph_node(2014, src, tgt, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "558d1159",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_lab_subgraph = np.zeros(len(df_node)).astype(int)\n",
    "new_lab_subgraph[idx_1] = 1\n",
    "new_lab_subgraph[idx_2] = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "ba5ceb6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, ..., 0, 0, 0])"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_lab_subgraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "09157dc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['new_lab_subgraph'] = new_lab_subgraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "59707cb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>influence</th>\n",
       "      <th>newlab</th>\n",
       "      <th>new_lab_subgraph</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>478</th>\n",
       "      <td>478</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479</th>\n",
       "      <td>479</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.327821</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>480</th>\n",
       "      <td>480</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>545</th>\n",
       "      <td>545</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>593</th>\n",
       "      <td>593</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2406</th>\n",
       "      <td>2406</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2449</th>\n",
       "      <td>2449</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2460</th>\n",
       "      <td>2460</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2468</th>\n",
       "      <td>2468</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.298210</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2569</th>\n",
       "      <td>2569</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.645985</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>214 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  influence  newlab  new_lab_subgraph\n",
       "478    478      1      0    0     1   0.000000       2                 1\n",
       "479    479      1      1    0     0   1.327821       3                 1\n",
       "480    480      1      0    0     1   0.000000       2                 1\n",
       "545    545      5      0    0     0   0.000000      10                 1\n",
       "593    593      3      0    0     1   0.000000       6                 2\n",
       "...    ...    ...    ...  ...   ...        ...     ...               ...\n",
       "2406  2406      2      0    0     1   0.000000       4                 2\n",
       "2449  2449      2      0    0     1   0.000000       4                 2\n",
       "2460  2460      2      0    0     0   0.000000       4                 2\n",
       "2468  2468      2      1    0     0   1.298210       5                 2\n",
       "2569  2569      0      1    0     0   3.645985       1                 2\n",
       "\n",
       "[214 rows x 8 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node.loc[df_node['new_lab_subgraph'] > 0] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "e630d4bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node_sub = df_node.loc[df_node['new_lab_subgraph'] > 0] \n",
    "df_node_sub.index = range(len(df_node_sub))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "06771fb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node_sub.to_csv('Cyto/node_influence_v2_with_subgraph_only.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "c212131e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>influence</th>\n",
       "      <th>newlab</th>\n",
       "      <th>new_lab_subgraph</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>478</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>479</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.327821</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>480</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>545</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>593</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>209</th>\n",
       "      <td>2406</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>210</th>\n",
       "      <td>2449</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>2460</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>2468</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.298210</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>2569</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.645985</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>214 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     name  label  train  val  test  influence  newlab  new_lab_subgraph\n",
       "0     478      1      0    0     1   0.000000       2                 1\n",
       "1     479      1      1    0     0   1.327821       3                 1\n",
       "2     480      1      0    0     1   0.000000       2                 1\n",
       "3     545      5      0    0     0   0.000000      10                 1\n",
       "4     593      3      0    0     1   0.000000       6                 2\n",
       "..    ...    ...    ...  ...   ...        ...     ...               ...\n",
       "209  2406      2      0    0     1   0.000000       4                 2\n",
       "210  2449      2      0    0     1   0.000000       4                 2\n",
       "211  2460      2      0    0     0   0.000000       4                 2\n",
       "212  2468      2      1    0     0   1.298210       5                 2\n",
       "213  2569      0      1    0     0   3.645985       1                 2\n",
       "\n",
       "[214 rows x 8 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node_sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "f3d49007",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v2_with_subgraph.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "81bd19e8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[478, 479, 480, 616, 858, 859, 860, 915, 1590, 1649]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_two_hop_subgraph_node(479, src, tgt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "8b68d8fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[478,\n",
       " 479,\n",
       " 480,\n",
       " 545,\n",
       " 616,\n",
       " 619,\n",
       " 858,\n",
       " 859,\n",
       " 860,\n",
       " 915,\n",
       " 1162,\n",
       " 1249,\n",
       " 1590,\n",
       " 1649,\n",
       " 1881,\n",
       " 2046,\n",
       " 2141,\n",
       " 2207]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_k_hop_subgraph_node(479, src, tgt, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "31020bcf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[478, 859, 860, 915]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_one_hop_node(479, src, tgt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "6bbfd21e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[478, 479, 480, 616, 858, 859, 860, 915, 1590, 1649]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_one_hop_node_list([478, 859, 860, 915], src, tgt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "eb4609a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.00995183,  0.50716101,  0.04089684, ...,  0.        ,\n",
       "        0.        ,  0.        ])"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infl_all_node = pd.read_csv('complete_node/cora_all_nodes.csv')\n",
    "infl_all_node['acctual_influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "ab15133b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.3212399264439227"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infl_all_node['acctual_influence'].values[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "9cc76d38",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['allinfluence'] = infl_all_node['acctual_influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "92ff2431",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.columns = ['name', 'label', 'train', 'val', 'test', 'feature influence', 'newlab',\n",
    "       'new_lab_subgraph', 'node influence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "13c0dd80",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>feature influence</th>\n",
       "      <th>newlab</th>\n",
       "      <th>new_lab_subgraph</th>\n",
       "      <th>node influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3.321240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4.843376</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>2.666335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.269817</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0.330703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>34</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.729692</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0.295677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>52</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.781227</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.698564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2569</th>\n",
       "      <td>2569</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3.645985</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1.517363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2658</th>\n",
       "      <td>2658</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-1.688545</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.050587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2669</th>\n",
       "      <td>2669</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4.865627</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>2.667773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2673</th>\n",
       "      <td>2673</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.169648</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3.439219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2681</th>\n",
       "      <td>2681</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.038287</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>2.680147</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>140 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  feature influence  newlab  \\\n",
       "3        3      4      1    0     0           5.340206       9   \n",
       "7        7      4      1    0     0           4.843376       9   \n",
       "24      24      5      1    0     0           1.269817      11   \n",
       "34      34      4      1    0     0           0.729692       9   \n",
       "52      52      0      1    0     0           1.781227       1   \n",
       "...    ...    ...    ...  ...   ...                ...     ...   \n",
       "2569  2569      0      1    0     0           3.645985       1   \n",
       "2658  2658      0      1    0     0          -1.688545       1   \n",
       "2669  2669      4      1    0     0           4.865627       9   \n",
       "2673  2673      4      1    0     0           5.169648       9   \n",
       "2681  2681      4      1    0     0           5.038287       9   \n",
       "\n",
       "      new_lab_subgraph  node influence  \n",
       "3                    0        3.321240  \n",
       "7                    0        2.666335  \n",
       "24                   0        0.330703  \n",
       "34                   0        0.295677  \n",
       "52                   0        0.698564  \n",
       "...                ...             ...  \n",
       "2569                 2        1.517363  \n",
       "2658                 0        0.050587  \n",
       "2669                 0        2.667773  \n",
       "2673                 0        3.439219  \n",
       "2681                 0        2.680147  \n",
       "\n",
       "[140 rows x 9 columns]"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node.loc[df_node['feature influence'] != 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfc20eff",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = df_node['node influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "2500ff62",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['node influence floor'] = np.round(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "9f952308",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>feature influence</th>\n",
       "      <th>newlab</th>\n",
       "      <th>new_lab_subgraph</th>\n",
       "      <th>node influence</th>\n",
       "      <th>node influence floor</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.009952</td>\n",
       "      <td>-0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.507161</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.040897</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3.321240</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.224352</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2703</th>\n",
       "      <td>2703</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>2704</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2705</th>\n",
       "      <td>2705</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>2706</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2707</th>\n",
       "      <td>2707</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2708 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  feature influence  newlab  \\\n",
       "0        0      4      0    0     1           0.000000       8   \n",
       "1        1      4      0    0     1           0.000000       8   \n",
       "2        2      4      0    1     0           0.000000       8   \n",
       "3        3      4      1    0     0           5.340206       9   \n",
       "4        4      4      0    0     1           0.000000       8   \n",
       "...    ...    ...    ...  ...   ...                ...     ...   \n",
       "2703  2703      3      0    1     0           0.000000       6   \n",
       "2704  2704      4      0    0     1           0.000000       8   \n",
       "2705  2705      4      0    0     0           0.000000       8   \n",
       "2706  2706      3      0    0     0           0.000000       6   \n",
       "2707  2707      3      0    0     0           0.000000       6   \n",
       "\n",
       "      new_lab_subgraph  node influence  node influence floor  \n",
       "0                    0       -0.009952                  -0.0  \n",
       "1                    0        0.507161                   1.0  \n",
       "2                    0        0.040897                   0.0  \n",
       "3                    0        3.321240                   3.0  \n",
       "4                    0        0.224352                   0.0  \n",
       "...                ...             ...                   ...  \n",
       "2703                 0        0.000000                   0.0  \n",
       "2704                 0        0.000000                   0.0  \n",
       "2705                 0        0.000000                   0.0  \n",
       "2706                 0        0.000000                   0.0  \n",
       "2707                 0        0.000000                   0.0  \n",
       "\n",
       "[2708 rows x 10 columns]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "206afda4",
   "metadata": {},
   "outputs": [],
   "source": [
    "a_round = np.round(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8d61aba",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "dbd3c48d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8.324715061615962, -1.893823854403763)"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.max(a), np.min(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "ddfb18ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v3.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "8bf218de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(a, .50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "e041318d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.025246072633933574"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(a, .250)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "157a62ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.022678508043895748"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(a, .750)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "2b878f10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8.324715061615962"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(a, 1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "5623bcb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[-1.893823854403763,\n",
       " -0.06779943837581183,\n",
       " -0.03361225334601836,\n",
       " -0.017145409043098397,\n",
       " -0.004935021810501881,\n",
       " 0.0,\n",
       " 0.0,\n",
       " 0.012106110833315085,\n",
       " 0.036975930436210475,\n",
       " 0.133771273290131,\n",
       " 8.324715061615962]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[np.quantile(a, i * 0.1) for i in range(11)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "69ac8fdf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.arange(0, 1.1, 0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "f8229f5e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "f0315b1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "quant = list(np.arange(0, 1.1, 0.1))\n",
    "s = df_node['node influence'].quantile(quant)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "a0829a51",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      -0.009952\n",
       "1       0.507161\n",
       "2       0.040897\n",
       "3       3.321240\n",
       "4       0.224352\n",
       "          ...   \n",
       "2703    0.000000\n",
       "2704    0.000000\n",
       "2705    0.000000\n",
       "2706    0.000000\n",
       "2707    0.000000\n",
       "Name: node influence, Length: 2708, dtype: float64"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node['node influence']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "fc804263",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.00995183,  0.50716101,  0.04089684, ...,  0.        ,\n",
       "        0.        ,  0.        ])"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node['node influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "99c00cab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 984., 2588., 2197., ..., 1672., 1673., 1674.])"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rank = df_node['node influence'].rank(method='first')\n",
    "rank.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "f5313661",
   "metadata": {},
   "outputs": [],
   "source": [
    "quantile_label = pd.qcut(rank.values, 10).codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "4202bf44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3, 9, 8, ..., 6, 6, 6], dtype=int8)"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quantile_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "fa7fecb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n",
       " [-1.893823854403763,\n",
       "  -0.06779943837581183,\n",
       "  -0.03361225334601836,\n",
       "  -0.017145409043098397,\n",
       "  -0.004935021810501881,\n",
       "  0.0,\n",
       "  0.0,\n",
       "  0.012106110833315071,\n",
       "  0.036975930436210475,\n",
       "  0.133771273290131,\n",
       "  8.324715061615962])"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(np.arange(0, 10, 1)), list(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "24cdec1c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.1809453471196455"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(a == 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "9968457e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['quantile_label'] = quantile_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "id": "ae21d6ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v3.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "id": "a107dee8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n",
       "       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
       "       5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\n",
       "       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\n",
       "       6, 6, 6, 6, 6, 6], dtype=int8)"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node.loc[df_node['node influence'] == 0]['quantile_label'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "id": "961cb188",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v3.csv', index = False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
