{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "daa5de9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import math\n",
    "from tqdm import tqdm\n",
    "import networkx as nx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f9800636",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge = pd.read_csv('edge_feature/citeseer_edge_influence_002.csv', header = None)\n",
    "df_edge.columns = ['actual_influence', 'predicted_influence', 'from_edges', 'to_edges']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6fa4316c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_actual_edge_influence(df_edge):\n",
    "    new_infl = []\n",
    "    df_edge_copy = df_edge.copy()\n",
    "    for i in tqdm(range(len(df_edge))):\n",
    "        f = df_edge.loc[i, ['from_edges']].values[0]\n",
    "        t = df_edge.loc[i, ['to_edges']].values[0]\n",
    "\n",
    "        act_1 = df_edge[(df_edge['from_edges'] == f) & (df_edge['to_edges'] == t)].actual_influence.values[0]\n",
    "        act_2 = df_edge[(df_edge['from_edges'] == t) & (df_edge['to_edges'] == f)].actual_influence.values[0]\n",
    "        assert (act_1 == act_2)\n",
    "        new_infl.append(act_1)\n",
    "    return new_infl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4b3aece5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████| 12431/12431 [00:14<00:00, 840.80it/s]\n"
     ]
    }
   ],
   "source": [
    "df_edge_copy = df_edge.copy()\n",
    "df_edge_copy['influence'] = get_actual_edge_influence(df_edge)\n",
    "df_edge_preprocessed = df_edge_copy.drop(columns=['actual_influence', 'predicted_influence'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ecf68814",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_preprocessed = df_edge_preprocessed.sort_values(['influence'], ascending=False)\n",
    "df_edge_preprocessed.index = range(len(df_edge_preprocessed))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "efef2906",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_preprocessed.to_csv('citeseer_df_edge_preprocessed_two_edge.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d1709634",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_infl = pd.read_csv('citeseer_df_edge_preprocessed_two_edge.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "988d48ac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>from_edges</th>\n",
       "      <th>to_edges</th>\n",
       "      <th>influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2655.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>2.179906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>42.0</td>\n",
       "      <td>2655.0</td>\n",
       "      <td>2.179906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>48.0</td>\n",
       "      <td>2185.0</td>\n",
       "      <td>1.988350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2185.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1.988350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>62.0</td>\n",
       "      <td>1620.0</td>\n",
       "      <td>1.724335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12426</th>\n",
       "      <td>85.0</td>\n",
       "      <td>2893.0</td>\n",
       "      <td>-1.078100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12427</th>\n",
       "      <td>367.0</td>\n",
       "      <td>117.0</td>\n",
       "      <td>-1.348294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12428</th>\n",
       "      <td>117.0</td>\n",
       "      <td>367.0</td>\n",
       "      <td>-1.348294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12429</th>\n",
       "      <td>107.0</td>\n",
       "      <td>3193.0</td>\n",
       "      <td>-1.912171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12430</th>\n",
       "      <td>3193.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>-1.912171</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12431 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       from_edges  to_edges  influence\n",
       "0          2655.0      42.0   2.179906\n",
       "1            42.0    2655.0   2.179906\n",
       "2            48.0    2185.0   1.988350\n",
       "3          2185.0      48.0   1.988350\n",
       "4            62.0    1620.0   1.724335\n",
       "...           ...       ...        ...\n",
       "12426        85.0    2893.0  -1.078100\n",
       "12427       367.0     117.0  -1.348294\n",
       "12428       117.0     367.0  -1.348294\n",
       "12429       107.0    3193.0  -1.912171\n",
       "12430      3193.0     107.0  -1.912171\n",
       "\n",
       "[12431 rows x 3 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge_infl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8e69bea4",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_self_loop = df_edge_infl[df_edge_infl['from_edges'] == df_edge_infl['to_edges']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "32884629",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/zizhang/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py:1951: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self.obj[selected_item_labels] = value\n",
      "/home/zizhang/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py:1773: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_single_column(ilocs[0], value, pi)\n"
     ]
    }
   ],
   "source": [
    "df_edge = df_edge_infl[df_edge_infl['from_edges'] != df_edge_infl['to_edges']]\n",
    "# df_edge = df_edge[1::2]\n",
    "df_edge.index = range(len(df_edge))\n",
    "df_edge.loc[:, 'from_edges'] = df_edge['from_edges'].astype(int)\n",
    "df_edge.loc[:, 'to_edges'] = df_edge['to_edges'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "3fdcfd7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge.to_csv('Cyto/citeseer_edge_influence_new_version.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9ab6f21a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>from_edges</th>\n",
       "      <th>to_edges</th>\n",
       "      <th>influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2655</td>\n",
       "      <td>42</td>\n",
       "      <td>2.179906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>42</td>\n",
       "      <td>2655</td>\n",
       "      <td>2.179906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>48</td>\n",
       "      <td>2185</td>\n",
       "      <td>1.988350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2185</td>\n",
       "      <td>48</td>\n",
       "      <td>1.988350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>62</td>\n",
       "      <td>1620</td>\n",
       "      <td>1.724335</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9099</th>\n",
       "      <td>85</td>\n",
       "      <td>2893</td>\n",
       "      <td>-1.078100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9100</th>\n",
       "      <td>367</td>\n",
       "      <td>117</td>\n",
       "      <td>-1.348294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9101</th>\n",
       "      <td>117</td>\n",
       "      <td>367</td>\n",
       "      <td>-1.348294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9102</th>\n",
       "      <td>107</td>\n",
       "      <td>3193</td>\n",
       "      <td>-1.912171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9103</th>\n",
       "      <td>3193</td>\n",
       "      <td>107</td>\n",
       "      <td>-1.912171</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9104 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      from_edges  to_edges  influence\n",
       "0           2655        42   2.179906\n",
       "1             42      2655   2.179906\n",
       "2             48      2185   1.988350\n",
       "3           2185        48   1.988350\n",
       "4             62      1620   1.724335\n",
       "...          ...       ...        ...\n",
       "9099          85      2893  -1.078100\n",
       "9100         367       117  -1.348294\n",
       "9101         117       367  -1.348294\n",
       "9102         107      3193  -1.912171\n",
       "9103        3193       107  -1.912171\n",
       "\n",
       "[9104 rows x 3 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "17be33cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "G = nx.Graph()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6119d0f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(df_edge)):\n",
    "    temp_f = df_edge.loc[i, ['from_edges']].values[0]\n",
    "    temp_t = df_edge.loc[i, ['to_edges']].values[0]\n",
    "    temp_influence = df_edge.loc[i, ['influence']].values[0]\n",
    "    G.add_edge(temp_f, temp_t, weight = temp_influence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "a712676a",
   "metadata": {},
   "outputs": [],
   "source": [
    "component_node_list = sorted(list(max(nx.connected_components(G), key=len)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "860383d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge\n",
    "from_edges_list = []\n",
    "to_edges_list = []\n",
    "influence_list = []\n",
    "for i in range(len(df_edge)):\n",
    "    temp_f = df_edge.loc[i, ['from_edges']].values[0]\n",
    "    temp_t = df_edge.loc[i, ['to_edges']].values[0]\n",
    "    temp_influence = df_edge.loc[i, ['influence']].values[0]\n",
    "    if (temp_f in component_node_list) and (temp_t in component_node_list):\n",
    "        from_edges_list.append(int(temp_f))\n",
    "        to_edges_list.append(int(temp_t))\n",
    "        influence_list.append(temp_influence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "3a747c55",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new = pd.DataFrame([from_edges_list, to_edges_list, influence_list]).T\n",
    "df_edge_new.columns = df_edge.columns\n",
    "df_edge_new['from_edges'] = df_edge_new['from_edges'].values.astype(int)\n",
    "df_edge_new['to_edges'] = df_edge_new['to_edges'].values.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "id": "fe114fb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new.to_csv('Cyto/edge_influence_new_version.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "275f6941",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new = pd.read_csv('Cyto/edge_influence_new_version.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "id": "b1138db2",
   "metadata": {},
   "outputs": [],
   "source": [
    "orig_index = []\n",
    "redundant_index = []\n",
    "for i in range(len(df_edge_new)):\n",
    "    temp_f = df_edge_new.loc[i, ['from_edges']].values[0]\n",
    "    temp_t = df_edge_new.loc[i, ['to_edges']].values[0]\n",
    "    \n",
    "    temp_index = df_edge_new.loc[(df_edge_new['from_edges'] == temp_t) & \n",
    "                                 (df_edge_new['to_edges'] == temp_f)].index.values\n",
    "    if temp_index.size > 0:\n",
    "        redundant_index.append(temp_index[0])\n",
    "        orig_index.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "id": "5534d589",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>from_edges</th>\n",
       "      <th>to_edges</th>\n",
       "      <th>influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014</td>\n",
       "      <td>1281</td>\n",
       "      <td>2.764419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1281</td>\n",
       "      <td>2014</td>\n",
       "      <td>2.764419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1152</td>\n",
       "      <td>554</td>\n",
       "      <td>1.886169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>554</td>\n",
       "      <td>1152</td>\n",
       "      <td>1.886169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2297</td>\n",
       "      <td>1857</td>\n",
       "      <td>1.535915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10133</th>\n",
       "      <td>2079</td>\n",
       "      <td>2452</td>\n",
       "      <td>-0.730289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10134</th>\n",
       "      <td>365</td>\n",
       "      <td>645</td>\n",
       "      <td>-0.889518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10135</th>\n",
       "      <td>645</td>\n",
       "      <td>365</td>\n",
       "      <td>-0.889518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10136</th>\n",
       "      <td>1423</td>\n",
       "      <td>741</td>\n",
       "      <td>-1.046440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10137</th>\n",
       "      <td>741</td>\n",
       "      <td>1423</td>\n",
       "      <td>-1.046440</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10138 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       from_edges  to_edges  influence\n",
       "0            2014      1281   2.764419\n",
       "1            1281      2014   2.764419\n",
       "2            1152       554   1.886169\n",
       "3             554      1152   1.886169\n",
       "4            2297      1857   1.535915\n",
       "...           ...       ...        ...\n",
       "10133        2079      2452  -0.730289\n",
       "10134         365       645  -0.889518\n",
       "10135         645       365  -0.889518\n",
       "10136        1423       741  -1.046440\n",
       "10137         741      1423  -1.046440\n",
       "\n",
       "[10138 rows x 3 columns]"
      ]
     },
     "execution_count": 223,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "id": "aae75a40",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10133</th>\n",
       "      <td>10132</td>\n",
       "      <td>10133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10134</th>\n",
       "      <td>10135</td>\n",
       "      <td>10134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10135</th>\n",
       "      <td>10134</td>\n",
       "      <td>10135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10136</th>\n",
       "      <td>10137</td>\n",
       "      <td>10136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10137</th>\n",
       "      <td>10136</td>\n",
       "      <td>10137</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10138 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           0      1\n",
       "0          1      0\n",
       "1          0      1\n",
       "2          3      2\n",
       "3          2      3\n",
       "4          5      4\n",
       "...      ...    ...\n",
       "10133  10132  10133\n",
       "10134  10135  10134\n",
       "10135  10134  10135\n",
       "10136  10137  10136\n",
       "10137  10136  10137\n",
       "\n",
       "[10138 rows x 2 columns]"
      ]
     },
     "execution_count": 224,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame([redundant_index, orig_index]).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "id": "19c5afa5",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.array(redundant_index)\n",
    "b = np.array(orig_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "id": "cfd0d85a",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_index = []\n",
    "for i in range(len(a)):\n",
    "    if b[i] in new_index:\n",
    "        continue\n",
    "    new_index.append(a[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "id": "792df010",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new_0 = df_edge_new.loc[new_index]\n",
    "df_edge_new_0.index = range(len(df_edge_new_0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "id": "4328aec3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new_0.to_csv('Cyto/edge_influence_new_version.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "id": "0a15f970",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>from_edges</th>\n",
       "      <th>to_edges</th>\n",
       "      <th>influence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1281</td>\n",
       "      <td>2014</td>\n",
       "      <td>2.764419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>554</td>\n",
       "      <td>1152</td>\n",
       "      <td>1.886169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1857</td>\n",
       "      <td>2297</td>\n",
       "      <td>1.535915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>645</td>\n",
       "      <td>1091</td>\n",
       "      <td>1.161760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1931</td>\n",
       "      <td>1152</td>\n",
       "      <td>1.141255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5064</th>\n",
       "      <td>2337</td>\n",
       "      <td>1898</td>\n",
       "      <td>-0.691945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5065</th>\n",
       "      <td>2079</td>\n",
       "      <td>1829</td>\n",
       "      <td>-0.707967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5066</th>\n",
       "      <td>2079</td>\n",
       "      <td>2452</td>\n",
       "      <td>-0.730289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5067</th>\n",
       "      <td>645</td>\n",
       "      <td>365</td>\n",
       "      <td>-0.889518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5068</th>\n",
       "      <td>741</td>\n",
       "      <td>1423</td>\n",
       "      <td>-1.046440</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5069 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      from_edges  to_edges  influence\n",
       "0           1281      2014   2.764419\n",
       "1            554      1152   1.886169\n",
       "2           1857      2297   1.535915\n",
       "3            645      1091   1.161760\n",
       "4           1931      1152   1.141255\n",
       "...          ...       ...        ...\n",
       "5064        2337      1898  -0.691945\n",
       "5065        2079      1829  -0.707967\n",
       "5066        2079      2452  -0.730289\n",
       "5067         645       365  -0.889518\n",
       "5068         741      1423  -1.046440\n",
       "\n",
       "[5069 rows x 3 columns]"
      ]
     },
     "execution_count": 253,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_edge_new_0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "f0888d2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "infl_rescale = df_edge_new_0.influence.values - min(df_edge_new_0.influence.values) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "id": "da904057",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new_0['influence rescale'] = infl_rescale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 256,
   "id": "0090e961",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_edge_new_0.to_csv('Cyto/edge_influence_new_version.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "id": "c1b67d6a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.81085954, 3.93260959, 3.58235495, ..., 1.31615112, 1.15692171,\n",
       "       1.        ])"
      ]
     },
     "execution_count": 258,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "infl_rescale"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "id": "d1820621",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.046440093646652"
      ]
     },
     "execution_count": 261,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "- min(df_edge_new_0.influence.values) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 267,
   "id": "faf47c1b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>label</th>\n",
       "      <th>train</th>\n",
       "      <th>val</th>\n",
       "      <th>test</th>\n",
       "      <th>feature influence</th>\n",
       "      <th>newlab</th>\n",
       "      <th>new_lab_subgraph</th>\n",
       "      <th>node influence</th>\n",
       "      <th>node influence floor</th>\n",
       "      <th>quantile_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.009952</td>\n",
       "      <td>-0.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.507161</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.040897</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.340206</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3.321240</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.224352</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2703</th>\n",
       "      <td>2703</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>2704</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2705</th>\n",
       "      <td>2705</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>2706</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2707</th>\n",
       "      <td>2707</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2708 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      name  label  train  val  test  feature influence  newlab  \\\n",
       "0        0      4      0    0     1           0.000000       8   \n",
       "1        1      4      0    0     1           0.000000       8   \n",
       "2        2      4      0    1     0           0.000000       8   \n",
       "3        3      4      1    0     0           5.340206       9   \n",
       "4        4      4      0    0     1           0.000000       8   \n",
       "...    ...    ...    ...  ...   ...                ...     ...   \n",
       "2703  2703      3      0    1     0           0.000000       6   \n",
       "2704  2704      4      0    0     1           0.000000       8   \n",
       "2705  2705      4      0    0     0           0.000000       8   \n",
       "2706  2706      3      0    0     0           0.000000       6   \n",
       "2707  2707      3      0    0     0           0.000000       6   \n",
       "\n",
       "      new_lab_subgraph  node influence  node influence floor  quantile_label  \n",
       "0                    0       -0.009952                  -0.0               3  \n",
       "1                    0        0.507161                   1.0               9  \n",
       "2                    0        0.040897                   0.0               8  \n",
       "3                    0        3.321240                   3.0               9  \n",
       "4                    0        0.224352                   0.0               9  \n",
       "...                ...             ...                   ...             ...  \n",
       "2703                 0        0.000000                   0.0               6  \n",
       "2704                 0        0.000000                   0.0               6  \n",
       "2705                 0        0.000000                   0.0               6  \n",
       "2706                 0        0.000000                   0.0               6  \n",
       "2707                 0        0.000000                   0.0               6  \n",
       "\n",
       "[2708 rows x 11 columns]"
      ]
     },
     "execution_count": 267,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_node = pd.read_csv('Cyto/node_influence_v3.csv')\n",
    "df_node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "id": "b20ce629",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = df_node['node influence'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "id": "cd1bd2ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([False, False, False, ...,  True,  True,  True])"
      ]
     },
     "execution_count": 287,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a == 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "id": "343c82f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = df_node['node influence floor'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "id": "abc8619c",
   "metadata": {},
   "outputs": [],
   "source": [
    "b[a == 0] = 99"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "id": "a09b266c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node['node influence modified'] = b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "id": "bc7a0d98",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_node.to_csv('Cyto/node_influence_v3.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb36c170",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
