{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bae25b5c",
   "metadata": {},
   "source": [
    "# Edge Context Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "43185476",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "120cdb43",
   "metadata": {},
   "source": [
    "## News"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eefe69f3",
   "metadata": {},
   "source": [
    "**Load `PENS` News Dataset**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f58a1c60-70a6-4139-aefe-b02638c2b339",
   "metadata": {},
   "outputs": [],
   "source": [
    "news_df = pd.read_csv(\"./Data/PENS/news.tsv\", sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "04b45c29-f14f-4040-8079-9458f243ca4a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>News ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Topic</th>\n",
       "      <th>Headline</th>\n",
       "      <th>News body</th>\n",
       "      <th>Title entity</th>\n",
       "      <th>Entity content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>N10000</td>\n",
       "      <td>sports</td>\n",
       "      <td>soccer</td>\n",
       "      <td>Predicting Atlanta United's lineup against Col...</td>\n",
       "      <td>Only FIVE internationals allowed, count em, FI...</td>\n",
       "      <td>{\"Atlanta United's\": 'Atlanta United FC'}</td>\n",
       "      <td>{'Atlanta United FC': {'type': 'item', 'id': '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>N10001</td>\n",
       "      <td>news</td>\n",
       "      <td>newspolitics</td>\n",
       "      <td>Mitch McConnell: DC statehood push is 'full bo...</td>\n",
       "      <td>WASHINGTON -- Senate Majority Leader Mitch McC...</td>\n",
       "      <td>{'DC': 'Washington, D.C.'}</td>\n",
       "      <td>{'Washington, D.C.': {'type': 'item', 'id': 'Q...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>N10002</td>\n",
       "      <td>news</td>\n",
       "      <td>newsus</td>\n",
       "      <td>Home In North Highlands Damaged By Fire</td>\n",
       "      <td>NORTH HIGHLANDS (CBS13)   Fire damaged a home ...</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>N10003</td>\n",
       "      <td>news</td>\n",
       "      <td>newspolitics</td>\n",
       "      <td>Meghan McCain blames 'liberal media' and 'thir...</td>\n",
       "      <td>Meghan McCain is speaking out after a journali...</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>N10004</td>\n",
       "      <td>news</td>\n",
       "      <td>newsworld</td>\n",
       "      <td>Today in History: Aug 1</td>\n",
       "      <td>1714: George I becomes King Georg Ludwig, Elec...</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113757</th>\n",
       "      <td>N123757</td>\n",
       "      <td>sports</td>\n",
       "      <td>soccer_fifa_wwc</td>\n",
       "      <td>Hope who? Alyssa Naeher's penalty save sends U...</td>\n",
       "      <td>LYON, France   At the conclusion of the United...</td>\n",
       "      <td>{'USWNT': \"United States women's national socc...</td>\n",
       "      <td>{\"United States women's national soccer team\":...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113758</th>\n",
       "      <td>N123758</td>\n",
       "      <td>sports</td>\n",
       "      <td>baseball_mlb</td>\n",
       "      <td>Chris Sale Explains What Specifically Has Gone...</td>\n",
       "      <td>The first half of Chris Sale's season could be...</td>\n",
       "      <td>{}</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113759</th>\n",
       "      <td>N123759</td>\n",
       "      <td>sports</td>\n",
       "      <td>basketball_nba_videos</td>\n",
       "      <td>Raptor fans jam streets to celebrate 1st NBA t...</td>\n",
       "      <td>Canadians are celebrating the country's first ...</td>\n",
       "      <td>{'NBA': 'National Basketball Association'}</td>\n",
       "      <td>{'National Basketball Association': {'type': '...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113760</th>\n",
       "      <td>N123760</td>\n",
       "      <td>news</td>\n",
       "      <td>newspolitics</td>\n",
       "      <td>Judge won't allow Flynn to fire his attorneys</td>\n",
       "      <td>A federal judge denied the request by Michael ...</td>\n",
       "      <td>{'Flynn': 'Michael Flynn'}</td>\n",
       "      <td>{'Michael Flynn': {'type': 'item', 'id': 'Q683...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113761</th>\n",
       "      <td>N123761</td>\n",
       "      <td>sports</td>\n",
       "      <td>football_nfl</td>\n",
       "      <td>Worley thinks he and Conley will rival greates...</td>\n",
       "      <td>Confidence imparts a wonderful inspiration on ...</td>\n",
       "      <td>{'Conley': 'Mike Conley Jr.'}</td>\n",
       "      <td>{'Mike Conley Jr.': {'type': 'item', 'id': 'Q9...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>113762 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        News ID Category                  Topic  \\\n",
       "0        N10000   sports                 soccer   \n",
       "1        N10001     news           newspolitics   \n",
       "2        N10002     news                 newsus   \n",
       "3        N10003     news           newspolitics   \n",
       "4        N10004     news              newsworld   \n",
       "...         ...      ...                    ...   \n",
       "113757  N123757   sports        soccer_fifa_wwc   \n",
       "113758  N123758   sports           baseball_mlb   \n",
       "113759  N123759   sports  basketball_nba_videos   \n",
       "113760  N123760     news           newspolitics   \n",
       "113761  N123761   sports           football_nfl   \n",
       "\n",
       "                                                 Headline  \\\n",
       "0       Predicting Atlanta United's lineup against Col...   \n",
       "1       Mitch McConnell: DC statehood push is 'full bo...   \n",
       "2                 Home In North Highlands Damaged By Fire   \n",
       "3       Meghan McCain blames 'liberal media' and 'thir...   \n",
       "4                                 Today in History: Aug 1   \n",
       "...                                                   ...   \n",
       "113757  Hope who? Alyssa Naeher's penalty save sends U...   \n",
       "113758  Chris Sale Explains What Specifically Has Gone...   \n",
       "113759  Raptor fans jam streets to celebrate 1st NBA t...   \n",
       "113760      Judge won't allow Flynn to fire his attorneys   \n",
       "113761  Worley thinks he and Conley will rival greates...   \n",
       "\n",
       "                                                News body  \\\n",
       "0       Only FIVE internationals allowed, count em, FI...   \n",
       "1       WASHINGTON -- Senate Majority Leader Mitch McC...   \n",
       "2       NORTH HIGHLANDS (CBS13)   Fire damaged a home ...   \n",
       "3       Meghan McCain is speaking out after a journali...   \n",
       "4       1714: George I becomes King Georg Ludwig, Elec...   \n",
       "...                                                   ...   \n",
       "113757  LYON, France   At the conclusion of the United...   \n",
       "113758  The first half of Chris Sale's season could be...   \n",
       "113759  Canadians are celebrating the country's first ...   \n",
       "113760  A federal judge denied the request by Michael ...   \n",
       "113761  Confidence imparts a wonderful inspiration on ...   \n",
       "\n",
       "                                             Title entity  \\\n",
       "0               {\"Atlanta United's\": 'Atlanta United FC'}   \n",
       "1                              {'DC': 'Washington, D.C.'}   \n",
       "2                                                      {}   \n",
       "3                                                      {}   \n",
       "4                                                      {}   \n",
       "...                                                   ...   \n",
       "113757  {'USWNT': \"United States women's national socc...   \n",
       "113758                                                 {}   \n",
       "113759         {'NBA': 'National Basketball Association'}   \n",
       "113760                         {'Flynn': 'Michael Flynn'}   \n",
       "113761                      {'Conley': 'Mike Conley Jr.'}   \n",
       "\n",
       "                                           Entity content  \n",
       "0       {'Atlanta United FC': {'type': 'item', 'id': '...  \n",
       "1       {'Washington, D.C.': {'type': 'item', 'id': 'Q...  \n",
       "2                                                      {}  \n",
       "3                                                      {}  \n",
       "4                                                      {}  \n",
       "...                                                   ...  \n",
       "113757  {\"United States women's national soccer team\":...  \n",
       "113758                                                 {}  \n",
       "113759  {'National Basketball Association': {'type': '...  \n",
       "113760  {'Michael Flynn': {'type': 'item', 'id': 'Q683...  \n",
       "113761  {'Mike Conley Jr.': {'type': 'item', 'id': 'Q9...  \n",
       "\n",
       "[113762 rows x 7 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12c34ab7",
   "metadata": {},
   "source": [
    "**Extract News IDs**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f7da16fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "news_ids = news_df['News ID'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c4ce0d8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['N10000', 'N10001', 'N10002', ..., 'N123759', 'N123760', 'N123761'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3f94975a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "113762"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(news_ids)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc09092b",
   "metadata": {},
   "source": [
    "**Sort Extracted News IDs**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "aca7e650",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "def alphanum_key(s):\n",
    "    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "df7ff5c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "news_ids = sorted(news_ids, key=alphanum_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fb6d2b78",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['N10000',\n",
       " 'N10001',\n",
       " 'N10002',\n",
       " 'N10003',\n",
       " 'N10004',\n",
       " 'N10005',\n",
       " 'N10006',\n",
       " 'N10007',\n",
       " 'N10008',\n",
       " 'N10009',\n",
       " 'N10010',\n",
       " 'N10011',\n",
       " 'N10012',\n",
       " 'N10013',\n",
       " 'N10014',\n",
       " 'N10015',\n",
       " 'N10016',\n",
       " 'N10017',\n",
       " 'N10018',\n",
       " 'N10019',\n",
       " 'N10020',\n",
       " 'N10021',\n",
       " 'N10022',\n",
       " 'N10023',\n",
       " 'N10024',\n",
       " 'N10025',\n",
       " 'N10026',\n",
       " 'N10027',\n",
       " 'N10028',\n",
       " 'N10029',\n",
       " 'N10030',\n",
       " 'N10031',\n",
       " 'N10032',\n",
       " 'N10033',\n",
       " 'N10034',\n",
       " 'N10035',\n",
       " 'N10036',\n",
       " 'N10037',\n",
       " 'N10038',\n",
       " 'N10039',\n",
       " 'N10040',\n",
       " 'N10041',\n",
       " 'N10042',\n",
       " 'N10043',\n",
       " 'N10044',\n",
       " 'N10045',\n",
       " 'N10046',\n",
       " 'N10047',\n",
       " 'N10048',\n",
       " 'N10049',\n",
       " 'N10050',\n",
       " 'N10051',\n",
       " 'N10052',\n",
       " 'N10053',\n",
       " 'N10054',\n",
       " 'N10055',\n",
       " 'N10056',\n",
       " 'N10057',\n",
       " 'N10058',\n",
       " 'N10059',\n",
       " 'N10060',\n",
       " 'N10061',\n",
       " 'N10062',\n",
       " 'N10063',\n",
       " 'N10064',\n",
       " 'N10065',\n",
       " 'N10066',\n",
       " 'N10067',\n",
       " 'N10068',\n",
       " 'N10069',\n",
       " 'N10070',\n",
       " 'N10071',\n",
       " 'N10072',\n",
       " 'N10073',\n",
       " 'N10074',\n",
       " 'N10075',\n",
       " 'N10076',\n",
       " 'N10077',\n",
       " 'N10078',\n",
       " 'N10079',\n",
       " 'N10080',\n",
       " 'N10081',\n",
       " 'N10082',\n",
       " 'N10083',\n",
       " 'N10084',\n",
       " 'N10085',\n",
       " 'N10086',\n",
       " 'N10087',\n",
       " 'N10088',\n",
       " 'N10089',\n",
       " 'N10090',\n",
       " 'N10091',\n",
       " 'N10092',\n",
       " 'N10093',\n",
       " 'N10094',\n",
       " 'N10095',\n",
       " 'N10096',\n",
       " 'N10097',\n",
       " 'N10098',\n",
       " 'N10099',\n",
       " 'N10100',\n",
       " 'N10101',\n",
       " 'N10102',\n",
       " 'N10103',\n",
       " 'N10104',\n",
       " 'N10105',\n",
       " 'N10106',\n",
       " 'N10107',\n",
       " 'N10108',\n",
       " 'N10109',\n",
       " 'N10110',\n",
       " 'N10111',\n",
       " 'N10112',\n",
       " 'N10113',\n",
       " 'N10114',\n",
       " 'N10115',\n",
       " 'N10116',\n",
       " 'N10117',\n",
       " 'N10118',\n",
       " 'N10119',\n",
       " 'N10120',\n",
       " 'N10121',\n",
       " 'N10122',\n",
       " 'N10123',\n",
       " 'N10124',\n",
       " 'N10125',\n",
       " 'N10126',\n",
       " 'N10127',\n",
       " 'N10128',\n",
       " 'N10129',\n",
       " 'N10130',\n",
       " 'N10131',\n",
       " 'N10132',\n",
       " 'N10133',\n",
       " 'N10134',\n",
       " 'N10135',\n",
       " 'N10136',\n",
       " 'N10137',\n",
       " 'N10138',\n",
       " 'N10139',\n",
       " 'N10140',\n",
       " 'N10141',\n",
       " 'N10142',\n",
       " 'N10143',\n",
       " 'N10144',\n",
       " 'N10145',\n",
       " 'N10146',\n",
       " 'N10147',\n",
       " 'N10148',\n",
       " 'N10149',\n",
       " 'N10150',\n",
       " 'N10151',\n",
       " 'N10152',\n",
       " 'N10153',\n",
       " 'N10154',\n",
       " 'N10155',\n",
       " 'N10156',\n",
       " 'N10157',\n",
       " 'N10158',\n",
       " 'N10159',\n",
       " 'N10160',\n",
       " 'N10161',\n",
       " 'N10162',\n",
       " 'N10163',\n",
       " 'N10164',\n",
       " 'N10165',\n",
       " 'N10166',\n",
       " 'N10167',\n",
       " 'N10168',\n",
       " 'N10169',\n",
       " 'N10170',\n",
       " 'N10171',\n",
       " 'N10172',\n",
       " 'N10173',\n",
       " 'N10174',\n",
       " 'N10175',\n",
       " 'N10176',\n",
       " 'N10177',\n",
       " 'N10178',\n",
       " 'N10179',\n",
       " 'N10180',\n",
       " 'N10181',\n",
       " 'N10182',\n",
       " 'N10183',\n",
       " 'N10184',\n",
       " 'N10185',\n",
       " 'N10186',\n",
       " 'N10187',\n",
       " 'N10188',\n",
       " 'N10189',\n",
       " 'N10190',\n",
       " 'N10191',\n",
       " 'N10192',\n",
       " 'N10193',\n",
       " 'N10194',\n",
       " 'N10195',\n",
       " 'N10196',\n",
       " 'N10197',\n",
       " 'N10198',\n",
       " 'N10199',\n",
       " 'N10200',\n",
       " 'N10201',\n",
       " 'N10202',\n",
       " 'N10203',\n",
       " 'N10204',\n",
       " 'N10205',\n",
       " 'N10206',\n",
       " 'N10207',\n",
       " 'N10208',\n",
       " 'N10209',\n",
       " 'N10210',\n",
       " 'N10211',\n",
       " 'N10212',\n",
       " 'N10213',\n",
       " 'N10214',\n",
       " 'N10215',\n",
       " 'N10216',\n",
       " 'N10217',\n",
       " 'N10218',\n",
       " 'N10219',\n",
       " 'N10220',\n",
       " 'N10221',\n",
       " 'N10222',\n",
       " 'N10223',\n",
       " 'N10224',\n",
       " 'N10225',\n",
       " 'N10226',\n",
       " 'N10227',\n",
       " 'N10228',\n",
       " 'N10229',\n",
       " 'N10230',\n",
       " 'N10231',\n",
       " 'N10232',\n",
       " 'N10233',\n",
       " 'N10234',\n",
       " 'N10235',\n",
       " 'N10236',\n",
       " 'N10237',\n",
       " 'N10238',\n",
       " 'N10239',\n",
       " 'N10240',\n",
       " 'N10241',\n",
       " 'N10242',\n",
       " 'N10243',\n",
       " 'N10244',\n",
       " 'N10245',\n",
       " 'N10246',\n",
       " 'N10247',\n",
       " 'N10248',\n",
       " 'N10249',\n",
       " 'N10250',\n",
       " 'N10251',\n",
       " 'N10252',\n",
       " 'N10253',\n",
       " 'N10254',\n",
       " 'N10255',\n",
       " 'N10256',\n",
       " 'N10257',\n",
       " 'N10258',\n",
       " 'N10259',\n",
       " 'N10260',\n",
       " 'N10261',\n",
       " 'N10262',\n",
       " 'N10263',\n",
       " 'N10264',\n",
       " 'N10265',\n",
       " 'N10266',\n",
       " 'N10267',\n",
       " 'N10268',\n",
       " 'N10269',\n",
       " 'N10270',\n",
       " 'N10271',\n",
       " 'N10272',\n",
       " 'N10273',\n",
       " 'N10274',\n",
       " 'N10275',\n",
       " 'N10276',\n",
       " 'N10277',\n",
       " 'N10278',\n",
       " 'N10279',\n",
       " 'N10280',\n",
       " 'N10281',\n",
       " 'N10282',\n",
       " 'N10283',\n",
       " 'N10284',\n",
       " 'N10285',\n",
       " 'N10286',\n",
       " 'N10287',\n",
       " 'N10288',\n",
       " 'N10289',\n",
       " 'N10290',\n",
       " 'N10291',\n",
       " 'N10292',\n",
       " 'N10293',\n",
       " 'N10294',\n",
       " 'N10295',\n",
       " 'N10296',\n",
       " 'N10297',\n",
       " 'N10298',\n",
       " 'N10299',\n",
       " 'N10300',\n",
       " 'N10301',\n",
       " 'N10302',\n",
       " 'N10303',\n",
       " 'N10304',\n",
       " 'N10305',\n",
       " 'N10306',\n",
       " 'N10307',\n",
       " 'N10308',\n",
       " 'N10309',\n",
       " 'N10310',\n",
       " 'N10311',\n",
       " 'N10312',\n",
       " 'N10313',\n",
       " 'N10314',\n",
       " 'N10315',\n",
       " 'N10316',\n",
       " 'N10317',\n",
       " 'N10318',\n",
       " 'N10319',\n",
       " 'N10320',\n",
       " 'N10321',\n",
       " 'N10322',\n",
       " 'N10323',\n",
       " 'N10324',\n",
       " 'N10325',\n",
       " 'N10326',\n",
       " 'N10327',\n",
       " 'N10328',\n",
       " 'N10329',\n",
       " 'N10330',\n",
       " 'N10331',\n",
       " 'N10332',\n",
       " 'N10333',\n",
       " 'N10334',\n",
       " 'N10335',\n",
       " 'N10336',\n",
       " 'N10337',\n",
       " 'N10338',\n",
       " 'N10339',\n",
       " 'N10340',\n",
       " 'N10341',\n",
       " 'N10342',\n",
       " 'N10343',\n",
       " 'N10344',\n",
       " 'N10345',\n",
       " 'N10346',\n",
       " 'N10347',\n",
       " 'N10348',\n",
       " 'N10349',\n",
       " 'N10350',\n",
       " 'N10351',\n",
       " 'N10352',\n",
       " 'N10353',\n",
       " 'N10354',\n",
       " 'N10355',\n",
       " 'N10356',\n",
       " 'N10357',\n",
       " 'N10358',\n",
       " 'N10359',\n",
       " 'N10360',\n",
       " 'N10361',\n",
       " 'N10362',\n",
       " 'N10363',\n",
       " 'N10364',\n",
       " 'N10365',\n",
       " 'N10366',\n",
       " 'N10367',\n",
       " 'N10368',\n",
       " 'N10369',\n",
       " 'N10370',\n",
       " 'N10371',\n",
       " 'N10372',\n",
       " 'N10373',\n",
       " 'N10374',\n",
       " 'N10375',\n",
       " 'N10376',\n",
       " 'N10377',\n",
       " 'N10378',\n",
       " 'N10379',\n",
       " 'N10380',\n",
       " 'N10381',\n",
       " 'N10382',\n",
       " 'N10383',\n",
       " 'N10384',\n",
       " 'N10385',\n",
       " 'N10386',\n",
       " 'N10387',\n",
       " 'N10388',\n",
       " 'N10389',\n",
       " 'N10390',\n",
       " 'N10391',\n",
       " 'N10392',\n",
       " 'N10393',\n",
       " 'N10394',\n",
       " 'N10395',\n",
       " 'N10396',\n",
       " 'N10397',\n",
       " 'N10398',\n",
       " 'N10399',\n",
       " 'N10400',\n",
       " 'N10401',\n",
       " 'N10402',\n",
       " 'N10403',\n",
       " 'N10404',\n",
       " 'N10405',\n",
       " 'N10406',\n",
       " 'N10407',\n",
       " 'N10408',\n",
       " 'N10409',\n",
       " 'N10410',\n",
       " 'N10411',\n",
       " 'N10412',\n",
       " 'N10413',\n",
       " 'N10414',\n",
       " 'N10415',\n",
       " 'N10416',\n",
       " 'N10417',\n",
       " 'N10418',\n",
       " 'N10419',\n",
       " 'N10420',\n",
       " 'N10421',\n",
       " 'N10422',\n",
       " 'N10423',\n",
       " 'N10424',\n",
       " 'N10425',\n",
       " 'N10426',\n",
       " 'N10427',\n",
       " 'N10428',\n",
       " 'N10429',\n",
       " 'N10430',\n",
       " 'N10431',\n",
       " 'N10432',\n",
       " 'N10433',\n",
       " 'N10434',\n",
       " 'N10435',\n",
       " 'N10436',\n",
       " 'N10437',\n",
       " 'N10438',\n",
       " 'N10439',\n",
       " 'N10440',\n",
       " 'N10441',\n",
       " 'N10442',\n",
       " 'N10443',\n",
       " 'N10444',\n",
       " 'N10445',\n",
       " 'N10446',\n",
       " 'N10447',\n",
       " 'N10448',\n",
       " 'N10449',\n",
       " 'N10450',\n",
       " 'N10451',\n",
       " 'N10452',\n",
       " 'N10453',\n",
       " 'N10454',\n",
       " 'N10455',\n",
       " 'N10456',\n",
       " 'N10457',\n",
       " 'N10458',\n",
       " 'N10459',\n",
       " 'N10460',\n",
       " 'N10461',\n",
       " 'N10462',\n",
       " 'N10463',\n",
       " 'N10464',\n",
       " 'N10465',\n",
       " 'N10466',\n",
       " 'N10467',\n",
       " 'N10468',\n",
       " 'N10469',\n",
       " 'N10470',\n",
       " 'N10471',\n",
       " 'N10472',\n",
       " 'N10473',\n",
       " 'N10474',\n",
       " 'N10475',\n",
       " 'N10476',\n",
       " 'N10477',\n",
       " 'N10478',\n",
       " 'N10479',\n",
       " 'N10480',\n",
       " 'N10481',\n",
       " 'N10482',\n",
       " 'N10483',\n",
       " 'N10484',\n",
       " 'N10485',\n",
       " 'N10486',\n",
       " 'N10487',\n",
       " 'N10488',\n",
       " 'N10489',\n",
       " 'N10490',\n",
       " 'N10491',\n",
       " 'N10492',\n",
       " 'N10493',\n",
       " 'N10494',\n",
       " 'N10495',\n",
       " 'N10496',\n",
       " 'N10497',\n",
       " 'N10498',\n",
       " 'N10499',\n",
       " 'N10500',\n",
       " 'N10501',\n",
       " 'N10502',\n",
       " 'N10503',\n",
       " 'N10504',\n",
       " 'N10505',\n",
       " 'N10506',\n",
       " 'N10507',\n",
       " 'N10508',\n",
       " 'N10509',\n",
       " 'N10510',\n",
       " 'N10511',\n",
       " 'N10512',\n",
       " 'N10513',\n",
       " 'N10514',\n",
       " 'N10515',\n",
       " 'N10516',\n",
       " 'N10517',\n",
       " 'N10518',\n",
       " 'N10519',\n",
       " 'N10520',\n",
       " 'N10521',\n",
       " 'N10522',\n",
       " 'N10523',\n",
       " 'N10524',\n",
       " 'N10525',\n",
       " 'N10526',\n",
       " 'N10527',\n",
       " 'N10528',\n",
       " 'N10529',\n",
       " 'N10530',\n",
       " 'N10531',\n",
       " 'N10532',\n",
       " 'N10533',\n",
       " 'N10534',\n",
       " 'N10535',\n",
       " 'N10536',\n",
       " 'N10537',\n",
       " 'N10538',\n",
       " 'N10539',\n",
       " 'N10540',\n",
       " 'N10541',\n",
       " 'N10542',\n",
       " 'N10543',\n",
       " 'N10544',\n",
       " 'N10545',\n",
       " 'N10546',\n",
       " 'N10547',\n",
       " 'N10548',\n",
       " 'N10549',\n",
       " 'N10550',\n",
       " 'N10551',\n",
       " 'N10552',\n",
       " 'N10553',\n",
       " 'N10554',\n",
       " 'N10555',\n",
       " 'N10556',\n",
       " 'N10557',\n",
       " 'N10558',\n",
       " 'N10559',\n",
       " 'N10560',\n",
       " 'N10561',\n",
       " 'N10562',\n",
       " 'N10563',\n",
       " 'N10564',\n",
       " 'N10565',\n",
       " 'N10566',\n",
       " 'N10567',\n",
       " 'N10568',\n",
       " 'N10569',\n",
       " 'N10570',\n",
       " 'N10571',\n",
       " 'N10572',\n",
       " 'N10573',\n",
       " 'N10574',\n",
       " 'N10575',\n",
       " 'N10576',\n",
       " 'N10577',\n",
       " 'N10578',\n",
       " 'N10579',\n",
       " 'N10580',\n",
       " 'N10581',\n",
       " 'N10582',\n",
       " 'N10583',\n",
       " 'N10584',\n",
       " 'N10585',\n",
       " 'N10586',\n",
       " 'N10587',\n",
       " 'N10588',\n",
       " 'N10589',\n",
       " 'N10590',\n",
       " 'N10591',\n",
       " 'N10592',\n",
       " 'N10593',\n",
       " 'N10594',\n",
       " 'N10595',\n",
       " 'N10596',\n",
       " 'N10597',\n",
       " 'N10598',\n",
       " 'N10599',\n",
       " 'N10600',\n",
       " 'N10601',\n",
       " 'N10602',\n",
       " 'N10603',\n",
       " 'N10604',\n",
       " 'N10605',\n",
       " 'N10606',\n",
       " 'N10607',\n",
       " 'N10608',\n",
       " 'N10609',\n",
       " 'N10610',\n",
       " 'N10611',\n",
       " 'N10612',\n",
       " 'N10613',\n",
       " 'N10614',\n",
       " 'N10615',\n",
       " 'N10616',\n",
       " 'N10617',\n",
       " 'N10618',\n",
       " 'N10619',\n",
       " 'N10620',\n",
       " 'N10621',\n",
       " 'N10622',\n",
       " 'N10623',\n",
       " 'N10624',\n",
       " 'N10625',\n",
       " 'N10626',\n",
       " 'N10627',\n",
       " 'N10628',\n",
       " 'N10629',\n",
       " 'N10630',\n",
       " 'N10631',\n",
       " 'N10632',\n",
       " 'N10633',\n",
       " 'N10634',\n",
       " 'N10635',\n",
       " 'N10636',\n",
       " 'N10637',\n",
       " 'N10638',\n",
       " 'N10639',\n",
       " 'N10640',\n",
       " 'N10641',\n",
       " 'N10642',\n",
       " 'N10643',\n",
       " 'N10644',\n",
       " 'N10645',\n",
       " 'N10646',\n",
       " 'N10647',\n",
       " 'N10648',\n",
       " 'N10649',\n",
       " 'N10650',\n",
       " 'N10651',\n",
       " 'N10652',\n",
       " 'N10653',\n",
       " 'N10654',\n",
       " 'N10655',\n",
       " 'N10656',\n",
       " 'N10657',\n",
       " 'N10658',\n",
       " 'N10659',\n",
       " 'N10660',\n",
       " 'N10661',\n",
       " 'N10662',\n",
       " 'N10663',\n",
       " 'N10664',\n",
       " 'N10665',\n",
       " 'N10666',\n",
       " 'N10667',\n",
       " 'N10668',\n",
       " 'N10669',\n",
       " 'N10670',\n",
       " 'N10671',\n",
       " 'N10672',\n",
       " 'N10673',\n",
       " 'N10674',\n",
       " 'N10675',\n",
       " 'N10676',\n",
       " 'N10677',\n",
       " 'N10678',\n",
       " 'N10679',\n",
       " 'N10680',\n",
       " 'N10681',\n",
       " 'N10682',\n",
       " 'N10683',\n",
       " 'N10684',\n",
       " 'N10685',\n",
       " 'N10686',\n",
       " 'N10687',\n",
       " 'N10688',\n",
       " 'N10689',\n",
       " 'N10690',\n",
       " 'N10691',\n",
       " 'N10692',\n",
       " 'N10693',\n",
       " 'N10694',\n",
       " 'N10695',\n",
       " 'N10696',\n",
       " 'N10697',\n",
       " 'N10698',\n",
       " 'N10699',\n",
       " 'N10700',\n",
       " 'N10701',\n",
       " 'N10702',\n",
       " 'N10703',\n",
       " 'N10704',\n",
       " 'N10705',\n",
       " 'N10706',\n",
       " 'N10707',\n",
       " 'N10708',\n",
       " 'N10709',\n",
       " 'N10710',\n",
       " 'N10711',\n",
       " 'N10712',\n",
       " 'N10713',\n",
       " 'N10714',\n",
       " 'N10715',\n",
       " 'N10716',\n",
       " 'N10717',\n",
       " 'N10718',\n",
       " 'N10719',\n",
       " 'N10720',\n",
       " 'N10721',\n",
       " 'N10722',\n",
       " 'N10723',\n",
       " 'N10724',\n",
       " 'N10725',\n",
       " 'N10726',\n",
       " 'N10727',\n",
       " 'N10728',\n",
       " 'N10729',\n",
       " 'N10730',\n",
       " 'N10731',\n",
       " 'N10732',\n",
       " 'N10733',\n",
       " 'N10734',\n",
       " 'N10735',\n",
       " 'N10736',\n",
       " 'N10737',\n",
       " 'N10738',\n",
       " 'N10739',\n",
       " 'N10740',\n",
       " 'N10741',\n",
       " 'N10742',\n",
       " 'N10743',\n",
       " 'N10744',\n",
       " 'N10745',\n",
       " 'N10746',\n",
       " 'N10747',\n",
       " 'N10748',\n",
       " 'N10749',\n",
       " 'N10750',\n",
       " 'N10751',\n",
       " 'N10752',\n",
       " 'N10753',\n",
       " 'N10754',\n",
       " 'N10755',\n",
       " 'N10756',\n",
       " 'N10757',\n",
       " 'N10758',\n",
       " 'N10759',\n",
       " 'N10760',\n",
       " 'N10761',\n",
       " 'N10762',\n",
       " 'N10763',\n",
       " 'N10764',\n",
       " 'N10765',\n",
       " 'N10766',\n",
       " 'N10767',\n",
       " 'N10768',\n",
       " 'N10769',\n",
       " 'N10770',\n",
       " 'N10771',\n",
       " 'N10772',\n",
       " 'N10773',\n",
       " 'N10774',\n",
       " 'N10775',\n",
       " 'N10776',\n",
       " 'N10777',\n",
       " 'N10778',\n",
       " 'N10779',\n",
       " 'N10780',\n",
       " 'N10781',\n",
       " 'N10782',\n",
       " 'N10783',\n",
       " 'N10784',\n",
       " 'N10785',\n",
       " 'N10786',\n",
       " 'N10787',\n",
       " 'N10788',\n",
       " 'N10789',\n",
       " 'N10790',\n",
       " 'N10791',\n",
       " 'N10792',\n",
       " 'N10793',\n",
       " 'N10794',\n",
       " 'N10795',\n",
       " 'N10796',\n",
       " 'N10797',\n",
       " 'N10798',\n",
       " 'N10799',\n",
       " 'N10800',\n",
       " 'N10801',\n",
       " 'N10802',\n",
       " 'N10803',\n",
       " 'N10804',\n",
       " 'N10805',\n",
       " 'N10806',\n",
       " 'N10807',\n",
       " 'N10808',\n",
       " 'N10809',\n",
       " 'N10810',\n",
       " 'N10811',\n",
       " 'N10812',\n",
       " 'N10813',\n",
       " 'N10814',\n",
       " 'N10815',\n",
       " 'N10816',\n",
       " 'N10817',\n",
       " 'N10818',\n",
       " 'N10819',\n",
       " 'N10820',\n",
       " 'N10821',\n",
       " 'N10822',\n",
       " 'N10823',\n",
       " 'N10824',\n",
       " 'N10825',\n",
       " 'N10826',\n",
       " 'N10827',\n",
       " 'N10828',\n",
       " 'N10829',\n",
       " 'N10830',\n",
       " 'N10831',\n",
       " 'N10832',\n",
       " 'N10833',\n",
       " 'N10834',\n",
       " 'N10835',\n",
       " 'N10836',\n",
       " 'N10837',\n",
       " 'N10838',\n",
       " 'N10839',\n",
       " 'N10840',\n",
       " 'N10841',\n",
       " 'N10842',\n",
       " 'N10843',\n",
       " 'N10844',\n",
       " 'N10845',\n",
       " 'N10846',\n",
       " 'N10847',\n",
       " 'N10848',\n",
       " 'N10849',\n",
       " 'N10850',\n",
       " 'N10851',\n",
       " 'N10852',\n",
       " 'N10853',\n",
       " 'N10854',\n",
       " 'N10855',\n",
       " 'N10856',\n",
       " 'N10857',\n",
       " 'N10858',\n",
       " 'N10859',\n",
       " 'N10860',\n",
       " 'N10861',\n",
       " 'N10862',\n",
       " 'N10863',\n",
       " 'N10864',\n",
       " 'N10865',\n",
       " 'N10866',\n",
       " 'N10867',\n",
       " 'N10868',\n",
       " 'N10869',\n",
       " 'N10870',\n",
       " 'N10871',\n",
       " 'N10872',\n",
       " 'N10873',\n",
       " 'N10874',\n",
       " 'N10875',\n",
       " 'N10876',\n",
       " 'N10877',\n",
       " 'N10878',\n",
       " 'N10879',\n",
       " 'N10880',\n",
       " 'N10881',\n",
       " 'N10882',\n",
       " 'N10883',\n",
       " 'N10884',\n",
       " 'N10885',\n",
       " 'N10886',\n",
       " 'N10887',\n",
       " 'N10888',\n",
       " 'N10889',\n",
       " 'N10890',\n",
       " 'N10891',\n",
       " 'N10892',\n",
       " 'N10893',\n",
       " 'N10894',\n",
       " 'N10895',\n",
       " 'N10896',\n",
       " 'N10897',\n",
       " 'N10898',\n",
       " 'N10899',\n",
       " 'N10900',\n",
       " 'N10901',\n",
       " 'N10902',\n",
       " 'N10903',\n",
       " 'N10904',\n",
       " 'N10905',\n",
       " 'N10906',\n",
       " 'N10907',\n",
       " 'N10908',\n",
       " 'N10909',\n",
       " 'N10910',\n",
       " 'N10911',\n",
       " 'N10912',\n",
       " 'N10913',\n",
       " 'N10914',\n",
       " 'N10915',\n",
       " 'N10916',\n",
       " 'N10917',\n",
       " 'N10918',\n",
       " 'N10919',\n",
       " 'N10920',\n",
       " 'N10921',\n",
       " 'N10922',\n",
       " 'N10923',\n",
       " 'N10924',\n",
       " 'N10925',\n",
       " 'N10926',\n",
       " 'N10927',\n",
       " 'N10928',\n",
       " 'N10929',\n",
       " 'N10930',\n",
       " 'N10931',\n",
       " 'N10932',\n",
       " 'N10933',\n",
       " 'N10934',\n",
       " 'N10935',\n",
       " 'N10936',\n",
       " 'N10937',\n",
       " 'N10938',\n",
       " 'N10939',\n",
       " 'N10940',\n",
       " 'N10941',\n",
       " 'N10942',\n",
       " 'N10943',\n",
       " 'N10944',\n",
       " 'N10945',\n",
       " 'N10946',\n",
       " 'N10947',\n",
       " 'N10948',\n",
       " 'N10949',\n",
       " 'N10950',\n",
       " 'N10951',\n",
       " 'N10952',\n",
       " 'N10953',\n",
       " 'N10954',\n",
       " 'N10955',\n",
       " 'N10956',\n",
       " 'N10957',\n",
       " 'N10958',\n",
       " 'N10959',\n",
       " 'N10960',\n",
       " 'N10961',\n",
       " 'N10962',\n",
       " 'N10963',\n",
       " 'N10964',\n",
       " 'N10965',\n",
       " 'N10966',\n",
       " 'N10967',\n",
       " 'N10968',\n",
       " 'N10969',\n",
       " 'N10970',\n",
       " 'N10971',\n",
       " 'N10972',\n",
       " 'N10973',\n",
       " 'N10974',\n",
       " 'N10975',\n",
       " 'N10976',\n",
       " 'N10977',\n",
       " 'N10978',\n",
       " 'N10979',\n",
       " 'N10980',\n",
       " 'N10981',\n",
       " 'N10982',\n",
       " 'N10983',\n",
       " 'N10984',\n",
       " 'N10985',\n",
       " 'N10986',\n",
       " 'N10987',\n",
       " 'N10988',\n",
       " 'N10989',\n",
       " 'N10990',\n",
       " 'N10991',\n",
       " 'N10992',\n",
       " 'N10993',\n",
       " 'N10994',\n",
       " 'N10995',\n",
       " 'N10996',\n",
       " 'N10997',\n",
       " 'N10998',\n",
       " 'N10999',\n",
       " ...]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_ids"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51f37346",
   "metadata": {},
   "source": [
    "## Summary"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c10148d9",
   "metadata": {},
   "source": [
    "**Load Summary Dataset**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "bae9b1ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "summ_df = pd.read_csv(\"./Data/summ.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "263dde40",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SummID</th>\n",
       "      <th>NewsID</th>\n",
       "      <th>UserID</th>\n",
       "      <th>Summary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>S1</td>\n",
       "      <td>N24324</td>\n",
       "      <td>U335175</td>\n",
       "      <td>Heat Wave Is Going to Hit Europe, Prepare to C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>S2</td>\n",
       "      <td>N93272</td>\n",
       "      <td>U335175</td>\n",
       "      <td>Everything you need to know to plan your dream...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>S3</td>\n",
       "      <td>N43563</td>\n",
       "      <td>U146053</td>\n",
       "      <td>Baby Boomers Love City Life as Much As Millenials</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>S4</td>\n",
       "      <td>N97393</td>\n",
       "      <td>U146053</td>\n",
       "      <td>Piping plover forces Sandy Hook to cancel all ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>S5</td>\n",
       "      <td>N104663</td>\n",
       "      <td>U146053</td>\n",
       "      <td>Trip Advisor's Guide to Eating America: for th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777529</th>\n",
       "      <td>S777530</td>\n",
       "      <td>N51965</td>\n",
       "      <td>U215323</td>\n",
       "      <td>Ranking Penguin's possible picks for NHL draft</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777530</th>\n",
       "      <td>S777531</td>\n",
       "      <td>N49096</td>\n",
       "      <td>U215323</td>\n",
       "      <td>Homeland Still Exposed to ISIS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777531</th>\n",
       "      <td>S777532</td>\n",
       "      <td>N44501</td>\n",
       "      <td>U215323</td>\n",
       "      <td>US-Iran tension in Photos</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777532</th>\n",
       "      <td>S777533</td>\n",
       "      <td>N59444</td>\n",
       "      <td>U215323</td>\n",
       "      <td>Former Steeler's RB Le'Veon Bell left bare as ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>777533</th>\n",
       "      <td>S777534</td>\n",
       "      <td>N81907</td>\n",
       "      <td>U215323</td>\n",
       "      <td>NFL standouts whose careers have never gone po...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>777534 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         SummID   NewsID   UserID  \\\n",
       "0            S1   N24324  U335175   \n",
       "1            S2   N93272  U335175   \n",
       "2            S3   N43563  U146053   \n",
       "3            S4   N97393  U146053   \n",
       "4            S5  N104663  U146053   \n",
       "...         ...      ...      ...   \n",
       "777529  S777530   N51965  U215323   \n",
       "777530  S777531   N49096  U215323   \n",
       "777531  S777532   N44501  U215323   \n",
       "777532  S777533   N59444  U215323   \n",
       "777533  S777534   N81907  U215323   \n",
       "\n",
       "                                                  Summary  \n",
       "0       Heat Wave Is Going to Hit Europe, Prepare to C...  \n",
       "1       Everything you need to know to plan your dream...  \n",
       "2       Baby Boomers Love City Life as Much As Millenials  \n",
       "3       Piping plover forces Sandy Hook to cancel all ...  \n",
       "4       Trip Advisor's Guide to Eating America: for th...  \n",
       "...                                                   ...  \n",
       "777529     Ranking Penguin's possible picks for NHL draft  \n",
       "777530                    Homeland Still Exposed to ISIS   \n",
       "777531                          US-Iran tension in Photos  \n",
       "777532  Former Steeler's RB Le'Veon Bell left bare as ...  \n",
       "777533  NFL standouts whose careers have never gone po...  \n",
       "\n",
       "[777534 rows x 4 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summ_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c60c969",
   "metadata": {},
   "source": [
    "**Extract Summary IDs**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ea8744a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "summ_ids = summ_df['SummID'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7eda9e44",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['S1', 'S2', 'S3', ..., 'S777532', 'S777533', 'S777534'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summ_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "134500a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "777534"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(summ_ids)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d84e292e",
   "metadata": {},
   "source": [
    "**Sort Extracted Summary IDs**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "142d1e82",
   "metadata": {},
   "outputs": [],
   "source": [
    "summ_ids = sorted(summ_ids, key=alphanum_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8442e81f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['S1',\n",
       " 'S2',\n",
       " 'S3',\n",
       " 'S4',\n",
       " 'S5',\n",
       " 'S6',\n",
       " 'S7',\n",
       " 'S8',\n",
       " 'S9',\n",
       " 'S10',\n",
       " 'S11',\n",
       " 'S12',\n",
       " 'S13',\n",
       " 'S14',\n",
       " 'S15',\n",
       " 'S16',\n",
       " 'S17',\n",
       " 'S18',\n",
       " 'S19',\n",
       " 'S20',\n",
       " 'S21',\n",
       " 'S22',\n",
       " 'S23',\n",
       " 'S24',\n",
       " 'S25',\n",
       " 'S26',\n",
       " 'S27',\n",
       " 'S28',\n",
       " 'S29',\n",
       " 'S30',\n",
       " 'S31',\n",
       " 'S32',\n",
       " 'S33',\n",
       " 'S34',\n",
       " 'S35',\n",
       " 'S36',\n",
       " 'S37',\n",
       " 'S38',\n",
       " 'S39',\n",
       " 'S40',\n",
       " 'S41',\n",
       " 'S42',\n",
       " 'S43',\n",
       " 'S44',\n",
       " 'S45',\n",
       " 'S46',\n",
       " 'S47',\n",
       " 'S48',\n",
       " 'S49',\n",
       " 'S50',\n",
       " 'S51',\n",
       " 'S52',\n",
       " 'S53',\n",
       " 'S54',\n",
       " 'S55',\n",
       " 'S56',\n",
       " 'S57',\n",
       " 'S58',\n",
       " 'S59',\n",
       " 'S60',\n",
       " 'S61',\n",
       " 'S62',\n",
       " 'S63',\n",
       " 'S64',\n",
       " 'S65',\n",
       " 'S66',\n",
       " 'S67',\n",
       " 'S68',\n",
       " 'S69',\n",
       " 'S70',\n",
       " 'S71',\n",
       " 'S72',\n",
       " 'S73',\n",
       " 'S74',\n",
       " 'S75',\n",
       " 'S76',\n",
       " 'S77',\n",
       " 'S78',\n",
       " 'S79',\n",
       " 'S80',\n",
       " 'S81',\n",
       " 'S82',\n",
       " 'S83',\n",
       " 'S84',\n",
       " 'S85',\n",
       " 'S86',\n",
       " 'S87',\n",
       " 'S88',\n",
       " 'S89',\n",
       " 'S90',\n",
       " 'S91',\n",
       " 'S92',\n",
       " 'S93',\n",
       " 'S94',\n",
       " 'S95',\n",
       " 'S96',\n",
       " 'S97',\n",
       " 'S98',\n",
       " 'S99',\n",
       " 'S100',\n",
       " 'S101',\n",
       " 'S102',\n",
       " 'S103',\n",
       " 'S104',\n",
       " 'S105',\n",
       " 'S106',\n",
       " 'S107',\n",
       " 'S108',\n",
       " 'S109',\n",
       " 'S110',\n",
       " 'S111',\n",
       " 'S112',\n",
       " 'S113',\n",
       " 'S114',\n",
       " 'S115',\n",
       " 'S116',\n",
       " 'S117',\n",
       " 'S118',\n",
       " 'S119',\n",
       " 'S120',\n",
       " 'S121',\n",
       " 'S122',\n",
       " 'S123',\n",
       " 'S124',\n",
       " 'S125',\n",
       " 'S126',\n",
       " 'S127',\n",
       " 'S128',\n",
       " 'S129',\n",
       " 'S130',\n",
       " 'S131',\n",
       " 'S132',\n",
       " 'S133',\n",
       " 'S134',\n",
       " 'S135',\n",
       " 'S136',\n",
       " 'S137',\n",
       " 'S138',\n",
       " 'S139',\n",
       " 'S140',\n",
       " 'S141',\n",
       " 'S142',\n",
       " 'S143',\n",
       " 'S144',\n",
       " 'S145',\n",
       " 'S146',\n",
       " 'S147',\n",
       " 'S148',\n",
       " 'S149',\n",
       " 'S150',\n",
       " 'S151',\n",
       " 'S152',\n",
       " 'S153',\n",
       " 'S154',\n",
       " 'S155',\n",
       " 'S156',\n",
       " 'S157',\n",
       " 'S158',\n",
       " 'S159',\n",
       " 'S160',\n",
       " 'S161',\n",
       " 'S162',\n",
       " 'S163',\n",
       " 'S164',\n",
       " 'S165',\n",
       " 'S166',\n",
       " 'S167',\n",
       " 'S168',\n",
       " 'S169',\n",
       " 'S170',\n",
       " 'S171',\n",
       " 'S172',\n",
       " 'S173',\n",
       " 'S174',\n",
       " 'S175',\n",
       " 'S176',\n",
       " 'S177',\n",
       " 'S178',\n",
       " 'S179',\n",
       " 'S180',\n",
       " 'S181',\n",
       " 'S182',\n",
       " 'S183',\n",
       " 'S184',\n",
       " 'S185',\n",
       " 'S186',\n",
       " 'S187',\n",
       " 'S188',\n",
       " 'S189',\n",
       " 'S190',\n",
       " 'S191',\n",
       " 'S192',\n",
       " 'S193',\n",
       " 'S194',\n",
       " 'S195',\n",
       " 'S196',\n",
       " 'S197',\n",
       " 'S198',\n",
       " 'S199',\n",
       " 'S200',\n",
       " 'S201',\n",
       " 'S202',\n",
       " 'S203',\n",
       " 'S204',\n",
       " 'S205',\n",
       " 'S206',\n",
       " 'S207',\n",
       " 'S208',\n",
       " 'S209',\n",
       " 'S210',\n",
       " 'S211',\n",
       " 'S212',\n",
       " 'S213',\n",
       " 'S214',\n",
       " 'S215',\n",
       " 'S216',\n",
       " 'S217',\n",
       " 'S218',\n",
       " 'S219',\n",
       " 'S220',\n",
       " 'S221',\n",
       " 'S222',\n",
       " 'S223',\n",
       " 'S224',\n",
       " 'S225',\n",
       " 'S226',\n",
       " 'S227',\n",
       " 'S228',\n",
       " 'S229',\n",
       " 'S230',\n",
       " 'S231',\n",
       " 'S232',\n",
       " 'S233',\n",
       " 'S234',\n",
       " 'S235',\n",
       " 'S236',\n",
       " 'S237',\n",
       " 'S238',\n",
       " 'S239',\n",
       " 'S240',\n",
       " 'S241',\n",
       " 'S242',\n",
       " 'S243',\n",
       " 'S244',\n",
       " 'S245',\n",
       " 'S246',\n",
       " 'S247',\n",
       " 'S248',\n",
       " 'S249',\n",
       " 'S250',\n",
       " 'S251',\n",
       " 'S252',\n",
       " 'S253',\n",
       " 'S254',\n",
       " 'S255',\n",
       " 'S256',\n",
       " 'S257',\n",
       " 'S258',\n",
       " 'S259',\n",
       " 'S260',\n",
       " 'S261',\n",
       " 'S262',\n",
       " 'S263',\n",
       " 'S264',\n",
       " 'S265',\n",
       " 'S266',\n",
       " 'S267',\n",
       " 'S268',\n",
       " 'S269',\n",
       " 'S270',\n",
       " 'S271',\n",
       " 'S272',\n",
       " 'S273',\n",
       " 'S274',\n",
       " 'S275',\n",
       " 'S276',\n",
       " 'S277',\n",
       " 'S278',\n",
       " 'S279',\n",
       " 'S280',\n",
       " 'S281',\n",
       " 'S282',\n",
       " 'S283',\n",
       " 'S284',\n",
       " 'S285',\n",
       " 'S286',\n",
       " 'S287',\n",
       " 'S288',\n",
       " 'S289',\n",
       " 'S290',\n",
       " 'S291',\n",
       " 'S292',\n",
       " 'S293',\n",
       " 'S294',\n",
       " 'S295',\n",
       " 'S296',\n",
       " 'S297',\n",
       " 'S298',\n",
       " 'S299',\n",
       " 'S300',\n",
       " 'S301',\n",
       " 'S302',\n",
       " 'S303',\n",
       " 'S304',\n",
       " 'S305',\n",
       " 'S306',\n",
       " 'S307',\n",
       " 'S308',\n",
       " 'S309',\n",
       " 'S310',\n",
       " 'S311',\n",
       " 'S312',\n",
       " 'S313',\n",
       " 'S314',\n",
       " 'S315',\n",
       " 'S316',\n",
       " 'S317',\n",
       " 'S318',\n",
       " 'S319',\n",
       " 'S320',\n",
       " 'S321',\n",
       " 'S322',\n",
       " 'S323',\n",
       " 'S324',\n",
       " 'S325',\n",
       " 'S326',\n",
       " 'S327',\n",
       " 'S328',\n",
       " 'S329',\n",
       " 'S330',\n",
       " 'S331',\n",
       " 'S332',\n",
       " 'S333',\n",
       " 'S334',\n",
       " 'S335',\n",
       " 'S336',\n",
       " 'S337',\n",
       " 'S338',\n",
       " 'S339',\n",
       " 'S340',\n",
       " 'S341',\n",
       " 'S342',\n",
       " 'S343',\n",
       " 'S344',\n",
       " 'S345',\n",
       " 'S346',\n",
       " 'S347',\n",
       " 'S348',\n",
       " 'S349',\n",
       " 'S350',\n",
       " 'S351',\n",
       " 'S352',\n",
       " 'S353',\n",
       " 'S354',\n",
       " 'S355',\n",
       " 'S356',\n",
       " 'S357',\n",
       " 'S358',\n",
       " 'S359',\n",
       " 'S360',\n",
       " 'S361',\n",
       " 'S362',\n",
       " 'S363',\n",
       " 'S364',\n",
       " 'S365',\n",
       " 'S366',\n",
       " 'S367',\n",
       " 'S368',\n",
       " 'S369',\n",
       " 'S370',\n",
       " 'S371',\n",
       " 'S372',\n",
       " 'S373',\n",
       " 'S374',\n",
       " 'S375',\n",
       " 'S376',\n",
       " 'S377',\n",
       " 'S378',\n",
       " 'S379',\n",
       " 'S380',\n",
       " 'S381',\n",
       " 'S382',\n",
       " 'S383',\n",
       " 'S384',\n",
       " 'S385',\n",
       " 'S386',\n",
       " 'S387',\n",
       " 'S388',\n",
       " 'S389',\n",
       " 'S390',\n",
       " 'S391',\n",
       " 'S392',\n",
       " 'S393',\n",
       " 'S394',\n",
       " 'S395',\n",
       " 'S396',\n",
       " 'S397',\n",
       " 'S398',\n",
       " 'S399',\n",
       " 'S400',\n",
       " 'S401',\n",
       " 'S402',\n",
       " 'S403',\n",
       " 'S404',\n",
       " 'S405',\n",
       " 'S406',\n",
       " 'S407',\n",
       " 'S408',\n",
       " 'S409',\n",
       " 'S410',\n",
       " 'S411',\n",
       " 'S412',\n",
       " 'S413',\n",
       " 'S414',\n",
       " 'S415',\n",
       " 'S416',\n",
       " 'S417',\n",
       " 'S418',\n",
       " 'S419',\n",
       " 'S420',\n",
       " 'S421',\n",
       " 'S422',\n",
       " 'S423',\n",
       " 'S424',\n",
       " 'S425',\n",
       " 'S426',\n",
       " 'S427',\n",
       " 'S428',\n",
       " 'S429',\n",
       " 'S430',\n",
       " 'S431',\n",
       " 'S432',\n",
       " 'S433',\n",
       " 'S434',\n",
       " 'S435',\n",
       " 'S436',\n",
       " 'S437',\n",
       " 'S438',\n",
       " 'S439',\n",
       " 'S440',\n",
       " 'S441',\n",
       " 'S442',\n",
       " 'S443',\n",
       " 'S444',\n",
       " 'S445',\n",
       " 'S446',\n",
       " 'S447',\n",
       " 'S448',\n",
       " 'S449',\n",
       " 'S450',\n",
       " 'S451',\n",
       " 'S452',\n",
       " 'S453',\n",
       " 'S454',\n",
       " 'S455',\n",
       " 'S456',\n",
       " 'S457',\n",
       " 'S458',\n",
       " 'S459',\n",
       " 'S460',\n",
       " 'S461',\n",
       " 'S462',\n",
       " 'S463',\n",
       " 'S464',\n",
       " 'S465',\n",
       " 'S466',\n",
       " 'S467',\n",
       " 'S468',\n",
       " 'S469',\n",
       " 'S470',\n",
       " 'S471',\n",
       " 'S472',\n",
       " 'S473',\n",
       " 'S474',\n",
       " 'S475',\n",
       " 'S476',\n",
       " 'S477',\n",
       " 'S478',\n",
       " 'S479',\n",
       " 'S480',\n",
       " 'S481',\n",
       " 'S482',\n",
       " 'S483',\n",
       " 'S484',\n",
       " 'S485',\n",
       " 'S486',\n",
       " 'S487',\n",
       " 'S488',\n",
       " 'S489',\n",
       " 'S490',\n",
       " 'S491',\n",
       " 'S492',\n",
       " 'S493',\n",
       " 'S494',\n",
       " 'S495',\n",
       " 'S496',\n",
       " 'S497',\n",
       " 'S498',\n",
       " 'S499',\n",
       " 'S500',\n",
       " 'S501',\n",
       " 'S502',\n",
       " 'S503',\n",
       " 'S504',\n",
       " 'S505',\n",
       " 'S506',\n",
       " 'S507',\n",
       " 'S508',\n",
       " 'S509',\n",
       " 'S510',\n",
       " 'S511',\n",
       " 'S512',\n",
       " 'S513',\n",
       " 'S514',\n",
       " 'S515',\n",
       " 'S516',\n",
       " 'S517',\n",
       " 'S518',\n",
       " 'S519',\n",
       " 'S520',\n",
       " 'S521',\n",
       " 'S522',\n",
       " 'S523',\n",
       " 'S524',\n",
       " 'S525',\n",
       " 'S526',\n",
       " 'S527',\n",
       " 'S528',\n",
       " 'S529',\n",
       " 'S530',\n",
       " 'S531',\n",
       " 'S532',\n",
       " 'S533',\n",
       " 'S534',\n",
       " 'S535',\n",
       " 'S536',\n",
       " 'S537',\n",
       " 'S538',\n",
       " 'S539',\n",
       " 'S540',\n",
       " 'S541',\n",
       " 'S542',\n",
       " 'S543',\n",
       " 'S544',\n",
       " 'S545',\n",
       " 'S546',\n",
       " 'S547',\n",
       " 'S548',\n",
       " 'S549',\n",
       " 'S550',\n",
       " 'S551',\n",
       " 'S552',\n",
       " 'S553',\n",
       " 'S554',\n",
       " 'S555',\n",
       " 'S556',\n",
       " 'S557',\n",
       " 'S558',\n",
       " 'S559',\n",
       " 'S560',\n",
       " 'S561',\n",
       " 'S562',\n",
       " 'S563',\n",
       " 'S564',\n",
       " 'S565',\n",
       " 'S566',\n",
       " 'S567',\n",
       " 'S568',\n",
       " 'S569',\n",
       " 'S570',\n",
       " 'S571',\n",
       " 'S572',\n",
       " 'S573',\n",
       " 'S574',\n",
       " 'S575',\n",
       " 'S576',\n",
       " 'S577',\n",
       " 'S578',\n",
       " 'S579',\n",
       " 'S580',\n",
       " 'S581',\n",
       " 'S582',\n",
       " 'S583',\n",
       " 'S584',\n",
       " 'S585',\n",
       " 'S586',\n",
       " 'S587',\n",
       " 'S588',\n",
       " 'S589',\n",
       " 'S590',\n",
       " 'S591',\n",
       " 'S592',\n",
       " 'S593',\n",
       " 'S594',\n",
       " 'S595',\n",
       " 'S596',\n",
       " 'S597',\n",
       " 'S598',\n",
       " 'S599',\n",
       " 'S600',\n",
       " 'S601',\n",
       " 'S602',\n",
       " 'S603',\n",
       " 'S604',\n",
       " 'S605',\n",
       " 'S606',\n",
       " 'S607',\n",
       " 'S608',\n",
       " 'S609',\n",
       " 'S610',\n",
       " 'S611',\n",
       " 'S612',\n",
       " 'S613',\n",
       " 'S614',\n",
       " 'S615',\n",
       " 'S616',\n",
       " 'S617',\n",
       " 'S618',\n",
       " 'S619',\n",
       " 'S620',\n",
       " 'S621',\n",
       " 'S622',\n",
       " 'S623',\n",
       " 'S624',\n",
       " 'S625',\n",
       " 'S626',\n",
       " 'S627',\n",
       " 'S628',\n",
       " 'S629',\n",
       " 'S630',\n",
       " 'S631',\n",
       " 'S632',\n",
       " 'S633',\n",
       " 'S634',\n",
       " 'S635',\n",
       " 'S636',\n",
       " 'S637',\n",
       " 'S638',\n",
       " 'S639',\n",
       " 'S640',\n",
       " 'S641',\n",
       " 'S642',\n",
       " 'S643',\n",
       " 'S644',\n",
       " 'S645',\n",
       " 'S646',\n",
       " 'S647',\n",
       " 'S648',\n",
       " 'S649',\n",
       " 'S650',\n",
       " 'S651',\n",
       " 'S652',\n",
       " 'S653',\n",
       " 'S654',\n",
       " 'S655',\n",
       " 'S656',\n",
       " 'S657',\n",
       " 'S658',\n",
       " 'S659',\n",
       " 'S660',\n",
       " 'S661',\n",
       " 'S662',\n",
       " 'S663',\n",
       " 'S664',\n",
       " 'S665',\n",
       " 'S666',\n",
       " 'S667',\n",
       " 'S668',\n",
       " 'S669',\n",
       " 'S670',\n",
       " 'S671',\n",
       " 'S672',\n",
       " 'S673',\n",
       " 'S674',\n",
       " 'S675',\n",
       " 'S676',\n",
       " 'S677',\n",
       " 'S678',\n",
       " 'S679',\n",
       " 'S680',\n",
       " 'S681',\n",
       " 'S682',\n",
       " 'S683',\n",
       " 'S684',\n",
       " 'S685',\n",
       " 'S686',\n",
       " 'S687',\n",
       " 'S688',\n",
       " 'S689',\n",
       " 'S690',\n",
       " 'S691',\n",
       " 'S692',\n",
       " 'S693',\n",
       " 'S694',\n",
       " 'S695',\n",
       " 'S696',\n",
       " 'S697',\n",
       " 'S698',\n",
       " 'S699',\n",
       " 'S700',\n",
       " 'S701',\n",
       " 'S702',\n",
       " 'S703',\n",
       " 'S704',\n",
       " 'S705',\n",
       " 'S706',\n",
       " 'S707',\n",
       " 'S708',\n",
       " 'S709',\n",
       " 'S710',\n",
       " 'S711',\n",
       " 'S712',\n",
       " 'S713',\n",
       " 'S714',\n",
       " 'S715',\n",
       " 'S716',\n",
       " 'S717',\n",
       " 'S718',\n",
       " 'S719',\n",
       " 'S720',\n",
       " 'S721',\n",
       " 'S722',\n",
       " 'S723',\n",
       " 'S724',\n",
       " 'S725',\n",
       " 'S726',\n",
       " 'S727',\n",
       " 'S728',\n",
       " 'S729',\n",
       " 'S730',\n",
       " 'S731',\n",
       " 'S732',\n",
       " 'S733',\n",
       " 'S734',\n",
       " 'S735',\n",
       " 'S736',\n",
       " 'S737',\n",
       " 'S738',\n",
       " 'S739',\n",
       " 'S740',\n",
       " 'S741',\n",
       " 'S742',\n",
       " 'S743',\n",
       " 'S744',\n",
       " 'S745',\n",
       " 'S746',\n",
       " 'S747',\n",
       " 'S748',\n",
       " 'S749',\n",
       " 'S750',\n",
       " 'S751',\n",
       " 'S752',\n",
       " 'S753',\n",
       " 'S754',\n",
       " 'S755',\n",
       " 'S756',\n",
       " 'S757',\n",
       " 'S758',\n",
       " 'S759',\n",
       " 'S760',\n",
       " 'S761',\n",
       " 'S762',\n",
       " 'S763',\n",
       " 'S764',\n",
       " 'S765',\n",
       " 'S766',\n",
       " 'S767',\n",
       " 'S768',\n",
       " 'S769',\n",
       " 'S770',\n",
       " 'S771',\n",
       " 'S772',\n",
       " 'S773',\n",
       " 'S774',\n",
       " 'S775',\n",
       " 'S776',\n",
       " 'S777',\n",
       " 'S778',\n",
       " 'S779',\n",
       " 'S780',\n",
       " 'S781',\n",
       " 'S782',\n",
       " 'S783',\n",
       " 'S784',\n",
       " 'S785',\n",
       " 'S786',\n",
       " 'S787',\n",
       " 'S788',\n",
       " 'S789',\n",
       " 'S790',\n",
       " 'S791',\n",
       " 'S792',\n",
       " 'S793',\n",
       " 'S794',\n",
       " 'S795',\n",
       " 'S796',\n",
       " 'S797',\n",
       " 'S798',\n",
       " 'S799',\n",
       " 'S800',\n",
       " 'S801',\n",
       " 'S802',\n",
       " 'S803',\n",
       " 'S804',\n",
       " 'S805',\n",
       " 'S806',\n",
       " 'S807',\n",
       " 'S808',\n",
       " 'S809',\n",
       " 'S810',\n",
       " 'S811',\n",
       " 'S812',\n",
       " 'S813',\n",
       " 'S814',\n",
       " 'S815',\n",
       " 'S816',\n",
       " 'S817',\n",
       " 'S818',\n",
       " 'S819',\n",
       " 'S820',\n",
       " 'S821',\n",
       " 'S822',\n",
       " 'S823',\n",
       " 'S824',\n",
       " 'S825',\n",
       " 'S826',\n",
       " 'S827',\n",
       " 'S828',\n",
       " 'S829',\n",
       " 'S830',\n",
       " 'S831',\n",
       " 'S832',\n",
       " 'S833',\n",
       " 'S834',\n",
       " 'S835',\n",
       " 'S836',\n",
       " 'S837',\n",
       " 'S838',\n",
       " 'S839',\n",
       " 'S840',\n",
       " 'S841',\n",
       " 'S842',\n",
       " 'S843',\n",
       " 'S844',\n",
       " 'S845',\n",
       " 'S846',\n",
       " 'S847',\n",
       " 'S848',\n",
       " 'S849',\n",
       " 'S850',\n",
       " 'S851',\n",
       " 'S852',\n",
       " 'S853',\n",
       " 'S854',\n",
       " 'S855',\n",
       " 'S856',\n",
       " 'S857',\n",
       " 'S858',\n",
       " 'S859',\n",
       " 'S860',\n",
       " 'S861',\n",
       " 'S862',\n",
       " 'S863',\n",
       " 'S864',\n",
       " 'S865',\n",
       " 'S866',\n",
       " 'S867',\n",
       " 'S868',\n",
       " 'S869',\n",
       " 'S870',\n",
       " 'S871',\n",
       " 'S872',\n",
       " 'S873',\n",
       " 'S874',\n",
       " 'S875',\n",
       " 'S876',\n",
       " 'S877',\n",
       " 'S878',\n",
       " 'S879',\n",
       " 'S880',\n",
       " 'S881',\n",
       " 'S882',\n",
       " 'S883',\n",
       " 'S884',\n",
       " 'S885',\n",
       " 'S886',\n",
       " 'S887',\n",
       " 'S888',\n",
       " 'S889',\n",
       " 'S890',\n",
       " 'S891',\n",
       " 'S892',\n",
       " 'S893',\n",
       " 'S894',\n",
       " 'S895',\n",
       " 'S896',\n",
       " 'S897',\n",
       " 'S898',\n",
       " 'S899',\n",
       " 'S900',\n",
       " 'S901',\n",
       " 'S902',\n",
       " 'S903',\n",
       " 'S904',\n",
       " 'S905',\n",
       " 'S906',\n",
       " 'S907',\n",
       " 'S908',\n",
       " 'S909',\n",
       " 'S910',\n",
       " 'S911',\n",
       " 'S912',\n",
       " 'S913',\n",
       " 'S914',\n",
       " 'S915',\n",
       " 'S916',\n",
       " 'S917',\n",
       " 'S918',\n",
       " 'S919',\n",
       " 'S920',\n",
       " 'S921',\n",
       " 'S922',\n",
       " 'S923',\n",
       " 'S924',\n",
       " 'S925',\n",
       " 'S926',\n",
       " 'S927',\n",
       " 'S928',\n",
       " 'S929',\n",
       " 'S930',\n",
       " 'S931',\n",
       " 'S932',\n",
       " 'S933',\n",
       " 'S934',\n",
       " 'S935',\n",
       " 'S936',\n",
       " 'S937',\n",
       " 'S938',\n",
       " 'S939',\n",
       " 'S940',\n",
       " 'S941',\n",
       " 'S942',\n",
       " 'S943',\n",
       " 'S944',\n",
       " 'S945',\n",
       " 'S946',\n",
       " 'S947',\n",
       " 'S948',\n",
       " 'S949',\n",
       " 'S950',\n",
       " 'S951',\n",
       " 'S952',\n",
       " 'S953',\n",
       " 'S954',\n",
       " 'S955',\n",
       " 'S956',\n",
       " 'S957',\n",
       " 'S958',\n",
       " 'S959',\n",
       " 'S960',\n",
       " 'S961',\n",
       " 'S962',\n",
       " 'S963',\n",
       " 'S964',\n",
       " 'S965',\n",
       " 'S966',\n",
       " 'S967',\n",
       " 'S968',\n",
       " 'S969',\n",
       " 'S970',\n",
       " 'S971',\n",
       " 'S972',\n",
       " 'S973',\n",
       " 'S974',\n",
       " 'S975',\n",
       " 'S976',\n",
       " 'S977',\n",
       " 'S978',\n",
       " 'S979',\n",
       " 'S980',\n",
       " 'S981',\n",
       " 'S982',\n",
       " 'S983',\n",
       " 'S984',\n",
       " 'S985',\n",
       " 'S986',\n",
       " 'S987',\n",
       " 'S988',\n",
       " 'S989',\n",
       " 'S990',\n",
       " 'S991',\n",
       " 'S992',\n",
       " 'S993',\n",
       " 'S994',\n",
       " 'S995',\n",
       " 'S996',\n",
       " 'S997',\n",
       " 'S998',\n",
       " 'S999',\n",
       " 'S1000',\n",
       " ...]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summ_ids"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6788c8fb",
   "metadata": {},
   "source": [
    "**Concatenating All Possible Nodes(Documents + Summary) of Knowledge Graphs** \\\n",
    "Note: Considering `User` nodes to construct the edge embedding is not useful because relationships(click, skip, gen_summ, gen_summ) is only associating to documents and summary node by user."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "3293c527",
   "metadata": {},
   "outputs": [],
   "source": [
    "nodes = news_ids + summ_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c00e2443",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "891296"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(nodes)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ee616a2",
   "metadata": {},
   "source": [
    "## Initialize `Context Matrix` in form of DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f04f5c54",
   "metadata": {},
   "outputs": [],
   "source": [
    "context_matrix_df = pd.DataFrame(nodes, columns=['NodeID'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f9e0f3b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "context_matrix_df['click'] = 0\n",
    "context_matrix_df['skip'] = 0\n",
    "context_matrix_df['gen_summ'] = 0\n",
    "context_matrix_df['summ_gen'] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "77034d69",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NodeID</th>\n",
       "      <th>click</th>\n",
       "      <th>skip</th>\n",
       "      <th>gen_summ</th>\n",
       "      <th>summ_gen</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>N10000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>N10001</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>N10002</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>N10003</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>N10004</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891291</th>\n",
       "      <td>S777530</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891292</th>\n",
       "      <td>S777531</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891293</th>\n",
       "      <td>S777532</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891294</th>\n",
       "      <td>S777533</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891295</th>\n",
       "      <td>S777534</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891296 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         NodeID  click  skip  gen_summ  summ_gen\n",
       "0        N10000      0     0         0         0\n",
       "1        N10001      0     0         0         0\n",
       "2        N10002      0     0         0         0\n",
       "3        N10003      0     0         0         0\n",
       "4        N10004      0     0         0         0\n",
       "...         ...    ...   ...       ...       ...\n",
       "891291  S777530      0     0         0         0\n",
       "891292  S777531      0     0         0         0\n",
       "891293  S777532      0     0         0         0\n",
       "891294  S777533      0     0         0         0\n",
       "891295  S777534      0     0         0         0\n",
       "\n",
       "[891296 rows x 5 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "context_matrix_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a8ef55a5",
   "metadata": {},
   "source": [
    "## Filling Up `Context Matrix` By Relation Frequency over Node (Document + Summary) using `Augmented Dataset`"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bef2e9f",
   "metadata": {},
   "source": [
    "### Load Augmented Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "bfefd31b",
   "metadata": {},
   "outputs": [],
   "source": [
    "aug_df = pd.read_csv(\"./Data/synthetic-original-augmented.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f14fcbbb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>Docs</th>\n",
       "      <th>Action</th>\n",
       "      <th>Summaries</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>U335175</td>\n",
       "      <td>['N41340', 'N55476', 'N103556', 'N27570', 'N83...</td>\n",
       "      <td>['click', 'click', 'click', 'click', 'click', ...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>U158889</td>\n",
       "      <td>['N84182', 'N72110', 'N15124', 'N85806', 'N114...</td>\n",
       "      <td>['skip', 'skip', 'skip', 'skip', 'skip', 'skip...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>U22232</td>\n",
       "      <td>['N25386', 'N90820', 'N49011', 'N80134', 'N684...</td>\n",
       "      <td>['click', 'skip', 'skip', 'skip', 'skip', 'ski...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>U32515</td>\n",
       "      <td>['N55509', 'N111634', 'N14872', 'N35463', 'N10...</td>\n",
       "      <td>['click', 'skip', 'skip', 'skip', 'skip', 'ski...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>U260848</td>\n",
       "      <td>['N113658', 'N58081', 'N99270', 'N72931', 'N91...</td>\n",
       "      <td>['skip', 'click', 'click', 'click', 'gen_summ'...</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399988</th>\n",
       "      <td>U199504</td>\n",
       "      <td>['N43145', 'N93930', 'N28736', 'N122890', 'N86...</td>\n",
       "      <td>['click', 'skip', 'skip', 'click', 'skip', 'sk...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399989</th>\n",
       "      <td>U236246</td>\n",
       "      <td>['N37463', 'N53716', 'N84319', 'N20284', 'N224...</td>\n",
       "      <td>['click', 'click', 'click', 'click', 'skip', '...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399990</th>\n",
       "      <td>U318251</td>\n",
       "      <td>['N91695', 'N38780', 'N78552', 'N79292', 'N933...</td>\n",
       "      <td>['skip', 'click', 'skip', 'skip', 'skip', 'ski...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399991</th>\n",
       "      <td>U215323</td>\n",
       "      <td>['N47207', 'N15248', 'N50002', 'N62450', 'N479...</td>\n",
       "      <td>['click', 'click', 'skip', 'click', 'click', '...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399992</th>\n",
       "      <td>U119029</td>\n",
       "      <td>['N79723', 'N74784', 'N67437', 'N53379', 'N123...</td>\n",
       "      <td>['skip', 'skip', 'skip', 'skip', 'skip', 'skip...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>399993 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         UserID                                               Docs  \\\n",
       "0       U335175  ['N41340', 'N55476', 'N103556', 'N27570', 'N83...   \n",
       "1       U158889  ['N84182', 'N72110', 'N15124', 'N85806', 'N114...   \n",
       "2        U22232  ['N25386', 'N90820', 'N49011', 'N80134', 'N684...   \n",
       "3        U32515  ['N55509', 'N111634', 'N14872', 'N35463', 'N10...   \n",
       "4       U260848  ['N113658', 'N58081', 'N99270', 'N72931', 'N91...   \n",
       "...         ...                                                ...   \n",
       "399988  U199504  ['N43145', 'N93930', 'N28736', 'N122890', 'N86...   \n",
       "399989  U236246  ['N37463', 'N53716', 'N84319', 'N20284', 'N224...   \n",
       "399990  U318251  ['N91695', 'N38780', 'N78552', 'N79292', 'N933...   \n",
       "399991  U215323  ['N47207', 'N15248', 'N50002', 'N62450', 'N479...   \n",
       "399992  U119029  ['N79723', 'N74784', 'N67437', 'N53379', 'N123...   \n",
       "\n",
       "                                                   Action  Summaries  \n",
       "0       ['click', 'click', 'click', 'click', 'click', ...          2  \n",
       "1       ['skip', 'skip', 'skip', 'skip', 'skip', 'skip...          0  \n",
       "2       ['click', 'skip', 'skip', 'skip', 'skip', 'ski...          1  \n",
       "3       ['click', 'skip', 'skip', 'skip', 'skip', 'ski...          2  \n",
       "4       ['skip', 'click', 'click', 'click', 'gen_summ'...          4  \n",
       "...                                                   ...        ...  \n",
       "399988  ['click', 'skip', 'skip', 'click', 'skip', 'sk...          3  \n",
       "399989  ['click', 'click', 'click', 'click', 'skip', '...          2  \n",
       "399990  ['skip', 'click', 'skip', 'skip', 'skip', 'ski...          3  \n",
       "399991  ['click', 'click', 'skip', 'click', 'click', '...          7  \n",
       "399992  ['skip', 'skip', 'skip', 'skip', 'skip', 'skip...          3  \n",
       "\n",
       "[399993 rows x 4 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aug_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d565087",
   "metadata": {},
   "source": [
    "### Iterate over `Augmented Dataset` to Filling Up the `Context Matrix`"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "af7c54dc",
   "metadata": {},
   "source": [
    "**Due to frequent r/w to `Context Matrix`, `NodeID` is required to be a key**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "483fdab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "context_matrix_df.set_index('NodeID', inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "76e98c2c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing Rows: 100%|██████████| 399993/399993 [44:13<00:00, 150.73it/s]   \n"
     ]
    }
   ],
   "source": [
    "import ast\n",
    "from tqdm import tqdm\n",
    "\n",
    "for idx, row in tqdm(aug_df.iterrows(), total=aug_df.shape[0], desc=\"Processing Rows\"):\n",
    "    doc_list = ast.literal_eval(row['Docs'])\n",
    "    action_list = ast.literal_eval(row['Action'])\n",
    "    assert(len(doc_list) == len(action_list))\n",
    "    for doc_idx, doc in enumerate(doc_list):\n",
    "        if action_list[doc_idx] == 'click':\n",
    "            context_matrix_df.at[doc, 'click'] += 1\n",
    "        elif action_list[doc_idx] == 'skip':\n",
    "            context_matrix_df.at[doc, 'skip'] += 1\n",
    "        elif action_list[doc_idx] == 'gen_summ':\n",
    "            context_matrix_df.at[doc, 'gen_summ'] += 1\n",
    "        elif action_list[doc_idx] == 'summ_gen':\n",
    "            context_matrix_df.at[doc, 'summ_gen'] += 1\n",
    "        else:\n",
    "            print('ERROR !')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "da4367c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>click</th>\n",
       "      <th>skip</th>\n",
       "      <th>gen_summ</th>\n",
       "      <th>summ_gen</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NodeID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>N10000</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>N10001</th>\n",
       "      <td>403</td>\n",
       "      <td>243</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>N10002</th>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>N10003</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>N10004</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S777530</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S777531</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S777532</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S777533</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S777534</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891296 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         click  skip  gen_summ  summ_gen\n",
       "NodeID                                  \n",
       "N10000       4     0         0         0\n",
       "N10001     403   243         0         0\n",
       "N10002      12     0         0         0\n",
       "N10003       2     0         0         0\n",
       "N10004       1     0         0         0\n",
       "...        ...   ...       ...       ...\n",
       "S777530      0     0         0         1\n",
       "S777531      0     0         0         1\n",
       "S777532      0     0         0         1\n",
       "S777533      0     0         0         1\n",
       "S777534      0     0         0         1\n",
       "\n",
       "[891296 rows x 4 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "context_matrix_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "72cca1c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "context_matrix_df.to_csv('./Data/edge_context_matrix.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2d801ec",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
