{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0177800e-b92c-4c46-bd80-d3d454c37405",
   "metadata": {},
   "source": [
    "<hr>\n",
    "\n",
    "***Version: 1001.1792024.qut.cs.tnl***\n",
    "\n",
    "***Sk Tanzir Mehedi, PhD Student, QUT***\n",
    "\n",
    "***Supervisory Team: Prof. Raja Jurdak & Dr Chadni Islam***\n",
    "<hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc475838-0464-482c-bb05-10b61074a39e",
   "metadata": {},
   "source": [
    "**----Start of Step 7 Analysis----**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "be7af54c-3041-4a8d-8049-ee679c08acb0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Index(['package', 'downloads', 'version'], dtype='object'),\n",
       " Index(['Benign Package Name', 'Benign Package Version'], dtype='object'))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "csv_file_path = \"D:/Final Version/Step 6 MostPopularBenignPackages/mostPopularBenignPackages.csv\"\n",
    "csv_df = pd.read_csv(csv_file_path)\n",
    "\n",
    "top_500_csv_df = csv_df.nlargest(500, 'downloads')\n",
    "top_500_csv_df.rename(columns={top_500_csv_df.columns[0]: 'Benign Package Name', top_500_csv_df.columns[2]: 'Benign Package Version'}, inplace=True)\n",
    "top_500_csv_df.drop(columns=['downloads'], inplace=True)\n",
    "\n",
    "excel_file_path = \"D:/Final Version/Step 5 ConsiderableBenignPackages/ConsiderableUniqueBenignPackagesWithVersions.xlsx\"\n",
    "excel_df = pd.read_excel(excel_file_path)\n",
    "\n",
    "csv_columns = csv_df.columns\n",
    "excel_columns = excel_df.columns\n",
    "\n",
    "csv_columns, excel_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0471ddf1-f700-4f16-bbc0-9e4c8447c6a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Benign Package Name</th>\n",
       "      <th>Benign Package Version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>botocore</td>\n",
       "      <td>1.35.21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>charset-normalizer</td>\n",
       "      <td>3.3.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>idna</td>\n",
       "      <td>3.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>packaging</td>\n",
       "      <td>24.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>grpcio-status</td>\n",
       "      <td>1.66.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>495</th>\n",
       "      <td>azure-mgmt-redis</td>\n",
       "      <td>14.4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>496</th>\n",
       "      <td>opt-einsum</td>\n",
       "      <td>3.3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>497</th>\n",
       "      <td>nvidia-nccl-cu12</td>\n",
       "      <td>2.23.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>498</th>\n",
       "      <td>nh3</td>\n",
       "      <td>0.2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>apache-airflow-providers-databricks</td>\n",
       "      <td>6.9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>500 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Benign Package Name Benign Package Version\n",
       "0                               botocore                1.35.21\n",
       "1                     charset-normalizer                  3.3.2\n",
       "2                                   idna                    3.1\n",
       "3                              packaging                   24.1\n",
       "4                          grpcio-status                 1.66.1\n",
       "..                                   ...                    ...\n",
       "495                     azure-mgmt-redis                 14.4.0\n",
       "496                           opt-einsum                  3.3.0\n",
       "497                     nvidia-nccl-cu12                 2.23.4\n",
       "498                                  nh3                 0.2.18\n",
       "499  apache-airflow-providers-databricks                  6.9.0\n",
       "\n",
       "[500 rows x 2 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_500_csv_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "4d6154f8-e82f-4771-a043-a4bfc123dacb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Benign Package Name</th>\n",
       "      <th>Benign Package Version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>X11Client</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>cent</td>\n",
       "      <td>5.0.0b1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1337x</td>\n",
       "      <td>1.2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>L1test</td>\n",
       "      <td>3.1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>sinch</td>\n",
       "      <td>1.0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6895</th>\n",
       "      <td>yelp</td>\n",
       "      <td>1.0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6896</th>\n",
       "      <td>ygame-engine</td>\n",
       "      <td>0.1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6897</th>\n",
       "      <td>j-ython</td>\n",
       "      <td>0.0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6898</th>\n",
       "      <td>pytest-zafira</td>\n",
       "      <td>1.0.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6899</th>\n",
       "      <td>zoom</td>\n",
       "      <td>0.0.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6900 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Benign Package Name Benign Package Version\n",
       "0              X11Client                    1.4\n",
       "1                   cent                5.0.0b1\n",
       "2                  1337x                  1.2.6\n",
       "3                 L1test                  3.1.1\n",
       "4                  sinch                  1.0.0\n",
       "...                  ...                    ...\n",
       "6895                yelp                  1.0.2\n",
       "6896        ygame-engine                  0.1.4\n",
       "6897             j-ython                  0.0.1\n",
       "6898       pytest-zafira                  1.0.3\n",
       "6899                zoom                 0.0.15\n",
       "\n",
       "[6900 rows x 2 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "excel_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "3338f8a0-9940-4648-bf42-697350842f66",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Benign Package Name</th>\n",
       "      <th>Benign Package Version</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>X11Client</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>cent</td>\n",
       "      <td>5.0.0b1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1337x</td>\n",
       "      <td>1.2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>L1test</td>\n",
       "      <td>3.1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>sinch</td>\n",
       "      <td>1.0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7395</th>\n",
       "      <td>azure-mgmt-redis</td>\n",
       "      <td>14.4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7396</th>\n",
       "      <td>opt-einsum</td>\n",
       "      <td>3.3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7397</th>\n",
       "      <td>nvidia-nccl-cu12</td>\n",
       "      <td>2.23.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7398</th>\n",
       "      <td>nh3</td>\n",
       "      <td>0.2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7399</th>\n",
       "      <td>apache-airflow-providers-databricks</td>\n",
       "      <td>6.9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7400 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Benign Package Name Benign Package Version\n",
       "0                               X11Client                    1.4\n",
       "1                                    cent                5.0.0b1\n",
       "2                                   1337x                  1.2.6\n",
       "3                                  L1test                  3.1.1\n",
       "4                                   sinch                  1.0.0\n",
       "...                                   ...                    ...\n",
       "7395                     azure-mgmt-redis                 14.4.0\n",
       "7396                           opt-einsum                  3.3.0\n",
       "7397                     nvidia-nccl-cu12                 2.23.4\n",
       "7398                                  nh3                 0.2.18\n",
       "7399  apache-airflow-providers-databricks                  6.9.0\n",
       "\n",
       "[7400 rows x 2 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combined_df = pd.concat([excel_df, top_500_csv_df], ignore_index=True)\n",
    "combined_excel_path = 'FinalSelectedBenignPackages.xlsx'\n",
    "combined_df.to_excel(combined_excel_path, index=False)\n",
    "combined_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ade526d-a560-469c-8a52-51cdca1352bb",
   "metadata": {},
   "source": [
    "**----End of Step 7 Analysis----**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
