{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2ae2219d-fc26-448c-9332-40bca7cc4a99",
   "metadata": {},
   "source": [
    "<hr>\n",
    "\n",
    "***Version: 1001.1592024.qut.cs.tnl***\n",
    "\n",
    "***Sk Tanzir Mehedi, PhD Student, QUT***\n",
    "\n",
    "***Supervisory Team: Prof. Raja Jurdak & Dr Chadni Islam***\n",
    "<hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c6624531-b17d-4861-8358-5c471b772abb",
   "metadata": {},
   "source": [
    "**----Start of Step 3----**"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1d969cf-4f51-4a2e-907f-7d72922b1dc8",
   "metadata": {},
   "source": [
    "## Private API (V1) Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "25a6727f-34f1-43a5-b985-696ec2de9775",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"ok\": false,\n",
      "  \"issues\": {\n",
      "    \"vulnerabilities\": [\n",
      "      {\n",
      "        \"id\": \"SNYK-PYTHON-RSA-40541\",\n",
      "        \"url\": \"https://snyk.io/vuln/SNYK-PYTHON-RSA-40541\",\n",
      "        \"title\": \"Timing Attack\",\n",
      "        \"type\": \"vuln\",\n",
      "        \"description\": \"## Overview\\n[`rsa`](https://pypi.python.org/pypi/rsa) is a Pure-Python RSA implementation.\\n\\nAffected versions of this package are vulnerable to Timing attacks.\\n\\n## References\\n- [GitHub Issue](https://github.com/sybrenstuvel/python-rsa/issues/19)\\n- [GitHub Commit](https://github.com/sybrenstuvel/python-rsa/commit/2310b34bdb530e0bad793d42f589c9f848ff181b)\\n\",\n",
      "        \"functions\": [],\n",
      "        \"from\": [\n",
      "          \"rsa@3.3\"\n",
      "        ],\n",
      "        \"package\": \"rsa\",\n",
      "        \"version\": \"3.3\",\n",
      "        \"severity\": \"medium\",\n",
      "        \"exploitMaturity\": \"no-known-exploit\",\n",
      "        \"language\": \"python\",\n",
      "        \"packageManager\": \"pip\",\n",
      "        \"semver\": {\n",
      "          \"vulnerable\": [\n",
      "            \"[3.0,3.4.0)\"\n",
      "          ]\n",
      "        },\n",
      "        \"publicationTime\": \"2013-11-15T02:34:45.265000Z\",\n",
      "        \"disclosureTime\": \"2013-11-15T02:34:45.265000Z\",\n",
      "        \"isUpgradable\": false,\n",
      "        \"isPatchable\": false,\n",
      "        \"isPinnable\": true,\n",
      "        \"identifiers\": {\n",
      "          \"CVE\": [],\n",
      "          \"CWE\": [\n",
      "            \"CWE-208\"\n",
      "          ]\n",
      "        },\n",
      "        \"credit\": [\n",
      "          \"Manuel Aude Morales\"\n",
      "        ],\n",
      "        \"CVSSv3\": \"CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N\",\n",
      "        \"cvssScore\": 5.3,\n",
      "        \"patches\": [],\n",
      "        \"upgradePath\": []\n",
      "      },\n",
      "      {\n",
      "        \"id\": \"SNYK-PYTHON-RSA-40542\",\n",
      "        \"url\": \"https://snyk.io/vuln/SNYK-PYTHON-RSA-40542\",\n",
      "        \"title\": \"Authentication Bypass\",\n",
      "        \"type\": \"vuln\",\n",
      "        \"description\": \"## Overview\\n[`rsa`](https://pypi.python.org/pypi/rsa) is a Pure-Python RSA implementation.\\n\\nAffected versions of this package are vulnerable to Authentication Bypass due to not implementing authentication encryption or use MACs to validate messages before decrypting public key encrypted messages.\\n\\n## References\\n- [GitHub Issue](https://github.com/sybrenstuvel/python-rsa/issues/13)\\n- [GitHub Commit](https://github.com/sybrenstuvel/python-rsa/commit/1681a0b2f84a4a252c71b87de870a2816de06fdf)\\n\",\n",
      "        \"functions\": [],\n",
      "        \"from\": [\n",
      "          \"rsa@3.3\"\n",
      "        ],\n",
      "        \"package\": \"rsa\",\n",
      "        \"version\": \"3.3\",\n",
      "        \"severity\": \"high\",\n",
      "        \"exploitMaturity\": \"no-known-exploit\",\n",
      "        \"language\": \"python\",\n",
      "        \"packageManager\": \"pip\",\n",
      "        \"semver\": {\n",
      "          \"vulnerable\": [\n",
      "            \"[3.0,3.4)\"\n",
      "          ]\n",
      "        },\n",
      "        \"publicationTime\": \"2012-12-07T03:15:00.052000Z\",\n",
      "        \"disclosureTime\": \"2012-12-07T03:15:00.052000Z\",\n",
      "        \"isUpgradable\": false,\n",
      "        \"isPatchable\": false,\n",
      "        \"isPinnable\": true,\n",
      "        \"identifiers\": {\n",
      "          \"CVE\": [],\n",
      "          \"CWE\": [\n",
      "            \"CWE-287\"\n",
      "          ]\n",
      "        },\n",
      "        \"credit\": [\n",
      "          \"Sergio Lerner\"\n",
      "        ],\n",
      "        \"CVSSv3\": \"CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\",\n",
      "        \"cvssScore\": 7.5,\n",
      "        \"patches\": [],\n",
      "        \"upgradePath\": []\n",
      "      }\n",
      "    ],\n",
      "    \"licenses\": []\n",
      "  },\n",
      "  \"dependencyCount\": 2,\n",
      "  \"org\": {\n",
      "    \"name\": \"gitphill\",\n",
      "    \"id\": \"229b76f3-802c-4553-aa1d-01d4d86f7f61\"\n",
      "  },\n",
      "  \"licensesPolicy\": null,\n",
      "  \"packageManager\": \"pip\"\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "from urllib.request import Request, urlopen\n",
    "\n",
    "headers = {\n",
    "  'Content-Type': 'application/json; charset=utf-8',\n",
    "  'Authorization': 'd7caa64a-7e1b-489f-ba42-a401a375445e'\n",
    "}\n",
    "\n",
    "# Create the request object with the URL and headers\n",
    "request = Request('https://private-32fcf5-snyk.apiary-mock.com/v1/test/pip/rsa/3.3?org=d7caa64a-7e1b-489f-ba42-a401a375445e', headers=headers)\n",
    "\n",
    "# Send the request and read the response\n",
    "response_body = urlopen(request).read()\n",
    "\n",
    "# Convert the response from bytes to a string (optional, depending on what you want to do next)\n",
    "response_body = response_body.decode('utf-8')\n",
    "\n",
    "print(response_body)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b1c01a94-2b87-4b95-a42e-15a661ea0f43",
   "metadata": {},
   "source": [
    "## Customized API (V1) Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "39aee954-077e-419c-84eb-be8d01138db4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error for version 999.0.4: 403\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json  # Import the json module for formatting\n",
    "\n",
    "#package_name = \"rsa\" # Woking fine\n",
    "#versions = [\"3.3\"] # Working fine\n",
    "\n",
    "package_name = \"10Cent11\" # But, when I insert any malicious package name (e.g., 10Cent11), I didn't get any data. \n",
    "versions = [\"999.0.4\"]  # But, when I insert also version form the specific malicious pacakge version (e.g., 999.0.4), I didn't get any data.\n",
    "\n",
    "for version in versions:\n",
    "    url = f\"https://api.snyk.io/v1/test/pip/{package_name}/{version}?topLevelOnly=true\"\n",
    "    headers = {\n",
    "        \"Authorization\": \"eac5086d-b99f-4fc4-b337-a578806c70b8\",  # Replace with your Snyk API token\n",
    "    }\n",
    "    \n",
    "    response = requests.get(url, headers=headers)\n",
    "    \n",
    "    if response.status_code == 200:\n",
    "        data = response.json()\n",
    "        print(f\"Results for version {version}:\")\n",
    "        print(json.dumps(data, indent=4))  # Pretty-print the JSON response\n",
    "    else:\n",
    "        print(f\"Error for version {version}: {response.status_code}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bbd81cd7-bb87-4d35-ac1a-7042241a809b",
   "metadata": {},
   "source": [
    "## Customized API (REST) Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "caf0465a-eda2-462b-8dbd-40c5110eab36",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "REST API Response for ccryptofeed v1.0:\n",
      "{\n",
      "    \"jsonapi\": {\n",
      "        \"version\": \"1.0\"\n",
      "    },\n",
      "    \"data\": [\n",
      "        {\n",
      "            \"id\": \"SNYK-PYTHON-CCRYPTOFEED-6243619\",\n",
      "            \"type\": \"issue\",\n",
      "            \"attributes\": {\n",
      "                \"key\": \"SNYK-PYTHON-CCRYPTOFEED-6243619\",\n",
      "                \"title\": \"Race Condition\",\n",
      "                \"type\": \"package_vulnerability\",\n",
      "                \"created_at\": \"2024-02-13T12:14:05.571744Z\",\n",
      "                \"updated_at\": \"2024-03-06T14:09:44.687641Z\",\n",
      "                \"description\": \"## Overview\\n\\nAffected versions of this package are vulnerable to Race Condition. This issue aroises during the resetting of feeds with multiple connections, leading to potential unauthorized access or data corruption.\\n## Remediation\\nUpgrade `ccryptofeed` to version 2.2.3 or higher.\\n## References\\n- [GitHub Commit](https://github.com/bmoscon/cryptofeed/commit/d6ce63b9a392b42e4ea936007e82da94f7566401)\\n- [GitHub PR](https://github.com/bmoscon/cryptofeed/pull/851)\\n\",\n",
      "                \"problems\": [\n",
      "                    {\n",
      "                        \"id\": \"CWE-362\",\n",
      "                        \"source\": \"CWE\"\n",
      "                    },\n",
      "                    {\n",
      "                        \"id\": \"PVE-2024-63280\",\n",
      "                        \"source\": \"PVE\"\n",
      "                    }\n",
      "                ],\n",
      "                \"coordinates\": [\n",
      "                    {\n",
      "                        \"remedies\": [\n",
      "                            {\n",
      "                                \"type\": \"indeterminate\",\n",
      "                                \"description\": \"Upgrade the package version to 2.2.3 to fix this vulnerability\",\n",
      "                                \"details\": {\n",
      "                                    \"upgrade_package\": \"2.2.3\"\n",
      "                                }\n",
      "                            }\n",
      "                        ],\n",
      "                        \"representation\": [\n",
      "                            \"[,2.2.3)\"\n",
      "                        ]\n",
      "                    }\n",
      "                ],\n",
      "                \"severities\": [\n",
      "                    {\n",
      "                        \"source\": \"Snyk\",\n",
      "                        \"level\": \"medium\",\n",
      "                        \"score\": 6.5,\n",
      "                        \"vector\": \"CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:H/E:P\"\n",
      "                    }\n",
      "                ],\n",
      "                \"effective_severity_level\": \"medium\",\n",
      "                \"slots\": {\n",
      "                    \"disclosure_time\": \"2024-02-01T06:29:44.189000Z\",\n",
      "                    \"exploit\": \"Proof of Concept\",\n",
      "                    \"publication_time\": \"2024-02-13T12:14:05.728409Z\",\n",
      "                    \"references\": [\n",
      "                        {\n",
      "                            \"url\": \"https://github.com/bmoscon/cryptofeed/commit/d6ce63b9a392b42e4ea936007e82da94f7566401\",\n",
      "                            \"title\": \"GitHub Commit\"\n",
      "                        },\n",
      "                        {\n",
      "                            \"url\": \"https://github.com/bmoscon/cryptofeed/pull/851\",\n",
      "                            \"title\": \"GitHub PR\"\n",
      "                        }\n",
      "                    ]\n",
      "                }\n",
      "            }\n",
      "        },\n",
      "        {\n",
      "            \"id\": \"SNYK-PYTHON-CCRYPTOFEED-3318803\",\n",
      "            \"type\": \"issue\",\n",
      "            \"attributes\": {\n",
      "                \"key\": \"SNYK-PYTHON-CCRYPTOFEED-3318803\",\n",
      "                \"title\": \"Malicious Package\",\n",
      "                \"type\": \"package_vulnerability\",\n",
      "                \"created_at\": \"2023-02-12T15:27:28.460488Z\",\n",
      "                \"updated_at\": \"2024-03-06T13:55:22.503141Z\",\n",
      "                \"description\": \"## Overview\\nccryptofeed is a malicious package. \\nThis is a \\\"dependency confusion\\\" package, which means the package name is based on existing repositories, namespaces, or components. It aims to trick users into downloading the package which contains malicious code.\\n## Remediation\\nAvoid using all malicious instances of the `ccryptofeed` package.\\n## References\\n- [Medium Blog](https://medium.com/checkmarx-security/17-malicious-python-packages-targeting-selenium-users-to-steal-crypto-8d24628ec656)\\n\",\n",
      "                \"problems\": [\n",
      "                    {\n",
      "                        \"id\": \"CWE-506\",\n",
      "                        \"source\": \"CWE\"\n",
      "                    }\n",
      "                ],\n",
      "                \"coordinates\": [\n",
      "                    {\n",
      "                        \"remedies\": [\n",
      "                            {\n",
      "                                \"type\": \"indeterminate\",\n",
      "                                \"description\": \"Upgrade the package version to  to fix this vulnerability\",\n",
      "                                \"details\": {\n",
      "                                    \"upgrade_package\": \"\"\n",
      "                                }\n",
      "                            }\n",
      "                        ],\n",
      "                        \"representation\": [\n",
      "                            \"[0,]\"\n",
      "                        ]\n",
      "                    }\n",
      "                ],\n",
      "                \"severities\": [\n",
      "                    {\n",
      "                        \"source\": \"Snyk\",\n",
      "                        \"level\": \"critical\",\n",
      "                        \"score\": 9.8,\n",
      "                        \"vector\": \"CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H/E:H\"\n",
      "                    }\n",
      "                ],\n",
      "                \"effective_severity_level\": \"critical\",\n",
      "                \"slots\": {\n",
      "                    \"disclosure_time\": \"2023-02-12T15:10:00Z\",\n",
      "                    \"exploit\": \"High\",\n",
      "                    \"publication_time\": \"2023-02-12T15:26:23.512986Z\",\n",
      "                    \"references\": [\n",
      "                        {\n",
      "                            \"url\": \"https://medium.com/checkmarx-security/17-malicious-python-packages-targeting-selenium-users-to-steal-crypto-8d24628ec656\",\n",
      "                            \"title\": \"Medium Blog\"\n",
      "                        }\n",
      "                    ]\n",
      "                }\n",
      "            }\n",
      "        }\n",
      "    ],\n",
      "    \"links\": {\n",
      "        \"self\": \"/orgs/76e80137-b6a6-40c6-923f-098c2c46ad4a/packages/pkg%3Apypi%2Fccryptofeed%401.0/issues?version=2024-09-04&limit=1000&offset=0\"\n",
      "    },\n",
      "    \"meta\": {\n",
      "        \"package\": {\n",
      "            \"name\": \"ccryptofeed\",\n",
      "            \"type\": \"pypi\",\n",
      "            \"url\": \"pkg:pypi/ccryptofeed@1.0\",\n",
      "            \"version\": \"1.0\"\n",
      "        }\n",
      "    }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "# Replace with your actual Snyk API token\n",
    "API_TOKEN = '5e2814d8-8042-4cd9-b570-1ebf28f332db'\n",
    "\n",
    "# Set up the headers for authorization\n",
    "headers = {\n",
    "    'Authorization': f'token {API_TOKEN}',\n",
    "    'Content-Type': 'application/json'\n",
    "}\n",
    "\n",
    "# REST API URL to check package vulnerabilities\n",
    "def check_vulnerabilities_rest(org_id, package_name, version):\n",
    "    url = f\"https://api.snyk.io/rest/orgs/{org_id}/packages/pkg%3Apypi%2F{package_name}%40{version}/issues?version=2024-09-04\"\n",
    "    response = requests.get(url, headers=headers)\n",
    "    \n",
    "    if response.status_code == 200:\n",
    "        print(f\"REST API Response for {package_name} v{version}:\")\n",
    "        data = response.json()\n",
    "        print(json.dumps(data, indent=4))  # Pretty-print the JSON response\n",
    "    else:\n",
    "        print(f\"Failed to fetch data from REST API. Status Code: {response.status_code}\")\n",
    "        print(response.text)\n",
    "\n",
    "\n",
    "# Example usage\n",
    "if __name__ == \"__main__\":\n",
    "    # Example package and version\n",
    "    package_name = \"ccryptofeed\"\n",
    "    version = \"1.0\"\n",
    "    \n",
    "    # Replace with your actual organization ID from Snyk\n",
    "    org_id = \"76e80137-b6a6-40c6-923f-098c2c46ad4a\"\n",
    "    \n",
    "    # Call the REST API\n",
    "    check_vulnerabilities_rest(org_id, package_name, version)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0030c36f-c7c2-4b57-b955-7e9f9dd26472",
   "metadata": {},
   "source": [
    "## Customized API (V1 vs REST) Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "59e12ee2-eac0-47f6-8261-8f59e3e0a242",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch data from V1 API. Status Code: 403\n",
      "{\"error\":true,\"message\":\"The org it18612 (30ed544d-6aea-4d2d-aa56-56508978d909) is not entitled for api access. Please upgrade your plan to access this capability\",\"cliMessage\":\"The org it18612 (30ed544d-6aea-4d2d-aa56-56508978d909) is not entitled for api access. Please upgrade your plan to access this capability\",\"userMessage\":\"The org it18612 (30ed544d-6aea-4d2d-aa56-56508978d909) is not entitled for api access. Please upgrade your plan to access this capability\"}\n",
      "-------------------------------------------\n",
      "Failed to fetch data from REST API. Status Code: 403\n",
      "{\"jsonapi\":{\"version\":\"1.0\"},\"errors\":[{\"status\":\"403\",\"detail\":\"Forbidden\",\"id\":\"007cce05-acea-44c8-8697-fb6b6d43e6ba\",\"title\":\"Forbidden\",\"meta\":{\"created\":\"2024-09-04T15:29:34.437851907Z\"}}]}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "# Replace with your actual Snyk API token\n",
    "API_TOKEN = '6c213dae-465a-4b54-af34-b03837916962'\n",
    "\n",
    "# Set up the headers for authorization\n",
    "headers = {\n",
    "    'Authorization': f'token {API_TOKEN}',\n",
    "    'Content-Type': 'application/json'\n",
    "}\n",
    "\n",
    "# V1 API URL to check package vulnerabilities (top-level only)\n",
    "def check_vulnerabilities_v1(package_name, version):\n",
    "    url = f\"https://api.snyk.io/api/v1/test/pip/{package_name}/{version}?topLevelOnly=true\"\n",
    "    response = requests.get(url, headers=headers)\n",
    "    \n",
    "    if response.status_code == 200:\n",
    "        print(f\"V1 API Response for {package_name} v{version}:\")\n",
    "        data = response.json()\n",
    "        print(json.dumps(data, indent=4))  # Pretty-print the JSON response\n",
    "    else:\n",
    "        print(f\"Failed to fetch data from V1 API. Status Code: {response.status_code}\")\n",
    "        print(response.text)\n",
    "\n",
    "\n",
    "# REST API URL to check package vulnerabilities\n",
    "def check_vulnerabilities_rest(org_id, package_name, version):\n",
    "    url = f\"https://api.snyk.io/rest/orgs/{org_id}/packages/pkg%3Apypi%2F{package_name}%40{version}/issues?version=2024-09-04\"\n",
    "    response = requests.get(url, headers=headers)\n",
    "    \n",
    "    if response.status_code == 200:\n",
    "        print(f\"REST API Response for {package_name} v{version}:\")\n",
    "        data = response.json()\n",
    "        print(json.dumps(data, indent=4))  # Pretty-print the JSON response\n",
    "    else:\n",
    "        print(f\"Failed to fetch data from REST API. Status Code: {response.status_code}\")\n",
    "        print(response.text)\n",
    "\n",
    "\n",
    "# Example usage\n",
    "if __name__ == \"__main__\":\n",
    "    # Example package and version\n",
    "    package_name = \"10Cent11\"\n",
    "    version = \"999.0.4\"\n",
    "    \n",
    "    # Replace with your actual organization ID from Snyk\n",
    "    org_id = \"30ed544d-6aea-4d2d-aa56-56508978d909\"\n",
    "    \n",
    "    # Call the V1 API\n",
    "    check_vulnerabilities_v1(package_name, version)\n",
    "\n",
    "    print(f\"-------------------------------------------\")\n",
    "    \n",
    "    # Call the REST API\n",
    "    check_vulnerabilities_rest(org_id, package_name, version)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad84c8a0-5a67-487d-9b31-41f6cc26a262",
   "metadata": {},
   "source": [
    "## Implementation with API (V1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "97fcbda3-6e53-48c8-bfeb-1f0f09be77b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing: 100.00% Complete\n",
      "Malicious Package Detected: 10Cent10 version 999.0.4\n",
      "\n",
      "Malicious Package Detected: 11cent version 999.0.0\n",
      "\n",
      "Processing Complete.\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import csv\n",
    "import pandas as pd\n",
    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
    "import sys\n",
    "\n",
    "def fetch_vulnerability_data(package_name, version):\n",
    "    url = f\"https://api.snyk.io/v1/test/pip/{package_name}/{version}?topLevelOnly=true\"\n",
    "    \n",
    "    headers = {\n",
    "        'Content-Type': 'application/json',\n",
    "        'Authorization': '5e2814d8-8042-4cd9-b570-1ebf28f332db'\n",
    "    }\n",
    "    \n",
    "    response = requests.get(url, headers=headers)\n",
    "    if response.status_code == 200:\n",
    "        return package_name, version, response.json()\n",
    "    else:\n",
    "        return package_name, version, None\n",
    "\n",
    "def gather_assigners(data_list):\n",
    "    assigners = set()\n",
    "    for data in data_list:\n",
    "        if data:\n",
    "            for vulnerability in data['issues']['vulnerabilities']:\n",
    "                for cvss_detail in vulnerability.get('cvssDetails', []):\n",
    "                    assigners.add(cvss_detail.get('assigner', ''))\n",
    "    return assigners\n",
    "\n",
    "def process_vulnerability_data(package_name, version, data, assigners, cvss_vector_index, writer):\n",
    "    if data and not data.get('ok', True):\n",
    "        # Print package name and version if it has vulnerabilities\n",
    "        print(f\"\\nMalicious Package Detected: {package_name} version {version}\")\n",
    "        \n",
    "        for vulnerability in data['issues']['vulnerabilities']:\n",
    "            row = [\n",
    "                package_name,\n",
    "                version,\n",
    "                vulnerability.get('language', ''),\n",
    "                data.get('packageManager', ''),\n",
    "                ', '.join(vulnerability.get('semver', {}).get('vulnerable', [])),\n",
    "                vulnerability.get('id', ''),\n",
    "                vulnerability.get('title', ''),\n",
    "                vulnerability.get('severity', ''),\n",
    "                vulnerability.get('cvssScore', ''),\n",
    "                vulnerability.get('CVSSv3', ''),\n",
    "                data.get('dependencyCount', ''),\n",
    "                ', '.join(vulnerability.get('identifiers', {}).get('CVE', [])),\n",
    "                ', '.join(vulnerability.get('identifiers', {}).get('CWE', [])),\n",
    "                vulnerability.get('exploitMaturity', ''),\n",
    "                vulnerability.get('disclosureTime', ''),\n",
    "                vulnerability.get('publicationTime', ''),\n",
    "                vulnerability.get('isUpgradable', ''),\n",
    "                vulnerability.get('isPatchable', ''),\n",
    "                vulnerability.get('isPinnable', ''),\n",
    "                ', '.join(vulnerability.get('credit', [])),\n",
    "                ', '.join(data.get('licenses', [])),\n",
    "                data.get('licensesPolicy', {}),\n",
    "                vulnerability.get('description', '').replace('\\n', ' ').replace('\\r', ''),\n",
    "                vulnerability.get('url', ''),\n",
    "                data.get('org', {}).get('id', ''),\n",
    "                data.get('org', {}).get('name', '')\n",
    "            ]\n",
    "            \n",
    "            assigner_data = []\n",
    "            for assigner in assigners:\n",
    "                cvss_detail = next((detail for detail in vulnerability.get('cvssDetails', []) if detail.get('assigner') == assigner), {})\n",
    "                assigner_data.extend([\n",
    "                    cvss_detail.get('assigner', ''),\n",
    "                    cvss_detail.get('severity', ''),\n",
    "                    cvss_detail.get('cvssV3BaseScore', ''),\n",
    "                    cvss_detail.get('modificationTime', ''),\n",
    "                    cvss_detail.get('cvssV3Vector', '')\n",
    "                ])\n",
    "            row[cvss_vector_index:cvss_vector_index] = assigner_data\n",
    "            writer.writerow(row)\n",
    "\n",
    "def process_packages_from_excel(input_excel, output_csv):\n",
    "    df = pd.read_excel(input_excel)\n",
    "\n",
    "    # Prepare headers once\n",
    "    headers_list = [\n",
    "        \"Package Name\", \"Version\", \"Language\", \"Package Manager\", \"Vulnerable SemVer Range\", \n",
    "        \"Vulnerability ID\", \"Vulnerability\", \"Severity\", \"CVSS Score\", \"CVSSv3 Vector\", \n",
    "        \"Dependency Count\", \"CVE\", \"CWE\", \"Exploit Maturity\", \"Disclosure Time\", \"Publication Time\", \n",
    "        \"Is Upgradable\", \"Is Patchable\", \"Is Pinnable\", \"Credit\", \"Licenses\", \n",
    "        \"Licenses Policy\", \"Description\", \"URL\", \"Organization ID\", \"Organization Name\"\n",
    "    ]\n",
    "    cvss_vector_index = headers_list.index(\"CVSSv3 Vector\") + 1\n",
    "\n",
    "    data_list = []\n",
    "\n",
    "    total_packages = len(df)\n",
    "    progress = 0\n",
    "\n",
    "    # Fetch data in parallel but maintain order\n",
    "    with ThreadPoolExecutor(max_workers=10) as executor:\n",
    "        futures = [\n",
    "            executor.submit(fetch_vulnerability_data, row['Package Name'], row['Package Version']) \n",
    "            for index, row in df.iterrows()\n",
    "        ]\n",
    "\n",
    "        for future in as_completed(futures):\n",
    "            package_name, version, data = future.result()\n",
    "            data_list.append((package_name, version, data))\n",
    "            \n",
    "            # Update and print progress\n",
    "            progress += 1\n",
    "            percentage = (progress / total_packages) * 100\n",
    "            sys.stdout.write(f\"\\rProcessing: {percentage:.2f}% Complete\")\n",
    "            sys.stdout.flush()\n",
    "\n",
    "    # Ensure data_list is in the original order\n",
    "    data_list.sort(key=lambda x: df[(df['Package Name'] == x[0]) & (df['Package Version'] == x[1])].index[0])\n",
    "\n",
    "    # Gather all assigners from the collected data\n",
    "    assigners = gather_assigners([data for _, _, data in data_list])\n",
    "\n",
    "    # Add assigner-specific columns after \"CVSSv3 Vector\"\n",
    "    assigner_columns = []\n",
    "    for assigner in assigners:\n",
    "        assigner_columns.extend([\n",
    "            f\"CVSS Assigner - {assigner}\", \n",
    "            f\"CVSS Severity - {assigner}\", \n",
    "            f\"CVSS V3 Base Score - {assigner}\", \n",
    "            f\"CVSS Modification Time - {assigner}\",\n",
    "            f\"CVSS V3 Vector - {assigner}\"\n",
    "        ])\n",
    "    headers_list[cvss_vector_index:cvss_vector_index] = assigner_columns\n",
    "\n",
    "    # Write the headers once\n",
    "    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:\n",
    "        writer = csv.writer(file)\n",
    "        writer.writerow(headers_list)\n",
    "\n",
    "        # Process each package and write data\n",
    "        for package_name, version, data in data_list:\n",
    "            process_vulnerability_data(package_name, version, data, assigners, cvss_vector_index, writer)\n",
    "\n",
    "    # Print completion message\n",
    "    print(\"\\nProcessing Complete.\")\n",
    "\n",
    "# Example usage:\n",
    "input_excel = \"D:/Final Version/Step 1 MaliciousPackagesNameAndVersion/MaliciousPackageNameAndVersion.xlsx\"\n",
    "output_csv = \"MaliciousPackagesDetailsFromDifferentWebsites_V1.csv\"\n",
    "process_packages_from_excel(input_excel, output_csv)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a628d8cb-65f9-4a0e-b485-ab562179299b",
   "metadata": {},
   "source": [
    "## Implementation with API (REST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1623dc97-57cc-4c09-8f3b-5ff883f8f395",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import csv\n",
    "import pandas as pd\n",
    "from concurrent.futures import ThreadPoolExecutor\n",
    "import sys\n",
    "import logging\n",
    "\n",
    "# Setup logging to log into a file\n",
    "logging.basicConfig(filename='malicious_package_processing_REST.log', level=logging.INFO, \n",
    "                    format='%(asctime)s - %(levelname)s - %(message)s')\n",
    "\n",
    "# Mapping for CVSS components with full names\n",
    "CVSSv3_MAPPING_FULL = {\n",
    "    'AV': {'N': 'Network', 'A': 'Adjacent', 'L': 'Local', 'P': 'Physical'},\n",
    "    'AC': {'L': 'Low', 'H': 'High'},\n",
    "    'PR': {'N': 'None', 'L': 'Low', 'H': 'High'},\n",
    "    'UI': {'N': 'None', 'R': 'Required'},\n",
    "    'S': {'U': 'Unchanged', 'C': 'Changed'},\n",
    "    'C': {'H': 'High', 'L': 'Low', 'N': 'None'},\n",
    "    'I': {'H': 'High', 'L': 'Low', 'N': 'None'},\n",
    "    'A': {'H': 'High', 'L': 'Low', 'N': 'None'}\n",
    "}\n",
    "\n",
    "# Full names for the CVSS keys\n",
    "CVSSv3_KEY_NAMES = {\n",
    "    'AV': 'Attack Vector',\n",
    "    'AC': 'Attack Complexity',\n",
    "    'PR': 'Privileges Required',\n",
    "    'UI': 'User Interaction',\n",
    "    'S': 'Scope',\n",
    "    'C': 'Confidentiality',\n",
    "    'I': 'Integrity',\n",
    "    'A': 'Availability'\n",
    "}\n",
    "\n",
    "# Function to parse CVSS version and vector into individual components\n",
    "def parse_cvss_vector(cvss_vector):\n",
    "    if not cvss_vector:\n",
    "        return 'N/A', {}\n",
    "\n",
    "    components = cvss_vector.split('/')\n",
    "    version = components[0]  # Extract the version part (e.g., \"CVSS:3.1\")\n",
    "    parsed_components = {\n",
    "        'Attack Vector': 'N/A',\n",
    "        'Attack Complexity': 'N/A',\n",
    "        'Privileges Required': 'N/A',\n",
    "        'User Interaction': 'N/A',\n",
    "        'Scope': 'N/A',\n",
    "        'Confidentiality': 'N/A',\n",
    "        'Integrity': 'N/A',\n",
    "        'Availability': 'N/A'\n",
    "    }\n",
    "\n",
    "    for component in components[1:]:  # Skip the version part\n",
    "        key, value = component.split(':')\n",
    "        if key in CVSSv3_MAPPING_FULL and value in CVSSv3_MAPPING_FULL[key]:\n",
    "            parsed_components[CVSSv3_KEY_NAMES.get(key, key)] = CVSSv3_MAPPING_FULL[key][value]\n",
    "\n",
    "    return version, parsed_components\n",
    "\n",
    "def fetch_vulnerability_data_rest(org_id, package_name, version):\n",
    "    url = f\"https://api.snyk.io/rest/orgs/{org_id}/packages/pkg%3Apypi%2F{package_name}%40{version}/issues?version=2024-09-04\"\n",
    "    \n",
    "    headers = {\n",
    "        'Content-Type': 'application/json',\n",
    "        'Authorization': '6c213dae-465a-4b54-af34-b03837916962'  # Replace with your actual API token\n",
    "    }\n",
    "    \n",
    "    response = requests.get(url, headers=headers)\n",
    "    if response.status_code == 200:\n",
    "        return package_name, version, response.json()\n",
    "    else:\n",
    "        logging.error(f\"Failed to fetch data for package: {package_name}, version: {version}\")\n",
    "        return package_name, version, None\n",
    "\n",
    "def process_vulnerability_data_rest(package_name, version, data):\n",
    "    if data and 'data' in data and len(data['data']) > 0:\n",
    "        rows = []\n",
    "        first_occurrence = False\n",
    "        for issue in data['data']:\n",
    "            attributes = issue.get('attributes', {})\n",
    "            title = attributes.get('title', 'N/A')\n",
    "\n",
    "            # Only process if the title is exactly \"Malicious Package\" and only consider the first occurrence\n",
    "            if title == \"Malicious Package\" and not first_occurrence:\n",
    "                first_occurrence = True\n",
    "                logging.info(f\"Processing first occurrence of Malicious Package for package: {package_name}, version: {version}\")\n",
    "                description = attributes.get('description', '').replace('\\n', ' ').replace('\\r', '')\n",
    "                \n",
    "                severities = attributes.get('severities', [{}])\n",
    "                severity_level = severities[0].get('level', 'N/A')\n",
    "                severity_score = severities[0].get('score', 'N/A')\n",
    "                cvss_vector = severities[0].get('vector', 'N/A')\n",
    "\n",
    "                # Parse the CVSS version and individual components into separate columns\n",
    "                cvss_version, parsed_components = parse_cvss_vector(cvss_vector)\n",
    "\n",
    "                created_at = attributes.get('created_at', 'N/A')\n",
    "                updated_at = attributes.get('updated_at', 'N/A')\n",
    "                publication_time = attributes.get('slots', {}).get('publication_time', 'N/A')\n",
    "                disclosure_time = attributes.get('slots', {}).get('disclosure_time', 'N/A')\n",
    "                exploit_maturity = attributes.get('slots', {}).get('exploit', 'N/A')\n",
    "\n",
    "                # Extracting Reference URLs and Titles\n",
    "                reference_urls = ', '.join([ref.get('url', '') for ref in attributes.get('slots', {}).get('references', [])])\n",
    "                reference_titles = ', '.join([ref.get('title', 'N/A') for ref in attributes.get('slots', {}).get('references', [])])\n",
    "                \n",
    "                # Additional fields from the response\n",
    "                vulnerability_id = issue.get('id', 'N/A')\n",
    "                vulnerability_type = attributes.get('type', 'N/A')  # Type of vulnerability\n",
    "                effective_severity_level = attributes.get('effective_severity_level', 'N/A')  # Effective severity level\n",
    "                \n",
    "                problems = ', '.join([problem.get('id', 'N/A') for problem in attributes.get('problems', [])])\n",
    "                \n",
    "                # Vulnerable SemVer Range (coordinates) and Remedy Details\n",
    "                coordinates = ', '.join([repr.get('representation', ['N/A'])[0] for repr in attributes.get('coordinates', [])])\n",
    "                \n",
    "                remedy_info = attributes.get('coordinates', [{}])[0].get('remedies', [{}])[0]\n",
    "                remedy_type = remedy_info.get('type', 'N/A')\n",
    "                remedy_description = remedy_info.get('description', 'N/A')\n",
    "                upgrade_package = remedy_info.get('details', {}).get('upgrade_package', 'N/A')\n",
    "                \n",
    "                # CVE and CWE identifiers\n",
    "                cve_ids = ', '.join([problem.get('id', 'N/A') for problem in attributes.get('problems', []) if problem.get('source') == 'CVE'])\n",
    "                cwe_ids = ', '.join([problem.get('id', 'N/A') for problem in attributes.get('problems', []) if problem.get('source') == 'CWE'])\n",
    "\n",
    "                # Row data for CSV, with \"Data Found\" indicator and separate columns for CVSS components\n",
    "                row = [\n",
    "                    package_name,\n",
    "                    version,\n",
    "                    attributes.get('language', 'Python'),\n",
    "                    data.get('meta', {}).get('package', {}).get('type', 'N/A'),  # Package Manager\n",
    "                    coordinates,  # Vulnerable SemVer Range\n",
    "                    vulnerability_id,\n",
    "                    title,\n",
    "                    vulnerability_type,  # Type of vulnerability\n",
    "                    effective_severity_level,  # Effective severity level\n",
    "                    severity_level,\n",
    "                    severity_score,\n",
    "                    cvss_version,  # CVSS Version\n",
    "                    parsed_components['Attack Vector'],  # Individual CVSS components\n",
    "                    parsed_components['Attack Complexity'],\n",
    "                    parsed_components['Privileges Required'],\n",
    "                    parsed_components['User Interaction'],\n",
    "                    parsed_components['Scope'],\n",
    "                    parsed_components['Confidentiality'],\n",
    "                    parsed_components['Integrity'],\n",
    "                    parsed_components['Availability'],\n",
    "                    cve_ids,\n",
    "                    cwe_ids,\n",
    "                    exploit_maturity,\n",
    "                    created_at,\n",
    "                    updated_at,\n",
    "                    disclosure_time,\n",
    "                    publication_time,\n",
    "                    remedy_type,  # Remedy Type\n",
    "                    remedy_description,  # Remedy Description\n",
    "                    upgrade_package,  # Upgrade Package\n",
    "                    reference_titles,  # Reference Titles\n",
    "                    description,\n",
    "                    reference_urls,\n",
    "                    problems,\n",
    "                    \"Data found\"  # New column indicating data found\n",
    "                ]\n",
    "                rows.append(row)\n",
    "                break  # Stop after the first occurrence is processed\n",
    "\n",
    "        return rows, True  # True indicates data was found\n",
    "    else:\n",
    "        logging.info(f\"No malicious package found for package: {package_name}, version: {version}\")\n",
    "        # Return only the package name and version when data is not found, and fill remaining columns with 'N/A' and a reason\n",
    "        return [[package_name, version] + ['N/A'] * 31 + [\"No malicious package data found\"]], False  # False indicates no data was found\n",
    "\n",
    "def process_packages_from_excel_rest(input_excel, output_csv, org_id):\n",
    "    df = pd.read_excel(input_excel)\n",
    "\n",
    "    # Prepare headers for CSV, adding the \"Data Found\" or \"Reason for Not Found\" column\n",
    "    headers_list = [\n",
    "        \"Malicious Package Name\", \"Malicious Package Version\", \"Language\", \"Package Manager\", \"Vulnerable SemVer Range\", \n",
    "        \"Vulnerability ID\", \"Title\", \"Vulnerability Type\", \"Effective Severity Level\", \n",
    "        \"Severity\", \"CVSS Score\", \"CVSS Version\", \"Attack Vector\", \"Attack Complexity\", \"Privileges Required\", \n",
    "        \"User Interaction\", \"Scope\", \"Confidentiality\", \"Integrity\", \"Availability\", \"CVE\", \"CWE\", \n",
    "        \"Exploit Maturity\", \"Created At\", \"Updated At\", \"Disclosure Time\", \"Publication Time\", \n",
    "        \"Remedy Type\", \"Remedy Description\", \"Upgrade Package\", \"Reference Titles\", \n",
    "        \"Description\", \"Reference URLs\", \"Problems\", \"Data Found/Reason\"\n",
    "    ]\n",
    "\n",
    "    total_packages = len(df)\n",
    "    progress = 0\n",
    "    data_found_count = 0\n",
    "    data_not_found_count = 0\n",
    "\n",
    "    # Store results in a list to preserve input order\n",
    "    all_rows = []\n",
    "\n",
    "    with ThreadPoolExecutor(max_workers=10) as executor:\n",
    "        futures = [\n",
    "            executor.submit(fetch_vulnerability_data_rest, org_id, row['Malicious Package Name'], row['Malicious Package Version']) \n",
    "            for index, row in df.iterrows()\n",
    "        ]\n",
    "\n",
    "        # Process results sequentially to keep the same order as input\n",
    "        for i, future in enumerate(futures):\n",
    "            package_name, version, data = future.result()\n",
    "            rows, data_found = process_vulnerability_data_rest(package_name, version, data)\n",
    "            all_rows.extend(rows)\n",
    "\n",
    "            # Update counts\n",
    "            if data_found:\n",
    "                data_found_count += 1\n",
    "            else:\n",
    "                data_not_found_count += 1\n",
    "            \n",
    "            # Update and log progress\n",
    "            progress += 1\n",
    "            percentage = (progress / total_packages) * 100\n",
    "            logging.info(f\"Processing: {percentage:.2f}% Complete\")\n",
    "\n",
    "    # Write the results to the CSV\n",
    "    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:\n",
    "        writer = csv.writer(file)\n",
    "        writer.writerow(headers_list)\n",
    "        writer.writerows(all_rows)\n",
    "\n",
    "    # Log final counts\n",
    "    logging.info(f\"Total data found: {data_found_count}\")\n",
    "    logging.info(f\"Total data not found: {data_not_found_count}\")\n",
    "    logging.info(\"Processing Complete.\")\n",
    "\n",
    "# Example usage:\n",
    "input_excel = \"D:/Final Version/Step 1 MaliciousPackagesNameAndVersion/MaliciousPackageNameAndVersion.xlsx\"\n",
    "output_csv = \"MaliciousPackagesDetailsFromDifferentWebsites_REST.csv\"\n",
    "org_id = \"30ed544d-6aea-4d2d-aa56-56508978d909\"  # Replace with your actual organization ID from Snyk\n",
    "process_packages_from_excel_rest(input_excel, output_csv, org_id)\n",
    "print(f\"Done\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e055b58c-e01c-4151-b48e-96d6d329f989",
   "metadata": {},
   "source": [
    "**----End of Step 3----**"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
