{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-22T07:41:03.562265Z",
     "start_time": "2024-05-22T07:41:03.559003Z"
    }
   },
   "outputs": [],
   "source": [
    "# Third Party Imports\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.preprocessing import MinMaxScaler"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "644fbcb1-a948-4036-8fec-6f18369ec046",
   "metadata": {},
   "source": [
    "#### Data Preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc9ca94d-fd7a-46c5-932d-01ca644994c9",
   "metadata": {},
   "source": [
    "**Reading the Diabetes Transfusion Dataset**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77cb6a91a07ed68d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-22T07:41:03.731947Z",
     "start_time": "2024-05-22T07:41:03.723709Z"
    }
   },
   "outputs": [],
   "source": [
    "# Reading the Car Dataset\n",
    "car = pd.read_csv(\"./../../../datasets/car/car.data\")\n",
    "car.head(-5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "317ecda9-87df-40b3-918d-19b7ccd05777",
   "metadata": {},
   "source": [
    "**Print Info and Missing Values**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b167864-ced8-46fd-89d8-74266115ab90",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(car.info())\n",
    "print(\"\\n\\n\")\n",
    "print(\"Missing values: \", car.isnull().values.any())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3657b1f4-3508-4c6c-8379-61e51f980375",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helper Function\n",
    "def label_encoder(feature, label_to_int=None):\n",
    "    if not label_to_int:\n",
    "        unique_labels = pd.unique(feature)\n",
    "        label_to_int = {label: idx for idx, label in enumerate(unique_labels)}\n",
    "    print(label_to_int)\n",
    "    transformed_feature = np.array([label_to_int[label] for label in feature])\n",
    "    return transformed_feature"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0570daa8-d121-4f1f-bf0a-70e67e126988",
   "metadata": {},
   "source": [
    "**Converting Categorical Features to Numerical**"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f22ab6c3-1891-4453-b10b-5ee0e16f74db",
   "metadata": {},
   "source": [
    "**class (Target Feature)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "unacc: 0 <br>\n",
    "acc: 1 <br>\n",
    "good: 2 <br>\n",
    "vgood: 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e4f93701-fc07-4747-9067-d74accc38c5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "class_label_to_int = {\"unacc\": 0, \"acc\": 1, \"good\": 2, \"vgood\": 3}\n",
    "car['class'] = label_encoder(car['class'], label_to_int=class_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8264a407-749e-4658-a1f1-ea2e078812bf",
   "metadata": {},
   "source": [
    "**buying (attribute)**\n",
    "\n",
    "Categorical: Numerical <br>\n",
    "vhigh: 3 <br>\n",
    "high: 2 <br>\n",
    "med: 1 <br>\n",
    "low: 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19838281-04ad-46f9-a9fd-cc41529354a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "buying_label_to_int = {\"vhigh\": 3, \"high\": 2, \"med\": 1, \"low\": 0}\n",
    "car['buying'] = label_encoder(car['buying'], label_to_int=buying_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc1d7e15-fcc8-4ec4-9657-daef1b751a99",
   "metadata": {},
   "source": [
    "**maint (attribute)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "vhigh: 3<br>\n",
    "high: 2<br>\n",
    "med: 1<br>\n",
    "low: 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d5a4112-d79e-4b61-be60-b79ea40cb025",
   "metadata": {},
   "outputs": [],
   "source": [
    "maint_label_to_int = {\"vhigh\": 3, \"high\": 2, \"med\": 1, \"low\": 0}\n",
    "car['maint'] = label_encoder(car['maint'], label_to_int=maint_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "368d75f3-361c-4be0-9f25-17804a89c416",
   "metadata": {},
   "source": [
    "**door (attribute)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "2: 2<br>\n",
    "3: 3<br>\n",
    "4: 4<br>\n",
    "5more: 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42be2829-8cfb-40ef-9d34-e45dbb188d2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "door_label_to_int = {\"2\": 2, \"3\": 3, \"4\": 4, \"5more\": 5}\n",
    "car['doors'] = label_encoder(car['doors'], label_to_int=door_label_to_int) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eeb45047-ce0d-46c4-a2be-8588b3f726ec",
   "metadata": {},
   "source": [
    "**persons (attribute)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "2: 2<br>\n",
    "4: 4<br>\n",
    "more: 5<br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e4a45bf5-e127-4b86-963a-10a5f1eec62d",
   "metadata": {},
   "outputs": [],
   "source": [
    "persons_label_to_int = {\"2\":2, \"4\":4, \"more\":5}\n",
    "car[\"persons\"] = label_encoder(car[\"persons\"], label_to_int=persons_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "888548b9-af17-49f9-8063-819bfb1b3c13",
   "metadata": {},
   "source": [
    "**lug_boot (attribute)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "small: 0<br>\n",
    "med: 1<br>\n",
    "big: 2<br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6de2d21c-7399-4070-b0e4-5872e3547ccf",
   "metadata": {},
   "outputs": [],
   "source": [
    "lug_boot_label_to_int = {\"small\":0, \"med\":1, \"big\":2}\n",
    "car[\"lug_boot\"] = label_encoder(car[\"lug_boot\"], label_to_int=lug_boot_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee641d39-e8e1-49f0-a0e9-1da6a90a8cbf",
   "metadata": {},
   "source": [
    "**safety (attribute)**\n",
    "\n",
    "Categorical: Numerical<br>\n",
    "low: 0<br>\n",
    "med: 1<br>\n",
    "high: 2<br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fd1eada-db4b-4fd9-b66f-2b125a2c3e40",
   "metadata": {},
   "outputs": [],
   "source": [
    "safety_label_to_int = {\"low\": 0, \"med\": 1, \"high\": 2}\n",
    "car[\"safety\"] = label_encoder(car[\"safety\"], label_to_int=safety_label_to_int)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d921143a-172f-4f29-80c6-5a877643d067",
   "metadata": {},
   "source": [
    "**Variance Check**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "948672e3-370b-466e-97ac-be927c47d05f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "car.var().round(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "039d6a5a-4cb2-43ea-b8c5-0c2488444861",
   "metadata": {},
   "source": [
    "Very different variance levels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a9aaeed7-a7e6-4fac-83fb-213f6b0c3774",
   "metadata": {},
   "outputs": [],
   "source": [
    "car.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "06776d06-aac3-4cd8-b1e8-b8498381d713",
   "metadata": {},
   "source": [
    "**Checking the distribution of target values**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "339d9bbe-8754-4572-87e1-3365bed7af5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "car[\"class\"].value_counts(normalize=True).round(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ce86e3c-d237-459e-805b-83a02b971354",
   "metadata": {},
   "source": [
    "**Train-Test Split**"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "852899ac-5378-4914-a9e7-c206ded4d0de",
   "metadata": {},
   "source": [
    "Train - Test Split &nbsp;&nbsp;&nbsp;&nbsp; 85%-15%\n",
    "The Test-Set is Further split into Verb And Gen split\n",
    "Verb - Gen Split   &nbsp;&nbsp;&nbsp;&nbsp; 50%-50%"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf7ff54f-f3f4-40e6-8bc4-424d99ae8af8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train Test Split Stratified on Target Feature\n",
    "X_train, X_test, y_train, y_test = train_test_split(car.drop(columns=\"class\"), car[\"class\"], test_size=0.15, random_state=400, stratify=car[\"class\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cc4deea-e333-4ce0-890d-ad1078aac260",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train Test Split on the Test set to get Verb and Gen Sets. Stratified on the Target Feature\n",
    "X_test_verb, X_test_gen, y_test_verb, y_test_gen = train_test_split(X_test, y_test, test_size=0.5, random_state=400, stratify=y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bda17883-3302-4fc9-8b3b-46c6c669a070",
   "metadata": {},
   "source": [
    "**Checking the distribution target values after the split**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f79613d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(y_train.value_counts(normalize=True).round(3))\n",
    "print(y_test_verb.value_counts(normalize=True).round(3))\n",
    "print(y_test_gen.value_counts(normalize=True).round(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ccf6c676-335d-4133-afa2-5161bd932f2a",
   "metadata": {},
   "source": [
    "**Variance Check**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bf865bb-067e-40f8-bfb8-03afba7c71a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.var().round(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0a7a2750-30fc-45f5-97d9-9bd0863cab60",
   "metadata": {},
   "source": [
    "**Standardization** - Skipping this because the resulting dataset has very high precision float values (that may not be good for the LLM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebbccb2f-d31e-451c-83a4-ba3b35025796",
   "metadata": {},
   "outputs": [],
   "source": [
    "# scaler = MinMaxScaler(feature_range=(0, 50))\n",
    "# X_train_scaled = scaler.fit_transform(X_train)\n",
    "# X_test_gen_scaled = scaler.transform(X_test_gen)\n",
    "# X_test_verb_scaled = scaler.transform(X_test_verb)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99cb1c0e-8239-4489-a757-51307bfe7e8a",
   "metadata": {},
   "source": [
    "**Converting them back to dataframes** - No need for this now as there's no standardization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "563a7aff-a958-42bf-a740-bfe04b397aee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)\n",
    "# X_test_gen_scaled_df = pd.DataFrame(X_test_gen_scaled, columns=X_test.columns)\n",
    "# X_test_verb_scaled_df = pd.DataFrame(X_test_verb_scaled, columns=X_test.columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "97fe3ae9-5bdc-488c-9339-af497d6999ee",
   "metadata": {},
   "source": [
    "From here Onwards `X_test_verb_scaled_df` is going to be `X_test_scaled_df`. `X_test_gen_scaled_df` is not going to be used at all in the code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "61841898",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'X_train' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m X_train_scaled_df \u001b[38;5;241m=\u001b[39m \u001b[43mX_train\u001b[49m\n\u001b[1;32m      2\u001b[0m X_test_scaled_df \u001b[38;5;241m=\u001b[39m X_test_verb\n\u001b[1;32m      3\u001b[0m X_test_gen_scaled_df \u001b[38;5;241m=\u001b[39m X_test_gen\n",
      "\u001b[0;31mNameError\u001b[0m: name 'X_train' is not defined"
     ]
    }
   ],
   "source": [
    "X_train_scaled_df = X_train\n",
    "X_test_scaled_df = X_test_verb\n",
    "X_test_gen_scaled_df = X_test_gen\n",
    "y_test = y_test_verb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1141106f-6dfb-4e50-8437-53c90d7a1715",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_scaled_df.var().round(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0f01c93-046d-4d34-b50c-400c16319fdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def nearest_neighbours_plot(knn, X_train, y_train, X_test, test_index):\n",
    "    # Select a test instance and print the prediction\n",
    "    test_instance = X_test.iloc[test_index]\n",
    "    \n",
    "    # Find the nearest neighbors\n",
    "    distances, indices = knn.kneighbors([test_instance])\n",
    "    \n",
    "    # Indices 0 and 1 correspond to Recency and Frequency\n",
    "    feature_index_1 = 1\n",
    "    feature_index_2 = 2\n",
    "\n",
    "    # Plot the training data\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.scatter(X_train.iloc[:, feature_index_1], X_train.iloc[:, feature_index_2], c=y_train, cmap='viridis', edgecolor='k')\n",
    "    \n",
    "    # Plot the test instance\n",
    "    plt.scatter(test_instance[feature_index_1], test_instance[feature_index_2], c='r', s=100, marker='*', label='Test Instance')\n",
    "    \n",
    "    # Plot the nearest neighbors\n",
    "    nearest_neighbors = X_train.iloc[indices[0]]\n",
    "    plt.scatter(nearest_neighbors.iloc[:, feature_index_1], nearest_neighbors.iloc[:, feature_index_2], c='g', s=100, label=f'k Nearest Neighbors')\n",
    "    \n",
    "    # Add labels and title\n",
    "    plt.xlabel(X_train.columns[0])\n",
    "    plt.ylabel(X_train.columns[1])\n",
    "    plt.title('Nearest Neighbors Plot')\n",
    "    plt.legend()\n",
    "    \n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c89636ce-0600-405c-ad76-9f9edb475f76",
   "metadata": {},
   "source": [
    "#### Model Variations"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "98c6a1a3",
   "metadata": {},
   "source": [
    "**Base Model** - Through Randomized Search CV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0698840-a387-47f3-8b76-37a875fdcbdc",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(123)\n",
    "    \n",
    "param_distributions = {\n",
    "    'n_neighbors': range(1, 10),\n",
    "    'weights': ['uniform', 'distance'],\n",
    "    'algorithm': ['auto', 'brute'],\n",
    "    'leaf_size': range(10, 51),\n",
    "    'p': [1, 2, np.inf],\n",
    "    'metric': ['minkowski', 'cosine']\n",
    "}\n",
    "\n",
    "knn = KNeighborsClassifier()\n",
    "random_search = RandomizedSearchCV(\n",
    "    estimator=knn,\n",
    "    param_distributions=param_distributions,\n",
    "    n_iter=100,\n",
    "    cv=5,\n",
    "    n_jobs=-1,\n",
    "    scoring='accuracy',\n",
    "    random_state=21\n",
    ")\n",
    "\n",
    "random_search.fit(X_train_scaled_df, y_train)\n",
    "best_params = random_search.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9edee19a-00a1-424f-9d79-dfb01d96dc51",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c1d7336",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train the base model\n",
    "def train_base_model(X_train, y_train):\n",
    "    best_knn = KNeighborsClassifier(**best_params)\n",
    "    # best_knn = KNeighborsClassifier(metric=\"cosine\", n_neighbors=3, p=np.inf)\n",
    "    best_knn.fit(X_train, y_train)\n",
    "    return best_knn\n",
    "\n",
    "base_model = train_base_model(X_train_scaled_df, y_train)\n",
    "base_pred = base_model.predict(X_test_scaled_df)\n",
    "accuracy_score(y_test, base_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45174ae8-aa72-4fe6-bebd-67bb8fee0635",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_random_params():\n",
    "    metric = np.random.choice(['minkowski', 'cosine'])\n",
    "    \n",
    "    if metric == 'minkowski':\n",
    "        p_choices = [1, 2, 3, 4, np.inf]\n",
    "        p = np.random.choice(p_choices)\n",
    "        params = {\n",
    "            'n_neighbors': np.random.randint(1, 20),\n",
    "            'weights': np.random.choice(['uniform', 'distance']),\n",
    "            'algorithm': np.random.choice(['auto', 'ball_tree', 'kd_tree', 'brute']),\n",
    "            'leaf_size': np.random.randint(10, 51),\n",
    "            'metric': 'minkowski',\n",
    "            'p': p\n",
    "        }\n",
    "    else:  # cosine similarity\n",
    "        params = {\n",
    "            'n_neighbors': np.random.randint(1, 20),\n",
    "            'weights': np.random.choice(['uniform', 'distance']),\n",
    "            'algorithm': 'brute',\n",
    "            'metric': 'cosine'\n",
    "        }\n",
    "    \n",
    "    return params\n",
    "\n",
    "def compare_models(model1, model2, X):\n",
    "    pred1 = model1.predict(X)\n",
    "    pred2 = model2.predict(X)\n",
    "    diff_percentage = np.mean(pred1 != pred2)\n",
    "    return diff_percentage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "906a2344-d155-45f2-a5af-91a927ea0a7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Comparison with Base Model\n",
    "def generate_models_with_diff(base_model, X_train, y_train, X_test, max_attempts=10):\n",
    "    best_diff = 0\n",
    "    best_pair = None\n",
    "    all_models = []\n",
    "    \n",
    "    for i in range(max_attempts):\n",
    "        random_params = generate_random_params()\n",
    "        new_model = KNeighborsClassifier(**random_params)\n",
    "        new_model.fit(X_train, y_train)\n",
    "        all_models.append(new_model)\n",
    "        \n",
    "        diff = compare_models(base_model, new_model, X_test)\n",
    "        print(f\"Attempt Number {i}. Diff - {diff}\")\n",
    "        \n",
    "        if diff > best_diff and (0.25 <= round(diff, 2) and round(diff, 2) <= 0.30):\n",
    "            best_diff = diff\n",
    "            best_pair = (base_model, new_model)\n",
    "            print(f\"Best diff till now {best_diff}.\")\n",
    "    return best_pair, best_diff, all_models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab8a043e-e419-4356-b728-bbcddcb19e01",
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.random.seed(128)\n",
    "# np.random.seed(236)\n",
    "np.random.seed(264)\n",
    "# np.random.seed(130)\n",
    "# np.random.seed(131)\n",
    "# np.random.seed(125)\n",
    "max_attempts = 50\n",
    "best_pair, best_diff, all_knns = generate_models_with_diff(base_model, X_train_scaled_df, y_train, X_test_scaled_df, max_attempts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57ef69f8-2739-4ffa-8730-4d1b71efcf2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_best_model_pair(all_models, X_test):\n",
    "    best_pair = None\n",
    "    best_diff = 0\n",
    "    n = len(all_models)\n",
    "    \n",
    "    for i in range(n):\n",
    "        print(f\"ith value is - {i}\")\n",
    "        for j in range(i+1, n):\n",
    "            \n",
    "            model_i = all_models[i]\n",
    "            model_j = all_models[j]\n",
    "            \n",
    "            diff = compare_models(model_i, model_j, X_test)\n",
    "            print(f\"Comparing Model {i} and Model {j}. Diff: {diff:.4f}\")\n",
    "            \n",
    "            if diff > best_diff and (0.25 <= diff and diff <= 0.29):\n",
    "                best_diff = diff\n",
    "                best_pair = (model_i, model_j)\n",
    "                print(f\"New best pair found: Models {i} and {j} with diff {diff:.4f}\")\n",
    "               \n",
    "    return best_pair, best_diff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a7a7368f-ad6e-4b28-a63d-9a649dad58c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_pair, best_diff = find_best_model_pair(all_knns, X_test_scaled_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ac60978-03ed-4a08-9d81-7d38dddafc0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model1, model2 = best_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64072148-60c3-4fb8-b5c5-15f7086742e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "model2, model1 = best_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78a3fa94-7957-4288-8406-f95435777b05",
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's check the difference on the generation data as well\n",
    "compare_models(model1, model2, X_test_gen_scaled_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c00e22b6-0bf6-47cd-9d32-90f80bd46deb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate both models\n",
    "y_pred_1 = model1.predict(X_test_scaled_df)\n",
    "y_pred_2 = model2.predict(X_test_scaled_df)\n",
    "\n",
    "accuracy1 = accuracy_score(y_test, y_pred_1)\n",
    "accuracy2 = accuracy_score(y_test, y_pred_2)\n",
    "\n",
    "# Print results\n",
    "print(f\"Model 1\")\n",
    "print(\"Model 1 accuracy:\", accuracy1)\n",
    "print(f\"\\nModel 2:\")\n",
    "print(\"Model 2 accuracy:\", accuracy2)\n",
    "print(\"\\nAccuracy difference:\", abs(accuracy1 - accuracy2))\n",
    "print(f\"\\nPercentage of different outputs: {best_diff:.2%}\")\n",
    "\n",
    "print(\"\\n\\n\")\n",
    "print(model1.get_params())\n",
    "print(model2.get_params())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37d4ee5a-9806-40de-b6eb-ed031a0f4290",
   "metadata": {},
   "outputs": [],
   "source": [
    "nearest_neighbours_plot(model1, X_train_scaled_df, y_train, X_test_scaled_df, 23)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f754714c-78f2-4752-a04b-e4a42bd4271a",
   "metadata": {},
   "outputs": [],
   "source": [
    "nearest_neighbours_plot(model2, X_train_scaled_df, y_train, X_test_scaled_df, 23)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "03eebeee-fb0c-48b2-949a-1e653015057c",
   "metadata": {},
   "source": [
    "#### Compare Model Boundaries"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6e2ff0ed-4a61-49ee-b168-b0c48eea1cc9",
   "metadata": {},
   "source": [
    "**Plot Model Decision Boundaries**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cadd96e0-0bd9-4e6d-aa41-91ca70c17623",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to plot decision boundary\n",
    "def plot_decision_boundary(X_test, y_pred, feature_1, feature_2):\n",
    "    # Create a scatter plot of the predictions\n",
    "    plt.figure(figsize=(8, 6))\n",
    "    plt.scatter(X_test[feature_1], X_test[feature_2], c=y_pred, cmap='rainbow', edgecolor='black', s=20, vmin=0, vmax=1)\n",
    "    \n",
    "    # Add labels and title\n",
    "    plt.xlabel(feature_1)\n",
    "    plt.ylabel(feature_2)\n",
    "    plt.title('KNN Decision Boundary')\n",
    "    \n",
    "    # Add a colorbar to indicate the predicted classes\n",
    "    cbar = plt.colorbar()\n",
    "    cbar.set_ticks([0, 1])\n",
    "    cbar.set_ticklabels([\"No\", \"Yes\"])\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da0d95b4-9abe-4a27-9507-8f9a6d3959e0",
   "metadata": {},
   "source": [
    "**Model 1 Decision Boundary**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "416e4659-cceb-4203-bb78-8acbefc10f7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_decision_boundary(X_test_scaled_df, y_pred_1, \"buying\", \"maint\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18dcb720-32f5-469a-8d4f-f2e02255e3a4",
   "metadata": {},
   "source": [
    "**Model 2 Decision Boundary**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd71e87d-f272-4d25-bc7a-286fde38544a",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_decision_boundary(X_test_scaled_df, y_pred_2, \"buying\", \"maint\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee51c7f6-1fb8-4eb1-bc09-a708ccb67e95",
   "metadata": {},
   "source": [
    "**Preparing Detailed Data For Ablation Study**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "692c41f6-892e-49a0-b4aa-ffc9916933cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_knn_details(knn_model, X_train, y_train, X_test, for_gen=False):\n",
    "    if not hasattr(knn_model, 'classes_'):\n",
    "        raise ValueError(\"The KNN model is not fitted. Please fit the model before using this function.\")\n",
    "    \n",
    "    # Get the number of neighbors (k) from the model\n",
    "    k = knn_model.n_neighbors\n",
    "    \n",
    "    result = []\n",
    "    \n",
    "    # Loop through each row in the testing dataset\n",
    "    for _, test_row in X_test.iterrows():\n",
    "        distances, indices = knn_model.kneighbors([test_row], n_neighbors=k)\n",
    "        predicted_output = knn_model.predict([test_row])[0]\n",
    "        \n",
    "        nearest_neighbors = []\n",
    "        for distance, index in zip(distances[0], indices[0]):\n",
    "            neighbor_details = {\n",
    "                \"feature_values\": X_train.iloc[index].tolist(),\n",
    "                \"assigned_class\": int(y_train.iloc[index]),\n",
    "                \"distance\": float(distance)\n",
    "            }\n",
    "            nearest_neighbors.append(neighbor_details)\n",
    "        \n",
    "        # Add k random data points if for_gen is True\n",
    "        if for_gen:\n",
    "            random_indices = np.random.choice(len(X_train), k, replace=False)\n",
    "            for index in random_indices:\n",
    "                random_point_details = {\n",
    "                    \"feature_values\": X_train.iloc[index].tolist(),\n",
    "                    \"assigned_class\": int(y_train.iloc[index]),\n",
    "                    \"distance\": None  # Random data points don't have a distance\n",
    "                }\n",
    "                nearest_neighbors.append(random_point_details)\n",
    "        \n",
    "        # Add the details to the result dictionary\n",
    "        result.append({\n",
    "            \"input\": test_row.tolist(),\n",
    "            \"output\": int(predicted_output),\n",
    "            \"nearest_neighbors\": nearest_neighbors\n",
    "        })\n",
    "        \n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1e1a95f-a57b-4aa4-81fd-25e3b3018b6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "model1_details_verb = get_knn_details(model1, X_train_scaled_df, y_train, X_test_scaled_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "046741ea-5d38-43b2-b795-82bdbfb4e1bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "model2_details_verb = get_knn_details(model2, X_train_scaled_df, y_train, X_test_scaled_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0997155-310f-4d41-879b-e40fd06334fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "model1_details_gen = get_knn_details(model1, X_train_scaled_df, y_train, X_test_gen_scaled_df, for_gen=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "662ece5c-1c6d-4486-ad27-49dc5d5a99b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "model2_details_gen = get_knn_details(model2, X_train_scaled_df, y_train, X_test_gen_scaled_df, for_gen=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5edabca-57ab-4348-995a-cf5516484f32",
   "metadata": {},
   "outputs": [],
   "source": [
    "def round_values(data, precision=3):\n",
    "    if isinstance(data, list):\n",
    "        return [round_values(item, precision) for item in data]\n",
    "    elif isinstance(data, dict):\n",
    "        return {key: round_values(value, precision) for key, value in data.items()}\n",
    "    elif isinstance(data, (int, float)):\n",
    "        return round(data, precision)\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15ad59a4-ed8e-4ca0-b2d7-10a3b9c7382c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_combined_details(model1_details, model2_details):\n",
    "    combined_details = []\n",
    "    for m1, m2 in zip(model1_details, model2_details):\n",
    "        if m1['input'] == m2['input']:\n",
    "            rounded_input = round_values(m1['input'])\n",
    "            rounded_nn1 = round_values(m1['nearest_neighbors'])\n",
    "            rounded_nn2 = round_values(m2['nearest_neighbors'])\n",
    "\n",
    "            combined_entry = {\n",
    "                \"input\": rounded_input,\n",
    "                \"model1\": {\n",
    "                    \"output\": m1['output'],\n",
    "                    \"nearest_neighbors\": rounded_nn1\n",
    "                },\n",
    "                \"model2\": {\n",
    "                    \"output\": m2['output'],\n",
    "                    \"nearest_neighbors\": rounded_nn2\n",
    "                }\n",
    "            }\n",
    "\n",
    "            combined_details.append(combined_entry)\n",
    "    return combined_details"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e070df5d-b5b1-4023-9ead-99b5ac3ad9d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "combined_details_verb = get_combined_details(model1_details_verb, model2_details_verb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc8a1330-856d-49d4-bd7a-9aff3e1889a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "combined_details_gen = get_combined_details(model1_details_gen, model2_details_gen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c623014-8324-4e9d-b5e9-95b95d720c90",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "7628520f-4e37-41df-ba75-e8da5c362ebe",
   "metadata": {},
   "source": [
    "#### Sample Data Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30773f9b-4f47-48c3-a107-e516c89e25b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def write_data(data, file_name, varname):\n",
    "    datastr = f\"\\n{varname} = {data}\"\n",
    "    \n",
    "    # Write this string to the file\n",
    "    with open(file_name, 'a') as file:\n",
    "        file.write(datastr)\n",
    "\n",
    "# This stays constant for this iPython file\n",
    "FILE_NAME = \"./../samples/car/level_1.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f2f63eb-f80c-40a3-8607-54a3c18d29ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Number of mismatched samples: {np.sum(y_pred_1 != y_pred_2)}\")\n",
    "np.where(y_pred_1 != y_pred_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5d4c843-b579-4aa1-a2e8-dd1a5af0a627",
   "metadata": {},
   "outputs": [],
   "source": [
    "verb_data = []\n",
    "for idx in range(len(X_test_scaled_df)):\n",
    "    data_point = {\n",
    "        \"input\": round_values(X_test_scaled_df.iloc[idx].to_list()),\n",
    "        \"output\": {\n",
    "            \"model1\": int(y_pred_1[idx]),\n",
    "            \"model2\": int(y_pred_2[idx])\n",
    "        }\n",
    "    }\n",
    "    verb_data.append(data_point)\n",
    "\n",
    "print(f\"Number of samples in verb_data: {len(verb_data)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a27f55c0-1226-4f65-bbb0-a5a4a40cfcfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "verb_data_second = [{\"input\": i['input'], \"output\": {\"model1\": i[\"model1\"][\"output\"], \"model2\": i[\"model2\"][\"output\"]}} for i in combined_details_verb]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "378d0082",
   "metadata": {},
   "outputs": [],
   "source": [
    "verb_data == verb_data_second"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d059089-5c61-40a1-abd6-c98c19da8e35",
   "metadata": {},
   "outputs": [],
   "source": [
    "varname = \"verb_data\"\n",
    "data = verb_data\n",
    "write_data(data, FILE_NAME, varname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "843ca357-e622-4c21-b922-d071e13ae7f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_gen_pred_1 = model1.predict(X_test_gen_scaled_df)\n",
    "y_gen_pred_2 = model2.predict(X_test_gen_scaled_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a7848dc-de08-4103-af8f-b7626dd94e0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "gen_data = []\n",
    "for idx in range(len(X_test_gen_scaled_df)):\n",
    "    data_point = {\n",
    "        \"input\": round_values(X_test_gen_scaled_df.iloc[idx].to_list()),\n",
    "        \"output\": {\n",
    "            \"model1\": int(y_gen_pred_1[idx]),\n",
    "            \"model2\": int(y_gen_pred_2[idx])\n",
    "        }\n",
    "    }\n",
    "    gen_data.append(data_point)\n",
    "\n",
    "print(f\"Number of samples in gen_data: {len(gen_data)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b519819d-41f5-43c8-99a0-10eddb54e9bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "varname = \"gen_data\"\n",
    "data = gen_data\n",
    "write_data(data, FILE_NAME, varname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c5d2828-0b8c-4a64-864d-4eba1d8dc85c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fbb5d95-194c-42e4-8459-d697d666ff4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def prune_data(gen_data):\n",
    "    return [{\"input\": inst[\"input\"], \"output\": {\"model1\": inst[\"output\"][\"model1\"]}} for inst in gen_data]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5fcf8e3-28c4-4c5b-8c38-0901990bf833",
   "metadata": {},
   "outputs": [],
   "source": [
    "varname = \"gen_data_pruned\"\n",
    "data = prune_data(gen_data)\n",
    "write_data(data, FILE_NAME, varname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e478aea3-bb2b-4e57-bd19-8933496cc0c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def write_structures(structure_text, file_name, varname):\n",
    "    datastr = f\"\\n{varname} = {structure_text}\"\n",
    "    \n",
    "    # write this string to the file\n",
    "    with open(file_name, 'a') as file:\n",
    "        file.write(datastr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3929be6a-6a5e-45ff-b787-881630c16194",
   "metadata": {},
   "outputs": [],
   "source": [
    "STRUCTURES_FILE_NAME = \"./../structures/car/level_1.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25aec849-b94f-449b-a79c-415dc5dc4b60",
   "metadata": {},
   "outputs": [],
   "source": [
    "write_structures(combined_details_verb, STRUCTURES_FILE_NAME, \"combined_details_verb\")\n",
    "write_structures(combined_details_gen, STRUCTURES_FILE_NAME, \"combined_details_gen\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b23107e6-13a7-4fab-8e7e-a1ab924fb94e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_accuracy(a, b):\n",
    "    correct = 0\n",
    "    total = len(a)\n",
    "    \n",
    "    for i in range(len(a)):\n",
    "        if(a[i]['input'] == b[i]['input']):\n",
    "            if(a[i]['output']['model2'] == b[i]['output']['model2']):\n",
    "                correct += 1\n",
    "        else:\n",
    "            print(\"Mismatch\")\n",
    "            print(a[i])\n",
    "            print(b[i])\n",
    "            print(\"\\n\\n\\n\")\n",
    "    \n",
    "    print(correct)\n",
    "    print(correct/total)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60b3feb5-f253-4760-9c56-369591db901a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3b01d64-0f6b-4057-9a8d-c808b9a9171d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def prediction_zero(data):\n",
    "    prediction0_1 = [i for i in range(len(data)) if data[i][\"output\"][\"model1\"] == 0]\n",
    "    prediction0_2 = [i for i in range(len(data)) if data[i][\"output\"][\"model2\"] == 0]\n",
    "    return prediction0_1, prediction0_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01e716b4-bf5a-4724-aa01-5b6537739aa0",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Model 1: {len(prediction_zero(verb_data)[0])}\")\n",
    "print(f\"Model 2: {len(prediction_zero(verb_data)[1])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb05145f-549f-4080-94dc-3f35f1c8501d",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Model 1: {len(prediction_zero(gen_data)[0])}\")\n",
    "print(f\"Model 2: {len(prediction_zero(gen_data)[1])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9226555f-b347-44df-ac4e-dfd405308b97",
   "metadata": {},
   "outputs": [],
   "source": [
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a537816d-aa82-49a7-b4a4-219e92569403",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2db23ae0-8e63-4b2a-a2dd-7a3acacec8d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to round data and analyze variance\n",
    "def analyze_precision(df, column, decimal_places):\n",
    "    results = []\n",
    "    for decimals in decimal_places:\n",
    "        df_rounded = df.copy()\n",
    "        df_rounded[column] = df_rounded[column].round(decimals)\n",
    "        variance = df_rounded[column].var()\n",
    "        results.append((decimals, variance))\n",
    "    return pd.DataFrame(results, columns=['Decimals', 'Variance'])\n",
    "\n",
    "# Analyze precision from 0 to 5 decimal places\n",
    "# precision_analysis_1 = analyze_precision(X_train_scaled_df, 'Recency (months)', range(7))\n",
    "# precision_analysis_2 = analyze_precision(X_train_scaled_df, 'Frequency (times)', range(7))\n",
    "# precision_analysis_3 = analyze_precision(X_train_scaled_df, 'Monetary (c.c. blood)', range(7))\n",
    "# precision_analysis_4 = analyze_precision(X_train_scaled_df, 'Time (months)', range(7))\n",
    "\n",
    "# print(precision_analysis_1)\n",
    "# print(precision_analysis_2)\n",
    "# print(precision_analysis_3)\n",
    "# print(precision_analysis_4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc6639ff-2b5e-4862-9fd2-66a2d03e777a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd5b02be-8711-4267-a611-6e30d32ef53b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20aa3d4e-434d-4bd6-8d0c-d6588720b303",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af8c9ac1-ec25-41db-8540-f5231046c447",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "677306bc-c8fe-414d-8b1c-a9e611300b5d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c5d1631-a2bc-47f6-bee4-b0d0e29c8897",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "713f34a5-c0f5-4497-a6d8-dac83fdc1c19",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30cebf14-074d-49aa-a87b-8b821a5107fc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bee44c4-4076-4b75-af66-0075a2bec0ef",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "306dee43-26a4-4d5c-94db-4b5497a1e329",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee9455c4-d50e-4a6d-bf55-b9559cdddc9b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
