{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d77e3e2d-0706-4fde-a71e-1ac3b74e8346",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.spatial.distance import cdist\n",
    "import random\n",
    "from sklearn.cluster import KMeans\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "from icfesl import *\n",
    "from utility_functions import *\n",
    "from xgboost import XGBRegressor\n",
    "from pytorch_tabnet.tab_model import TabNetRegressor\n",
    "import time\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.stats import norm\n",
    "from catboost import CatBoostRegressor, Pool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dd181542-b52a-4555-afdf-ba920573f9b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "n_samples = 50000\n",
    "sigma = 0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b6513970-d79a-4beb-9517-dea61573012e",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_variables = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "59c4ca71-57f4-4828-b297-248dc0e07b66",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_levels = 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "664151bb-fc8a-4545-9345-0ee0f1ba41c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_vars = ['var1','var2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "524eb3b6-1960-4635-b0d9-a90eed58524b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "\n",
    "def generate_correlated_categorical_variables(num_samples, num_variables, sigma, categories_per_variable):\n",
    "\n",
    "    correlation_matrix = np.array([[1.0, sigma],\n",
    "                                   [sigma, 1.0]])\n",
    "\n",
    "    \n",
    "    L = np.linalg.cholesky(correlation_matrix)\n",
    "    uncorrelated_normals = np.random.normal(size=(num_samples, num_variables))\n",
    "    correlated_normals = uncorrelated_normals @ L.T\n",
    "\n",
    "    categorical_data = np.zeros_like(correlated_normals, dtype=int)\n",
    "\n",
    "    for i in range(num_variables):\n",
    "        num_categories = categories_per_variable[i]\n",
    "        quantiles = np.linspace(0, 1, num_categories + 1)[1:-1]\n",
    "        thresholds = norm.ppf(quantiles)\n",
    "\n",
    "        for j in range(num_categories):\n",
    "            if j == 0:\n",
    "                categorical_data[:, i][correlated_normals[:, i] <= thresholds[j]] = j\n",
    "            elif j == num_categories - 1:\n",
    "                categorical_data[:, i][correlated_normals[:, i] > thresholds[j-1]] = j\n",
    "            else:\n",
    "                categorical_data[:, i][(correlated_normals[:, i] > thresholds[j-1]) & (correlated_normals[:, i] <= thresholds[j])] = j\n",
    "\n",
    "    res = pd.DataFrame(categorical_data, columns=[f\"var{i+1}\" for i in range(num_variables)])\n",
    "                       \n",
    "    return res\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e82673dc-d14a-4b93-9c8f-613f27855b1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = generate_correlated_categorical_variables(n_samples, n_variables, sigma, [n_levels, n_levels])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3049f72f-fc0b-47cd-9341-269490a4a249",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of simulated data: 0.7630958557128906 MB\n"
     ]
    }
   ],
   "source": [
    "simulated_data_size = sys.getsizeof(X)\n",
    "print(f\"Size of simulated data: {simulated_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3fb395f5-06e5-4b29-886a-d3d22ea99556",
   "metadata": {},
   "source": [
    "## Simulated study for regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "493bc876-7031-4fd8-a2f6-0d4636372984",
   "metadata": {},
   "outputs": [],
   "source": [
    "beta_var1 = 10\n",
    "beta_var2 = -10\n",
    "intercept = 5\n",
    "\n",
    "error = np.random.normal(loc=0, scale=200, size=n_samples)\n",
    "\n",
    "y = (intercept +\n",
    "     beta_var1 * X['var1'] +\n",
    "     beta_var2 * X['var2'] +\n",
    "     error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "445831a3-03db-4b65-9414-2c93aab2ee2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.concat([X, y], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d6c95d06-0640-4944-9adc-fbee3c7cade1",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={0:\"y\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e2b61f15-2c83-4de2-bb9e-7acfbd8531a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGdCAYAAADjWSL8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAALL5JREFUeJzt3X9YlXWe//HXURSBgTMiczgdBWMbMibMdqxB3JnR8vcVMV3ujpUjX7vWrCw1Nr3acWsvqS1p3B11F1czx0stcqjrmpxpdyYK23TG8Fe0XKGRSxOJpgc6LR4wCEw+3z+6vLcjap4jCB94Pq7rvq7O535/Dp/7w528uLk/53YZY4wAAAAsM6CnBwAAABAJQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEpRPT2A7tLR0aHjx48rPj5eLperp4cDAAAugTFGzc3N8vl8GjDg4tda+myIOX78uFJSUnp6GAAAIAJHjx7ViBEjLlrTZ0NMfHy8pK8mISEhoYdHAwAALkVTU5NSUlKcn+MX02dDzNk/ISUkJBBiAACwzKXcCsKNvQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACs1GefYg3gyqirq1MgEIiob1JSklJTU7t4RAD6C0IMgIjV1dXpuowMtba0RNQ/JjZWH1RXE2QARIQQAyBigUBArS0tmvXUennS0sPq21Bbo5cfX6BAIECIARARQgyAy+ZJS9fwjDE9PQwA/Qw39gIAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkl1gB6VHV1dUT9+LRfAIQYAD2iOVAv14ABmjNnTkT9+bRfAIQYAD2itblJpqODT/sFEDFCDIAexaf9AogUN/YCAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCUeAAnAWtXV1RH1S0pK4unXQB9AiAFgneZAvVwDBmjOnDkR9Y+JjdUH1dUEGcByhBgA1mltbpLp6NCsp9bLk5YeVt+G2hq9/PgCBQIBQgxgOUIMAGt50tI1PGNMTw8DQA/hxl4AAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFZidRIA1dXVKRAIhN0v0g+bA4CuQIgB+rm6ujpdl5Gh1paWnh4KAISFEAP0c4FAQK0tLRF9cNzht99U2brCbhoZAFwcIQaApMg+OK6htqabRgMA3yysG3sLCgrkcrlCNq/X6+w3xqigoEA+n08xMTGaOHGiDh06FPIebW1tWrRokZKSkhQXF6fc3FwdO3YspKaxsVF5eXlyu91yu93Ky8vTyZMnIz9KAADQ54S9Oun666/XiRMnnK2qqsrZt3LlSq1atUpr167VgQMH5PV6NWXKFDU3Nzs1+fn52r59u0pKSrR7926dOnVKOTk5OnPmjFMze/ZsVVZWqrS0VKWlpaqsrFReXt5lHioAAOhLwv5zUlRUVMjVl7OMMVqzZo0ee+wxzZw5U5K0detWJScna9u2bbr//vsVDAa1adMmvfDCC5o8ebIkqbi4WCkpKdqxY4emTZum6upqlZaWau/evcrKypIkbdy4UdnZ2Tp8+LBGjRp1OccLAAD6iLCvxNTU1Mjn8yktLU133XWXPvroI0lSbW2t/H6/pk6d6tRGR0drwoQJKi8vlyRVVFTo9OnTITU+n0+ZmZlOzZ49e+R2u50AI0njxo2T2+12as6nra1NTU1NIRsAAOi7wgoxWVlZev755/X6669r48aN8vv9Gj9+vD777DP5/X5JUnJyckif5ORkZ5/f79fgwYM1dOjQi9Z4PJ5OX9vj8Tg151NYWOjcQ+N2u5WSkhLOoQEAAMuEFWJmzJihv/7rv9bo0aM1efJk/f73v5f01Z+NznK5XCF9jDGd2s51bs356r/pfZYtW6ZgMOhsR48evaRjAgAAdrqsxw7ExcVp9OjRqqmpce6TOfdqSUNDg3N1xuv1qr29XY2NjRetqa+v7/S1Pv30005Xeb4uOjpaCQkJIRsAAOi7LivEtLW1qbq6WldddZXS0tLk9XpVVlbm7G9vb9euXbs0fvx4SdLYsWM1aNCgkJoTJ07o4MGDTk12draCwaD279/v1Ozbt0/BYNCpAQAACGt10tKlS3X77bcrNTVVDQ0Neuqpp9TU1KS5c+fK5XIpPz9fK1asUHp6utLT07VixQrFxsZq9uzZkiS326158+ZpyZIlGjZsmBITE7V06VLnz1OSlJGRoenTp2v+/PnasGGDJOm+++5TTk4OK5MAAIAjrBBz7Ngx3X333QoEAvrOd76jcePGae/evRo5cqQk6dFHH1Vra6sefPBBNTY2KisrS2+88Ybi4+Od91i9erWioqI0a9Ystba2atKkSdqyZYsGDhzo1Lz44otavHixs4opNzdXa9eu7YrjBQAAfURYIaakpOSi+10ulwoKClRQUHDBmiFDhqioqEhFRUUXrElMTFRxcXE4QwMAAP3MZd0TAwAA0FMIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACtF9fQAAKAnVFdXR9QvKSlJqampXTwaAJEgxADoV5oD9XINGKA5c+ZE1D8mNlYfVFcTZIBegBADoF9pbW6S6ejQrKfWy5OWHlbfhtoavfz4AgUCAUIM0AsQYgD0S560dA3PGNPTwwBwGbixFwAAWIkQAwAArESIAQAAViLEAAAAK3FjL9BH1NXVKRAIhN0v0s9LAYCeRogB+oC6ujpdl5Gh1paWnh4KAFwxhBigDwgEAmptaYnos08Ov/2mytYVdtPIAKD7EGKAPiSSzz5pqK3pptEAQPfixl4AAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlS4rxBQWFsrlcik/P99pM8aooKBAPp9PMTExmjhxog4dOhTSr62tTYsWLVJSUpLi4uKUm5urY8eOhdQ0NjYqLy9PbrdbbrdbeXl5Onny5OUMFwAA9CERh5gDBw7oueee0w033BDSvnLlSq1atUpr167VgQMH5PV6NWXKFDU3Nzs1+fn52r59u0pKSrR7926dOnVKOTk5OnPmjFMze/ZsVVZWqrS0VKWlpaqsrFReXl6kwwUAAH1MRCHm1KlT+tnPfqaNGzdq6NChTrsxRmvWrNFjjz2mmTNnKjMzU1u3blVLS4u2bdsmSQoGg9q0aZN++ctfavLkyfrLv/xLFRcXq6qqSjt27JAkVVdXq7S0VL/61a+UnZ2t7Oxsbdy4Uf/5n/+pw4cPd8FhAwAA20UUYh566CHddtttmjx5ckh7bW2t/H6/pk6d6rRFR0drwoQJKi8vlyRVVFTo9OnTITU+n0+ZmZlOzZ49e+R2u5WVleXUjBs3Tm6326k5V1tbm5qamkI2AADQd0WF26GkpETvvvuuDhw40Gmf3++XJCUnJ4e0Jycn68iRI07N4MGDQ67gnK0529/v98vj8XR6f4/H49Scq7CwUE888US4hwMAACwV1pWYo0eP6uGHH1ZxcbGGDBlywTqXyxXy2hjTqe1c59acr/5i77Ns2TIFg0FnO3r06EW/HgAAsFtYIaaiokINDQ0aO3asoqKiFBUVpV27dunf/u3fFBUV5VyBOfdqSUNDg7PP6/Wqvb1djY2NF62pr6/v9PU//fTTTld5zoqOjlZCQkLIBgAA+q6wQsykSZNUVVWlyspKZ7vpppv0s5/9TJWVlfqLv/gLeb1elZWVOX3a29u1a9cujR8/XpI0duxYDRo0KKTmxIkTOnjwoFOTnZ2tYDCo/fv3OzX79u1TMBh0agAAQP8W1j0x8fHxyszMDGmLi4vTsGHDnPb8/HytWLFC6enpSk9P14oVKxQbG6vZs2dLktxut+bNm6clS5Zo2LBhSkxM1NKlSzV69GjnRuGMjAxNnz5d8+fP14YNGyRJ9913n3JycjRq1KjLPmgAAGC/sG/s/SaPPvqoWltb9eCDD6qxsVFZWVl64403FB8f79SsXr1aUVFRmjVrllpbWzVp0iRt2bJFAwcOdGpefPFFLV682FnFlJubq7Vr13b1cAEAgKUuO8Ts3Lkz5LXL5VJBQYEKCgou2GfIkCEqKipSUVHRBWsSExNVXFx8ucMDAAB9FM9OAgAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAVorq6QEAgG2qq6sj6peUlKTU1NQuHg3QfxFiAOASNQfq5RowQHPmzImof0xsrD6oribIAF2EEAP0InV1dQoEAmH3i/TKAMLT2twk09GhWU+tlyctPay+DbU1evnxBQoEAoQYoIsQYoBeoq6uTtdlZKi1paWnh4Jv4ElL1/CMMT09DKDfI8QAvUQgEFBrS0tEv+UffvtNla0r7KaRAUDvRIgBeplIfstvqK3pptEAQO/FEmsAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsFFaIWb9+vW644QYlJCQoISFB2dnZeu2115z9xhgVFBTI5/MpJiZGEydO1KFDh0Leo62tTYsWLVJSUpLi4uKUm5urY8eOhdQ0NjYqLy9PbrdbbrdbeXl5OnnyZORHCQAA+pywQsyIESP0zDPP6J133tE777yjW2+9VT/5yU+coLJy5UqtWrVKa9eu1YEDB+T1ejVlyhQ1Nzc775Gfn6/t27erpKREu3fv1qlTp5STk6MzZ844NbNnz1ZlZaVKS0tVWlqqyspK5eXlddEhAwCAviAqnOLbb7895PXTTz+t9evXa+/evfre976nNWvW6LHHHtPMmTMlSVu3blVycrK2bdum+++/X8FgUJs2bdILL7ygyZMnS5KKi4uVkpKiHTt2aNq0aaqurlZpaan27t2rrKwsSdLGjRuVnZ2tw4cPa9SoUV1x3AAAwHIR3xNz5swZlZSU6PPPP1d2drZqa2vl9/s1depUpyY6OloTJkxQeXm5JKmiokKnT58OqfH5fMrMzHRq9uzZI7fb7QQYSRo3bpzcbrdTcz5tbW1qamoK2QAAQN8VdoipqqrSt771LUVHR+uBBx7Q9u3b9b3vfU9+v1+SlJycHFKfnJzs7PP7/Ro8eLCGDh160RqPx9Pp63o8HqfmfAoLC517aNxut1JSUsI9NAAAYJGwQ8yoUaNUWVmpvXv3asGCBZo7d67ef/99Z7/L5QqpN8Z0ajvXuTXnq/+m91m2bJmCwaCzHT169FIPCQAAWCjsEDN48GB997vf1U033aTCwkKNGTNG//qv/yqv1ytJna6WNDQ0OFdnvF6v2tvb1djYeNGa+vr6Tl/3008/7XSV5+uio6OdVVNnNwAA0Hdd9ufEGGPU1tamtLQ0eb1elZWVOfva29u1a9cujR8/XpI0duxYDRo0KKTmxIkTOnjwoFOTnZ2tYDCo/fv3OzX79u1TMBh0agAAAMJanfQP//APmjFjhlJSUtTc3KySkhLt3LlTpaWlcrlcys/P14oVK5Senq709HStWLFCsbGxmj17tiTJ7XZr3rx5WrJkiYYNG6bExEQtXbpUo0ePdlYrZWRkaPr06Zo/f742bNggSbrvvvuUk5PDyiQAAOAIK8TU19crLy9PJ06ckNvt1g033KDS0lJNmTJFkvToo4+qtbVVDz74oBobG5WVlaU33nhD8fHxznusXr1aUVFRmjVrllpbWzVp0iRt2bJFAwcOdGpefPFFLV682FnFlJubq7Vr13bF8QIAgD4irBCzadOmi+53uVwqKChQQUHBBWuGDBmioqIiFRUVXbAmMTFRxcXF4QwNAAD0Mzw7CQAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACtF9fQAAKA/qa6ujqhfUlKSUlNTu3g0gN0IMQBwBTQH6uUaMEBz5syJqH9MbKw+qK4myABfQ4gBgCugtblJpqNDs55aL09aelh9G2pr9PLjCxQIBAgxwNcQYgDgCvKkpWt4xpieHgbQJ3BjLwAAsBIhBgAAWIk/JwFdrK6uToFAIOx+ka5aAYD+ihADdKG6ujpdl5Gh1paWnh4KAPR5hBigCwUCAbW2tES0AuXw22+qbF1hN40MAPoeQgzQDSJZgdJQW9NNowGAvokbewEAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFgprBBTWFiom2++WfHx8fJ4PLrjjjt0+PDhkBpjjAoKCuTz+RQTE6OJEyfq0KFDITVtbW1atGiRkpKSFBcXp9zcXB07diykprGxUXl5eXK73XK73crLy9PJkycjO0oAANDnhBVidu3apYceekh79+5VWVmZvvzyS02dOlWff/65U7Ny5UqtWrVKa9eu1YEDB+T1ejVlyhQ1Nzc7Nfn5+dq+fbtKSkq0e/dunTp1Sjk5OTpz5oxTM3v2bFVWVqq0tFSlpaWqrKxUXl5eFxwyAADoC6LCKS4tLQ15vXnzZnk8HlVUVOjHP/6xjDFas2aNHnvsMc2cOVOStHXrViUnJ2vbtm26//77FQwGtWnTJr3wwguaPHmyJKm4uFgpKSnasWOHpk2bpurqapWWlmrv3r3KysqSJG3cuFHZ2dk6fPiwRo0a1RXHDgAALHZZ98QEg0FJUmJioiSptrZWfr9fU6dOdWqio6M1YcIElZeXS5IqKip0+vTpkBqfz6fMzEynZs+ePXK73U6AkaRx48bJ7XY7Nedqa2tTU1NTyAYAAPquiEOMMUaPPPKIfvjDHyozM1OS5Pf7JUnJyckhtcnJyc4+v9+vwYMHa+jQoRet8Xg8nb6mx+Nxas5VWFjo3D/jdruVkpIS6aEBAAALRBxiFi5cqPfee0+//vWvO+1zuVwhr40xndrOdW7N+eov9j7Lli1TMBh0tqNHj17KYQAAAEtFFGIWLVqkV199VW+99ZZGjBjhtHu9XknqdLWkoaHBuTrj9XrV3t6uxsbGi9bU19d3+rqffvppp6s8Z0VHRyshISFkAwAAfVdYIcYYo4ULF+qVV17Rf/3XfyktLS1kf1pamrxer8rKypy29vZ27dq1S+PHj5ckjR07VoMGDQqpOXHihA4ePOjUZGdnKxgMav/+/U7Nvn37FAwGnRoAANC/hbU66aGHHtK2bdv0u9/9TvHx8c4VF7fbrZiYGLlcLuXn52vFihVKT09Xenq6VqxYodjYWM2ePdupnTdvnpYsWaJhw4YpMTFRS5cu1ejRo53VShkZGZo+fbrmz5+vDRs2SJLuu+8+5eTksDIJAABICjPErF+/XpI0ceLEkPbNmzfrnnvukSQ9+uijam1t1YMPPqjGxkZlZWXpjTfeUHx8vFO/evVqRUVFadasWWptbdWkSZO0ZcsWDRw40Kl58cUXtXjxYmcVU25urtauXRvJMQIAgD4orBBjjPnGGpfLpYKCAhUUFFywZsiQISoqKlJRUdEFaxITE1VcXBzO8AAAQD/Cs5MAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsFJYD4AEAPSc6urqiPolJSUpNTW1i0cD9DxCDHAedXV1CgQCYfeL9IcMcDHNgXq5BgzQnDlzIuofExurD6qrCTLocwgxwDnq6up0XUaGWltaenoogCSptblJpqNDs55aL09aelh9G2pr9PLjCxQIBAgx6HMIMcA5AoGAWltaIvqBcfjtN1W2rrCbRob+zpOWruEZY3p6GECvQYgBLiCSHxgNtTXdNBoAwLlYnQQAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAViLEAAAAKxFiAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACsRYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWIkQAwAArESIAQAAVgo7xPzxj3/U7bffLp/PJ5fLpd/+9rch+40xKigokM/nU0xMjCZOnKhDhw6F1LS1tWnRokVKSkpSXFyccnNzdezYsZCaxsZG5eXlye12y+12Ky8vTydPngz7AAEAQN8Udoj5/PPPNWbMGK1du/a8+1euXKlVq1Zp7dq1OnDggLxer6ZMmaLm5manJj8/X9u3b1dJSYl2796tU6dOKScnR2fOnHFqZs+ercrKSpWWlqq0tFSVlZXKy8uL4BABAEBfFBVuhxkzZmjGjBnn3WeM0Zo1a/TYY49p5syZkqStW7cqOTlZ27Zt0/33369gMKhNmzbphRde0OTJkyVJxcXFSklJ0Y4dOzRt2jRVV1ertLRUe/fuVVZWliRp48aNys7O1uHDhzVq1KhIjxcA+qXq6uqI+iUlJSk1NbWLRwN0jbBDzMXU1tbK7/dr6tSpTlt0dLQmTJig8vJy3X///aqoqNDp06dDanw+nzIzM1VeXq5p06Zpz549crvdToCRpHHjxsntdqu8vPy8IaatrU1tbW3O66ampq48NFiorq5OgUAg7H6R/mMP9EbNgXq5BgzQnDlzIuofExurD6qrCTLolbo0xPj9fklScnJySHtycrKOHDni1AwePFhDhw7tVHO2v9/vl8fj6fT+Ho/HqTlXYWGhnnjiics+BvQNdXV1ui4jQ60tLT09FKBHtTY3yXR0aNZT6+VJSw+rb0NtjV5+fIECgQAhBr1Sl4aYs1wuV8hrY0yntnOdW3O++ou9z7Jly/TII484r5uampSSkhLOsNGHBAIBtba0RPQP9+G331TZusJuGhnQMzxp6RqeMaanhwF0qS4NMV6vV9JXV1Kuuuoqp72hocG5OuP1etXe3q7GxsaQqzENDQ0aP368U1NfX9/p/T/99NNOV3nOio6OVnR0dJcdC/qGSP7hbqit6abRAAC6Upd+TkxaWpq8Xq/Kysqctvb2du3atcsJKGPHjtWgQYNCak6cOKGDBw86NdnZ2QoGg9q/f79Ts2/fPgWDQacGAAD0b2FfiTl16pQ+/PBD53Vtba0qKyuVmJio1NRU5efna8WKFUpPT1d6erpWrFih2NhYzZ49W5Lkdrs1b948LVmyRMOGDVNiYqKWLl2q0aNHO6uVMjIyNH36dM2fP18bNmyQJN13333KyclhZRIAAJAUQYh55513dMsttzivz96HMnfuXG3ZskWPPvqoWltb9eCDD6qxsVFZWVl64403FB8f7/RZvXq1oqKiNGvWLLW2tmrSpEnasmWLBg4c6NS8+OKLWrx4sbOKKTc394KfTQMAAPqfsEPMxIkTZYy54H6Xy6WCggIVFBRcsGbIkCEqKipSUVHRBWsSExNVXFwc7vAAAEA/wbOTAACAlQgxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJEAMAAKxEiAEAAFYixAAAACtF9fQAgIupq6tTIBAIu191dXU3jAYA0JsQYtBr1dXV6bqMDLW2tPT0UIB+LdJfCpKSkpSamtrFowH+DyEGvVYgEFBrS4tmPbVenrT0sPoefvtNla0r7KaRAf1Dc6BergEDNGfOnIj6x8TG6oPqaoIMug0hBr2eJy1dwzPGhNWnobamm0YD9B+tzU0yHR0R/SLRUFujlx9foEAgQIhBtyHEAAAuKpJfJIArgdVJAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFiJB0ACALpNdXV1RP2SkpJ4+jW+ESEGANDlmgP1cg0YoDlz5kTUPyY2Vh9UVxNkcFGEGABAl2ttbpLp6NCsp9bLk5YeVt+G2hq9/PgCBQIBQgwuihCDbldXV6dAIBB2v0gvQwPoPTxp6RqeMaanh4E+ihCDblVXV6frMjLU2tLS00MBAPQxhBh0q0AgoNaWloguKR9++02VrSvsppEBAGxHiMEVEckl5Ybamm4aDQCgL+BzYgAAgJUIMQAAwEqEGAAAYCVCDAAAsBIhBgAAWInVSQCAXonnLuGbEGJwSfjUXQBXCs9dwqUixOAb8am7AK4knruES0WIwTfiU3cB9ASeu4RvQojBJeNTdwEAvQkhph/hvhYAQF/S60PMunXr9M///M86ceKErr/+eq1Zs0Y/+tGPenpY1uG+FgD9yeX88sXqJnv06hDz0ksvKT8/X+vWrdNf/dVfacOGDZoxY4bef/99TrAwcV8LgP7gclc2SaxuskmvDjGrVq3SvHnzdO+990qS1qxZo9dff13r169XYWH//KF6uX8S4r4WAH3Z5axskv5vddOf/vQnZWRkhN2fqzhXVq8NMe3t7aqoqNDPf/7zkPapU6eqvLy8U31bW5va2tqc18FgUJLU1NTULePz+/3y+/0R9R0wYIA6OjrC7ldfX6+8//f/1PbFFxF9XUn6pPo9tbd8HlafTz+uoS996Utfq/qe/qI17L6SdLL+uORyRXwlJ3rIEL3w/PNKTk4Ou2+kPxt6sq/X65XX642o74Wc/bltjPnmYtNLffLJJ0aSefvtt0Pan376aXPttdd2ql++fLmRxMbGxsbGxtYHtqNHj35jVui1V2LOcrlcIa+NMZ3aJGnZsmV65JFHnNcdHR363//9Xw0bNuy89VdaU1OTUlJSdPToUSUkJPT0cHoM8/AV5oE5OIt5+Arz8BXm4auf883NzfL5fN9Y22tDTFJSkgYOHNjpTzYNDQ3nvUwXHR2t6OjokLZvf/vb3TnEiCQkJPTbE/PrmIevMA/MwVnMw1eYh6/093lwu92XVNdrn2I9ePBgjR07VmVlZSHtZWVlGj9+fA+NCgAA9Ba99kqMJD3yyCPKy8vTTTfdpOzsbD333HOqq6vTAw880NNDAwAAPaxXh5g777xTn332mZ588kmdOHFCmZmZ+sMf/qCRI0f29NDCFh0dreXLl3f6k1d/wzx8hXlgDs5iHr7CPHyFeQiPy5hLWcMEAADQu/Tae2IAAAAuhhADAACsRIgBAABWIsQAAAArEWK60M6dO+Vyuc67HThwwKk73/5nn3025L2qqqo0YcIExcTEaPjw4XryyScv7TkSvcTVV1/d6RjPfQ5WXV2dbr/9dsXFxSkpKUmLFy9We3t7SI3N8/Dxxx9r3rx5SktLU0xMjK655hotX7680zH2h/PhfNatW6e0tDQNGTJEY8eO1Z/+9KeeHlKXKSws1M0336z4+Hh5PB7dcccdOnz4cEjNPffc0+n7Pm7cuJCatrY2LVq0SElJSYqLi1Nubq6OHTt2JQ/lshQUFHQ6xq8/Z8cYo4KCAvl8PsXExGjixIk6dOhQyHvYPgfS+f89dLlceuihhyT1j3Oh21z2Q47gaGtrMydOnAjZ7r33XnP11Vebjo4Op06S2bx5c0hdS0uLsz8YDJrk5GRz1113maqqKvOb3/zGxMfHm3/5l3/picOKyMiRI82TTz4ZcozNzc3O/i+//NJkZmaaW265xbz77rumrKzM+Hw+s3DhQqfG9nl47bXXzD333GNef/118+c//9n87ne/Mx6PxyxZsiSkrj+cD+cqKSkxgwYNMhs3bjTvv/++efjhh01cXJw5cuRITw+tS0ybNs1s3rzZHDx40FRWVprbbrvNpKammlOnTjk1c+fONdOnTw/5vn/22Wch7/PAAw+Y4cOHm7KyMvPuu++aW265xYwZM8Z8+eWXV/qQIrJ8+XJz/fXXhxxjQ0ODs/+ZZ54x8fHx5je/+Y2pqqoyd955p7nqqqtMU1OTU2P7HBhjTENDQ8gclJWVGUnmrbfeMsb0j3OhuxBiulF7e7vxeDzmySefDGmXZLZv337BfuvWrTNut9t88cUXTlthYaHx+XwhYag3GzlypFm9evUF9//hD38wAwYMMJ988onT9utf/9pER0ebYDBojOkb83CulStXmrS0tJC2/nA+nOsHP/iBeeCBB0LarrvuOvPzn/+8h0bUvRoaGowks2vXLqdt7ty55ic/+ckF+5w8edIMGjTIlJSUOG2ffPKJGTBggCktLe3O4XaZ5cuXmzFjxpx3X0dHh/F6veaZZ55x2r744gvjdrvNs88+a4zpG3NwPg8//LC55pprnP9/+8O50F34c1I3evXVVxUIBHTPPfd02rdw4UIlJSXp5ptv1rPPPhvyGPQ9e/ZowoQJIR92NG3aNB0/flwff/zxFRh51/jFL36hYcOG6cYbb9TTTz8d8meUPXv2KDMzM+QBX9OmTVNbW5sqKiqcmr4wD18XDAaVmJjYqb0/nA9ntbe3q6KiQlOnTg1pnzp1qsrLy3toVN0rGAxKUqfv/c6dO+XxeHTttddq/vz5amhocPZVVFTo9OnTIfPk8/mUmZlp1TzV1NTI5/MpLS1Nd911lz766CNJUm1trfx+f8jxRUdHa8KECc7x9ZU5+Lr29nYVFxfrb//2b0MeTtwfzoXu0Ks/sdd2mzZt0rRp05SSkhLS/k//9E+aNGmSYmJi9Oabb2rJkiUKBAJ6/PHHJUl+v19XX311SJ+zD730+/1KS0u7IuO/HA8//LC+//3va+jQodq/f7+WLVum2tpa/epXv5L01XGc+yDPoUOHavDgwc5DP/vCPHzdn//8ZxUVFemXv/xlSHt/OB++LhAI6MyZM52+/8nJyZ0e+NoXGGP0yCOP6Ic//KEyMzOd9hkzZuinP/2pRo4cqdraWv3jP/6jbr31VlVUVCg6Olp+v1+DBw/W0KFDQ97PpnnKysrS888/r2uvvVb19fV66qmnNH78eB06dMg5hvOdB0eOHJGkPjEH5/rtb3+rkydPhvxy2x/Ohe5CiLkEBQUFeuKJJy5ac+DAAd10003O62PHjun111/Xyy+/3Kn27A8nSbrxxhslSU8++WRI+9cTuiTnJs5z26+kcObh7/7u75y2G264QUOHDtXf/M3fOFdnpPMfizEmpN32eTjr+PHjmj59un7605/q3nvvDam19Xy4XOc7JpuP50IWLlyo9957T7t37w5pv/POO53/zszM1E033aSRI0fq97//vWbOnHnB97NpnmbMmOH89+jRo5Wdna1rrrlGW7dudW5cjeQ8sGkOzrVp0ybNmDEj5Cp0fzgXugsh5hIsXLhQd91110Vrzv1NefPmzRo2bJhyc3O/8f3HjRunpqYm1dfXKzk5WV6vt1O6Pntp8dzfWq6kSObhrLP/YH344YcaNmyYvF6v9u3bF1LT2Nio06dPO8fYV+bh+PHjuuWWW5yHmH4TW86HSCUlJWngwIHnPSYbj+diFi1apFdffVV//OMfNWLEiIvWXnXVVRo5cqRqamokfXX+t7e3q7GxMeQ38IaGBo0fP75bx91d4uLiNHr0aNXU1OiOO+6Q9NXVlquuusqp+fp50Nfm4MiRI9qxY4deeeWVi9b1h3Ohy/TUzTh9WUdHh0lLS+u0CuVCioqKzJAhQ5wbN9etW2e+/e1vm7a2NqfmmWeesfpGzv/4j/8wkpzVJ2dv7D1+/LhTU1JS0unGXtvn4dixYyY9Pd3cddddl7yKoD+cDz/4wQ/MggULQtoyMjL6zI29HR0d5qGHHjI+n8/8z//8zyX1CQQCJjo62mzdutUY8383c7700ktOzfHjx62+mfOLL74ww4cPN0888YRzY+8vfvELZ39bW9t5b+ztK3OwfPly4/V6zenTpy9a1x/Oha5CiOkGO3bsMJLM+++/32nfq6++ap577jlTVVVlPvzwQ7Nx40aTkJBgFi9e7NScPHnSJCcnm7vvvttUVVWZV155xSQkJFizpLa8vNysWrXK/Pd//7f56KOPzEsvvWR8Pp/Jzc11as4usZ40aZJ59913zY4dO8yIESNClljbPg+ffPKJ+e53v2tuvfVWc+zYsZDlk2f1h/PhfM4usd60aZN5//33TX5+vomLizMff/xxTw+tSyxYsMC43W6zc+fO8y6db25uNkuWLDHl5eWmtrbWvPXWWyY7O9sMHz680/LiESNGmB07dph3333X3HrrrVYtq12yZInZuXOn+eijj8zevXtNTk6OiY+Pd77PzzzzjHG73eaVV14xVVVV5u677z7vEmub5+CsM2fOmNTUVPP3f//3Ie395VzoLoSYbnD33Xeb8ePHn3ffa6+9Zm688UbzrW99y8TGxprMzEyzZs2aTsn8vffeMz/60Y9MdHS08Xq9pqCgwJrfuisqKkxWVpZxu91myJAhZtSoUWb58uXm888/D6k7cuSIue2220xMTIxJTEw0CxcuDFlGbIzd87B582Yj6bzbWf3hfLiQf//3fzcjR440gwcPNt///vdDlh/b7kLf982bNxtjjGlpaTFTp0413/nOd8ygQYNMamqqmTt3rqmrqwt5n9bWVrNw4UKTmJhoYmJiTE5OTqea3uzs574MGjTI+Hw+M3PmTHPo0CFnf0dHh3N1Ijo62vz4xz82VVVVIe9h+xyc9frrrxtJ5vDhwyHt/eVc6C4uYyz/2E8AANAv8TkxAADASoQYAABgJUIMAACwEiEGAABYiRADAACsRIgBAABWIsQAAAArEWIAAICVCDEAAMBKhBgAAGAlQgwAALASIQYAAFjp/wM5X7SK2mV4/AAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "_ = plt.hist(error, bins=30, color='skyblue', edgecolor='black')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7d8a5ba5-57a8-415b-a1fb-8d97d2b2a787",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMctJREFUeJzt3X9YlXWe//EXIh7BgZPowpEipdYxCytXE6FmtFTQlZi5vHbMbBm7xszG1EhdJ9eZEW1FZSb1umD6oeulbmi2s2bjTobgTto44I9INn+N1WSWKWKFoGmHE3y+f/Tl3o6gop3Dj0/Px3Vx6fnc7/O5P29uOLy4Ofc5IcYYIwAAAAt1aO0FAAAABAtBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgrY6tvYDWVF9frxMnTigyMlIhISGtvRwAANAMxhidPXtWcXFx6tDh8udsvtNB58SJE4qPj2/tZQAAgGvw8ccf64YbbrhszXc66ERGRkr6+hMVFRUV0Ll9Pp+KioqUmpqqsLCwgM7dFtjcn829SXb3Z3Nvkt392dybZHd/rdFbTU2N4uPjnZ/jl/OdDjoNf66KiooKStCJiIhQVFSUdV/Ukt392dybZHd/Nvcm2d2fzb1JdvfXmr0152knPBkZAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFodW3sBANqHXk+91tpLuCJXqFHuICkxe6u8dSH6cPHo1l4SgFbGGR0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLV4HR0A1moPr/1zMV77BwgszugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALDWVQedN998U/fff7/i4uIUEhKiV1991W+7MUbZ2dmKi4tTeHi4hg4dqoMHD/rVeL1eTZs2Td27d1eXLl2UkZGh48eP+9VUVVUpMzNTbrdbbrdbmZmZOnPmjF/NRx99pPvvv19dunRR9+7dNX36dNXW1l5tSwAAwFJXHXS++OIL3XHHHcrPz29ye25urpYuXar8/Hzt3btXHo9HI0aM0NmzZ52arKwsbdq0SRs2bNDOnTt17tw5paenq66uzqkZP368ysvLVVhYqMLCQpWXlyszM9PZXldXp9GjR+uLL77Qzp07tWHDBm3cuFEzZ8682pYAAIClrvp1dEaNGqVRo0Y1uc0Yo+XLl2vu3LkaM2aMJGnt2rWKjY3V+vXrNXnyZFVXV2vVqlV68cUXNXz4cElSQUGB4uPjtW3bNqWlpenw4cMqLCzUrl27lJSUJElauXKlkpOTdeTIEfXp00dFRUU6dOiQPv74Y8XFxUmSnnnmGT388MNauHChoqKirukTAgAA7BHQFww8evSoKioqlJqa6oy5XC4NGTJEJSUlmjx5ssrKyuTz+fxq4uLilJiYqJKSEqWlpam0tFRut9sJOZI0ePBgud1ulZSUqE+fPiotLVViYqITciQpLS1NXq9XZWVluvfeexutz+v1yuv1OrdramokST6fTz6fL5CfCme+QM/bVtjcn829SdfenyvUBGM5AeXqYPz+bY8ud1xs/tq0uTfJ7v5ao7er2VdAg05FRYUkKTY21m88NjZWx44dc2o6deqkrl27NqppuH9FRYViYmIazR8TE+NXc/F+unbtqk6dOjk1F1u0aJHmz5/faLyoqEgRERHNafGqFRcXB2XetsLm/mzuTbr6/nIHBWkhQfD0wPrWXsI127JlyxVrbP7atLk3ye7+WrK38+fPN7s2KG8BERIS4nfbGNNo7GIX1zRVfy013zRnzhzNmDHDuV1TU6P4+HilpqYG/E9dPp9PxcXFGjFihMLCwgI6d1tgc3829yZde3+J2VuDuKrAcHUwenpgvX71Vgd56y//mNNWHchOu+Q2m782be5Nsru/1uit4S8yzRHQoOPxeCR9fbalR48eznhlZaVz9sXj8ai2tlZVVVV+Z3UqKyuVkpLi1Jw6darR/KdPn/abZ/fu3X7bq6qq5PP5Gp3paeByueRyuRqNh4WFBe3gBHPutsDm/mzuTbr6/rx17Sc4eOtD2tV6v6k5x8Tmr02be5Ps7q8le7ua/QT0dXQSEhLk8Xj8Tl/V1tZqx44dTogZMGCAwsLC/GpOnjypAwcOODXJycmqrq7Wnj17nJrdu3erurrar+bAgQM6efKkU1NUVCSXy6UBAwYEsi0AANBOXfUZnXPnzun99993bh89elTl5eWKjo7WjTfeqKysLOXk5Kh3797q3bu3cnJyFBERofHjx0uS3G63Jk6cqJkzZ6pbt26Kjo7WrFmz1K9fP+cqrL59+2rkyJGaNGmSXnjhBUnSo48+qvT0dPXp00eSlJqaqltvvVWZmZn6zW9+o88//1yzZs3SpEmTuOIKAABIuoag89Zbb/ld0dTwnJcJEyZozZo1mj17ti5cuKApU6aoqqpKSUlJKioqUmRkpHOfZcuWqWPHjho7dqwuXLigYcOGac2aNQoNDXVq1q1bp+nTpztXZ2VkZPi9dk9oaKhee+01TZkyRXfffbfCw8M1fvx4/fa3v736zwIAALDSVQedoUOHyphLX7oZEhKi7OxsZWdnX7Kmc+fOysvLU15e3iVroqOjVVBQcNm13HjjjfrjH/94xTUDAIDvJt7rCgAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgrYAHna+++kq//OUvlZCQoPDwcN10001asGCB6uvrnRpjjLKzsxUXF6fw8HANHTpUBw8e9JvH6/Vq2rRp6t69u7p06aKMjAwdP37cr6aqqkqZmZlyu91yu93KzMzUmTNnAt0SAABopwIedJYsWaLnn39e+fn5Onz4sHJzc/Wb3/xGeXl5Tk1ubq6WLl2q/Px87d27Vx6PRyNGjNDZs2edmqysLG3atEkbNmzQzp07de7cOaWnp6uurs6pGT9+vMrLy1VYWKjCwkKVl5crMzMz0C0BAIB2qmOgJywtLdWPfvQjjR49WpLUq1cvvfTSS3rrrbckfX02Z/ny5Zo7d67GjBkjSVq7dq1iY2O1fv16TZ48WdXV1Vq1apVefPFFDR8+XJJUUFCg+Ph4bdu2TWlpaTp8+LAKCwu1a9cuJSUlSZJWrlyp5ORkHTlyRH369Al0awAAoJ0J+Bmde+65R//zP/+jd999V5L0v//7v9q5c6f+8R//UZJ09OhRVVRUKDU11bmPy+XSkCFDVFJSIkkqKyuTz+fzq4mLi1NiYqJTU1paKrfb7YQcSRo8eLDcbrdTAwAAvtsCfkbnF7/4haqrq3XLLbcoNDRUdXV1WrhwoR588EFJUkVFhSQpNjbW736xsbE6duyYU9OpUyd17dq1UU3D/SsqKhQTE9No/zExMU7Nxbxer7xer3O7pqZGkuTz+eTz+a6l3UtqmC/Q87YVNvdnc2/StffnCjXBWE5AuToYv3/bo8sdF5u/Nm3uTbK7v9bo7Wr2FfCg8/LLL6ugoEDr16/XbbfdpvLycmVlZSkuLk4TJkxw6kJCQvzuZ4xpNHaxi2uaqr/cPIsWLdL8+fMbjRcVFSkiIuKy+75WxcXFQZm3rbC5P5t7k66+v9xBQVpIEDw9sP7KRW3Uli1brlhj89emzb1JdvfXkr2dP3++2bUBDzr/8i//oqeeekrjxo2TJPXr10/Hjh3TokWLNGHCBHk8Hklfn5Hp0aOHc7/KykrnLI/H41Ftba2qqqr8zupUVlYqJSXFqTl16lSj/Z8+fbrR2aIGc+bM0YwZM5zbNTU1io+PV2pqqqKior5l5/58Pp+Ki4s1YsQIhYWFBXTutsDm/mzuTbr2/hKztwZxVYHh6mD09MB6/eqtDvLWX/4Xp7bqQHbaJbfZ/LVpc2+S3f21Rm8Nf5FpjoAHnfPnz6tDB/+n/oSGhjqXlyckJMjj8ai4uFj9+/eXJNXW1mrHjh1asmSJJGnAgAEKCwtTcXGxxo4dK0k6efKkDhw4oNzcXElScnKyqqurtWfPHg0a9PWvmrt371Z1dbUThi7mcrnkcrkajYeFhQXt4ARz7rbA5v5s7k26+v68de0nOHjrQ9rVer+pOcfE5q9Nm3uT7O6vJXu7mv0EPOjcf//9WrhwoW688Ubddttt2rdvn5YuXaqf/exnkr7+c1NWVpZycnLUu3dv9e7dWzk5OYqIiND48eMlSW63WxMnTtTMmTPVrVs3RUdHa9asWerXr59zFVbfvn01cuRITZo0SS+88IIk6dFHH1V6ejpXXAEAAElBCDp5eXn61a9+pSlTpqiyslJxcXGaPHmyfv3rXzs1s2fP1oULFzRlyhRVVVUpKSlJRUVFioyMdGqWLVumjh07auzYsbpw4YKGDRumNWvWKDQ01KlZt26dpk+f7lydlZGRofz8/EC3BAAA2qmAB53IyEgtX75cy5cvv2RNSEiIsrOzlZ2dfcmazp07Ky8vz++FBi8WHR2tgoKCb7FaAABgM97rCgAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWCvh7XQG4sl5PvdZq+3aFGuUOkhKzt8pbF9Jq6wCAlsAZHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFm8BAQBtyOXeHqStvn3Hh4tHt/YSgEvijA4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsFZQgs4nn3yif/7nf1a3bt0UERGhO++8U2VlZc52Y4yys7MVFxen8PBwDR06VAcPHvSbw+v1atq0aerevbu6dOmijIwMHT9+3K+mqqpKmZmZcrvdcrvdyszM1JkzZ4LREgAAaIcCHnSqqqp09913KywsTK+//roOHTqkZ555Rtddd51Tk5ubq6VLlyo/P1979+6Vx+PRiBEjdPbsWacmKytLmzZt0oYNG7Rz506dO3dO6enpqqurc2rGjx+v8vJyFRYWqrCwUOXl5crMzAx0SwAAoJ3qGOgJlyxZovj4eK1evdoZ69Wrl/N/Y4yWL1+uuXPnasyYMZKktWvXKjY2VuvXr9fkyZNVXV2tVatW6cUXX9Tw4cMlSQUFBYqPj9e2bduUlpamw4cPq7CwULt27VJSUpIkaeXKlUpOTtaRI0fUp0+fQLcGAADamYCf0dm8ebMGDhyon/zkJ4qJiVH//v21cuVKZ/vRo0dVUVGh1NRUZ8zlcmnIkCEqKSmRJJWVlcnn8/nVxMXFKTEx0akpLS2V2+12Qo4kDR48WG6326kBAADfbQE/o/PBBx/oueee04wZM/Sv//qv2rNnj6ZPny6Xy6Wf/vSnqqiokCTFxsb63S82NlbHjh2TJFVUVKhTp07q2rVro5qG+1dUVCgmJqbR/mNiYpyai3m9Xnm9Xud2TU2NJMnn88nn811jx01rmC/Q87YVNvfXEr25Qk3Q5r7ivjsYv39tYnNvUtvtLxDfKzY/pkh299cavV3NvgIedOrr6zVw4EDl5ORIkvr376+DBw/queee009/+lOnLiQkxO9+xphGYxe7uKap+svNs2jRIs2fP7/ReFFRkSIiIi6772tVXFwclHnbCpv7C2ZvuYOCNnWzPT2wvrWXEDQ29ya1vf62bNkSsLlsfkyR7O6vJXs7f/58s2sDHnR69OihW2+91W+sb9++2rhxoyTJ4/FI+vqMTI8ePZyayspK5yyPx+NRbW2tqqqq/M7qVFZWKiUlxak5depUo/2fPn260dmiBnPmzNGMGTOc2zU1NYqPj1dqaqqioqKupd1L8vl8Ki4u1ogRIxQWFhbQudsCm/trid4Ss7cGZd7mcHUwenpgvX71Vgd56y//y0V7Y3NvUtvt70B22reew+bHFMnu/lqjt4a/yDRHwIPO3XffrSNHjviNvfvuu+rZs6ckKSEhQR6PR8XFxerfv78kqba2Vjt27NCSJUskSQMGDFBYWJiKi4s1duxYSdLJkyd14MAB5ebmSpKSk5NVXV2tPXv2aNCgr3893r17t6qrq50wdDGXyyWXy9VoPCwsLGgHJ5hztwU29xfM3rx1rf9Dylsf0ibWEQw29ya1vf4C+X1i82OKZHd/Ldnb1ewn4EHnySefVEpKinJycjR27Fjt2bNHK1as0IoVKyR9/eemrKws5eTkqHfv3urdu7dycnIUERGh8ePHS5LcbrcmTpyomTNnqlu3boqOjtasWbPUr18/5yqsvn37auTIkZo0aZJeeOEFSdKjjz6q9PR0rrgCAACSghB07rrrLm3atElz5szRggULlJCQoOXLl+uhhx5yambPnq0LFy5oypQpqqqqUlJSkoqKihQZGenULFu2TB07dtTYsWN14cIFDRs2TGvWrFFoaKhTs27dOk2fPt25OisjI0P5+fmBbgkAALRTAQ86kpSenq709PRLbg8JCVF2drays7MvWdO5c2fl5eUpLy/vkjXR0dEqKCj4NksFAAAW472uAACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALBWx9ZeAPBt9XrqtYDO5wo1yh0kJWZvlbcuJKBzAwBaFmd0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1eGVkAMC3EohXJ2/pVyT/cPHooO8DbQNndAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtYIedBYtWqSQkBBlZWU5Y8YYZWdnKy4uTuHh4Ro6dKgOHjzodz+v16tp06ape/fu6tKlizIyMnT8+HG/mqqqKmVmZsrtdsvtdiszM1NnzpwJdksAAKCdCGrQ2bt3r1asWKHbb7/dbzw3N1dLly5Vfn6+9u7dK4/HoxEjRujs2bNOTVZWljZt2qQNGzZo586dOnfunNLT01VXV+fUjB8/XuXl5SosLFRhYaHKy8uVmZkZzJYAAEA7ErSgc+7cOT300ENauXKlunbt6owbY7R8+XLNnTtXY8aMUWJiotauXavz589r/fr1kqTq6mqtWrVKzzzzjIYPH67+/furoKBA+/fv17Zt2yRJhw8fVmFhof793/9dycnJSk5O1sqVK/XHP/5RR44cCVZbAACgHQnam3o+/vjjGj16tIYPH65/+7d/c8aPHj2qiooKpaamOmMul0tDhgxRSUmJJk+erLKyMvl8Pr+auLg4JSYmqqSkRGlpaSotLZXb7VZSUpJTM3jwYLndbpWUlKhPnz6N1uT1euX1ep3bNTU1kiSfzyefzxfQ/hvmC/S8bUVb6s8VagI7Xwfj969tbO7P5t4ku/tr6d5a+rGrLT1mBlpr9HY1+wpK0NmwYYPefvtt7d27t9G2iooKSVJsbKzfeGxsrI4dO+bUdOrUye9MUENNw/0rKioUExPTaP6YmBin5mKLFi3S/PnzG40XFRUpIiKiGZ1dveLi4qDM21a0hf5yBwVn3qcH1gdn4jbC5v5s7k2yu7+W6m3Lli0tsp+LtYXHzGBpyd7Onz/f7NqAB52PP/5YTzzxhIqKitS5c+dL1oWEhPjdNsY0GrvYxTVN1V9unjlz5mjGjBnO7ZqaGsXHxys1NVVRUVGX3ffV8vl8Ki4u1ogRIxQWFhbQuduCttRfYvbWgM7n6mD09MB6/eqtDvLWX/5rsj2yuT+be5Ps7q+lezuQnRb0fXxTW3rMDLTW6K3hLzLNEfCgU1ZWpsrKSg0YMMAZq6ur05tvvqn8/Hzn+TMVFRXq0aOHU1NZWemc5fF4PKqtrVVVVZXfWZ3KykqlpKQ4NadOnWq0/9OnTzc6W9TA5XLJ5XI1Gg8LCwvawQnm3G1BW+jPWxecB0VvfUjQ5m4LbO7P5t4ku/trqd5a63GrLTxmBktL9nY1+wn4k5GHDRum/fv3q7y83PkYOHCgHnroIZWXl+umm26Sx+PxO8VVW1urHTt2OCFmwIABCgsL86s5efKkDhw44NQkJyerurpae/bscWp2796t6upqpwYAAHy3BfyMTmRkpBITE/3GunTpom7dujnjWVlZysnJUe/evdW7d2/l5OQoIiJC48ePlyS53W5NnDhRM2fOVLdu3RQdHa1Zs2apX79+Gj58uCSpb9++GjlypCZNmqQXXnhBkvToo48qPT29ySciAwCA756gXXV1ObNnz9aFCxc0ZcoUVVVVKSkpSUVFRYqMjHRqli1bpo4dO2rs2LG6cOGChg0bpjVr1ig0NNSpWbdunaZPn+5cnZWRkaH8/PwW7wcAALRNLRJ0tm/f7nc7JCRE2dnZys7OvuR9OnfurLy8POXl5V2yJjo6WgUFBQFaJQAAsA3vdQUAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKzVsbUXgLal11OvNavOFWqUO0hKzN4qb11IkFcFAMC14YwOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1An55+aJFi/TKK6/or3/9q8LDw5WSkqIlS5aoT58+To0xRvPnz9eKFStUVVWlpKQk/e53v9Ntt93m1Hi9Xs2aNUsvvfSSLly4oGHDhunZZ5/VDTfc4NRUVVVp+vTp2rx5syQpIyNDeXl5uu666wLdFgDAIs19KY1ACcRLcny4eHSAV/XdEPAzOjt27NDjjz+uXbt2qbi4WF999ZVSU1P1xRdfODW5ublaunSp8vPztXfvXnk8Ho0YMUJnz551arKysrRp0yZt2LBBO3fu1Llz55Senq66ujqnZvz48SovL1dhYaEKCwtVXl6uzMzMQLcEAADaqYCf0SksLPS7vXr1asXExKisrEw//OEPZYzR8uXLNXfuXI0ZM0aStHbtWsXGxmr9+vWaPHmyqqurtWrVKr344osaPny4JKmgoEDx8fHatm2b0tLSdPjwYRUWFmrXrl1KSkqSJK1cuVLJyck6cuSI3xkkAADw3RT0V0aurq6WJEVHR0uSjh49qoqKCqWmpjo1LpdLQ4YMUUlJiSZPnqyysjL5fD6/mri4OCUmJqqkpERpaWkqLS2V2+12Qo4kDR48WG63WyUlJU0GHa/XK6/X69yuqamRJPl8Pvl8voD23TBfoOcNNleoaV5dB+P3r01s7k2yuz+be5Ps7s/m3qTA9NdWf560xs+7q9lXUIOOMUYzZszQPffco8TERElSRUWFJCk2NtavNjY2VseOHXNqOnXqpK5duzaqabh/RUWFYmJiGu0zJibGqbnYokWLNH/+/EbjRUVFioiIuMrumqe4uDgo8wZL7qCrq396YH1wFtIG2NybZHd/Nvcm2d2fzb1J366/LVu2BHAlgdeSP+/Onz/f7NqgBp2pU6fqnXfe0c6dOxttCwnxfzKWMabR2MUurmmq/nLzzJkzRzNmzHBu19TUKD4+XqmpqYqKirrsvq+Wz+dTcXGxRowYobCwsIDOHUyJ2VubVefqYPT0wHr96q0O8tbb9V5XNvcm2d2fzb1Jdvdnc29SYPo7kJ0W4FUFRmv8vGv4i0xzBC3oTJs2TZs3b9abb77pd6WUx+OR9PUZmR49ejjjlZWVzlkej8ej2tpaVVVV+Z3VqaysVEpKilNz6tSpRvs9ffp0o7NFDVwul1wuV6PxsLCwoB2cYM4dDFd7NYC3PsTaN/W0uTfJ7v5s7k2yuz+be5O+XX9t/WdJS/68u5r9BPyqK2OMpk6dqldeeUV/+tOflJCQ4Lc9ISFBHo/H7xRXbW2tduzY4YSYAQMGKCwszK/m5MmTOnDggFOTnJys6upq7dmzx6nZvXu3qqurnRoAAPDdFvAzOo8//rjWr1+vP/zhD4qMjHSeL+N2uxUeHq6QkBBlZWUpJydHvXv3Vu/evZWTk6OIiAiNHz/eqZ04caJmzpypbt26KTo6WrNmzVK/fv2cq7D69u2rkSNHatKkSXrhhRckSY8++qjS09O54goAAEgKQtB57rnnJElDhw71G1+9erUefvhhSdLs2bN14cIFTZkyxXnBwKKiIkVGRjr1y5YtU8eOHTV27FjnBQPXrFmj0NBQp2bdunWaPn26c3VWRkaG8vPzA90SAABopwIedIy58qVzISEhys7OVnZ29iVrOnfurLy8POXl5V2yJjo6WgUFBdeyTAAA8B3Ae10BAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtTq29gJsl5i9Vd66kNZeBgAA30mc0QEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2uugIAoB3o9dRrrb2EJrlCjXIHXfoq4w8Xj26FVf0fzugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKzV7oPOs88+q4SEBHXu3FkDBgzQn//859ZeEgAAaCPaddB5+eWXlZWVpblz52rfvn36wQ9+oFGjRumjjz5q7aUBAIA2oF0HnaVLl2rixIl65JFH1LdvXy1fvlzx8fF67rnnWntpAACgDWi3bwFRW1ursrIyPfXUU37jqampKikpafI+Xq9XXq/XuV1dXS1J+vzzz+Xz+QK6Pp/Pp/Pnz6ujr4Pq6hu/JHZ717He6Pz5eiv7s7k3ye7+bO5Nsrs/m3uT7O7vSr199tlnAd/n2bNnJUnGmCsXm3bqk08+MZLMX/7yF7/xhQsXmu9///tN3mfevHlGEh988MEHH3zwYcHHxx9/fMW80G7P6DQICfFPj8aYRmMN5syZoxkzZji36+vr9fnnn6tbt26XvM+1qqmpUXx8vD7++GNFRUUFdO62wOb+bO5Nsrs/m3uT7O7P5t4ku/trjd6MMTp79qzi4uKuWNtug0737t0VGhqqiooKv/HKykrFxsY2eR+XyyWXy+U3dt111wVriZKkqKgo676ov8nm/mzuTbK7P5t7k+zuz+beJLv7a+ne3G53s+ra7ZORO3XqpAEDBqi4uNhvvLi4WCkpKa20KgAA0Ja02zM6kjRjxgxlZmZq4MCBSk5O1ooVK/TRRx/psccea+2lAQCANqBdB50HHnhAn332mRYsWKCTJ08qMTFRW7ZsUc+ePVt7aXK5XJo3b16jP5XZwub+bO5Nsrs/m3uT7O7P5t4ku/tr672FGNOca7MAAADan3b7HB0AAIArIegAAABrEXQAAIC1CDoAAMBaBJ0rWLhwoVJSUhQREXHJFxf86KOPdP/996tLly7q3r27pk+frtraWr+a/fv3a8iQIQoPD9f111+vBQsWNHqPjh07dmjAgAHq3LmzbrrpJj3//PON9rVx40bdeuutcrlcuvXWW7Vp06aA9bp9+3aFhIQ0+bF3716nrqntF681UP0GUq9evRqt++L3SmvJYxlIH374oSZOnKiEhASFh4fr5ptv1rx58xqtvb0eu+Z69tlnlZCQoM6dO2vAgAH685//3NpL8rNo0SLdddddioyMVExMjH784x/ryJEjfjUPP/xwo2M0ePBgvxqv16tp06ape/fu6tKlizIyMnT8+HG/mqqqKmVmZsrtdsvtdiszM1NnzpwJWm/Z2dmN1u3xeJztxhhlZ2crLi5O4eHhGjp0qA4ePNjm+2rQ1ONHSEiIHn/8cUnt67i9+eabuv/++xUXF6eQkBC9+uqrfttb8lg15zH3W/sWbzf1nfDrX//aLF261MyYMcO43e5G27/66iuTmJho7r33XvP222+b4uJiExcXZ6ZOnerUVFdXm9jYWDNu3Dizf/9+s3HjRhMZGWl++9vfOjUffPCBiYiIME888YQ5dOiQWblypQkLCzP/9V//5dSUlJSY0NBQk5OTYw4fPmxycnJMx44dza5duwLSq9frNSdPnvT7eOSRR0yvXr1MfX29UyfJrF692q/u/PnzAe830Hr27GkWLFjgt+6zZ88621vyWAba66+/bh5++GGzdetW87e//c384Q9/MDExMWbmzJl+de312DXHhg0bTFhYmFm5cqU5dOiQeeKJJ0yXLl3MsWPHWnVd35SWlmZWr15tDhw4YMrLy83o0aPNjTfeaM6dO+fUTJgwwYwcOdLvGH322Wd+8zz22GPm+uuvN8XFxebtt9829957r7njjjvMV1995dSMHDnSJCYmmpKSElNSUmISExNNenp60HqbN2+eue222/zWXVlZ6WxfvHixiYyMNBs3bjT79+83DzzwgOnRo4epqalp0301qKys9OutuLjYSDJvvPGGMaZ9HbctW7aYuXPnmo0bNxpJZtOmTX7bW+pYNecxNxAIOs20evXqJoPOli1bTIcOHcwnn3zijL300kvG5XKZ6upqY4wxzz77rHG73ebLL790ahYtWmTi4uKcADF79mxzyy23+M09efJkM3jwYOf22LFjzciRI/1q0tLSzLhx4751f02pra01MTExZsGCBX7jTX1jfFOg+g20nj17mmXLll1ye0sey5aQm5trEhIS/Mba67FrjkGDBpnHHnvMb+yWW24xTz31VCut6MoqKyuNJLNjxw5nbMKECeZHP/rRJe9z5swZExYWZjZs2OCMffLJJ6ZDhw6msLDQGGPMoUOHjCS/X4JKS0uNJPPXv/418I2Yr4POHXfc0eS2+vp64/F4zOLFi52xL7/80rjdbvP8888bY9puX5fyxBNPmJtvvtn5vmivx+3ix4SWPFbNecwNBP509S2VlpYqMTHR743F0tLS5PV6VVZW5tQMGTLE78WU0tLSdOLECX344YdOTWpqqt/caWlpeuutt+Tz+S5bU1JSEozWtHnzZn366ad6+OGHG22bOnWqunfvrrvuukvPP/+86uvrnW2B6jcYlixZom7duunOO+/UwoUL/U6RtuSxbAnV1dWKjo5uNN5ej93l1NbWqqysrNG6UlNTg/b9EQjV1dWS1Og4bd++XTExMfr+97+vSZMmqbKy0tlWVlYmn8/n12tcXJwSExOdXktLS+V2u5WUlOTUDB48WG63O6ifj/fee09xcXFKSEjQuHHj9MEHH0iSjh49qoqKCr81u1wuDRkyxFlPW+7rYrW1tSooKNDPfvYzvzeEbq/H7Zta8lg15zE3EAg631JFRUWjNxHt2rWrOnXq5LzhaFM1DbevVPPVV1/p008/vWzNxW9sGiirVq1SWlqa4uPj/caffvpp/f73v9e2bds0btw4zZw5Uzk5Oc72QPUbaE888YQ2bNigN954Q1OnTtXy5cs1ZcqUy647WMcy2P72t78pLy+v0duhtNdjdyWffvqp6urqWvT749syxmjGjBm65557lJiY6IyPGjVK69at05/+9Cc988wz2rt3r+677z55vV5JX3/+O3XqpK5du/rN981eKyoqFBMT02ifMTExQft8JCUl6T/+4z+0detWrVy5UhUVFUpJSdFnn33m7PNyx6et9tWUV199VWfOnPH7JbC9HreLteSxas5jbiC067eAuFbZ2dmaP3/+ZWv27t2rgQMHNmu+byb6BsYYv/GLa8z/f3JnIGqa2v83XUu/x48f19atW/Wf//mfjWp/+ctfOv+/8847JUkLFizwGw9Uv1dyNb09+eSTztjtt9+url276p/+6Z+cszyX2newjmVzXMuxO3HihEaOHKmf/OQneuSRR/xq29KxC4Zr+f5oLVOnTtU777yjnTt3+o0/8MADzv8TExM1cOBA9ezZU6+99prGjBlzyfmu9HXaVE0gjRo1yvl/v379lJycrJtvvllr1651npR7LcentftqyqpVqzRq1Ci/MxHt9bhdSksdq5bo9zsZdKZOnapx48ZdtqZXr17Nmsvj8Wj37t1+Y1VVVfL5fE5S9Xg8jdJpwynNK9V07NjR+SF8qZqLE/HFrqXf1atXq1u3bsrIyLjs/aSvT0fW1NTo1KlTio2NDVi/zfFtjmXDg+/777+vbt26teixbK6r7e/EiRO69957nTe5vZLWPHaB1L17d4WGhl7T90drmDZtmjZv3qw333xTN9xww2Vre/TooZ49e+q9996T9PXnv7a2VlVVVX6/UVdWViolJcWpOXXqVKO5Tp8+3WKfjy5duqhfv35677339OMf/1jS17/B9+jRw2/N3/y6ag99HTt2TNu2bdMrr7xy2br2etwarpRriWPVnMfcgAjYs30sd6UnI584ccIZ27BhQ6MnsF533XXG6/U6NYsXL270BM++ffv6zf3YY481ejLyqFGj/GpGjhwZ8Ccj19fXm4SEhEZX7FxKXl6e6dy5s/ME1kD1G2z//d//bSQ5V+W05LEMhuPHj5vevXubcePG+V35cDnt9dg1ZdCgQebnP/+531jfvn3b1JOR6+vrzeOPP27i4uLMu+++26z7fPrpp8blcpm1a9caY/7viaAvv/yyU3PixIkmnwi6e/dup2bXrl0t+qTdL7/80lx//fVm/vz5zhNclyxZ4mz3er1NPsG1rfc1b9484/F4jM/nu2xdezluusSTkVviWDXnMTcgPQZsJksdO3bM7Nu3z8yfP99873vfM/v27TP79u1zLktuuDxu2LBh5u233zbbtm0zN9xwg9/lcWfOnDGxsbHmwQcfNPv37zevvPKKiYqKavKS3SeffNIcOnTIrFq1qtElu3/5y19MaGioWbx4sTl8+LBZvHhxQC8vb7Bt2zYjyRw6dKjRts2bN5sVK1aY/fv3m/fff9+sXLnSREVFmenTpwe830AqKSkxS5cuNfv27TMffPCBefnll01cXJzJyMhwalryWAbaJ598Yv7+7//e3Hfffeb48eN+l7g2aK/HrrkaLi9ftWqVOXTokMnKyjJdunQxH374Yauu65t+/vOfG7fbbbZv397kJf5nz541M2fONCUlJebo0aPmjTfeMMnJyeb6669vdGnvDTfcYLZt22befvttc9999zV5ae/tt99uSktLTWlpqenXr19QL8OeOXOm2b59u/nggw/Mrl27THp6uomMjHQ+/4sXLzZut9u88sorZv/+/ebBBx9s8pLlttbXN9XV1Zkbb7zR/OIXv/Abb2/H7ezZs87PMknOY2PDL30tdaya85gbCASdK5gwYYKR1Oij4bUTjPk6DI0ePdqEh4eb6OhoM3XqVL/Lc40x5p133jE/+MEPjMvlMh6Px2RnZ/u9No0xxmzfvt3079/fdOrUyfTq1cs899xzjdbz+9//3vTp08eEhYWZW265xWzcuDHgPT/44IMmJSWlyW2vv/66ufPOO833vvc9ExERYRITE83y5csb/XYTqH4DpayszCQlJRm32206d+5s+vTpY+bNm2e++OILv7qWPJaBtHr16ia/Tr950ra9Hrur8bvf/c707NnTdOrUyfzDP/yD32XbbcGljtHq1auNMcacP3/epKammr/7u78zYWFh5sYbbzQTJkwwH330kd88Fy5cMFOnTjXR0dEmPDzcpKenN6r57LPPzEMPPWQiIyNNZGSkeeihh0xVVVXQemt4rZWwsDATFxdnxowZYw4ePOhsr6+vd86GuFwu88Mf/tDs37+/zff1TVu3bjWSzJEjR/zG29txe+ONN5r8OpwwYYIxpmWPVXMec7+tEGMueslTAAAAS3B5OQAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADW+n/gJqumYAXLtAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "y.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3848fd5d-177a-4d03-a12b-f4301b1bd3ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.sort_values('var1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8d8483bd-5b4d-4087-8d13-92cdfc662ed3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_unobserved = data.loc[data['var1']==n_levels-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2ef433a9-a3f7-4f79-94ce-a99265858fdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_observed = data.loc[data['var1']<n_levels]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "38ca1c00-b163-462c-9fa2-b91354b44429",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data_observed[cat_vars], data_observed['y'], test_size=0.3, random_state=35)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "21495b2e-2742-4490-9986-a47f4265d8d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = pd.concat([X_test, data_unobserved[cat_vars]], axis=0)\n",
    "y_test = pd.concat([y_test, data_unobserved['y']], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e3528929-b2f8-462d-bb96-636052fcd93d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = X_train.reset_index(drop=True)\n",
    "X_test = X_test.reset_index(drop=True)\n",
    "y_train = y_train.reset_index(drop=True)\n",
    "y_test = y_test.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "0360b9a9-ee20-4089-87c9-fab640ff5e21",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2, encoder = icfesl.f_get_dummies(X_train, ['var1','var2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "924c7ed4-5b53-440b-b103-77ad85033a86",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_test = icfesl.f_get_dummies(X_test, ['var1','var2'], encoder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5fcaa6b4-8007-4e01-a0e9-4dcb8728a4a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "min_child_weight = np.floor(X2.shape[0]/X2.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9d51c73a-70d3-475e-9376-6c8c5b6cf498",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(17.0)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7c6871b4-c5c6-4b76-856a-cfcb3c4a7873",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of X2 data: 533.5237159729004 MB\n"
     ]
    }
   ],
   "source": [
    "X2_data_size = sys.getsizeof(X2)\n",
    "print(f\"Size of X2 data: {X2_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59924c30-72c8-41e4-ac5c-9f63911b271c",
   "metadata": {},
   "source": [
    "### CatBoost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a64d20c9-b58a-4edc-b9c2-44c6f4721c3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = CatBoostRegressor(\n",
    "    iterations=100,  \n",
    "    loss_function='RMSE', \n",
    "    random_seed=42,  \n",
    "    verbose=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2cd0335b-e6d5-409f-b32d-9086c345f403",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.244\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train, y_train, cat_features=cat_vars)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e940226e-b8a7-4285-a743-0684b8fd6abd",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c26341be-4a74-483b-9810-c7f36596adb9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 1086.5091064370956\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "599550aa-a1d9-4164-ae75-4785ed139d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ffd15889-01a3-4d87-8d81-1892ef32eb98",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 1144.312262201615\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a14325db-bbee-43d1-976c-176f0018bddb",
   "metadata": {},
   "source": [
    "### TabNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "377e0174-52ab-44b0-8fbc-92597f657406",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_idxs = [X_train.columns.get_loc(col) for col in cat_vars]\n",
    "cat_dims = [X_train[col].nunique() for col in cat_vars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "bf5c1e81-c149-454d-a677-7048b07eecd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = TabNetRegressor(verbose=0, seed=200, cat_idxs=cat_idxs, cat_dims=cat_dims)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ff2493dd-9bbe-42ac-b8e0-c8799070b26b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Early stopping occurred at epoch 43 with best_epoch = 33 and best_train_mse = 123653.9223\n",
      "22.0875\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.13/site-packages/pytorch_tabnet/callbacks.py:172: UserWarning: Best weights from best epoch are automatically used!\n",
      "  warnings.warn(wrn_msg)\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train=X_train.to_numpy(), y_train=y_train.to_numpy().reshape(-1,1), eval_set=[(X_train.to_numpy(), y_train.to_numpy().reshape(-1,1))], eval_name=['train'], max_epochs=50)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "994cbfd4-36d0-4879-b0be-5ab2eef8cf06",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2441371f-d15c-40d4-ad9d-590b03df2a0a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE:351.6445965695799\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "c35b05c9-a34b-4fda-ae6e-f5bf8c8e9de2",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "72114801-717a-4ba8-83a0-8cd697671770",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE:356.689939703504\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04f66670-54f6-4c90-96e9-d65a096663b0",
   "metadata": {},
   "source": [
    "### One hot encoding"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9407c138-1151-49a5-b15f-067c12f1fee9",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "5876a562-9ad5-4796-a0b3-e6a250d0ad2d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "13.3648\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X2, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "4cad784d-cd85-42a1-b674-2281c411ccd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:1998.0; R2: 0.9972135413826585\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "9d98efc3-7dd7-4684-8bb1-e6b18d630451",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "d522aafa-4a4f-49af-b35c-252568e8752a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 193.90128743141534\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "dd3b6924-3608-4e29-9017-d332b8acc6dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "77159d20-1353-4815-9365-506dd9985242",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 206.81639185021726\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c888aa95-d9f6-4694-80ac-05b2bbe8b61c",
   "metadata": {},
   "source": [
    "#### 2.xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "ed98a66f-01db-4ea3-a8b3-a78e955be1f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "df42399f-ca2b-4525-8198-fe1fa9e3c0d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.9079\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X2, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "ae1430b7-aceb-4742-b934-a1eed4e907a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "40224e28-b679-452a-9534-678ae93c1867",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 3066.2962550020156\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "16ffb892-5e5e-4601-a5a5-653e1b86997a",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "040f7a6c-00dd-45e3-9c25-60e92992708a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 3169.3148015022907\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27a2a623-5281-47c0-acbe-0247e59a4334",
   "metadata": {},
   "source": [
    "### Target Encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "586c16d4-4a09-4b8d-8dbc-76a35d822258",
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders import TargetEncoder\n",
    "enc_auto = TargetEncoder(cols=cat_vars).fit(X_train, y_train)\n",
    "X_t = enc_auto.transform(X_train)\n",
    "X_t_test = enc_auto.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e0a8ae9-bd27-4cfa-9c5d-dfd479d121e3",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "d2131938-e931-4f91-b99b-82c8f3d74226",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0079\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X_t, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "a3053246-108a-46e7-98ae-89ee1ac35bbc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:2.0; R2: 0.9409140080570968\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "5ccab110-ab74-4e63-9d09-0174472a644b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "34ff4e5f-8b9a-41d3-94b7-79d64c7bf241",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 892.8873277419076\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "2ac968e0-4bfd-423d-944b-73c5710f7f69",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "82ae9a2a-4a84-4ae3-8abd-5520a9b0f785",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 978.429727760193\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0fb66c13-ae7e-4dbd-b1ab-6564363315e4",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "cc923abd-b206-440d-a64e-b1177cb8642a",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "647e7142-4e25-47d3-a749-b8b17a11da96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.21\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_t, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "bd191fae-0925-4ace-8690-42b41085b2f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "a75ecae2-9d64-47e0-9206-044b02010fc8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 727.2757653718004\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "af3cddbb-aa89-4942-9fce-f791b0270580",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "49e89411-0ebb-4dce-b2c9-1e8d6b27dc2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 861.1622711043852\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f9efe23-76f4-49e5-9130-053c9680dd64",
   "metadata": {},
   "source": [
    "### ICFESL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "95eed9eb-f300-40a6-b9d4-dd749b7e3769",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-11-29 14:32:12.269\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:20.366\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:20.422\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:20.733\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:20.734\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:35.570\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:35.604\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:35.893\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:35.894\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:53.255\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:53.306\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:53.612\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:32:53.612\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:12.942\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:12.994\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:13.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:13.302\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:34.331\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:34.383\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:34.696\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:34.696\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:56.350\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:56.400\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:56.707\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:33:56.708\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:34:18.269\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:34:18.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:34:18.632\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 5 ------>\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "fit_info_panel, best_index, fit_figs, cluster_groups, criterions, inertias, gap_statss = icfesl.regularized_search_algorun(\n",
    "    X2, y_train, X2_test, y_test, cat_vars, 'regression', alphas = [0.01, 0.05, 0.1, 0.2, 0.5, 1, 5], cbine_column=False,\n",
    "    distance_threshold=0.002, figure=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "9139ee6e-1f9f-4834-961e-fa45bf6fe245",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot, summary_plot = fit_figs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "79c0959a-b17f-4c3e-a1aa-477505a69d25",
   "metadata": {},
   "outputs": [],
   "source": [
    "fit_info_panel.to_excel(\"simulation_regression_fit_info_case3.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "8bb5be76-c3f2-48d7-88fa-1c5f8e07566c",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot.savefig('decision_plot_simulation_regression_case3.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "ffdb9c09-7e59-49c6-9d4d-b8eda8992216",
   "metadata": {},
   "outputs": [],
   "source": [
    "summary_plot.savefig('summary_plot_simulation_regression_case3.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "0c583112-9148-4c78-8d6c-2b581c772dd9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Experiment</th>\n",
       "      <th>dof</th>\n",
       "      <th>reg_fit_time</th>\n",
       "      <th>reg_training_rmse</th>\n",
       "      <th>reg_testing_rmse</th>\n",
       "      <th>xgb_fit_time</th>\n",
       "      <th>xgb_training_rmse</th>\n",
       "      <th>xgb_testing_rmse</th>\n",
       "      <th>var_inf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0412</td>\n",
       "      <td>391.007621</td>\n",
       "      <td>398.721522</td>\n",
       "      <td>0.2929</td>\n",
       "      <td>439.286908</td>\n",
       "      <td>447.246540</td>\n",
       "      <td>2.787762e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.0255</td>\n",
       "      <td>326.048353</td>\n",
       "      <td>337.752900</td>\n",
       "      <td>0.2700</td>\n",
       "      <td>379.537481</td>\n",
       "      <td>393.058939</td>\n",
       "      <td>2.159637e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.0413</td>\n",
       "      <td>303.546056</td>\n",
       "      <td>312.415367</td>\n",
       "      <td>0.2878</td>\n",
       "      <td>369.044803</td>\n",
       "      <td>378.684440</td>\n",
       "      <td>1.916290e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0396</td>\n",
       "      <td>294.215832</td>\n",
       "      <td>305.097912</td>\n",
       "      <td>0.2881</td>\n",
       "      <td>358.025595</td>\n",
       "      <td>369.963768</td>\n",
       "      <td>1.824632e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0329</td>\n",
       "      <td>289.707310</td>\n",
       "      <td>300.720860</td>\n",
       "      <td>0.2945</td>\n",
       "      <td>363.484972</td>\n",
       "      <td>374.421643</td>\n",
       "      <td>1.809008e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0.0331</td>\n",
       "      <td>286.541987</td>\n",
       "      <td>298.567852</td>\n",
       "      <td>0.2874</td>\n",
       "      <td>353.738301</td>\n",
       "      <td>368.271196</td>\n",
       "      <td>1.814050e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0.0343</td>\n",
       "      <td>313.455555</td>\n",
       "      <td>330.885766</td>\n",
       "      <td>0.2902</td>\n",
       "      <td>374.606304</td>\n",
       "      <td>393.505487</td>\n",
       "      <td>2.273081e-13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Experiment   dof  reg_fit_time  reg_training_rmse  reg_testing_rmse  \\\n",
       "0           0  26.0        0.0412         391.007621        398.721522   \n",
       "1           1  34.0        0.0255         326.048353        337.752900   \n",
       "2           2  38.0        0.0413         303.546056        312.415367   \n",
       "3           3  40.0        0.0396         294.215832        305.097912   \n",
       "4           4  41.0        0.0329         289.707310        300.720860   \n",
       "5           5  42.0        0.0331         286.541987        298.567852   \n",
       "6           6  42.0        0.0343         313.455555        330.885766   \n",
       "\n",
       "   xgb_fit_time  xgb_training_rmse  xgb_testing_rmse       var_inf  \n",
       "0        0.2929         439.286908        447.246540  2.787762e-13  \n",
       "1        0.2700         379.537481        393.058939  2.159637e-13  \n",
       "2        0.2878         369.044803        378.684440  1.916290e-13  \n",
       "3        0.2881         358.025595        369.963768  1.824632e-13  \n",
       "4        0.2945         363.484972        374.421643  1.809008e-13  \n",
       "5        0.2874         353.738301        368.271196  1.814050e-13  \n",
       "6        0.2902         374.606304        393.505487  2.273081e-13  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fit_info_panel"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ba26263-8b9d-4e35-880e-7b077e7e9159",
   "metadata": {},
   "source": [
    "## CBind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "ca2a9cd3-2da2-4fd4-b673-7d62c346e657",
   "metadata": {},
   "outputs": [],
   "source": [
    "cgrouping = icfesl.group_categorical_features(X2, X2.columns.tolist(), distance_threshold=0.002)\n",
    "X4 = icfesl.combine_features(X2, cgrouping)\n",
    "X4_test = icfesl.combine_features(X2_test, cgrouping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "b055c561-7dce-4048-8ad6-0ca5c2c4ca68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(15052, 861)"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X4_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7db5b54-7a14-401e-be2f-2670f66a5bf6",
   "metadata": {},
   "source": [
    "#### 1.OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "11ace45f-93b7-4611-b2ba-4f1c186f2ecc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.0391\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X4, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "6cd36506-6cb2-4002-bde7-75be46c788df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:861.0; R2: 0.43765323737774897\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "889e61bb-0c16-4dcf-ba9f-4117720d6bd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "b0f172ff-c995-4354-a9bd-c8490faafcba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 2754.586999679324\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "5a226b37-6efc-42f2-bdf5-473a1844b96e",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "189d19cb-6c55-4e55-9cdf-7c3652999a78",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 2946.3546464911465\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f83b17f-caa2-48e7-a00c-fb108a9c74bb",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "d5752ddb-6128-4743-a7b9-f6617ac74036",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "d9f35ec7-a09b-497a-b2a5-a6ff2b8a9479",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8763\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X4, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "859f3057-694d-4d9b-b4c7-afd9900985fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "642b22c0-8f68-4ebf-8b8d-37e9096a591b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training rmse: 3179.0328015859236\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "49110501-218a-44b5-901e-35765607adaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "d581ff33-8ae3-4b70-b709-6570723daa99",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing rmse: 3301.038732606505\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c29881f4-a3d9-4ef4-84a7-ceb656bd7acf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab84783b-284f-4d36-9806-2f619cb9412c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c7f16a8-e38e-4ee5-8daa-f660b9a76a8e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
