{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d77e3e2d-0706-4fde-a71e-1ac3b74e8346",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.spatial.distance import cdist\n",
    "import random\n",
    "from sklearn.cluster import KMeans\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "from icfesl import *\n",
    "from utility_functions import *\n",
    "from xgboost import XGBRegressor\n",
    "from pytorch_tabnet.tab_model import TabNetRegressor\n",
    "import time\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.stats import norm\n",
    "from catboost import CatBoostRegressor, Pool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dd181542-b52a-4555-afdf-ba920573f9b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "n_samples = 50000\n",
    "sigma = 0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b6513970-d79a-4beb-9517-dea61573012e",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_variables = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "59c4ca71-57f4-4828-b297-248dc0e07b66",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_levels = 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "664151bb-fc8a-4545-9345-0ee0f1ba41c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_vars = ['var1','var2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "524eb3b6-1960-4635-b0d9-a90eed58524b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "\n",
    "def generate_correlated_categorical_variables(num_samples, num_variables, sigma, categories_per_variable):\n",
    "\n",
    "    correlation_matrix = np.array([[1.0, sigma],\n",
    "                                   [sigma, 1.0]])\n",
    "\n",
    "    \n",
    "    L = np.linalg.cholesky(correlation_matrix)\n",
    "    uncorrelated_normals = np.random.normal(size=(num_samples, num_variables))\n",
    "    correlated_normals = uncorrelated_normals @ L.T\n",
    "\n",
    "    categorical_data = np.zeros_like(correlated_normals, dtype=int)\n",
    "\n",
    "    for i in range(num_variables):\n",
    "        num_categories = categories_per_variable[i]\n",
    "        quantiles = np.linspace(0, 1, num_categories + 1)[1:-1]\n",
    "        thresholds = norm.ppf(quantiles)\n",
    "\n",
    "        for j in range(num_categories):\n",
    "            if j == 0:\n",
    "                categorical_data[:, i][correlated_normals[:, i] <= thresholds[j]] = j\n",
    "            elif j == num_categories - 1:\n",
    "                categorical_data[:, i][correlated_normals[:, i] > thresholds[j-1]] = j\n",
    "            else:\n",
    "                categorical_data[:, i][(correlated_normals[:, i] > thresholds[j-1]) & (correlated_normals[:, i] <= thresholds[j])] = j\n",
    "\n",
    "    res = pd.DataFrame(categorical_data, columns=[f\"var{i+1}\" for i in range(num_variables)])\n",
    "                       \n",
    "    return res\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e82673dc-d14a-4b93-9c8f-613f27855b1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = generate_correlated_categorical_variables(n_samples, n_variables, sigma, [n_levels, n_levels])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3049f72f-fc0b-47cd-9341-269490a4a249",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of simulated data: 0.7630958557128906 MB\n"
     ]
    }
   ],
   "source": [
    "simulated_data_size = sys.getsizeof(X)\n",
    "print(f\"Size of simulated data: {simulated_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3fb395f5-06e5-4b29-886a-d3d22ea99556",
   "metadata": {},
   "source": [
    "## Simulated study for regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "493bc876-7031-4fd8-a2f6-0d4636372984",
   "metadata": {},
   "outputs": [],
   "source": [
    "beta_var1 = 10\n",
    "beta_var2 = -10\n",
    "intercept = 5\n",
    "\n",
    "error = np.random.normal(loc=0, scale=50, size=n_samples)\n",
    "\n",
    "y = (intercept +\n",
    "     beta_var1 * X['var1'] +\n",
    "     beta_var2 * X['var2'] +\n",
    "     error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "445831a3-03db-4b65-9414-2c93aab2ee2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.concat([X, y], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d6c95d06-0640-4944-9adc-fbee3c7cade1",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={0:\"y\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e2b61f15-2c83-4de2-bb9e-7acfbd8531a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGdCAYAAADjWSL8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAKPxJREFUeJzt3X9QVXd+//HXRQSBhbsihZsbMaVdatjFOK3ZRexmdaOiTpDu2FmbsFI746rxZxl1nHXNTMhOAqndqi1U17WOukFDMtO43Wm3rJgmZo0/Q2Si5obalng1csG7xQvKDRg53z/y9TRXFHOvIHwuz8fMmcn9nPfn8jl8ktzXPXzOOQ7LsiwBAAAYJmawBwAAABAJQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEixgz2AgdLT06PLly8rOTlZDodjsIcDAAC+BMuy1NHRIbfbrZiYvs+1RG2IuXz5sjIzMwd7GAAAIAIXL17U2LFj+6yJ2hCTnJws6fNfQkpKyiCPBgAAfBnt7e3KzMy0P8f7ErUh5tafkFJSUggxAAAY5sssBWFhLwAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjRe1TrAE8GF6vV36/P6K+aWlpGjduXD+PCMBwQYgBEDGv16tHc3IU7OyMqH9CYqI+8ngIMgAiQogBEDG/369gZ6fmv7hd6VnZYfVtbTqv159bJr/fT4gBEBFCDID7lp6VrYdzJg72MAAMMyzsBQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEpdYAxhUHo8non7c7RcAIQbAoOjwt8gRE6MFCxZE1J+7/QIgxAAYFMGOdlk9PdztF0DECDEABhV3+wUQKRb2AgAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkHgAJwFgejyeifmlpaTz9GogChBgAxunwt8gRE6MFCxZE1D8hMVEfeTwEGcBwhBgAxgl2tMvq6dH8F7crPSs7rL6tTef1+nPL5Pf7CTGA4QgxAIyVnpWth3MmDvYwAAwSFvYCAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASVycBkNfrld/vD7tfpDebA4D+QIgBhjmv16tHc3IU7Owc7KEAQFgIMcAw5/f7FezsjOjGcY3vvqm6bRUDNDIA6BshBoCkyG4c19p0foBGAwD3FtbC3rKyMjkcjpDN5XLZ+y3LUllZmdxutxISEjRt2jSdO3cu5D26urq0atUqpaWlKSkpSUVFRbp06VJITVtbm0pKSuR0OuV0OlVSUqKrV69GfpQAACDqhH110je+8Q01Nzfb25kzZ+x9mzZt0ubNm1VVVaVTp07J5XJp5syZ6ujosGtKS0t14MAB1dTU6MiRI7p27ZoKCwt18+ZNu6a4uFgNDQ2qra1VbW2tGhoaVFJScp+HCgAAoknYf06KjY0NOftyi2VZ2rp1qzZu3Kh58+ZJkvbu3auMjAzt379fS5cuVSAQ0K5du/TKK69oxowZkqTq6mplZmbq0KFDmjVrljwej2pra3X8+HHl5eVJknbu3Kn8/Hw1NjZq/Pjx93O8AAAgSoR9Jub8+fNyu93KysrS008/rf/5n/+RJDU1Ncnn86mgoMCujY+P19SpU3X06FFJUn19vW7cuBFS43a7lZuba9ccO3ZMTqfTDjCSNHnyZDmdTrvmTrq6utTe3h6yAQCA6BVWiMnLy9MvfvEL/eY3v9HOnTvl8/k0ZcoU/e53v5PP55MkZWRkhPTJyMiw9/l8PsXFxWn06NF91qSnp/f62enp6XbNnVRUVNhraJxOpzIzM8M5NAAAYJiwQsycOXP053/+55owYYJmzJihf/u3f5P0+Z+NbnE4HCF9LMvq1Xa722vuVH+v99mwYYMCgYC9Xbx48UsdEwAAMNN9PXYgKSlJEyZM0Pnz5+11MrefLWltbbXPzrhcLnV3d6utra3PmpaWll4/68qVK73O8nxRfHy8UlJSQjYAABC97ivEdHV1yePx6KGHHlJWVpZcLpfq6urs/d3d3Tp8+LCmTJkiSZo0aZJGjhwZUtPc3KyzZ8/aNfn5+QoEAjp58qRdc+LECQUCAbsGAAAgrKuT1q1bp7lz52rcuHFqbW3Viy++qPb2di1cuFAOh0OlpaUqLy9Xdna2srOzVV5ersTERBUXF0uSnE6nFi1apLVr12rMmDFKTU3VunXr7D9PSVJOTo5mz56txYsXa8eOHZKkJUuWqLCwkCuTAACALawQc+nSJT3zzDPy+/36vd/7PU2ePFnHjx/XI488Iklav369gsGgli9frra2NuXl5engwYNKTk6232PLli2KjY3V/PnzFQwGNX36dO3Zs0cjRoywa/bt26fVq1fbVzEVFRWpqqqqP44XAABEibBCTE1NTZ/7HQ6HysrKVFZWdteaUaNGqbKyUpWVlXetSU1NVXV1dThDAwAAw8x9rYkBAAAYLIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkWIHewAAMBg8Hk9E/dLS0jRu3Lh+Hg2ASBBiAAwrHf4WOWJitGDBgoj6JyQm6iOPhyADDAGEGADDSrCjXVZPj+a/uF3pWdlh9W1tOq/Xn1smv99PiAGGAEIMgGEpPStbD+dMHOxhALgPLOwFAABGIsQAAAAjEWIAAICRCDEAAMBILOwFooTX65Xf7w+7X6T3SwGAwUaIAaKA1+vVozk5CnZ2DvZQAOCBIcQAUcDv9yvY2RnRvU8a331TddsqBmhkADBwCDFAFInk3ietTecHaDQAMLBY2AsAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAw0n2FmIqKCjkcDpWWltptlmWprKxMbrdbCQkJmjZtms6dOxfSr6urS6tWrVJaWpqSkpJUVFSkS5cuhdS0tbWppKRETqdTTqdTJSUlunr16v0MFwAARJGIQ8ypU6f085//XI899lhI+6ZNm7R582ZVVVXp1KlTcrlcmjlzpjo6Ouya0tJSHThwQDU1NTpy5IiuXbumwsJC3bx5064pLi5WQ0ODamtrVVtbq4aGBpWUlEQ6XAAAEGUiCjHXrl3TD37wA+3cuVOjR4+22y3L0tatW7Vx40bNmzdPubm52rt3rzo7O7V//35JUiAQ0K5du/R3f/d3mjFjhv74j/9Y1dXVOnPmjA4dOiRJ8ng8qq2t1T/90z8pPz9f+fn52rlzp/71X/9VjY2N/XDYAADAdBGFmBUrVuipp57SjBkzQtqbmprk8/lUUFBgt8XHx2vq1Kk6evSoJKm+vl43btwIqXG73crNzbVrjh07JqfTqby8PLtm8uTJcjqdds3turq61N7eHrIBAIDoFRtuh5qaGr3//vs6depUr30+n0+SlJGREdKekZGhCxcu2DVxcXEhZ3Bu1dzq7/P5lJ6e3uv909PT7ZrbVVRU6IUXXgj3cAAAgKHCOhNz8eJF/fVf/7Wqq6s1atSou9Y5HI6Q15Zl9Wq73e01d6rv6302bNigQCBgbxcvXuzz5wEAALOFFWLq6+vV2tqqSZMmKTY2VrGxsTp8+LD+4R/+QbGxsfYZmNvPlrS2ttr7XC6Xuru71dbW1mdNS0tLr59/5cqVXmd5bomPj1dKSkrIBgAAoldYIWb69Ok6c+aMGhoa7O3xxx/XD37wAzU0NOgP/uAP5HK5VFdXZ/fp7u7W4cOHNWXKFEnSpEmTNHLkyJCa5uZmnT171q7Jz89XIBDQyZMn7ZoTJ04oEAjYNQAAYHgLa01McnKycnNzQ9qSkpI0ZswYu720tFTl5eXKzs5Wdna2ysvLlZiYqOLiYkmS0+nUokWLtHbtWo0ZM0apqalat26dJkyYYC8UzsnJ0ezZs7V48WLt2LFDkrRkyRIVFhZq/Pjx933QAADAfGEv7L2X9evXKxgMavny5Wpra1NeXp4OHjyo5ORku2bLli2KjY3V/PnzFQwGNX36dO3Zs0cjRoywa/bt26fVq1fbVzEVFRWpqqqqv4cLAAAMdd8h5u233w557XA4VFZWprKysrv2GTVqlCorK1VZWXnXmtTUVFVXV9/v8AAAQJTi2UkAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIsYM9AAAwjcfjiahfWlqaxo0b18+jAYYvQgwAfEkd/hY5YmK0YMGCiPonJCbqI4+HIAP0E0IMMIR4vV75/f6w+0V6ZgDhCXa0y+rp0fwXtys9Kzusvq1N5/X6c8vk9/sJMUA/IcQAQ4TX69WjOTkKdnYO9lBwD+lZ2Xo4Z+JgDwMY9ggxwBDh9/sV7OyM6Ft+47tvqm5bxQCNDACGJkIMMMRE8i2/ten8AI0GAIYuLrEGAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASGGFmO3bt+uxxx5TSkqKUlJSlJ+fr3//93+391uWpbKyMrndbiUkJGjatGk6d+5cyHt0dXVp1apVSktLU1JSkoqKinTp0qWQmra2NpWUlMjpdMrpdKqkpERXr16N/CgBAEDUCSvEjB07Vi+//LLee+89vffee3ryySf1Z3/2Z3ZQ2bRpkzZv3qyqqiqdOnVKLpdLM2fOVEdHh/0epaWlOnDggGpqanTkyBFdu3ZNhYWFunnzpl1TXFyshoYG1dbWqra2Vg0NDSopKemnQwYAANEgNpziuXPnhrx+6aWXtH37dh0/flxf//rXtXXrVm3cuFHz5s2TJO3du1cZGRnav3+/li5dqkAgoF27dumVV17RjBkzJEnV1dXKzMzUoUOHNGvWLHk8HtXW1ur48ePKy8uTJO3cuVP5+flqbGzU+PHj++O4AQCA4SJeE3Pz5k3V1NTo+vXrys/PV1NTk3w+nwoKCuya+Ph4TZ06VUePHpUk1dfX68aNGyE1brdbubm5ds2xY8fkdDrtACNJkydPltPptGvupKurS+3t7SEbAACIXmGHmDNnzugrX/mK4uPj9eyzz+rAgQP6+te/Lp/PJ0nKyMgIqc/IyLD3+Xw+xcXFafTo0X3WpKen9/q56enpds2dVFRU2GtonE6nMjMzwz00AABgkLBDzPjx49XQ0KDjx49r2bJlWrhwoT788EN7v8PhCKm3LKtX2+1ur7lT/b3eZ8OGDQoEAvZ28eLFL3tIAADAQGGHmLi4OH3ta1/T448/roqKCk2cOFF///d/L5fLJUm9zpa0trbaZ2dcLpe6u7vV1tbWZ01LS0uvn3vlypVeZ3m+KD4+3r5q6tYGAACi133fJ8ayLHV1dSkrK0sul0t1dXX2vu7ubh0+fFhTpkyRJE2aNEkjR44MqWlubtbZs2ftmvz8fAUCAZ08edKuOXHihAKBgF0DAAAQ1tVJP/7xjzVnzhxlZmaqo6NDNTU1evvtt1VbWyuHw6HS0lKVl5crOztb2dnZKi8vV2JiooqLiyVJTqdTixYt0tq1azVmzBilpqZq3bp1mjBhgn21Uk5OjmbPnq3Fixdrx44dkqQlS5aosLCQK5MAAIAtrBDT0tKikpISNTc3y+l06rHHHlNtba1mzpwpSVq/fr2CwaCWL1+utrY25eXl6eDBg0pOTrbfY8uWLYqNjdX8+fMVDAY1ffp07dmzRyNGjLBr9u3bp9WrV9tXMRUVFamqqqo/jhcAAESJsELMrl27+tzvcDhUVlamsrKyu9aMGjVKlZWVqqysvGtNamqqqqurwxkaAAAYZnh2EgAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEaKHewBAMBw4vF4IuqXlpamcePG9fNoALMRYgDgAejwt8gRE6MFCxZE1D8hMVEfeTwEGeALCDEA8AAEO9pl9fRo/ovblZ6VHVbf1qbzev25ZfL7/YQY4AsIMQDwAKVnZevhnImDPQwgKrCwFwAAGIkQAwAAjMSfk4B+5vV65ff7w+4X6VUrADBcEWKAfuT1evVoTo6CnZ2DPRQAiHqEGKAf+f1+BTs7I7oCpfHdN1W3rWKARgYA0YcQAwyASK5AaW06P0CjAYDoxMJeAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARgorxFRUVOib3/ymkpOTlZ6eru9973tqbGwMqbEsS2VlZXK73UpISNC0adN07ty5kJquri6tWrVKaWlpSkpKUlFRkS5duhRS09bWppKSEjmdTjmdTpWUlOjq1auRHSUAAIg6YYWYw4cPa8WKFTp+/Ljq6ur02WefqaCgQNevX7drNm3apM2bN6uqqkqnTp2Sy+XSzJkz1dHRYdeUlpbqwIEDqqmp0ZEjR3Tt2jUVFhbq5s2bdk1xcbEaGhpUW1ur2tpaNTQ0qKSkpB8OGQAARIPYcIpra2tDXu/evVvp6emqr6/Xd77zHVmWpa1bt2rjxo2aN2+eJGnv3r3KyMjQ/v37tXTpUgUCAe3atUuvvPKKZsyYIUmqrq5WZmamDh06pFmzZsnj8ai2tlbHjx9XXl6eJGnnzp3Kz89XY2Ojxo8f3x/HDgAADHZfa2ICgYAkKTU1VZLU1NQkn8+ngoICuyY+Pl5Tp07V0aNHJUn19fW6ceNGSI3b7VZubq5dc+zYMTmdTjvASNLkyZPldDrtmtt1dXWpvb09ZAMAANEr4hBjWZbWrFmjb3/728rNzZUk+Xw+SVJGRkZIbUZGhr3P5/MpLi5Oo0eP7rMmPT29189MT0+3a25XUVFhr59xOp3KzMyM9NAAAIABIg4xK1eu1AcffKBXX3211z6HwxHy2rKsXm23u73mTvV9vc+GDRsUCATs7eLFi1/mMAAAgKEiCjGrVq3Sr371K7311lsaO3as3e5yuSSp19mS1tZW++yMy+VSd3e32tra+qxpaWnp9XOvXLnS6yzPLfHx8UpJSQnZAABA9AorxFiWpZUrV+qNN97Qf/zHfygrKytkf1ZWllwul+rq6uy27u5uHT58WFOmTJEkTZo0SSNHjgypaW5u1tmzZ+2a/Px8BQIBnTx50q45ceKEAoGAXQMAAIa3sK5OWrFihfbv369/+Zd/UXJysn3Gxel0KiEhQQ6HQ6WlpSovL1d2drays7NVXl6uxMREFRcX27WLFi3S2rVrNWbMGKWmpmrdunWaMGGCfbVSTk6OZs+ercWLF2vHjh2SpCVLlqiwsJArkwAAgKQwQ8z27dslSdOmTQtp3717t/7qr/5KkrR+/XoFg0EtX75cbW1tysvL08GDB5WcnGzXb9myRbGxsZo/f76CwaCmT5+uPXv2aMSIEXbNvn37tHr1avsqpqKiIlVVVUVyjAAAIAqFFWIsy7pnjcPhUFlZmcrKyu5aM2rUKFVWVqqysvKuNampqaqurg5neAAAYBjh2UkAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGCmsB0ACAAaPx+OJqF9aWprGjRvXz6MBBh8hBrgDr9crv98fdr9IP2SAvnT4W+SIidGCBQsi6p+QmKiPPB6CDKIOIQa4jdfr1aM5OQp2dg72UABJUrCjXVZPj+a/uF3pWdlh9W1tOq/Xn1smv99PiEHUIcQAt/H7/Qp2dkb0gdH47puq21YxQCPDcJeela2HcyYO9jCAIYMQA9xFJB8YrU3nB2g0AIDbcXUSAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABgp7BDzzjvvaO7cuXK73XI4HPrlL38Zst+yLJWVlcntdishIUHTpk3TuXPnQmq6urq0atUqpaWlKSkpSUVFRbp06VJITVtbm0pKSuR0OuV0OlVSUqKrV6+GfYAAACA6hR1irl+/rokTJ6qqquqO+zdt2qTNmzerqqpKp06dksvl0syZM9XR0WHXlJaW6sCBA6qpqdGRI0d07do1FRYW6ubNm3ZNcXGxGhoaVFtbq9raWjU0NKikpCSCQwQAANEoNtwOc+bM0Zw5c+64z7Isbd26VRs3btS8efMkSXv37lVGRob279+vpUuXKhAIaNeuXXrllVc0Y8YMSVJ1dbUyMzN16NAhzZo1Sx6PR7W1tTp+/Ljy8vIkSTt37lR+fr4aGxs1fvz4SI8XAIYlj8cTUb+0tDSNGzeun0cD9I+wQ0xfmpqa5PP5VFBQYLfFx8dr6tSpOnr0qJYuXar6+nrduHEjpMbtdis3N1dHjx7VrFmzdOzYMTmdTjvASNLkyZPldDp19OjRO4aYrq4udXV12a/b29v789BgIK/XK7/fH3a/SP9nDwxFHf4WOWJitGDBgoj6JyQm6iOPhyCDIalfQ4zP55MkZWRkhLRnZGTowoULdk1cXJxGjx7dq+ZWf5/Pp/T09F7vn56ebtfcrqKiQi+88MJ9HwOig9fr1aM5OQp2dg72UIBBFexol9XTo/kvbld6VnZYfVubzuv155bJ7/cTYjAk9WuIucXhcIS8tiyrV9vtbq+5U31f77NhwwatWbPGft3e3q7MzMxwho0o4vf7FezsjOh/3I3vvqm6bRUDNDJgcKRnZevhnImDPQygX/VriHG5XJI+P5Py0EMP2e2tra322RmXy6Xu7m61tbWFnI1pbW3VlClT7JqWlpZe73/lypVeZ3luiY+PV3x8fL8dC6JDJP/jbm06P0CjAQD0p369T0xWVpZcLpfq6urstu7ubh0+fNgOKJMmTdLIkSNDapqbm3X27Fm7Jj8/X4FAQCdPnrRrTpw4oUAgYNcAAIDhLewzMdeuXdN//dd/2a+bmprU0NCg1NRUjRs3TqWlpSovL1d2drays7NVXl6uxMREFRcXS5KcTqcWLVqktWvXasyYMUpNTdW6des0YcIE+2qlnJwczZ49W4sXL9aOHTskSUuWLFFhYSFXJgEAAEkRhJj33ntP3/3ud+3Xt9ahLFy4UHv27NH69esVDAa1fPlytbW1KS8vTwcPHlRycrLdZ8uWLYqNjdX8+fMVDAY1ffp07dmzRyNGjLBr9u3bp9WrV9tXMRUVFd313jQAAGD4CTvETJs2TZZl3XW/w+FQWVmZysrK7lozatQoVVZWqrKy8q41qampqq6uDnd4AABgmODZSQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRYgd7AEBfvF6v/H5/2P08Hs8AjAYAMJQQYjBkeb1ePZqTo2Bn52APBRjWIv1SkJaWpnHjxvXzaID/Q4jBkOX3+xXs7NT8F7crPSs7rL6N776pum0VAzQyYHjo8LfIEROjBQsWRNQ/ITFRH3k8BBkMGEIMhrz0rGw9nDMxrD6tTecHaDTA8BHsaJfV0xPRF4nWpvN6/bll8vv9hBgMGEIMAKBPkXyRAB4Erk4CAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEg8ABIAMGA8Hk9E/dLS0nj6Ne6JEAMA6Hcd/hY5YmK0YMGCiPonJCbqI4+HIIM+EWIAAP0u2NEuq6dH81/crvSs7LD6tjad1+vPLZPf7yfEoE+EGAw4r9crv98fdr9IT0MDGDrSs7L1cM7EwR4GohQhBgPK6/Xq0ZwcBTs7B3soAIAoQ4jBgPL7/Qp2dkZ0Srnx3TdVt61igEYGADAdIQYPRCSnlFubzg/QaAAA0YD7xAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBJXJwEAhiSeu4R7IcTgS+GuuwAeFJ67hC+LEIN74q67AB4knruEL4sQg3virrsABgPPXcK9EGLwpXHXXQDAUEKIGUZY1wIAiCZDPsRs27ZNf/u3f6vm5mZ94xvf0NatW/XEE08M9rCMw7oWAMPJ/Xz54uomcwzpEPPaa6+ptLRU27Zt05/+6Z9qx44dmjNnjj788EP+BQsT61oADAf3e2WTxNVNJhnSIWbz5s1atGiRfvjDH0qStm7dqt/85jfavn27KiqG54fq/f5JiHUtAKLZ/VzZJP3f1U2//e1vlZOTE3Z/zuI8WEM2xHR3d6u+vl4/+tGPQtoLCgp09OjRXvVdXV3q6uqyXwcCAUlSe3v7gIzP5/PJ5/NF1DcmJkY9PT1h92tpaVHJX/6luj79NKKfK0mfeD5Qd+f1sPpc+fg8felLX/oa1ffGp8Gw+0rS1ZbLksMR8Zmc+FGj9MovfqGMjIyw+0b62TCYfV0ul1wuV0R97+bW57ZlWfcutoaoTz75xJJkvfvuuyHtL730kvVHf/RHveqff/55SxIbGxsbGxtbFGwXL168Z1YYsmdibnE4HCGvLcvq1SZJGzZs0Jo1a+zXPT09+t///V+NGTPmjvX3o729XZmZmbp48aJSUlL69b3RP5ijoY85MgPzNPRF2xxZlqWOjg653e571g7ZEJOWlqYRI0b0+pNNa2vrHU/TxcfHKz4+PqTtq1/96kAOUSkpKVHxL0w0Y46GPubIDMzT0BdNc+R0Or9U3ZB9inVcXJwmTZqkurq6kPa6ujpNmTJlkEYFAACGiiF7JkaS1qxZo5KSEj3++OPKz8/Xz3/+c3m9Xj377LODPTQAADDIhnSI+Yu/+Av97ne/009+8hM1NzcrNzdXv/71r/XII48M6rji4+P1/PPP9/rzFYYO5mjoY47MwDwNfcN5jhyW9WWuYQIAABhahuyaGAAAgL4QYgAAgJEIMQAAwEiEGAAAYCRCzF18/PHHWrRokbKyspSQkKA//MM/1PPPP6/u7u6QOq/Xq7lz5yopKUlpaWlavXp1r5ozZ85o6tSpSkhI0MMPP6yf/OQnX+6ZELinl156SVOmTFFiYuJdb27IHA1N27ZtU1ZWlkaNGqVJkybpt7/97WAPadh45513NHfuXLndbjkcDv3yl78M2W9ZlsrKyuR2u5WQkKBp06bp3LlzITVdXV1atWqV0tLSlJSUpKKiIl26dOkBHkV0q6io0De/+U0lJycrPT1d3/ve99TY2BhSwzwRYu7qo48+Uk9Pj3bs2KFz585py5Yt+tnPfqYf//jHds3Nmzf11FNP6fr16zpy5Ihqamr0z//8z1q7dq1d097erpkzZ8rtduvUqVOqrKzUT3/6U23evHkwDivqdHd36/vf/76WLVt2x/3M0dD02muvqbS0VBs3btTp06f1xBNPaM6cOfJ6vYM9tGHh+vXrmjhxoqqqqu64f9OmTdq8ebOqqqp06tQpuVwuzZw5Ux0dHXZNaWmpDhw4oJqaGh05ckTXrl1TYWGhbt68+aAOI6odPnxYK1as0PHjx1VXV6fPPvtMBQUFun79/x5qyTxJQ/YBkEPRpk2brKysLPv1r3/9aysmJsb65JNP7LZXX33Vio+PtwKBgGVZlrVt2zbL6XRan376qV1TUVFhud1uq6en58ENPsrt3r3bcjqdvdqZo6HpW9/6lvXss8+GtD366KPWj370o0Ea0fAlyTpw4ID9uqenx3K5XNbLL79st3366aeW0+m0fvazn1mWZVlXr161Ro4cadXU1Ng1n3zyiRUTE2PV1tY+sLEPJ62trZYk6/Dhw5ZlMU+3cCYmDIFAQKmpqfbrY8eOKTc3N+QhVbNmzVJXV5fq6+vtmqlTp4bchGjWrFm6fPmyPv744wc29uGKORp6uru7VV9fr4KCgpD2goICHT16dJBGhVuamprk8/lC5ic+Pl5Tp06156e+vl43btwIqXG73crNzWUOB0ggEJAk+zOIefocIeZL+u///m9VVlaGPPLA5/P1ehjl6NGjFRcXZz+48k41t17f/nBL9D/maOjx+/26efPmHX/n/L4H36056Gt+fD6f4uLiNHr06LvWoP9YlqU1a9bo29/+tnJzcyUxT7cMuxBTVlYmh8PR5/bee++F9Ll8+bJmz56t73//+/rhD38Yss/hcPT6GZZlhbTfXmP9/wWjd+qLyOaoL8zR0HSn3zm/76EjkvlhDgfGypUr9cEHH+jVV1/ttW+4z9OQfnbSQFi5cqWefvrpPmt+//d/3/7ny5cv67vf/a79AMovcrlcOnHiREhbW1ubbty4Yadjl8vVK/G2trZK6p2g8blw56gvzNHQk5aWphEjRtzxd87ve/C5XC5Jn3+Lf+ihh+z2L86Py+VSd3e32traQr7lt7a2asqUKQ92wFFu1apV+tWvfqV33nlHY8eOtduZp/9vsBbjmODSpUtWdna29fTTT1ufffZZr/23Fo1evnzZbqupqem1aPSrX/2q1dXVZde8/PLLLBrtZ/da2MscDS3f+ta3rGXLloW05eTksLB3EOguC3v/5m/+xm7r6uq644LR1157za65fPlyVC0YHWw9PT3WihUrLLfbbf3nf/7nHfczT5ZFiLmLTz75xPra175mPfnkk9alS5es5uZme7vls88+s3Jzc63p06db77//vnXo0CFr7Nix1sqVK+2aq1evWhkZGdYzzzxjnTlzxnrjjTeslJQU66c//elgHFbUuXDhgnX69GnrhRdesL7yla9Yp0+ftk6fPm11dHRYlsUcDVU1NTXWyJEjrV27dlkffvihVVpaaiUlJVkff/zxYA9tWOjo6LD/W5Fkbd682Tp9+rR14cIFy7I+D/FOp9N64403rDNnzljPPPOM9dBDD1nt7e32ezz77LPW2LFjrUOHDlnvv/++9eSTT1oTJ0684xc+hG/ZsmWW0+m03n777ZDPn87OTruGeSLE3NXu3bstSXfcvujChQvWU089ZSUkJFipqanWypUrQy7VtSzL+uCDD6wnnnjCio+Pt1wul1VWVsY3/H6ycOHCO87RW2+9ZdcwR0PTP/7jP1qPPPKIFRcXZ/3Jn/yJfekoBt5bb711x/9uFi5caFnW59/yn3/+ecvlclnx8fHWd77zHevMmTMh7xEMBq2VK1daqampVkJCglVYWGh5vd5BOJrodLfPn927d9s1zJNlOSyL25ICAADzDLurkwAAQHQgxAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASP8P2kPQxab/e08AAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "_ = plt.hist(error, bins=30, color='skyblue', edgecolor='black')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7d8a5ba5-57a8-415b-a1fb-8d97d2b2a787",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAGdCAYAAAAVEKdkAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANiFJREFUeJzt3X10lPWd//9XCJMhoWEksMkwGm7aZRENVRckBNsFCySwxNTDaSnGTfGUIhYBU2CtLLUGXAJiBc4J6x3LAdZAcbtI6yoNCavisuHOYLbcLWpFECEENSQgOBmTz+8Pf7m+DpMEApOEzOf5OIcD87ne13V93rlmhleumWsmyhhjBAAAYJlO7T0BAACA9kAIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYqXN7T6A91dfX6+TJk4qPj1dUVFR7TwcAAFwBY4zOnTsnn8+nTp2u/nyO1SHo5MmTSk5Obu9pAACAq/Dxxx/rpptuuur1rQ5B8fHxkr7+IXbr1i1keSAQUHFxsdLT0+Vyudp6em3Kpl4lu/ql18hEr5GJXq9MTU2NkpOTnf/Hr5bVIajhJbBu3bo1GYLi4uLUrVs3K+6MtvQq2dUvvUYmeo1M9Noy1/pWFt4YDQAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGClzu09AQAdQ9/HXm/vKVyWO9po6VApJW+r/HVR+mjJ+PaeEoDrGGeCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASp1busLbb7+tp59+WmVlZTp16pQ2b96se++9V5IUCAT061//Wlu2bNGHH34oj8ej0aNHa8mSJfL5fM42/H6/5s6dq9/97ne6ePGiRo0apWeffVY33XSTU1NVVaVZs2bp1VdflSRlZWWpoKBAN9xwg1Nz/PhxPfzww3rjjTcUGxur7Oxs/fa3v1VMTMxV/jgARJK+j73e3lNosY+WjG/vKQDWaPGZoC+++EK33XabVq5cGbLswoUL2rdvnx5//HHt27dPr7zyit577z1lZWUF1eXm5mrz5s3auHGjduzYofPnzyszM1N1dXVOTXZ2tsrLy1VUVKSioiKVl5crJyfHWV5XV6fx48friy++0I4dO7Rx40Zt2rRJc+bMaWlLAADAQi0+EzRu3DiNGzeu0WUej0clJSVBYwUFBRo6dKiOHz+u3r17q7q6WqtXr9ZLL72k0aNHS5IKCwuVnJysbdu2KSMjQ4cPH1ZRUZF27dql1NRUSdKqVauUlpamI0eOaMCAASouLtahQ4f08ccfO2eZnnnmGT3wwANatGiRunXr1tLWAACARVr9PUHV1dWKiopyXsYqKytTIBBQenq6U+Pz+ZSSkqLS0lJJ0s6dO+XxeJwAJEnDhg2Tx+MJqklJSQl6mS0jI0N+v19lZWWt3RYAAOjgWnwmqCW+/PJLPfbYY8rOznbOzFRUVCgmJkbdu3cPqk1KSlJFRYVTk5iYGLK9xMTEoJqkpKSg5d27d1dMTIxTcym/3y+/3+/crqmpkfT1e5kCgUBIfcNYY8sijU29Snb1G65e3dEmHNNpVe5OJujvjuhKjxP34chEry1b91q1WggKBAKaNGmS6uvr9eyzz1623hijqKgo5/Y3/30tNd+0ePFiLViwIGS8uLhYcXFxTc7t0pf4IplNvUp29XutvS4dGqaJtIEnh9S39xSu2pYtW1pUz304MtFr8y5cuBCWfbdKCAoEApo4caKOHj2qN954I+j9OV6vV7W1taqqqgo6G1RZWanhw4c7NadPnw7Z7pkzZ5yzP16vV7t37w5aXlVVpUAgEHKGqMG8efM0e/Zs53ZNTY2Sk5OVnp7e6HuIAoGASkpKNGbMGLlcrhb8BDoem3qV7Oo3XL2m5G0N46xah7uT0ZND6vX4O53kr2/8l6Hr3YG8jCuq4z4cmej1yjS8knOtwh6CGgLQ+++/rzfffFM9evQIWj548GC5XC6VlJRo4sSJkqRTp07pwIEDWrp0qSQpLS1N1dXV2rNnj4YO/frXz927d6u6utoJSmlpaVq0aJFOnTqlXr16Sfr6jI7b7dbgwYMbnZvb7Zbb7Q4Zd7lczR6Ayy2PJDb1KtnV77X26q/rOKHCXx/Voeb7TS09RtyHIxO9Xn6dcGhxCDp//rw++OAD5/bRo0dVXl6uhIQE+Xw+/ehHP9K+ffv02muvqa6uznl/TkJCgmJiYuTxeDRlyhTNmTNHPXr0UEJCgubOnatBgwY5V4sNHDhQY8eO1dSpU/XCCy9Ikh588EFlZmZqwIABkqT09HTdcsstysnJ0dNPP63PP/9cc+fO1dSpU7kyDAAAXFaLQ9A777yju+++27nd8PLS5MmTlZeX53y44e233x603ptvvqmRI0dKkpYvX67OnTtr4sSJzoclrl27VtHR0U79+vXrNWvWLOcqsqysrKDPJoqOjtbrr7+u6dOn66677gr6sEQAAIDLaXEIGjlypIxp+sqL5pY16NKliwoKClRQUNBkTUJCggoLC5vdTu/evfXaa69ddn8AAACX4rvDAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYKUWh6C3335b99xzj3w+n6KiovSHP/whaLkxRnl5efL5fIqNjdXIkSN18ODBoBq/36+ZM2eqZ8+e6tq1q7KysnTixImgmqqqKuXk5Mjj8cjj8SgnJ0dnz54Nqjl+/Ljuuecede3aVT179tSsWbNUW1vb0pYAAICFWhyCvvjiC912221auXJlo8uXLl2qZcuWaeXKldq7d6+8Xq/GjBmjc+fOOTW5ubnavHmzNm7cqB07duj8+fPKzMxUXV2dU5Odna3y8nIVFRWpqKhI5eXlysnJcZbX1dVp/Pjx+uKLL7Rjxw5t3LhRmzZt0pw5c1raEgAAsFDnlq4wbtw4jRs3rtFlxhitWLFC8+fP14QJEyRJ69atU1JSkjZs2KBp06apurpaq1ev1ksvvaTRo0dLkgoLC5WcnKxt27YpIyNDhw8fVlFRkXbt2qXU1FRJ0qpVq5SWlqYjR45owIABKi4u1qFDh/Txxx/L5/NJkp555hk98MADWrRokbp163ZVPxAAAGCHFoeg5hw9elQVFRVKT093xtxut0aMGKHS0lJNmzZNZWVlCgQCQTU+n08pKSkqLS1VRkaGdu7cKY/H4wQgSRo2bJg8Ho9KS0s1YMAA7dy5UykpKU4AkqSMjAz5/X6VlZXp7rvvDpmf3++X3+93btfU1EiSAoGAAoFASH3DWGPLIo1NvUp29RuuXt3RJhzTaVXuTibo747oSo8T9+HIRK8tW/dahTUEVVRUSJKSkpKCxpOSknTs2DGnJiYmRt27dw+paVi/oqJCiYmJIdtPTEwMqrl0P927d1dMTIxTc6nFixdrwYIFIePFxcWKi4trsq+SkpIml0Uam3qV7Or3WntdOjRME2kDTw6pb+8pXLUtW7a0qJ77cGSi1+ZduHAhLPsOawhqEBUVFXTbGBMydqlLaxqrv5qab5o3b55mz57t3K6pqVFycrLS09MbffksEAiopKREY8aMkcvlanb+HZ1NvUp29RuuXlPytoZxVq3D3cnoySH1evydTvLXN/+cc706kJdxRXXchyMTvV6ZhldyrlVYQ5DX65X09VmaXr16OeOVlZXOWRuv16va2lpVVVUFnQ2qrKzU8OHDnZrTp0+HbP/MmTNB29m9e3fQ8qqqKgUCgZAzRA3cbrfcbnfIuMvlavYAXG55JLGpV8mufq+1V39dxwkV/vqoDjXfb2rpMeI+HJno9fLrhENYPyeoX79+8nq9Qae2amtrtX37difgDB48WC6XK6jm1KlTOnDggFOTlpam6upq7dmzx6nZvXu3qqurg2oOHDigU6dOOTXFxcVyu90aPHhwONsCAAARqMVngs6fP68PPvjAuX306FGVl5crISFBvXv3Vm5urvLz89W/f3/1799f+fn5iouLU3Z2tiTJ4/FoypQpmjNnjnr06KGEhATNnTtXgwYNcq4WGzhwoMaOHaupU6fqhRdekCQ9+OCDyszM1IABAyRJ6enpuuWWW5STk6Onn35an3/+uebOnaupU6dyZRgAALisFoegd955J+jKq4b32EyePFlr167Vo48+qosXL2r69OmqqqpSamqqiouLFR8f76yzfPlyde7cWRMnTtTFixc1atQorV27VtHR0U7N+vXrNWvWLOcqsqysrKDPJoqOjtbrr7+u6dOn66677lJsbKyys7P129/+tuU/BQAAYJ0Wh6CRI0fKmKYvP42KilJeXp7y8vKarOnSpYsKCgpUUFDQZE1CQoIKCwubnUvv3r312muvXXbOAAAAl+K7wwAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKYQ9BX331lX7961+rX79+io2N1be//W0tXLhQ9fX1To0xRnl5efL5fIqNjdXIkSN18ODBoO34/X7NnDlTPXv2VNeuXZWVlaUTJ04E1VRVVSknJ0cej0cej0c5OTk6e/ZsuFsCAAARKOwh6KmnntLzzz+vlStX6vDhw1q6dKmefvppFRQUODVLly7VsmXLtHLlSu3du1der1djxozRuXPnnJrc3Fxt3rxZGzdu1I4dO3T+/HllZmaqrq7OqcnOzlZ5ebmKiopUVFSk8vJy5eTkhLslAAAQgTqHe4M7d+7UD3/4Q40fP16S1LdvX/3ud7/TO++8I+nrs0ArVqzQ/PnzNWHCBEnSunXrlJSUpA0bNmjatGmqrq7W6tWr9dJLL2n06NGSpMLCQiUnJ2vbtm3KyMjQ4cOHVVRUpF27dik1NVWStGrVKqWlpenIkSMaMGBAuFsDAAARJOxngr73ve/pv/7rv/Tee+9Jkv73f/9XO3bs0N///d9Lko4ePaqKigqlp6c767jdbo0YMUKlpaWSpLKyMgUCgaAan8+nlJQUp2bnzp3yeDxOAJKkYcOGyePxODUAAABNCfuZoF/96leqrq7WzTffrOjoaNXV1WnRokW67777JEkVFRWSpKSkpKD1kpKSdOzYMacmJiZG3bt3D6lpWL+iokKJiYkh+09MTHRqLuX3++X3+53bNTU1kqRAIKBAIBBS3zDW2LJIY1Ovkl39hqtXd7QJx3RalbuTCfq7I7rS48R9ODLRa8vWvVZhD0Evv/yyCgsLtWHDBt16660qLy9Xbm6ufD6fJk+e7NRFRUUFrWeMCRm71KU1jdU3t53FixdrwYIFIePFxcWKi4trcr8lJSXNziuS2NSrZFe/19rr0qFhmkgbeHJI/eWLrlNbtmxpUT334chEr827cOFCWPYd9hD0j//4j3rsscc0adIkSdKgQYN07NgxLV68WJMnT5bX65X09ZmcXr16OetVVlY6Z4e8Xq9qa2tVVVUVdDaosrJSw4cPd2pOnz4dsv8zZ86EnGVqMG/ePM2ePdu5XVNTo+TkZKWnp6tbt24h9YFAQCUlJRozZoxcLldLfxQdik29Snb1G65eU/K2hnFWrcPdyejJIfV6/J1O8tc3/0vV9epAXsYV1XEfjkz0emUaXsm5VmEPQRcuXFCnTsFvNYqOjnYuke/Xr5+8Xq9KSkp0xx13SJJqa2u1fft2PfXUU5KkwYMHy+VyqaSkRBMnTpQknTp1SgcOHNDSpUslSWlpaaqurtaePXs0dOjXv6Lu3r1b1dXVTlC6lNvtltvtDhl3uVzNHoDLLY8kNvUq2dXvtfbqr+s4ocJfH9Wh5vtNLT1G3IcjE71efp1wCHsIuueee7Ro0SL17t1bt956q959910tW7ZMP/vZzyR9/RJWbm6u8vPz1b9/f/Xv31/5+fmKi4tTdna2JMnj8WjKlCmaM2eOevTooYSEBM2dO1eDBg1yrhYbOHCgxo4dq6lTp+qFF16QJD344IPKzMzkyjAAAHBZYQ9BBQUFevzxxzV9+nRVVlbK5/Np2rRp+s1vfuPUPProo7p48aKmT5+uqqoqpaamqri4WPHx8U7N8uXL1blzZ02cOFEXL17UqFGjtHbtWkVHRzs169ev16xZs5yryLKysrRy5cpwtwQAACJQ2ENQfHy8VqxYoRUrVjRZExUVpby8POXl5TVZ06VLFxUUFAR9yOKlEhISVFhYeA2zBQAAtuK7wwAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlcL+idEALq/vY6+32b7c0UZLh379LfAd9UtFAaA1cCYIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFbiu8MA4Dpypd8rdz19J9xHS8a36/6Bq8WZIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFZqlRD0ySef6B/+4R/Uo0cPxcXF6fbbb1dZWZmz3BijvLw8+Xw+xcbGauTIkTp48GDQNvx+v2bOnKmePXuqa9euysrK0okTJ4JqqqqqlJOTI4/HI4/Ho5ycHJ09e7Y1WgIAABEm7CGoqqpKd911l1wul/70pz/p0KFDeuaZZ3TDDTc4NUuXLtWyZcu0cuVK7d27V16vV2PGjNG5c+ecmtzcXG3evFkbN27Ujh07dP78eWVmZqqurs6pyc7OVnl5uYqKilRUVKTy8nLl5OSEuyUAABCBOod7g0899ZSSk5O1Zs0aZ6xv377Ov40xWrFihebPn68JEyZIktatW6ekpCRt2LBB06ZNU3V1tVavXq2XXnpJo0ePliQVFhYqOTlZ27ZtU0ZGhg4fPqyioiLt2rVLqampkqRVq1YpLS1NR44c0YABA8LdGgAAiCBhD0GvvvqqMjIy9OMf/1jbt2/XjTfeqOnTp2vq1KmSpKNHj6qiokLp6enOOm63WyNGjFBpaammTZumsrIyBQKBoBqfz6eUlBSVlpYqIyNDO3fulMfjcQKQJA0bNkwej0elpaWNhiC/3y+/3+/crqmpkSQFAgEFAoGQ+oaxxpZFGpt6ldq/X3e0abt9dTJBf0cyem0frf04au/Ha1ui15ate63CHoI+/PBDPffcc5o9e7b+6Z/+SXv27NGsWbPkdrv105/+VBUVFZKkpKSkoPWSkpJ07NgxSVJFRYViYmLUvXv3kJqG9SsqKpSYmBiy/8TERKfmUosXL9aCBQtCxouLixUXF9dkTyUlJc10HFls6lVqv36XDm37fT45pL7td9pO6LVtbdmypU32Y9PzE70278KFC2HZd9hDUH19vYYMGaL8/HxJ0h133KGDBw/queee009/+lOnLioqKmg9Y0zI2KUurWmsvrntzJs3T7Nnz3Zu19TUKDk5Wenp6erWrVtIfSAQUElJicaMGSOXy9Xs3Do6m3qV2r/flLytbbYvdyejJ4fU6/F3Oslf3/xjrKOj1/ZxIC+jVbff3o/XtkSvV6bhlZxrFfYQ1KtXL91yyy1BYwMHDtSmTZskSV6vV9LXZ3J69erl1FRWVjpnh7xer2pra1VVVRV0NqiyslLDhw93ak6fPh2y/zNnzoScZWrgdrvldrtDxl0uV7MH4HLLI4lNvUrt16+/ru3/0/LXR7XLftsDvbattnoM2fT8RK+XXyccwn512F133aUjR44Ejb333nvq06ePJKlfv37yer1Bp79qa2u1fft2J+AMHjxYLpcrqObUqVM6cOCAU5OWlqbq6mrt2bPHqdm9e7eqq6udGgAAgKaE/UzQL3/5Sw0fPlz5+fmaOHGi9uzZoxdffFEvvviipK9fwsrNzVV+fr769++v/v37Kz8/X3FxccrOzpYkeTweTZkyRXPmzFGPHj2UkJCguXPnatCgQc7VYgMHDtTYsWM1depUvfDCC5KkBx98UJmZmVwZBgAALivsIejOO+/U5s2bNW/ePC1cuFD9+vXTihUrdP/99zs1jz76qC5evKjp06erqqpKqampKi4uVnx8vFOzfPlyde7cWRMnTtTFixc1atQorV27VtHR0U7N+vXrNWvWLOcqsqysLK1cuTLcLQEAgAgU9hAkSZmZmcrMzGxyeVRUlPLy8pSXl9dkTZcuXVRQUKCCgoImaxISElRYWHgtUwUAAJbiu8MAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASq0eghYvXqyoqCjl5uY6Y8YY5eXlyefzKTY2ViNHjtTBgweD1vP7/Zo5c6Z69uyprl27KisrSydOnAiqqaqqUk5Ojjwejzwej3JycnT27NnWbgkAAESAVg1Be/fu1Ysvvqjvfve7QeNLly7VsmXLtHLlSu3du1der1djxozRuXPnnJrc3Fxt3rxZGzdu1I4dO3T+/HllZmaqrq7OqcnOzlZ5ebmKiopUVFSk8vJy5eTktGZLAAAgQrRaCDp//rzuv/9+rVq1St27d3fGjTFasWKF5s+frwkTJiglJUXr1q3ThQsXtGHDBklSdXW1Vq9erWeeeUajR4/WHXfcocLCQu3fv1/btm2TJB0+fFhFRUX613/9V6WlpSktLU2rVq3Sa6+9piNHjrRWWwAAIEJ0bq0NP/zwwxo/frxGjx6tf/7nf3bGjx49qoqKCqWnpztjbrdbI0aMUGlpqaZNm6aysjIFAoGgGp/Pp5SUFJWWliojI0M7d+6Ux+NRamqqUzNs2DB5PB6VlpZqwIABIXPy+/3y+/3O7ZqaGklSIBBQIBAIqW8Ya2xZpLGpV6n9+3VHm7bbVycT9Hcko9f20dqPo/Z+vLYlem3ZuteqVULQxo0btW/fPu3duzdkWUVFhSQpKSkpaDwpKUnHjh1zamJiYoLOIDXUNKxfUVGhxMTEkO0nJiY6NZdavHixFixYEDJeXFysuLi4JvspKSlpclmksalXqf36XTq07ff55JD6tt9pO6HXtrVly5Y22Y9Nz0/02rwLFy6EZd9hD0Eff/yxHnnkERUXF6tLly5N1kVFRQXdNsaEjF3q0prG6pvbzrx58zR79mzndk1NjZKTk5Wenq5u3bqF1AcCAZWUlGjMmDFyuVzNzq2js6lXqf37Tcnb2mb7cncyenJIvR5/p5P89c0/xjo6em0fB/IyWnX77f14bUv0emUaXsm5VmEPQWVlZaqsrNTgwYOdsbq6Or399ttauXKl836diooK9erVy6mprKx0zg55vV7V1taqqqoq6GxQZWWlhg8f7tScPn06ZP9nzpwJOcvUwO12y+12h4y7XK5mD8DllkcSm3qV2q9ff13b/6flr49ql/22B3ptW231GLLp+YleL79OOIT9jdGjRo3S/v37VV5e7vwZMmSI7r//fpWXl+vb3/62vF5v0Omv2tpabd++3Qk4gwcPlsvlCqo5deqUDhw44NSkpaWpurpae/bscWp2796t6upqpwYAAKApYT8TFB8fr5SUlKCxrl27qkePHs54bm6u8vPz1b9/f/Xv31/5+fmKi4tTdna2JMnj8WjKlCmaM2eOevTooYSEBM2dO1eDBg3S6NGjJUkDBw7U2LFjNXXqVL3wwguSpAcffFCZmZmNvikakavvY6+3eB13tNHSoV+/LNXev0UDANpHq10d1pxHH31UFy9e1PTp01VVVaXU1FQVFxcrPj7eqVm+fLk6d+6siRMn6uLFixo1apTWrl2r6Ohop2b9+vWaNWuWcxVZVlaWVq5c2eb9AACAjqdNQtBbb70VdDsqKkp5eXnKy8trcp0uXbqooKBABQUFTdYkJCSosLAwTLMEAAA24bvDAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArtcsnRgMAIsfVfHVNS7TG19x8tGR8WLaDjo0zQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgpbCHoMWLF+vOO+9UfHy8EhMTde+99+rIkSNBNcYY5eXlyefzKTY2ViNHjtTBgweDavx+v2bOnKmePXuqa9euysrK0okTJ4JqqqqqlJOTI4/HI4/Ho5ycHJ09ezbcLQEAgAgU9hC0fft2Pfzww9q1a5dKSkr01VdfKT09XV988YVTs3TpUi1btkwrV67U3r175fV6NWbMGJ07d86pyc3N1ebNm7Vx40bt2LFD58+fV2Zmpurq6pya7OxslZeXq6ioSEVFRSovL1dOTk64WwIAABGoc7g3WFRUFHR7zZo1SkxMVFlZmf7u7/5OxhitWLFC8+fP14QJEyRJ69atU1JSkjZs2KBp06apurpaq1ev1ksvvaTRo0dLkgoLC5WcnKxt27YpIyNDhw8fVlFRkXbt2qXU1FRJ0qpVq5SWlqYjR45owIAB4W4NAABEkLCHoEtVV1dLkhISEiRJR48eVUVFhdLT050at9utESNGqLS0VNOmTVNZWZkCgUBQjc/nU0pKikpLS5WRkaGdO3fK4/E4AUiShg0bJo/Ho9LS0kZDkN/vl9/vd27X1NRIkgKBgAKBQEh9w1hjyyJNR+7VHW1avk4nE/R3JKPXyESv1+Z6fa7ryM/FLXUtvYbr59OqIcgYo9mzZ+t73/ueUlJSJEkVFRWSpKSkpKDapKQkHTt2zKmJiYlR9+7dQ2oa1q+oqFBiYmLIPhMTE52aSy1evFgLFiwIGS8uLlZcXFyTfZSUlDS5LNJ0xF6XDr36dZ8cUh++iVzn6DUy0evV2bJlS9i21Ro64nPx1bqaXi9cuBCWfbdqCJoxY4b+/Oc/a8eOHSHLoqKigm4bY0LGLnVpTWP1zW1n3rx5mj17tnO7pqZGycnJSk9PV7du3ULqA4GASkpKNGbMGLlcrmbn1tF15F5T8ra2eB13J6Mnh9Tr8Xc6yV/f/P2uo6PXyESv1+ZAXkZYthNuHfm5uKWupdeGV3KuVauFoJkzZ+rVV1/V22+/rZtuuskZ93q9kr4+k9OrVy9nvLKy0jk75PV6VVtbq6qqqqCzQZWVlRo+fLhTc/r06ZD9njlzJuQsUwO32y232x0y7nK5mj0Al1seSTpir/66q39S9NdHXdP6HQm9RiZ6vTrX+/NcR3wuvlpX02u4fjZhvzrMGKMZM2bolVde0RtvvKF+/foFLe/Xr5+8Xm/Q6a/a2lpt377dCTiDBw+Wy+UKqjl16pQOHDjg1KSlpam6ulp79uxxanbv3q3q6mqnBgAAoClhPxP08MMPa8OGDfrjH/+o+Ph45/05Ho9HsbGxioqKUm5urvLz89W/f3/1799f+fn5iouLU3Z2tlM7ZcoUzZkzRz169FBCQoLmzp2rQYMGOVeLDRw4UGPHjtXUqVP1wgsvSJIefPBBZWZmcmUYAAC4rLCHoOeee06SNHLkyKDxNWvW6IEHHpAkPfroo7p48aKmT5+uqqoqpaamqri4WPHx8U798uXL1blzZ02cOFEXL17UqFGjtHbtWkVHRzs169ev16xZs5yryLKysrRy5cpwtwQAACJQ2EOQMZe/hDEqKkp5eXnKy8trsqZLly4qKChQQUFBkzUJCQkqLCy8mmkCAADL8d1hAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKrfYt8uiYUvK2WvON1AAAu3EmCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlbg6DABgnb6Pvd7eU2iUO9po6dDGr9T9aMn4dppV5OJMEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsFLn9p5AJOv72OvtPYUr5o42Wjq0vWcBAEDb6fAh6Nlnn9XTTz+tU6dO6dZbb9WKFSv0/e9/v72nBQBAWHWkX6y/6aMl49t7Ck3q0C+Hvfzyy8rNzdX8+fP17rvv6vvf/77GjRun48ePt/fUAADAda5Dh6Bly5ZpypQp+vnPf66BAwdqxYoVSk5O1nPPPdfeUwMAANe5DvtyWG1trcrKyvTYY48Fjaenp6u0tLTRdfx+v/x+v3O7urpakvT5558rEAiE1AcCAV24cEGfffaZXC5Xi+fY+asvWrxOe+lcb3ThQr06Bzqprj6qvafT6mzql14jE71Gpkjs9bPPPmt0/Fr+jz137pwkyRhzTXPrsCHo008/VV1dnZKSkoLGk5KSVFFR0eg6ixcv1oIFC0LG+/Xr1ypz7Giy23sCbcymfuk1MtFrZIq0Xns+03rbPnfunDwez1Wv32FDUIOoqOCkbIwJGWswb948zZ4927ldX1+vzz//XD169Gh0nZqaGiUnJ+vjjz9Wt27dwjvx64xNvUp29UuvkYleIxO9XhljjM6dOyefz3dNc+iwIahnz56Kjo4OOetTWVkZcnaogdvtltvtDhq74YYbLruvbt26RfydsYFNvUp29UuvkYleIxO9Xt61nAFq0GHfGB0TE6PBgwerpKQkaLykpETDhw9vp1kBAICOosOeCZKk2bNnKycnR0OGDFFaWppefPFFHT9+XA899FB7Tw0AAFznOnQI+slPfqLPPvtMCxcu1KlTp5SSkqItW7aoT58+Ydm+2+3WE088EfISWiSyqVfJrn7pNTLRa2Si17YVZa71+jIAAIAOqMO+JwgAAOBaEIIAAICVCEEAAMBKhCAAAGAlK0LQokWLNHz4cMXFxTX54YjHjx/XPffco65du6pnz56aNWuWamtrg2r279+vESNGKDY2VjfeeKMWLlwY8r0l27dv1+DBg9WlSxd9+9vf1vPPPx+yr02bNumWW26R2+3WLbfcos2bN4et12966623FBUV1eifvXv3OnWNLb903uHqvbX17ds3pJdLv1+uLY91a/noo480ZcoU9evXT7GxsfrOd76jJ554IqSPSDq2l/Pss8+qX79+6tKliwYPHqz//u//bu8pNWvx4sW68847FR8fr8TERN177706cuRIUM0DDzwQcvyGDRsWVOP3+zVz5kz17NlTXbt2VVZWlk6cOBFUU1VVpZycHHk8Hnk8HuXk5Ojs2bOt3aIjLy8vpA+v1+ssN8YoLy9PPp9PsbGxGjlypA4ePBi0jY7QZ4PGnoeioqL08MMPS+rYx/Xtt9/WPffcI5/Pp6ioKP3hD38IWt6Wx/JKnssvy1jgN7/5jVm2bJmZPXu28Xg8Icu/+uork5KSYu6++26zb98+U1JSYnw+n5kxY4ZTU11dbZKSksykSZPM/v37zaZNm0x8fLz57W9/69R8+OGHJi4uzjzyyCPm0KFDZtWqVcblcpn/+I//cGpKS0tNdHS0yc/PN4cPHzb5+fmmc+fOZteuXWHv2+/3m1OnTgX9+fnPf2769u1r6uvrnTpJZs2aNUF1Fy5cCHvvbaFPnz5m4cKFQb2cO3fOWd6Wx7o1/elPfzIPPPCA2bp1q/nLX/5i/vjHP5rExEQzZ86coLpIOrbN2bhxo3G5XGbVqlXm0KFD5pFHHjFdu3Y1x44da++pNSkjI8OsWbPGHDhwwJSXl5vx48eb3r17m/Pnzzs1kydPNmPHjg06fp999lnQdh566CFz4403mpKSErNv3z5z9913m9tuu8189dVXTs3YsWNNSkqKKS0tNaWlpSYlJcVkZma2Wa9PPPGEufXWW4P6qKysdJYvWbLExMfHm02bNpn9+/ebn/zkJ6ZXr16mpqamQ/XZoLKyMqjXkpISI8m8+eabxpiOfVy3bNli5s+fbzZt2mQkmc2bNwctb6tjeSXP5VfCihDUYM2aNY2GoC1btphOnTqZTz75xBn73e9+Z9xut6murjbGGPPss88aj8djvvzyS6dm8eLFxufzOYHi0UcfNTfffHPQtqdNm2aGDRvm3J44caIZO3ZsUE1GRoaZNGnSNfd3ObW1tSYxMdEsXLgwaLyxO/I3hav3ttCnTx+zfPnyJpe35bFua0uXLjX9+vULGoukY9ucoUOHmoceeiho7OabbzaPPfZYO82o5SorK40ks337dmds8uTJ5oc//GGT65w9e9a4XC6zceNGZ+yTTz4xnTp1MkVFRcYYYw4dOmQkBf2itXPnTiPJ/N///V/4G2nEE088YW677bZGl9XX1xuv12uWLFnijH355ZfG4/GY559/3hjTcfpsyiOPPGK+853vOI+pSDmulz6/tOWxvJLn8ithxcthl7Nz506lpKQEfRFbRkaG/H6/ysrKnJoRI0YEfahTRkaGTp48qY8++sipSU9PD9p2RkaG3nnnHQUCgWZrSktLW6O1IK+++qo+/fRTPfDAAyHLZsyYoZ49e+rOO+/U888/r/r6emdZuHpvK0899ZR69Oih22+/XYsWLQo6PdqWx7qtVVdXKyEhIWQ8ko5tY2pra1VWVhYyx/T09DZ5XIVLdXW1JIUcw7feekuJiYn6m7/5G02dOlWVlZXOsrKyMgUCgaDefT6fUlJSnN537twpj8ej1NRUp2bYsGHyeDxt+vN5//335fP51K9fP02aNEkffvihJOno0aOqqKgI6sHtdmvEiBHO/DpSn5eqra1VYWGhfvaznwV9UXekHNdvastjeSXP5VeCECSpoqIi5EtXu3fvrpiYGOcLWhurabh9uZqvvvpKn376abM1l34RbGtYvXq1MjIylJycHDT+5JNP6ve//722bdumSZMmac6cOcrPz3eWh6v3tvDII49o48aNevPNNzVjxgytWLFC06dPd5a35bFuS3/5y19UUFAQ8pUxkXRsm/Lpp5+qrq6u3R5X4WCM0ezZs/W9731PKSkpzvi4ceO0fv16vfHGG3rmmWe0d+9e/eAHP5Df75f09bGJiYlR9+7dg7b3zd4rKiqUmJgYss/ExMQ2+/mkpqbq3/7t37R161atWrVKFRUVGj58uD777DNnDs0dv47SZ2P+8Ic/6OzZs0G/fEbKcb1UWx7LK3kuvxId9msz8vLytGDBgmZr9u7dqyFDhlzR9r6Z0BsYY4LGL60x//+bR8NR09j+m3I1vZ84cUJbt27Vv//7v4fU/vrXv3b+ffvtt0uSFi5cGDQert6vRkv6/eUvf+mMffe731X37t31ox/9yDk71NR8WutYt9TVHNuTJ09q7Nix+vGPf6yf//znQbXX+7ENp2t9XLWnGTNm6M9//rN27NgRNP6Tn/zE+XdKSoqGDBmiPn366PXXX9eECROa3N7l7s+N1bSmcePGOf8eNGiQ0tLS9J3vfEfr1q1z3hB8NcfveuuzMatXr9a4ceOCzlhEynFtSlsdy3D032FD0IwZMzRp0qRma/r27XtF2/J6vdq9e3fQWFVVlQKBgJM0vV5vSLpsOH15uZrOnTs7/wE3VXNpom3O1fS+Zs0a9ejRQ1lZWZfd/rBhw1RTU6PTp08rKSkpbL1frWs51g1PsB988IF69OjRpsf6arS015MnT+ruu+92vkD4cq63YxsOPXv2VHR09DU/rtrLzJkz9eqrr+rtt9/WTTfd1Gxtr1691KdPH73//vuSvj42tbW1qqqqCvrNurKyUsOHD3dqTp8+HbKtM2fOtNvPp2vXrho0aJDef/993XvvvZK+/s2+V69eTs03j19H7fPYsWPatm2bXnnllWbrIuW4Nlzx1xbH8kqey6/IFb97KAJc7o3RJ0+edMY2btwY8mbZG264wfj9fqdmyZIlIW8gHThwYNC2H3rooZA3Ro8bNy6oZuzYsa36xuj6+nrTr1+/kCuHmlJQUGC6dOnivFk2XL23h//8z/80kpyrhNryWLe2EydOmP79+5tJkyYFXVXRnEg6tt80dOhQ84tf/CJobODAgdf1G6Pr6+vNww8/bHw+n3nvvfeuaJ1PP/3UuN1us27dOmPM/3uT6csvv+zUnDx5stE3me7evdup2bVrV7u+YfjLL780N954o1mwYIHzZtqnnnrKWe73+xt9M21H6/OJJ54wXq/XBAKBZus66nFVE2+MbotjeSXP5VfUQ8ta7piOHTtm3n33XbNgwQLzrW99y7z77rvm3XffdS6dbrjUbtSoUWbfvn1m27Zt5qabbgq61O7s2bMmKSnJ3HfffWb//v3mlVdeMd26dWv0UuJf/vKX5tChQ2b16tUhlxL/z//8j4mOjjZLliwxhw8fNkuWLGm1S+QbbNu2zUgyhw4dCln26quvmhdffNHs37/ffPDBB2bVqlWmW7duZtasWWHvvbWVlpaaZcuWmXfffdd8+OGH5uWXXzY+n89kZWU5NW15rFvTJ598Yv76r//a/OAHPzAnTpwIutS2QSQd28tpuER+9erV5tChQyY3N9d07drVfPTRR+09tSb94he/MB6Px7z11luNfoTBuXPnzJw5c0xpaak5evSoefPNN01aWpq58cYbQy43vummm8y2bdvMvn37zA9+8INGLzf+7ne/a3bu3Gl27txpBg0a1KaXjs+ZM8e89dZb5sMPPzS7du0ymZmZJj4+3jk+S5YsMR6Px7zyyitm//795r777mv0surrvc9vqqurM7179za/+tWvgsY7+nE9d+6c83+oJOc5t+EXzbY6llfyXH4lrAhBkydPNpJC/jR8ZoMxXwel8ePHm9jYWJOQkGBmzJgRdNmwMcb8+c9/Nt///veN2+02Xq/X5OXlBX3ejjHGvPXWW+aOO+4wMTExpm/fvua5554Lmc/vf/97M2DAAONyuczNN99sNm3a1Cp9N7jvvvvM8OHDG132pz/9ydx+++3mW9/6lomLizMpKSlmxYoVIb+5hKv31lRWVmZSU1ONx+MxXbp0MQMGDDBPPPGE+eKLL4Lq2vJYt5Y1a9Y0ep/+5sndSDq2V+Jf/uVfTJ8+fUxMTIz527/926BLza9HTR2/NWvWGGOMuXDhgklPTzd/9Vd/ZVwul+ndu7eZPHmyOX78eNB2Ll68aGbMmGESEhJMbGysyczMDKn57LPPzP3332/i4+NNfHy8uf/++01VVVUbdWqcz4pxuVzG5/OZCRMmmIMHDzrL6+vrnbMmbrfb/N3f/Z3Zv39/0DY6Qp/ftHXrViPJHDlyJGi8ox/XN998s9H77eTJk40xbXssr+S5/HKijLnko2EBAAAswCXyAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFjp/wMp93/T7Ls4kwAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "y.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3848fd5d-177a-4d03-a12b-f4301b1bd3ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.sort_values('var1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8d8483bd-5b4d-4087-8d13-92cdfc662ed3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_unobserved = data.loc[data['var1']==n_levels-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2ef433a9-a3f7-4f79-94ce-a99265858fdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_observed = data.loc[data['var1']<n_levels]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "38ca1c00-b163-462c-9fa2-b91354b44429",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data_observed[cat_vars], data_observed['y'], test_size=0.3, random_state=35)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "21495b2e-2742-4490-9986-a47f4265d8d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = pd.concat([X_test, data_unobserved[cat_vars]], axis=0)\n",
    "y_test = pd.concat([y_test, data_unobserved['y']], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e3528929-b2f8-462d-bb96-636052fcd93d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = X_train.reset_index(drop=True)\n",
    "X_test = X_test.reset_index(drop=True)\n",
    "y_train = y_train.reset_index(drop=True)\n",
    "y_test = y_test.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "0360b9a9-ee20-4089-87c9-fab640ff5e21",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2, encoder = icfesl.f_get_dummies(X_train, ['var1','var2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "924c7ed4-5b53-440b-b103-77ad85033a86",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_test = icfesl.f_get_dummies(X_test, ['var1','var2'], encoder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5fcaa6b4-8007-4e01-a0e9-4dcb8728a4a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "min_child_weight = np.floor(X2.shape[0]/X2.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9d51c73a-70d3-475e-9376-6c8c5b6cf498",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(17.0)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7c6871b4-c5c6-4b76-856a-cfcb3c4a7873",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of X2 data: 533.5237159729004 MB\n"
     ]
    }
   ],
   "source": [
    "X2_data_size = sys.getsizeof(X2)\n",
    "print(f\"Size of X2 data: {X2_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59924c30-72c8-41e4-ac5c-9f63911b271c",
   "metadata": {},
   "source": [
    "### CatBoost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a64d20c9-b58a-4edc-b9c2-44c6f4721c3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = CatBoostRegressor(\n",
    "    iterations=100,  \n",
    "    loss_function='RMSE', \n",
    "    random_seed=42,  \n",
    "    verbose=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2cd0335b-e6d5-409f-b32d-9086c345f403",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.2551\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train, y_train, cat_features=cat_vars)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e940226e-b8a7-4285-a743-0684b8fd6abd",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c26341be-4a74-483b-9810-c7f36596adb9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 1334.2665664953404\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "599550aa-a1d9-4164-ae75-4785ed139d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ffd15889-01a3-4d87-8d81-1892ef32eb98",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 1451.514196107443\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a14325db-bbee-43d1-976c-176f0018bddb",
   "metadata": {},
   "source": [
    "### TabNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "377e0174-52ab-44b0-8fbc-92597f657406",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_idxs = [X_train.columns.get_loc(col) for col in cat_vars]\n",
    "cat_dims = [X_train[col].nunique() for col in cat_vars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "bf5c1e81-c149-454d-a677-7048b07eecd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = TabNetRegressor(verbose=0, seed=200, cat_idxs=cat_idxs, cat_dims=cat_dims)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ff2493dd-9bbe-42ac-b8e0-c8799070b26b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stop training because you reached max_epochs = 50 with best_epoch = 47 and best_train_mse = 164730.91031\n",
      "22.9998\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.13/site-packages/pytorch_tabnet/callbacks.py:172: UserWarning: Best weights from best epoch are automatically used!\n",
      "  warnings.warn(wrn_msg)\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train=X_train.to_numpy(), y_train=y_train.to_numpy().reshape(-1,1), eval_set=[(X_train.to_numpy(), y_train.to_numpy().reshape(-1,1))], eval_name=['train'], max_epochs=50)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "994cbfd4-36d0-4879-b0be-5ab2eef8cf06",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2441371f-d15c-40d4-ad9d-590b03df2a0a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE:405.87055856840135\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "c35b05c9-a34b-4fda-ae6e-f5bf8c8e9de2",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "72114801-717a-4ba8-83a0-8cd697671770",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE:417.0671545141546\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04f66670-54f6-4c90-96e9-d65a096663b0",
   "metadata": {},
   "source": [
    "### One hot encoding"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9407c138-1151-49a5-b15f-067c12f1fee9",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "5876a562-9ad5-4796-a0b3-e6a250d0ad2d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14.7922\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X2, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "4cad784d-cd85-42a1-b674-2281c411ccd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:1998.0; R2: 0.9997250156293348\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "9d98efc3-7dd7-4684-8bb1-e6b18d630451",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "d522aafa-4a4f-49af-b35c-252568e8752a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 48.57986719609181\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "dd3b6924-3608-4e29-9017-d332b8acc6dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "77159d20-1353-4815-9365-506dd9985242",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 51.48401113055416\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c888aa95-d9f6-4694-80ac-05b2bbe8b61c",
   "metadata": {},
   "source": [
    "#### 2.xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "ed98a66f-01db-4ea3-a8b3-a78e955be1f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "df42399f-ca2b-4525-8198-fe1fa9e3c0d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.8217\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X2, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "ae1430b7-aceb-4742-b934-a1eed4e907a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "40224e28-b679-452a-9534-678ae93c1867",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 2595.4994232900476\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "16ffb892-5e5e-4601-a5a5-653e1b86997a",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "040f7a6c-00dd-45e3-9c25-60e92992708a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 2686.029772192453\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27a2a623-5281-47c0-acbe-0247e59a4334",
   "metadata": {},
   "source": [
    "### Target Encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "586c16d4-4a09-4b8d-8dbc-76a35d822258",
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders import TargetEncoder\n",
    "enc_auto = TargetEncoder(cols=cat_vars).fit(X_train, y_train)\n",
    "X_t = enc_auto.transform(X_train)\n",
    "X_t_test = enc_auto.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e0a8ae9-bd27-4cfa-9c5d-dfd479d121e3",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "d2131938-e931-4f91-b99b-82c8f3d74226",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0113\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X_t, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "a3053246-108a-46e7-98ae-89ee1ac35bbc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:2.0; R2: 0.8532493213860327\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "5ccab110-ab74-4e63-9d09-0174472a644b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "34ff4e5f-8b9a-41d3-94b7-79d64c7bf241",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 1122.2571399141525\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "2ac968e0-4bfd-423d-944b-73c5710f7f69",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "82ae9a2a-4a84-4ae3-8abd-5520a9b0f785",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 1231.7800791801512\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0fb66c13-ae7e-4dbd-b1ab-6564363315e4",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "cc923abd-b206-440d-a64e-b1177cb8642a",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "647e7142-4e25-47d3-a749-b8b17a11da96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.1695\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_t, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "bd191fae-0925-4ace-8690-42b41085b2f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "a75ecae2-9d64-47e0-9206-044b02010fc8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 897.640573193459\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "af3cddbb-aa89-4942-9fce-f791b0270580",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "49e89411-0ebb-4dce-b2c9-1e8d6b27dc2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 1070.8616438775177\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f9efe23-76f4-49e5-9130-053c9680dd64",
   "metadata": {},
   "source": [
    "### ICFESL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "95eed9eb-f300-40a6-b9d4-dd749b7e3769",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-11-29 14:03:27.112\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:35.734\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:35.779\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:36.092\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:36.092\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:51.392\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:51.468\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:51.779\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:03:51.779\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:09.469\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:09.531\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:09.829\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:09.830\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:29.929\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:30.005\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:30.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:30.320\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:51.700\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:51.778\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:52.084\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:04:52.084\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:12.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:13.024\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:13.336\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:13.336\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 5 ------>\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:34.779\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:34.853\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-11-29 14:05:35.156\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 5 ------>\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "fit_info_panel, best_index, fit_figs, cluster_groups, criterions, inertias, gap_statss = icfesl.regularized_search_algorun(\n",
    "    X2, y_train, X2_test, y_test, cat_vars, 'regression', alphas = [0.01, 0.05, 0.1, 0.2, 0.5, 1, 5], cbine_column=False,\n",
    "    distance_threshold=0.002, figure=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "9139ee6e-1f9f-4834-961e-fa45bf6fe245",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot, summary_plot = fit_figs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "79c0959a-b17f-4c3e-a1aa-477505a69d25",
   "metadata": {},
   "outputs": [],
   "source": [
    "fit_info_panel.to_excel(\"simulation_regression_fit_info_case2.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "8bb5be76-c3f2-48d7-88fa-1c5f8e07566c",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot.savefig('decision_plot_simulation_regression_case2.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "ffdb9c09-7e59-49c6-9d4d-b8eda8992216",
   "metadata": {},
   "outputs": [],
   "source": [
    "summary_plot.savefig('summary_plot_simulation_regression_case2.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "0c583112-9148-4c78-8d6c-2b581c772dd9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Experiment</th>\n",
       "      <th>dof</th>\n",
       "      <th>reg_fit_time</th>\n",
       "      <th>reg_training_rmse</th>\n",
       "      <th>reg_testing_rmse</th>\n",
       "      <th>xgb_fit_time</th>\n",
       "      <th>xgb_training_rmse</th>\n",
       "      <th>xgb_testing_rmse</th>\n",
       "      <th>var_inf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0363</td>\n",
       "      <td>342.636872</td>\n",
       "      <td>345.281527</td>\n",
       "      <td>0.2946</td>\n",
       "      <td>383.865414</td>\n",
       "      <td>385.847885</td>\n",
       "      <td>5.506954e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0654</td>\n",
       "      <td>252.774990</td>\n",
       "      <td>254.657525</td>\n",
       "      <td>0.2922</td>\n",
       "      <td>297.729666</td>\n",
       "      <td>297.969864</td>\n",
       "      <td>4.028506e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.0460</td>\n",
       "      <td>234.240201</td>\n",
       "      <td>234.872942</td>\n",
       "      <td>0.2806</td>\n",
       "      <td>285.877574</td>\n",
       "      <td>285.103354</td>\n",
       "      <td>3.708324e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0606</td>\n",
       "      <td>226.706201</td>\n",
       "      <td>226.664331</td>\n",
       "      <td>0.2965</td>\n",
       "      <td>284.248595</td>\n",
       "      <td>284.368151</td>\n",
       "      <td>3.602048e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0581</td>\n",
       "      <td>225.622304</td>\n",
       "      <td>226.277826</td>\n",
       "      <td>0.2875</td>\n",
       "      <td>281.080740</td>\n",
       "      <td>281.834647</td>\n",
       "      <td>3.662330e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0665</td>\n",
       "      <td>222.471947</td>\n",
       "      <td>223.062859</td>\n",
       "      <td>0.2938</td>\n",
       "      <td>283.953477</td>\n",
       "      <td>283.915738</td>\n",
       "      <td>3.710333e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0.0545</td>\n",
       "      <td>273.334582</td>\n",
       "      <td>285.460174</td>\n",
       "      <td>0.2841</td>\n",
       "      <td>324.165257</td>\n",
       "      <td>336.142670</td>\n",
       "      <td>4.931813e-13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Experiment   dof  reg_fit_time  reg_training_rmse  reg_testing_rmse  \\\n",
       "0           0  26.0        0.0363         342.636872        345.281527   \n",
       "1           1  36.0        0.0654         252.774990        254.657525   \n",
       "2           2  38.0        0.0460         234.240201        234.872942   \n",
       "3           3  40.0        0.0606         226.706201        226.664331   \n",
       "4           4  40.0        0.0581         225.622304        226.277826   \n",
       "5           5  41.0        0.0665         222.471947        223.062859   \n",
       "6           6  42.0        0.0545         273.334582        285.460174   \n",
       "\n",
       "   xgb_fit_time  xgb_training_rmse  xgb_testing_rmse       var_inf  \n",
       "0        0.2946         383.865414        385.847885  5.506954e-13  \n",
       "1        0.2922         297.729666        297.969864  4.028506e-13  \n",
       "2        0.2806         285.877574        285.103354  3.708324e-13  \n",
       "3        0.2965         284.248595        284.368151  3.602048e-13  \n",
       "4        0.2875         281.080740        281.834647  3.662330e-13  \n",
       "5        0.2938         283.953477        283.915738  3.710333e-13  \n",
       "6        0.2841         324.165257        336.142670  4.931813e-13  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fit_info_panel"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ba26263-8b9d-4e35-880e-7b077e7e9159",
   "metadata": {},
   "source": [
    "## CBind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "ca2a9cd3-2da2-4fd4-b673-7d62c346e657",
   "metadata": {},
   "outputs": [],
   "source": [
    "cgrouping = icfesl.group_categorical_features(X2, X2.columns.tolist(), distance_threshold=0.002)\n",
    "X4 = icfesl.combine_features(X2, cgrouping)\n",
    "X4_test = icfesl.combine_features(X2_test, cgrouping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "b055c561-7dce-4048-8ad6-0ca5c2c4ca68",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(15052, 870)"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X4_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7db5b54-7a14-401e-be2f-2670f66a5bf6",
   "metadata": {},
   "source": [
    "#### 1.OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "11ace45f-93b7-4611-b2ba-4f1c186f2ecc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.1505\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X4, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "6cd36506-6cb2-4002-bde7-75be46c788df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:870.0; R2: 0.32956516462861163\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "889e61bb-0c16-4dcf-ba9f-4117720d6bd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "b0f172ff-c995-4354-a9bd-c8490faafcba",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 2398.725786933654\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "5a226b37-6efc-42f2-bdf5-473a1844b96e",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "189d19cb-6c55-4e55-9cdf-7c3652999a78",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 2559.5924241017656\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f83b17f-caa2-48e7-a00c-fb108a9c74bb",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "d5752ddb-6128-4743-a7b9-f6617ac74036",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "d9f35ec7-a09b-497a-b2a5-a6ff2b8a9479",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.766\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X4, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "859f3057-694d-4d9b-b4c7-afd9900985fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "642b22c0-8f68-4ebf-8b8d-37e9096a591b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training rmse: 2661.061227894369\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "49110501-218a-44b5-901e-35765607adaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "d581ff33-8ae3-4b70-b709-6570723daa99",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing rmse: 2757.147206095897\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c29881f4-a3d9-4ef4-84a7-ceb656bd7acf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab84783b-284f-4d36-9806-2f619cb9412c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c7f16a8-e38e-4ee5-8daa-f660b9a76a8e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
