{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d77e3e2d-0706-4fde-a71e-1ac3b74e8346",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.spatial.distance import cdist\n",
    "import random\n",
    "from sklearn.cluster import KMeans\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "from icfesl import *\n",
    "from utility_functions import *\n",
    "from xgboost import XGBRegressor\n",
    "from pytorch_tabnet.tab_model import TabNetRegressor\n",
    "import time\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.stats import norm\n",
    "from catboost import CatBoostRegressor, Pool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dd181542-b52a-4555-afdf-ba920573f9b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "n_samples = 50000\n",
    "sigma = 0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b6513970-d79a-4beb-9517-dea61573012e",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_variables = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "59c4ca71-57f4-4828-b297-248dc0e07b66",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_levels = 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "664151bb-fc8a-4545-9345-0ee0f1ba41c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_vars = ['var1','var2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "524eb3b6-1960-4635-b0d9-a90eed58524b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "\n",
    "def generate_correlated_categorical_variables(num_samples, num_variables, sigma, categories_per_variable):\n",
    "\n",
    "    correlation_matrix = np.array([[1.0, sigma],\n",
    "                                   [sigma, 1.0]])\n",
    "\n",
    "    \n",
    "    L = np.linalg.cholesky(correlation_matrix)\n",
    "    uncorrelated_normals = np.random.normal(size=(num_samples, num_variables))\n",
    "    correlated_normals = uncorrelated_normals @ L.T\n",
    "\n",
    "    categorical_data = np.zeros_like(correlated_normals, dtype=int)\n",
    "\n",
    "    for i in range(num_variables):\n",
    "        num_categories = categories_per_variable[i]\n",
    "        quantiles = np.linspace(0, 1, num_categories + 1)[1:-1]\n",
    "        thresholds = norm.ppf(quantiles)\n",
    "\n",
    "        for j in range(num_categories):\n",
    "            if j == 0:\n",
    "                categorical_data[:, i][correlated_normals[:, i] <= thresholds[j]] = j\n",
    "            elif j == num_categories - 1:\n",
    "                categorical_data[:, i][correlated_normals[:, i] > thresholds[j-1]] = j\n",
    "            else:\n",
    "                categorical_data[:, i][(correlated_normals[:, i] > thresholds[j-1]) & (correlated_normals[:, i] <= thresholds[j])] = j\n",
    "\n",
    "    res = pd.DataFrame(categorical_data, columns=[f\"var{i+1}\" for i in range(num_variables)])\n",
    "                       \n",
    "    return res\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e82673dc-d14a-4b93-9c8f-613f27855b1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = generate_correlated_categorical_variables(n_samples, n_variables, sigma, [n_levels, n_levels])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3049f72f-fc0b-47cd-9341-269490a4a249",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of simulated data: 0.7630958557128906 MB\n"
     ]
    }
   ],
   "source": [
    "simulated_data_size = sys.getsizeof(X)\n",
    "print(f\"Size of simulated data: {simulated_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3fb395f5-06e5-4b29-886a-d3d22ea99556",
   "metadata": {},
   "source": [
    "## Simulated study for regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "493bc876-7031-4fd8-a2f6-0d4636372984",
   "metadata": {},
   "outputs": [],
   "source": [
    "beta_var1 = 10\n",
    "beta_var2 = -10\n",
    "intercept = 5\n",
    "\n",
    "error = np.random.normal(loc=0, scale=50, size=n_samples)\n",
    "\n",
    "y = (intercept +\n",
    "     beta_var1 * X['var1'] +\n",
    "     beta_var2 * X['var2'] +\n",
    "     error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "445831a3-03db-4b65-9414-2c93aab2ee2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.concat([X, y], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d6c95d06-0640-4944-9adc-fbee3c7cade1",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={0:\"y\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e2b61f15-2c83-4de2-bb9e-7acfbd8531a4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGdCAYAAADjWSL8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAKPxJREFUeJzt3X9QVXd+//HXRQSBhbsihZsbMaVdatjFOK3ZRexmdaOiTpDu2FmbsFI746rxZxl1nHXNTMhOAqndqi1U17WOukFDMtO43Wm3rJgmZo0/Q2Si5obalng1csG7xQvKDRg53z/y9TRXFHOvIHwuz8fMmcn9nPfn8jl8ktzXPXzOOQ7LsiwBAAAYJmawBwAAABAJQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEixgz2AgdLT06PLly8rOTlZDodjsIcDAAC+BMuy1NHRIbfbrZiYvs+1RG2IuXz5sjIzMwd7GAAAIAIXL17U2LFj+6yJ2hCTnJws6fNfQkpKyiCPBgAAfBnt7e3KzMy0P8f7ErUh5tafkFJSUggxAAAY5sssBWFhLwAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjRe1TrAE8GF6vV36/P6K+aWlpGjduXD+PCMBwQYgBEDGv16tHc3IU7OyMqH9CYqI+8ngIMgAiQogBEDG/369gZ6fmv7hd6VnZYfVtbTqv159bJr/fT4gBEBFCDID7lp6VrYdzJg72MAAMMyzsBQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEpdYAxhUHo8non7c7RcAIQbAoOjwt8gRE6MFCxZE1J+7/QIgxAAYFMGOdlk9PdztF0DECDEABhV3+wUQKRb2AgAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkHgAJwFgejyeifmlpaTz9GogChBgAxunwt8gRE6MFCxZE1D8hMVEfeTwEGcBwhBgAxgl2tMvq6dH8F7crPSs7rL6tTef1+nPL5Pf7CTGA4QgxAIyVnpWth3MmDvYwAAwSFvYCAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASVycBkNfrld/vD7tfpDebA4D+QIgBhjmv16tHc3IU7Owc7KEAQFgIMcAw5/f7FezsjOjGcY3vvqm6bRUDNDIA6BshBoCkyG4c19p0foBGAwD3FtbC3rKyMjkcjpDN5XLZ+y3LUllZmdxutxISEjRt2jSdO3cu5D26urq0atUqpaWlKSkpSUVFRbp06VJITVtbm0pKSuR0OuV0OlVSUqKrV69GfpQAACDqhH110je+8Q01Nzfb25kzZ+x9mzZt0ubNm1VVVaVTp07J5XJp5syZ6ujosGtKS0t14MAB1dTU6MiRI7p27ZoKCwt18+ZNu6a4uFgNDQ2qra1VbW2tGhoaVFJScp+HCgAAoknYf06KjY0NOftyi2VZ2rp1qzZu3Kh58+ZJkvbu3auMjAzt379fS5cuVSAQ0K5du/TKK69oxowZkqTq6mplZmbq0KFDmjVrljwej2pra3X8+HHl5eVJknbu3Kn8/Hw1NjZq/Pjx93O8AAAgSoR9Jub8+fNyu93KysrS008/rf/5n/+RJDU1Ncnn86mgoMCujY+P19SpU3X06FFJUn19vW7cuBFS43a7lZuba9ccO3ZMTqfTDjCSNHnyZDmdTrvmTrq6utTe3h6yAQCA6BVWiMnLy9MvfvEL/eY3v9HOnTvl8/k0ZcoU/e53v5PP55MkZWRkhPTJyMiw9/l8PsXFxWn06NF91qSnp/f62enp6XbNnVRUVNhraJxOpzIzM8M5NAAAYJiwQsycOXP053/+55owYYJmzJihf/u3f5P0+Z+NbnE4HCF9LMvq1Xa722vuVH+v99mwYYMCgYC9Xbx48UsdEwAAMNN9PXYgKSlJEyZM0Pnz5+11MrefLWltbbXPzrhcLnV3d6utra3PmpaWll4/68qVK73O8nxRfHy8UlJSQjYAABC97ivEdHV1yePx6KGHHlJWVpZcLpfq6urs/d3d3Tp8+LCmTJkiSZo0aZJGjhwZUtPc3KyzZ8/aNfn5+QoEAjp58qRdc+LECQUCAbsGAAAgrKuT1q1bp7lz52rcuHFqbW3Viy++qPb2di1cuFAOh0OlpaUqLy9Xdna2srOzVV5ersTERBUXF0uSnE6nFi1apLVr12rMmDFKTU3VunXr7D9PSVJOTo5mz56txYsXa8eOHZKkJUuWqLCwkCuTAACALawQc+nSJT3zzDPy+/36vd/7PU2ePFnHjx/XI488Iklav369gsGgli9frra2NuXl5engwYNKTk6232PLli2KjY3V/PnzFQwGNX36dO3Zs0cjRoywa/bt26fVq1fbVzEVFRWpqqqqP44XAABEibBCTE1NTZ/7HQ6HysrKVFZWdteaUaNGqbKyUpWVlXetSU1NVXV1dThDAwAAw8x9rYkBAAAYLIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkWIHewAAMBg8Hk9E/dLS0jRu3Lh+Hg2ASBBiAAwrHf4WOWJitGDBgoj6JyQm6iOPhyADDAGEGADDSrCjXVZPj+a/uF3pWdlh9W1tOq/Xn1smv99PiAGGAEIMgGEpPStbD+dMHOxhALgPLOwFAABGIsQAAAAjEWIAAICRCDEAAMBILOwFooTX65Xf7w+7X6T3SwGAwUaIAaKA1+vVozk5CnZ2DvZQAOCBIcQAUcDv9yvY2RnRvU8a331TddsqBmhkADBwCDFAFInk3ietTecHaDQAMLBY2AsAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAw0n2FmIqKCjkcDpWWltptlmWprKxMbrdbCQkJmjZtms6dOxfSr6urS6tWrVJaWpqSkpJUVFSkS5cuhdS0tbWppKRETqdTTqdTJSUlunr16v0MFwAARJGIQ8ypU6f085//XI899lhI+6ZNm7R582ZVVVXp1KlTcrlcmjlzpjo6Ouya0tJSHThwQDU1NTpy5IiuXbumwsJC3bx5064pLi5WQ0ODamtrVVtbq4aGBpWUlEQ6XAAAEGUiCjHXrl3TD37wA+3cuVOjR4+22y3L0tatW7Vx40bNmzdPubm52rt3rzo7O7V//35JUiAQ0K5du/R3f/d3mjFjhv74j/9Y1dXVOnPmjA4dOiRJ8ng8qq2t1T/90z8pPz9f+fn52rlzp/71X/9VjY2N/XDYAADAdBGFmBUrVuipp57SjBkzQtqbmprk8/lUUFBgt8XHx2vq1Kk6evSoJKm+vl43btwIqXG73crNzbVrjh07JqfTqby8PLtm8uTJcjqdds3turq61N7eHrIBAIDoFRtuh5qaGr3//vs6depUr30+n0+SlJGREdKekZGhCxcu2DVxcXEhZ3Bu1dzq7/P5lJ6e3uv909PT7ZrbVVRU6IUXXgj3cAAAgKHCOhNz8eJF/fVf/7Wqq6s1atSou9Y5HI6Q15Zl9Wq73e01d6rv6302bNigQCBgbxcvXuzz5wEAALOFFWLq6+vV2tqqSZMmKTY2VrGxsTp8+LD+4R/+QbGxsfYZmNvPlrS2ttr7XC6Xuru71dbW1mdNS0tLr59/5cqVXmd5bomPj1dKSkrIBgAAoldYIWb69Ok6c+aMGhoa7O3xxx/XD37wAzU0NOgP/uAP5HK5VFdXZ/fp7u7W4cOHNWXKFEnSpEmTNHLkyJCa5uZmnT171q7Jz89XIBDQyZMn7ZoTJ04oEAjYNQAAYHgLa01McnKycnNzQ9qSkpI0ZswYu720tFTl5eXKzs5Wdna2ysvLlZiYqOLiYkmS0+nUokWLtHbtWo0ZM0apqalat26dJkyYYC8UzsnJ0ezZs7V48WLt2LFDkrRkyRIVFhZq/Pjx933QAADAfGEv7L2X9evXKxgMavny5Wpra1NeXp4OHjyo5ORku2bLli2KjY3V/PnzFQwGNX36dO3Zs0cjRoywa/bt26fVq1fbVzEVFRWpqqqqv4cLAAAMdd8h5u233w557XA4VFZWprKysrv2GTVqlCorK1VZWXnXmtTUVFVXV9/v8AAAQJTi2UkAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIsYM9AAAwjcfjiahfWlqaxo0b18+jAYYvQgwAfEkd/hY5YmK0YMGCiPonJCbqI4+HIAP0E0IMMIR4vV75/f6w+0V6ZgDhCXa0y+rp0fwXtys9Kzusvq1N5/X6c8vk9/sJMUA/IcQAQ4TX69WjOTkKdnYO9lBwD+lZ2Xo4Z+JgDwMY9ggxwBDh9/sV7OyM6Ft+47tvqm5bxQCNDACGJkIMMMRE8i2/ten8AI0GAIYuLrEGAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASGGFmO3bt+uxxx5TSkqKUlJSlJ+fr3//93+391uWpbKyMrndbiUkJGjatGk6d+5cyHt0dXVp1apVSktLU1JSkoqKinTp0qWQmra2NpWUlMjpdMrpdKqkpERXr16N/CgBAEDUCSvEjB07Vi+//LLee+89vffee3ryySf1Z3/2Z3ZQ2bRpkzZv3qyqqiqdOnVKLpdLM2fOVEdHh/0epaWlOnDggGpqanTkyBFdu3ZNhYWFunnzpl1TXFyshoYG1dbWqra2Vg0NDSopKemnQwYAANEgNpziuXPnhrx+6aWXtH37dh0/flxf//rXtXXrVm3cuFHz5s2TJO3du1cZGRnav3+/li5dqkAgoF27dumVV17RjBkzJEnV1dXKzMzUoUOHNGvWLHk8HtXW1ur48ePKy8uTJO3cuVP5+flqbGzU+PHj++O4AQCA4SJeE3Pz5k3V1NTo+vXrys/PV1NTk3w+nwoKCuya+Ph4TZ06VUePHpUk1dfX68aNGyE1brdbubm5ds2xY8fkdDrtACNJkydPltPptGvupKurS+3t7SEbAACIXmGHmDNnzugrX/mK4uPj9eyzz+rAgQP6+te/Lp/PJ0nKyMgIqc/IyLD3+Xw+xcXFafTo0X3WpKen9/q56enpds2dVFRU2GtonE6nMjMzwz00AABgkLBDzPjx49XQ0KDjx49r2bJlWrhwoT788EN7v8PhCKm3LKtX2+1ur7lT/b3eZ8OGDQoEAvZ28eLFL3tIAADAQGGHmLi4OH3ta1/T448/roqKCk2cOFF///d/L5fLJUm9zpa0trbaZ2dcLpe6u7vV1tbWZ01LS0uvn3vlypVeZ3m+KD4+3r5q6tYGAACi133fJ8ayLHV1dSkrK0sul0t1dXX2vu7ubh0+fFhTpkyRJE2aNEkjR44MqWlubtbZs2ftmvz8fAUCAZ08edKuOXHihAKBgF0DAAAQ1tVJP/7xjzVnzhxlZmaqo6NDNTU1evvtt1VbWyuHw6HS0lKVl5crOztb2dnZKi8vV2JiooqLiyVJTqdTixYt0tq1azVmzBilpqZq3bp1mjBhgn21Uk5OjmbPnq3Fixdrx44dkqQlS5aosLCQK5MAAIAtrBDT0tKikpISNTc3y+l06rHHHlNtba1mzpwpSVq/fr2CwaCWL1+utrY25eXl6eDBg0pOTrbfY8uWLYqNjdX8+fMVDAY1ffp07dmzRyNGjLBr9u3bp9WrV9tXMRUVFamqqqo/jhcAAESJsELMrl27+tzvcDhUVlamsrKyu9aMGjVKlZWVqqysvGtNamqqqqurwxkaAAAYZnh2EgAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEaKHewBAMBw4vF4IuqXlpamcePG9fNoALMRYgDgAejwt8gRE6MFCxZE1D8hMVEfeTwEGeALCDEA8AAEO9pl9fRo/ovblZ6VHVbf1qbzev25ZfL7/YQY4AsIMQDwAKVnZevhnImDPQwgKrCwFwAAGIkQAwAAjMSfk4B+5vV65ff7w+4X6VUrADBcEWKAfuT1evVoTo6CnZ2DPRQAiHqEGKAf+f1+BTs7I7oCpfHdN1W3rWKARgYA0YcQAwyASK5AaW06P0CjAYDoxMJeAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARgorxFRUVOib3/ymkpOTlZ6eru9973tqbGwMqbEsS2VlZXK73UpISNC0adN07ty5kJquri6tWrVKaWlpSkpKUlFRkS5duhRS09bWppKSEjmdTjmdTpWUlOjq1auRHSUAAIg6YYWYw4cPa8WKFTp+/Ljq6ur02WefqaCgQNevX7drNm3apM2bN6uqqkqnTp2Sy+XSzJkz1dHRYdeUlpbqwIEDqqmp0ZEjR3Tt2jUVFhbq5s2bdk1xcbEaGhpUW1ur2tpaNTQ0qKSkpB8OGQAARIPYcIpra2tDXu/evVvp6emqr6/Xd77zHVmWpa1bt2rjxo2aN2+eJGnv3r3KyMjQ/v37tXTpUgUCAe3atUuvvPKKZsyYIUmqrq5WZmamDh06pFmzZsnj8ai2tlbHjx9XXl6eJGnnzp3Kz89XY2Ojxo8f3x/HDgAADHZfa2ICgYAkKTU1VZLU1NQkn8+ngoICuyY+Pl5Tp07V0aNHJUn19fW6ceNGSI3b7VZubq5dc+zYMTmdTjvASNLkyZPldDrtmtt1dXWpvb09ZAMAANEr4hBjWZbWrFmjb3/728rNzZUk+Xw+SVJGRkZIbUZGhr3P5/MpLi5Oo0eP7rMmPT29189MT0+3a25XUVFhr59xOp3KzMyM9NAAAIABIg4xK1eu1AcffKBXX3211z6HwxHy2rKsXm23u73mTvV9vc+GDRsUCATs7eLFi1/mMAAAgKEiCjGrVq3Sr371K7311lsaO3as3e5yuSSp19mS1tZW++yMy+VSd3e32tra+qxpaWnp9XOvXLnS6yzPLfHx8UpJSQnZAABA9AorxFiWpZUrV+qNN97Qf/zHfygrKytkf1ZWllwul+rq6uy27u5uHT58WFOmTJEkTZo0SSNHjgypaW5u1tmzZ+2a/Px8BQIBnTx50q45ceKEAoGAXQMAAIa3sK5OWrFihfbv369/+Zd/UXJysn3Gxel0KiEhQQ6HQ6WlpSovL1d2drays7NVXl6uxMREFRcX27WLFi3S2rVrNWbMGKWmpmrdunWaMGGCfbVSTk6OZs+ercWLF2vHjh2SpCVLlqiwsJArkwAAgKQwQ8z27dslSdOmTQtp3717t/7qr/5KkrR+/XoFg0EtX75cbW1tysvL08GDB5WcnGzXb9myRbGxsZo/f76CwaCmT5+uPXv2aMSIEXbNvn37tHr1avsqpqKiIlVVVUVyjAAAIAqFFWIsy7pnjcPhUFlZmcrKyu5aM2rUKFVWVqqysvKuNampqaqurg5neAAAYBjh2UkAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGCmsB0ACAAaPx+OJqF9aWprGjRvXz6MBBh8hBrgDr9crv98fdr9IP2SAvnT4W+SIidGCBQsi6p+QmKiPPB6CDKIOIQa4jdfr1aM5OQp2dg72UABJUrCjXVZPj+a/uF3pWdlh9W1tOq/Xn1smv99PiEHUIcQAt/H7/Qp2dkb0gdH47puq21YxQCPDcJeela2HcyYO9jCAIYMQA9xFJB8YrU3nB2g0AIDbcXUSAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABgp7BDzzjvvaO7cuXK73XI4HPrlL38Zst+yLJWVlcntdishIUHTpk3TuXPnQmq6urq0atUqpaWlKSkpSUVFRbp06VJITVtbm0pKSuR0OuV0OlVSUqKrV6+GfYAAACA6hR1irl+/rokTJ6qqquqO+zdt2qTNmzerqqpKp06dksvl0syZM9XR0WHXlJaW6sCBA6qpqdGRI0d07do1FRYW6ubNm3ZNcXGxGhoaVFtbq9raWjU0NKikpCSCQwQAANEoNtwOc+bM0Zw5c+64z7Isbd26VRs3btS8efMkSXv37lVGRob279+vpUuXKhAIaNeuXXrllVc0Y8YMSVJ1dbUyMzN16NAhzZo1Sx6PR7W1tTp+/Ljy8vIkSTt37lR+fr4aGxs1fvz4SI8XAIYlj8cTUb+0tDSNGzeun0cD9I+wQ0xfmpqa5PP5VFBQYLfFx8dr6tSpOnr0qJYuXar6+nrduHEjpMbtdis3N1dHjx7VrFmzdOzYMTmdTjvASNLkyZPldDp19OjRO4aYrq4udXV12a/b29v789BgIK/XK7/fH3a/SP9nDwxFHf4WOWJitGDBgoj6JyQm6iOPhyCDIalfQ4zP55MkZWRkhLRnZGTowoULdk1cXJxGjx7dq+ZWf5/Pp/T09F7vn56ebtfcrqKiQi+88MJ9HwOig9fr1aM5OQp2dg72UIBBFexol9XTo/kvbld6VnZYfVubzuv155bJ7/cTYjAk9WuIucXhcIS8tiyrV9vtbq+5U31f77NhwwatWbPGft3e3q7MzMxwho0o4vf7FezsjOh/3I3vvqm6bRUDNDJgcKRnZevhnImDPQygX/VriHG5XJI+P5Py0EMP2e2tra322RmXy6Xu7m61tbWFnI1pbW3VlClT7JqWlpZe73/lypVeZ3luiY+PV3x8fL8dC6JDJP/jbm06P0CjAQD0p369T0xWVpZcLpfq6urstu7ubh0+fNgOKJMmTdLIkSNDapqbm3X27Fm7Jj8/X4FAQCdPnrRrTpw4oUAgYNcAAIDhLewzMdeuXdN//dd/2a+bmprU0NCg1NRUjRs3TqWlpSovL1d2drays7NVXl6uxMREFRcXS5KcTqcWLVqktWvXasyYMUpNTdW6des0YcIE+2qlnJwczZ49W4sXL9aOHTskSUuWLFFhYSFXJgEAAEkRhJj33ntP3/3ud+3Xt9ahLFy4UHv27NH69esVDAa1fPlytbW1KS8vTwcPHlRycrLdZ8uWLYqNjdX8+fMVDAY1ffp07dmzRyNGjLBr9u3bp9WrV9tXMRUVFd313jQAAGD4CTvETJs2TZZl3XW/w+FQWVmZysrK7lozatQoVVZWqrKy8q41qampqq6uDnd4AABgmODZSQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASIQYAABgJEIMAAAwEiEGAAAYiRADAACMRIgBAABGIsQAAAAjEWIAAICRYgd7AEBfvF6v/H5/2P08Hs8AjAYAMJQQYjBkeb1ePZqTo2Bn52APBRjWIv1SkJaWpnHjxvXzaID/Q4jBkOX3+xXs7NT8F7crPSs7rL6N776pum0VAzQyYHjo8LfIEROjBQsWRNQ/ITFRH3k8BBkMGEIMhrz0rGw9nDMxrD6tTecHaDTA8BHsaJfV0xPRF4nWpvN6/bll8vv9hBgMGEIMAKBPkXyRAB4Erk4CAABGIsQAAAAjEWIAAICRCDEAAMBIhBgAAGAkQgwAADASIQYAABiJEAMAAIxEiAEAAEYixAAAACMRYgAAgJEIMQAAwEg8ABIAMGA8Hk9E/dLS0nj6Ne6JEAMA6Hcd/hY5YmK0YMGCiPonJCbqI4+HIIM+EWIAAP0u2NEuq6dH81/crvSs7LD6tjad1+vPLZPf7yfEoE+EGAw4r9crv98fdr9IT0MDGDrSs7L1cM7EwR4GohQhBgPK6/Xq0ZwcBTs7B3soAIAoQ4jBgPL7/Qp2dkZ0Srnx3TdVt61igEYGADAdIQYPRCSnlFubzg/QaAAA0YD7xAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBJXJwEAhiSeu4R7IcTgS+GuuwAeFJ67hC+LEIN74q67AB4knruEL4sQg3virrsABgPPXcK9EGLwpXHXXQDAUEKIGUZY1wIAiCZDPsRs27ZNf/u3f6vm5mZ94xvf0NatW/XEE08M9rCMw7oWAMPJ/Xz54uomcwzpEPPaa6+ptLRU27Zt05/+6Z9qx44dmjNnjj788EP+BQsT61oADAf3e2WTxNVNJhnSIWbz5s1atGiRfvjDH0qStm7dqt/85jfavn27KiqG54fq/f5JiHUtAKLZ/VzZJP3f1U2//e1vlZOTE3Z/zuI8WEM2xHR3d6u+vl4/+tGPQtoLCgp09OjRXvVdXV3q6uqyXwcCAUlSe3v7gIzP5/PJ5/NF1DcmJkY9PT1h92tpaVHJX/6luj79NKKfK0mfeD5Qd+f1sPpc+fg8felLX/oa1ffGp8Gw+0rS1ZbLksMR8Zmc+FGj9MovfqGMjIyw+0b62TCYfV0ul1wuV0R97+bW57ZlWfcutoaoTz75xJJkvfvuuyHtL730kvVHf/RHveqff/55SxIbGxsbGxtbFGwXL168Z1YYsmdibnE4HCGvLcvq1SZJGzZs0Jo1a+zXPT09+t///V+NGTPmjvX3o729XZmZmbp48aJSUlL69b3RP5ijoY85MgPzNPRF2xxZlqWOjg653e571g7ZEJOWlqYRI0b0+pNNa2vrHU/TxcfHKz4+PqTtq1/96kAOUSkpKVHxL0w0Y46GPubIDMzT0BdNc+R0Or9U3ZB9inVcXJwmTZqkurq6kPa6ujpNmTJlkEYFAACGiiF7JkaS1qxZo5KSEj3++OPKz8/Xz3/+c3m9Xj377LODPTQAADDIhnSI+Yu/+Av97ne/009+8hM1NzcrNzdXv/71r/XII48M6rji4+P1/PPP9/rzFYYO5mjoY47MwDwNfcN5jhyW9WWuYQIAABhahuyaGAAAgL4QYgAAgJEIMQAAwEiEGAAAYCRCzF18/PHHWrRokbKyspSQkKA//MM/1PPPP6/u7u6QOq/Xq7lz5yopKUlpaWlavXp1r5ozZ85o6tSpSkhI0MMPP6yf/OQnX+6ZELinl156SVOmTFFiYuJdb27IHA1N27ZtU1ZWlkaNGqVJkybpt7/97WAPadh45513NHfuXLndbjkcDv3yl78M2W9ZlsrKyuR2u5WQkKBp06bp3LlzITVdXV1atWqV0tLSlJSUpKKiIl26dOkBHkV0q6io0De/+U0lJycrPT1d3/ve99TY2BhSwzwRYu7qo48+Uk9Pj3bs2KFz585py5Yt+tnPfqYf//jHds3Nmzf11FNP6fr16zpy5Ihqamr0z//8z1q7dq1d097erpkzZ8rtduvUqVOqrKzUT3/6U23evHkwDivqdHd36/vf/76WLVt2x/3M0dD02muvqbS0VBs3btTp06f1xBNPaM6cOfJ6vYM9tGHh+vXrmjhxoqqqqu64f9OmTdq8ebOqqqp06tQpuVwuzZw5Ux0dHXZNaWmpDhw4oJqaGh05ckTXrl1TYWGhbt68+aAOI6odPnxYK1as0PHjx1VXV6fPPvtMBQUFun79/x5qyTxJQ/YBkEPRpk2brKysLPv1r3/9aysmJsb65JNP7LZXX33Vio+PtwKBgGVZlrVt2zbL6XRan376qV1TUVFhud1uq6en58ENPsrt3r3bcjqdvdqZo6HpW9/6lvXss8+GtD366KPWj370o0Ea0fAlyTpw4ID9uqenx3K5XNbLL79st3366aeW0+m0fvazn1mWZVlXr161Ro4cadXU1Ng1n3zyiRUTE2PV1tY+sLEPJ62trZYk6/Dhw5ZlMU+3cCYmDIFAQKmpqfbrY8eOKTc3N+QhVbNmzVJXV5fq6+vtmqlTp4bchGjWrFm6fPmyPv744wc29uGKORp6uru7VV9fr4KCgpD2goICHT16dJBGhVuamprk8/lC5ic+Pl5Tp06156e+vl43btwIqXG73crNzWUOB0ggEJAk+zOIefocIeZL+u///m9VVlaGPPLA5/P1ehjl6NGjFRcXZz+48k41t17f/nBL9D/maOjx+/26efPmHX/n/L4H36056Gt+fD6f4uLiNHr06LvWoP9YlqU1a9bo29/+tnJzcyUxT7cMuxBTVlYmh8PR5/bee++F9Ll8+bJmz56t73//+/rhD38Yss/hcPT6GZZlhbTfXmP9/wWjd+qLyOaoL8zR0HSn3zm/76EjkvlhDgfGypUr9cEHH+jVV1/ttW+4z9OQfnbSQFi5cqWefvrpPmt+//d/3/7ny5cv67vf/a79AMovcrlcOnHiREhbW1ubbty4Yadjl8vVK/G2trZK6p2g8blw56gvzNHQk5aWphEjRtzxd87ve/C5XC5Jn3+Lf+ihh+z2L86Py+VSd3e32traQr7lt7a2asqUKQ92wFFu1apV+tWvfqV33nlHY8eOtduZp/9vsBbjmODSpUtWdna29fTTT1ufffZZr/23Fo1evnzZbqupqem1aPSrX/2q1dXVZde8/PLLLBrtZ/da2MscDS3f+ta3rGXLloW05eTksLB3EOguC3v/5m/+xm7r6uq644LR1157za65fPlyVC0YHWw9PT3WihUrLLfbbf3nf/7nHfczT5ZFiLmLTz75xPra175mPfnkk9alS5es5uZme7vls88+s3Jzc63p06db77//vnXo0CFr7Nix1sqVK+2aq1evWhkZGdYzzzxjnTlzxnrjjTeslJQU66c//elgHFbUuXDhgnX69GnrhRdesL7yla9Yp0+ftk6fPm11dHRYlsUcDVU1NTXWyJEjrV27dlkffvihVVpaaiUlJVkff/zxYA9tWOjo6LD/W5Fkbd682Tp9+rR14cIFy7I+D/FOp9N64403rDNnzljPPPOM9dBDD1nt7e32ezz77LPW2LFjrUOHDlnvv/++9eSTT1oTJ0684xc+hG/ZsmWW0+m03n777ZDPn87OTruGeSLE3NXu3bstSXfcvujChQvWU089ZSUkJFipqanWypUrQy7VtSzL+uCDD6wnnnjCio+Pt1wul1VWVsY3/H6ycOHCO87RW2+9ZdcwR0PTP/7jP1qPPPKIFRcXZ/3Jn/yJfekoBt5bb711x/9uFi5caFnW59/yn3/+ecvlclnx8fHWd77zHevMmTMh7xEMBq2VK1daqampVkJCglVYWGh5vd5BOJrodLfPn927d9s1zJNlOSyL25ICAADzDLurkwAAQHQgxAAAACMRYgAAgJEIMQAAwEiEGAAAYCRCDAAAMBIhBgAAGIkQAwAAjESIAQAARiLEAAAAIxFiAACAkQgxAADASP8P2kPQxab/e08AAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "_ = plt.hist(error, bins=30, color='skyblue', edgecolor='black')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7d8a5ba5-57a8-415b-a1fb-8d97d2b2a787",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMPFJREFUeJzt3X10lPWd//9XCMOQ0DAS2GQYDRBdimjwZkFDsC0okMAhph7OFhE3xVNELAJGYKksbQm4BqQVOCdZ71gOsAaK20WsWzEknAqWDXdGsnK3qBVRhBDUkIDgZEw+3z/85fo5TIAEZnLz8fk4hwNzXe/5zOfNZzJ55cp1zUQZY4wAAAAs1KG1JwAAABApBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLU6tvYEWlN9fb2OHz+uuLg4RUVFtfZ0AABAExhjdObMGfl8PnXocOljNt/roHP8+HElJSW19jQAAMAV+PTTT3XdddddsuZ7HXTi4uIkffsf1bVr16saKxAIqLi4WOnp6XK5XOGYXptDj3agRzvQox1s7zFS/dXU1CgpKcn5Pn4p3+ug0/Drqq5du4Yl6MTGxqpr165WPlklerQFPdqBHu1ge4+R7q8pp51wMjIAALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtTq29gQAtA99nnyjtafQZO5ooyV3tvYsALQFHNEBAADWIugAAABr8asrANZKyd0sf11Ua0+jWT5ePKa1pwBYhSM6AADAWgQdAABgrWYHnbffflv33nuvfD6foqKi9NprrwXtN8YoNzdXPp9PMTExGjZsmA4cOBBU4/f7NX36dPXo0UNdunRRVlaWjh07FlRTVVWl7OxseTweeTweZWdn6/Tp00E1n3zyie6991516dJFPXr00IwZM1RbW9vclgAAgKWaHXS++uor3XrrrSooKGh0/5IlS7R06VIVFBRoz5498nq9GjlypM6cOePU5OTkaOPGjVq/fr22b9+us2fPKjMzU3V1dU7NhAkTVF5erqKiIhUVFam8vFzZ2dnO/rq6Oo0ZM0ZfffWVtm/frvXr12vDhg2aNWtWc1sCAACWavbJyKNHj9bo0aMb3WeM0fLlyzVv3jyNHTtWkrRmzRolJiZq3bp1mjJliqqrq7Vy5Uq9/PLLGjFihCSpsLBQSUlJ2rJlizIyMnTo0CEVFRVp586dSk1NlSStWLFCaWlpOnz4sPr166fi4mIdPHhQn376qXw+nyTp2Wef1UMPPaSnn35aXbt2vaL/EAAAYI+wXnV15MgRVVRUKD093dnmdrs1dOhQlZaWasqUKSorK1MgEAiq8fl8SklJUWlpqTIyMrRjxw55PB4n5EjS4MGD5fF4VFpaqn79+mnHjh1KSUlxQo4kZWRkyO/3q6ysTHfffXfI/Px+v/x+v3O7pqZGkhQIBBQIBK6q94b7X+04bRk92uFKe3RHm0hMJyLcHUzQ3+1JU9eF56odbO8xUv01Z7ywBp2KigpJUmJiYtD2xMREHT161Knp1KmTunXrFlLTcP+KigolJCSEjJ+QkBBUc+HjdOvWTZ06dXJqLrRo0SItWLAgZHtxcbFiY2Ob0uJllZSUhGWctowe7dDcHtvjOw0/Nai+tafQbJs2bWpWPc9VO9jeY7j7O3fuXJNrI/I+OlFRwe9bYYwJ2XahC2saq7+Smu+aO3euZs6c6dyuqalRUlKS0tPTr/pXXYFAQCUlJRo5cqRcLtdVjdVW0aMdrrTHlNzNEZxVeLk7GD01qF6/eaeD/PXt63109udmNKmO56odbO8xUv01/EamKcIadLxer6Rvj7b07NnT2V5ZWekcffF6vaqtrVVVVVXQUZ3KykoNGTLEqTl58mTI+KdOnQoaZ9euXUH7q6qqFAgEQo70NHC73XK73SHbXS5X2BYgnGO1VfRoh+b22N7eeE+S/PVR7W7ezX3e8Vy1g+09hru/5owV1vfRSU5OltfrDTpEVVtbq23btjkhZuDAgXK5XEE1J06c0P79+52atLQ0VVdXa/fu3U7Nrl27VF1dHVSzf/9+nThxwqkpLi6W2+3WwIEDw9kWAABop5p9ROfs2bP68MMPndtHjhxReXm54uPj1atXL+Xk5CgvL099+/ZV3759lZeXp9jYWE2YMEGS5PF4NGnSJM2aNUvdu3dXfHy8Zs+erQEDBjhXYfXv31+jRo3S5MmT9eKLL0qSHnnkEWVmZqpfv36SpPT0dN10003Kzs7W7373O3355ZeaPXu2Jk+ezBVXAABA0hUEnXfeeSfoiqaGc14mTpyo1atXa86cOTp//rymTp2qqqoqpaamqri4WHFxcc59li1bpo4dO2rcuHE6f/68hg8frtWrVys6OtqpWbt2rWbMmOFcnZWVlRX03j3R0dF64403NHXqVN11112KiYnRhAkT9Pvf/775/wsAAMBKzQ46w4YNkzEXv2QzKipKubm5ys3NvWhN586dlZ+fr/z8/IvWxMfHq7Cw8JJz6dWrl/785z9fds4AAOD7ic+6AgAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWCnvQ+eabb/TrX/9aycnJiomJ0fXXX6+FCxeqvr7eqTHGKDc3Vz6fTzExMRo2bJgOHDgQNI7f79f06dPVo0cPdenSRVlZWTp27FhQTVVVlbKzs+XxeOTxeJSdna3Tp0+HuyUAANBOhT3oPPPMM3rhhRdUUFCgQ4cOacmSJfrd736n/Px8p2bJkiVaunSpCgoKtGfPHnm9Xo0cOVJnzpxxanJycrRx40atX79e27dv19mzZ5WZmam6ujqnZsKECSovL1dRUZGKiopUXl6u7OzscLcEAADaqY7hHnDHjh366U9/qjFjxkiS+vTpoz/84Q965513JH17NGf58uWaN2+exo4dK0las2aNEhMTtW7dOk2ZMkXV1dVauXKlXn75ZY0YMUKSVFhYqKSkJG3ZskUZGRk6dOiQioqKtHPnTqWmpkqSVqxYobS0NB0+fFj9+vULd2sAAKCdCXvQ+dGPfqQXXnhB77//vn74wx/qf//3f7V9+3YtX75cknTkyBFVVFQoPT3duY/b7dbQoUNVWlqqKVOmqKysTIFAIKjG5/MpJSVFpaWlysjI0I4dO+TxeJyQI0mDBw+Wx+NRaWlpo0HH7/fL7/c7t2tqaiRJgUBAgUDgqvpuuP/VjtOW0aMdrrRHd7SJxHQiwt3BBP3dnjR1XXiu2sH2HiPVX3PGC3vQ+dWvfqXq6mrdeOONio6OVl1dnZ5++mk98MADkqSKigpJUmJiYtD9EhMTdfToUaemU6dO6tatW0hNw/0rKiqUkJAQ8vgJCQlOzYUWLVqkBQsWhGwvLi5WbGxsMzttXElJSVjGacvo0Q7N7XHJnRGaSAQ9Naj+8kVtzKZNm5pVz3PVDrb3GO7+zp071+TasAedV155RYWFhVq3bp1uvvlmlZeXKycnRz6fTxMnTnTqoqKigu5njAnZdqELaxqrv9Q4c+fO1cyZM53bNTU1SkpKUnp6urp27dqk/i4mEAiopKREI0eOlMvluqqx2ip6tMOV9piSuzmCswovdwejpwbV6zfvdJC//tKvK23N/tyMJtXxXLWD7T1Gqr+G38g0RdiDzj//8z/rySef1Pjx4yVJAwYM0NGjR7Vo0SJNnDhRXq9X0rdHZHr27Oncr7Ky0jnK4/V6VVtbq6qqqqCjOpWVlRoyZIhTc/LkyZDHP3XqVMjRogZut1tutztku8vlCtsChHOstooe7dDcHv117SswSJK/Pqrdzbu5zzueq3awvcdw99ecscJ+1dW5c+fUoUPwsNHR0c7l5cnJyfJ6vUGHsWpra7Vt2zYnxAwcOFAulyuo5sSJE9q/f79Tk5aWpurqau3evdup2bVrl6qrq50aAADw/Rb2Izr33nuvnn76afXq1Us333yz9u7dq6VLl+oXv/iFpG9/3ZSTk6O8vDz17dtXffv2VV5enmJjYzVhwgRJksfj0aRJkzRr1ix1795d8fHxmj17tgYMGOBchdW/f3+NGjVKkydP1osvvihJeuSRR5SZmckVVwAAQFIEgk5+fr5+85vfaOrUqaqsrJTP59OUKVP029/+1qmZM2eOzp8/r6lTp6qqqkqpqakqLi5WXFycU7Ns2TJ17NhR48aN0/nz5zV8+HCtXr1a0dHRTs3atWs1Y8YM5+qsrKwsFRQUhLslAADQToU96MTFxWn58uXO5eSNiYqKUm5urnJzcy9a07lzZ+Xn5we90eCF4uPjVVhYeBWzBQAANuOzrgAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaHVt7AsD3UZ8n32i1x3ZHGy25U0rJ3Sx/XVSrzQMAWgJHdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBYnIwNAG9LUE9Xb0knlHy8e06qPD1wKR3QAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoRCTqfffaZ/umf/kndu3dXbGysbrvtNpWVlTn7jTHKzc2Vz+dTTEyMhg0bpgMHDgSN4ff7NX36dPXo0UNdunRRVlaWjh07FlRTVVWl7OxseTweeTweZWdn6/Tp05FoCQAAtENhDzpVVVW666675HK59Oabb+rgwYN69tlndc011zg1S5Ys0dKlS1VQUKA9e/bI6/Vq5MiROnPmjFOTk5OjjRs3av369dq+fbvOnj2rzMxM1dXVOTUTJkxQeXm5ioqKVFRUpPLycmVnZ4e7JQAA0E51DPeAzzzzjJKSkrRq1SpnW58+fZx/G2O0fPlyzZs3T2PHjpUkrVmzRomJiVq3bp2mTJmi6upqrVy5Ui+//LJGjBghSSosLFRSUpK2bNmijIwMHTp0SEVFRdq5c6dSU1MlSStWrFBaWpoOHz6sfv36hbs1AADQzoQ96Lz++uvKyMjQz372M23btk3XXnutpk6dqsmTJ0uSjhw5ooqKCqWnpzv3cbvdGjp0qEpLSzVlyhSVlZUpEAgE1fh8PqWkpKi0tFQZGRnasWOHPB6PE3IkafDgwfJ4PCotLW006Pj9fvn9fud2TU2NJCkQCCgQCFxV3w33v9px2jJ6DB93tIno+Jd87A4m6G8b0WPLitTXC6857V+k+mvOeGEPOh999JGef/55zZw5U//yL/+i3bt3a8aMGXK73fr5z3+uiooKSVJiYmLQ/RITE3X06FFJUkVFhTp16qRu3bqF1DTcv6KiQgkJCSGPn5CQ4NRcaNGiRVqwYEHI9uLiYsXGxja/2UaUlJSEZZy2jB6v3pI7Izp8kzw1qL61pxBx9NgyNm3aFNHxec1p/8Ld37lz55pcG/agU19fr0GDBikvL0+SdPvtt+vAgQN6/vnn9fOf/9ypi4qKCrqfMSZk24UurGms/lLjzJ07VzNnznRu19TUKCkpSenp6eratevlm7uEQCCgkpISjRw5Ui6X66rGaqvoMXxScjdHbOzLcXcwempQvX7zTgf56y/9Ndde0WPL2p+bEZFxec1p/yLVX8NvZJoi7EGnZ8+euummm4K29e/fXxs2bJAkeb1eSd8ekenZs6dTU1lZ6Rzl8Xq9qq2tVVVVVdBRncrKSg0ZMsSpOXnyZMjjnzp1KuRoUQO32y232x2y3eVyhW0BwjlWW0WPV89f1/rffP31UW1iHpFEjy0j0q8HvOa0f+Hurzljhf2qq7vuukuHDx8O2vb++++rd+/ekqTk5GR5vd6gw1i1tbXatm2bE2IGDhwol8sVVHPixAnt37/fqUlLS1N1dbV2797t1OzatUvV1dVODQAA+H4L+xGdJ554QkOGDFFeXp7GjRun3bt366WXXtJLL70k6dtfN+Xk5CgvL099+/ZV3759lZeXp9jYWE2YMEGS5PF4NGnSJM2aNUvdu3dXfHy8Zs+erQEDBjhXYfXv31+jRo3S5MmT9eKLL0qSHnnkEWVmZnLFFQAAkBSBoHPHHXdo48aNmjt3rhYuXKjk5GQtX75cDz74oFMzZ84cnT9/XlOnTlVVVZVSU1NVXFysuLg4p2bZsmXq2LGjxo0bp/Pnz2v48OFavXq1oqOjnZq1a9dqxowZztVZWVlZKigoCHdLAACgnQp70JGkzMxMZWZmXnR/VFSUcnNzlZube9Gazp07Kz8/X/n5+RetiY+PV2Fh4dVMFQAAWIzPugIAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYq2NrTwC4Wn2efCNsY7mjjZbcKaXkbpa/Lips4wIAWgdHdAAAgLU4ogMAuCrhPKr6XZE8wvrx4jFhHQ9tF0d0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANaKeNBZtGiRoqKilJOT42wzxig3N1c+n08xMTEaNmyYDhw4EHQ/v9+v6dOnq0ePHurSpYuysrJ07NixoJqqqiplZ2fL4/HI4/EoOztbp0+fjnRLAACgnYho0NmzZ49eeukl3XLLLUHblyxZoqVLl6qgoEB79uyR1+vVyJEjdebMGacmJydHGzdu1Pr167V9+3adPXtWmZmZqqurc2omTJig8vJyFRUVqaioSOXl5crOzo5kSwAAoB2JWNA5e/asHnzwQa1YsULdunVzthtjtHz5cs2bN09jx45VSkqK1qxZo3PnzmndunWSpOrqaq1cuVLPPvusRowYodtvv12FhYXat2+ftmzZIkk6dOiQioqK9O///u9KS0tTWlqaVqxYoT//+c86fPhwpNoCAADtSMdIDfzYY49pzJgxGjFihP71X//V2X7kyBFVVFQoPT3d2eZ2uzV06FCVlpZqypQpKisrUyAQCKrx+XxKSUlRaWmpMjIytGPHDnk8HqWmpjo1gwcPlsfjUWlpqfr16xcyJ7/fL7/f79yuqamRJAUCAQUCgavqt+H+VztOW9ZWe3RHm/CN1cEE/W0jerQDPV6dtvI61lZfV8MlUv01Z7yIBJ3169fr3Xff1Z49e0L2VVRUSJISExODticmJuro0aNOTadOnYKOBDXUNNy/oqJCCQkJIeMnJCQ4NRdatGiRFixYELK9uLhYsbGxTejs8kpKSsIyTlvW1npccmf4x3xqUH34B21j6NEO9HhlNm3aFPYxr0Zbe10Nt3D3d+7cuSbXhj3ofPrpp3r88cdVXFyszp07X7QuKioq6LYxJmTbhS6saaz+UuPMnTtXM2fOdG7X1NQoKSlJ6enp6tq16yUf+3ICgYBKSko0cuRIuVyuqxqrrWqrPabkbg7bWO4ORk8Nqtdv3ukgf/2ln4/tFT3agR6vzv7cjLCOd6Xa6utquESqv4bfyDRF2INOWVmZKisrNXDgQGdbXV2d3n77bRUUFDjnz1RUVKhnz55OTWVlpXOUx+v1qra2VlVVVUFHdSorKzVkyBCn5uTJkyGPf+rUqZCjRQ3cbrfcbnfIdpfLFbYFCOdYbVVb69FfF/4XeX99VETGbUvo0Q70eGXa0muY1PZeV8Mt3P01Z6ywn4w8fPhw7du3T+Xl5c6fQYMG6cEHH1R5ebmuv/56eb3eoMNYtbW12rZtmxNiBg4cKJfLFVRz4sQJ7d+/36lJS0tTdXW1du/e7dTs2rVL1dXVTg0AAPh+C/sRnbi4OKWkpARt69Kli7p37+5sz8nJUV5envr27au+ffsqLy9PsbGxmjBhgiTJ4/Fo0qRJmjVrlrp37674+HjNnj1bAwYM0IgRIyRJ/fv316hRozR58mS9+OKLkqRHHnlEmZmZjZ6IDAAAvn8idtXVpcyZM0fnz5/X1KlTVVVVpdTUVBUXFysuLs6pWbZsmTp27Khx48bp/PnzGj58uFavXq3o6GinZu3atZoxY4ZzdVZWVpYKCgpavB8AANA2tUjQ2bp1a9DtqKgo5ebmKjc396L36dy5s/Lz85Wfn3/Rmvj4eBUWFoZplgAAwDZ81hUAALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFirY2tPAG1LnyffuOg+d7TRkjullNzN8tdFteCsACC8LvVa15Ka87r68eIxLTQru3BEBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgrbAHnUWLFumOO+5QXFycEhISdN999+nw4cNBNcYY5ebmyufzKSYmRsOGDdOBAweCavx+v6ZPn64ePXqoS5cuysrK0rFjx4JqqqqqlJ2dLY/HI4/Ho+zsbJ0+fTrcLQEAgHYq7EFn27Zteuyxx7Rz506VlJTom2++UXp6ur766iunZsmSJVq6dKkKCgq0Z88eeb1ejRw5UmfOnHFqcnJytHHjRq1fv17bt2/X2bNnlZmZqbq6OqdmwoQJKi8vV1FRkYqKilReXq7s7OxwtwQAANqpsL8zclFRUdDtVatWKSEhQWVlZfrJT34iY4yWL1+uefPmaezYsZKkNWvWKDExUevWrdOUKVNUXV2tlStX6uWXX9aIESMkSYWFhUpKStKWLVuUkZGhQ4cOqaioSDt37lRqaqokacWKFUpLS9Phw4fVr1+/cLcGAADamYh/BER1dbUkKT4+XpJ05MgRVVRUKD093alxu90aOnSoSktLNWXKFJWVlSkQCATV+Hw+paSkqLS0VBkZGdqxY4c8Ho8TciRp8ODB8ng8Ki0tbTTo+P1++f1+53ZNTY0kKRAIKBAIXFWfDfe/2nFamzvaXHxfBxP0t43o0Q70aAd6DNYev79E6ntjc8aLaNAxxmjmzJn60Y9+pJSUFElSRUWFJCkxMTGoNjExUUePHnVqOnXqpG7duoXUNNy/oqJCCQkJIY+ZkJDg1Fxo0aJFWrBgQcj24uJixcbGNrO7xpWUlIRlnNay5M7L1zw1qD7yE2ll9GgHerQDPX5r06ZNLTCTyAj398Zz5841uTaiQWfatGl67733tH379pB9UVHBH15mjAnZdqELaxqrv9Q4c+fO1cyZM53bNTU1SkpKUnp6urp27XrJx76cQCCgkpISjRw5Ui6X66rGak0puZsvus/dweipQfX6zTsd5K+380M96dEO9GgHegy2PzejhWYVPpH63tjwG5mmiFjQmT59ul5//XW9/fbbuu6665ztXq9X0rdHZHr27Olsr6ysdI7yeL1e1dbWqqqqKuioTmVlpYYMGeLUnDx5MuRxT506FXK0qIHb7Zbb7Q7Z7nK5wrYA4RyrNTTlU8n99VHWf3o5PdqBHu1Aj99qz99bwv29sTljhf2qK2OMpk2bpldffVV/+ctflJycHLQ/OTlZXq836DBWbW2ttm3b5oSYgQMHyuVyBdWcOHFC+/fvd2rS0tJUXV2t3bt3OzW7du1SdXW1UwMAAL7fwn5E57HHHtO6dev0pz/9SXFxcc75Mh6PRzExMYqKilJOTo7y8vLUt29f9e3bV3l5eYqNjdWECROc2kmTJmnWrFnq3r274uPjNXv2bA0YMMC5Cqt///4aNWqUJk+erBdffFGS9MgjjygzM5MrrgAAgKQIBJ3nn39ekjRs2LCg7atWrdJDDz0kSZozZ47Onz+vqVOnqqqqSqmpqSouLlZcXJxTv2zZMnXs2FHjxo3T+fPnNXz4cK1evVrR0dFOzdq1azVjxgzn6qysrCwVFBSEuyUAANBOhT3oGHP5S+SioqKUm5ur3Nzci9Z07txZ+fn5ys/Pv2hNfHy8CgsLr2SaAADge4DPugIAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACs1bG1J2CzPk++0dpTAADge40jOgAAwFoEHQAAYC1+dQUAQDvQHk+HcEcbLbmzdefAER0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1mr3Qee5555TcnKyOnfurIEDB+qvf/1ra08JAAC0Ee066LzyyivKycnRvHnztHfvXv34xz/W6NGj9cknn7T21AAAQBvQroPO0qVLNWnSJD388MPq37+/li9frqSkJD3//POtPTUAANAGdGztCVyp2tpalZWV6cknnwzanp6ertLS0kbv4/f75ff7ndvV1dWSpC+//FKBQOCq5hMIBHTu3Dl98cUXcrlckqSO33x1VWO2NR3rjc6dq1fHQAfV1Ue19nQigh7tQI92oMf2r6G/735vDIczZ85Ikowxl59D2B61hX3++eeqq6tTYmJi0PbExERVVFQ0ep9FixZpwYIFIduTk5MjMkcbTWjtCbQAerQDPdqBHtu/SPZ35swZeTyeS9a026DTICoqOAEbY0K2NZg7d65mzpzp3K6vr9eXX36p7t27X/Q+TVVTU6OkpCR9+umn6tq161WN1VbRox3o0Q70aAfbe4xUf8YYnTlzRj6f77K17Tbo9OjRQ9HR0SFHbyorK0OO8jRwu91yu91B26655pqwzqtr165WPlm/ix7tQI92oEc72N5jJPq73JGcBu32ZOROnTpp4MCBKikpCdpeUlKiIUOGtNKsAABAW9Juj+hI0syZM5Wdna1BgwYpLS1NL730kj755BM9+uijrT01AADQBrTroHP//ffriy++0MKFC3XixAmlpKRo06ZN6t27d4vPxe12a/78+SG/GrMJPdqBHu1Aj3awvce20F+Uacq1WQAAAO1Quz1HBwAA4HIIOgAAwFoEHQAAYC2CDgAAsBZB5wJPP/20hgwZotjY2Iu+meAnn3yie++9V126dFGPHj00Y8YM1dbWBtXs27dPQ4cOVUxMjK699lotXLgw5DM5tm3bpoEDB6pz5866/vrr9cILL4Q81oYNG3TTTTfJ7Xbrpptu0saNG8PWqyRt3bpVUVFRjf7Zs2ePU9fY/gvnG66eI6FPnz4h87/wc9Jacl3D7eOPP9akSZOUnJysmJgY3XDDDZo/f37I/Nv7OjbFc889p+TkZHXu3FkDBw7UX//619aeUqMWLVqkO+64Q3FxcUpISNB9992nw4cPB9U89NBDIes1ePDgoBq/36/p06erR48e6tKli7KysnTs2LGgmqqqKmVnZ8vj8cjj8Sg7O1unT5+OdIvKzc0Nmb/X63X2G2OUm5srn8+nmJgYDRs2TAcOHGg3/UmNv7ZERUXpsccek9Q+1/Dtt9/WvffeK5/Pp6ioKL322mtB+1ty3ZryunxZBkF++9vfmqVLl5qZM2caj8cTsv+bb74xKSkp5u677zbvvvuuKSkpMT6fz0ybNs2pqa6uNomJiWb8+PFm3759ZsOGDSYuLs78/ve/d2o++ugjExsbax5//HFz8OBBs2LFCuNyucx//dd/OTWlpaUmOjra5OXlmUOHDpm8vDzTsWNHs3PnzrD16/f7zYkTJ4L+PPzww6ZPnz6mvr7eqZNkVq1aFVR37ty5sPccKb179zYLFy4Mmv+ZM2ec/S25rpHw5ptvmoceeshs3rzZ/O1vfzN/+tOfTEJCgpk1a1ZQXXtfx8tZv369cblcZsWKFebgwYPm8ccfN126dDFHjx5t7amFyMjIMKtWrTL79+835eXlZsyYMaZXr17m7NmzTs3EiRPNqFGjgtbriy++CBrn0UcfNddee60pKSkx7777rrn77rvNrbfear755hunZtSoUSYlJcWUlpaa0tJSk5KSYjIzMyPe4/z5883NN98cNP/Kykpn/+LFi01cXJzZsGGD2bdvn7n//vtNz549TU1NTbvozxhjKisrg/orKSkxksxbb71ljGmfa7hp0yYzb948s2HDBiPJbNy4MWh/S61bU16Xm4KgcxGrVq1qNOhs2rTJdOjQwXz22WfOtj/84Q/G7Xab6upqY4wxzz33nPF4PObrr792ahYtWmR8Pp8THubMmWNuvPHGoLGnTJliBg8e7NweN26cGTVqVFBNRkaGGT9+/FX3dzG1tbUmISHBLFy4MGh7Y0/27wpXz5HSu3dvs2zZsovub8l1bSlLliwxycnJQdva+zpezp133mkeffTRoG033nijefLJJ1tpRk1XWVlpJJlt27Y52yZOnGh++tOfXvQ+p0+fNi6Xy6xfv97Z9tlnn5kOHTqYoqIiY4wxBw8eNJKCfkDasWOHkWT+7//+L/yNfMf8+fPNrbfe2ui++vp64/V6zeLFi51tX3/9tfF4POaFF14wxrT9/hrz+OOPmxtuuMH5emnva3jha0ZLrltTXpebgl9dNdOOHTuUkpIS9EFiGRkZ8vv9Kisrc2qGDh0a9AZJGRkZOn78uD7++GOnJj09PWjsjIwMvfPOOwoEApesKS0tjURrkqTXX39dn3/+uR566KGQfdOmTVOPHj10xx136IUXXlB9fb2zL1w9R9Izzzyj7t2767bbbtPTTz8ddPizJde1pVRXVys+Pj5ke3tfx4upra1VWVlZyNzS09Mj+jUTLtXV1ZIUsmZbt25VQkKCfvjDH2ry5MmqrKx09pWVlSkQCAT17PP5lJKS4vS8Y8cOeTwepaamOjWDBw+Wx+Npkf+XDz74QD6fT8nJyRo/frw++ugjSdKRI0dUUVERNHe3262hQ4c682oP/X1XbW2tCgsL9Ytf/CLog6Lb+xp+V0uuW1Nel5uCoNNMFRUVIR8a2q1bN3Xq1Mn5gNHGahpuX67mm2++0eeff37Jmgs/yDScVq5cqYyMDCUlJQVtf+qpp/THP/5RW7Zs0fjx4zVr1izl5eU5+8PVc6Q8/vjjWr9+vd566y1NmzZNy5cv19SpUy85/0ita0v429/+pvz8/JCPQ2nv63gpn3/+uerq6lr8ayYcjDGaOXOmfvSjHyklJcXZPnr0aK1du1Z/+ctf9Oyzz2rPnj2655575Pf7JX27Fp06dVK3bt2CxvtuzxUVFUpISAh5zISEhIj/v6Smpuo//uM/tHnzZq1YsUIVFRUaMmSIvvjiC+exL7Vebb2/C7322ms6ffp00A+K7X0NL9SS69aU1+WmaNcfAdFUubm5WrBgwSVr9uzZo0GDBjVpvO8m9QbGmKDtF9aY/+9kznDUNPb4F7qSno8dO6bNmzfrP//zP0Nqf/3rXzv/vu222yRJCxcuDNoerp6bqjk9PvHEE862W265Rd26ddM//uM/Okd5LjaHSK1rU13JOh4/flyjRo3Sz372Mz388MNBtW1xHcPtSr9mWtO0adP03nvvafv27UHb77//fuffKSkpGjRokHr37q033nhDY8eOveh4l3veNlYTCaNHj3b+PWDAAKWlpemGG27QmjVrnBNyr2S92kp/F1q5cqVGjx4ddASiva/hxbTUuoWj7+9F0Jk2bZrGjx9/yZo+ffo0aSyv16tdu3YFbauqqlIgEHCSp9frDUmbDYcqL1fTsWNH5xvvxWouTLiNuZKeV61ape7duysrK+uy4w8ePFg1NTU6efKkEhMTw9Zzc1zNuja8yH744Yfq3r17i65rczS3x+PHj+vuu+92PuT2ctrCOoZLjx49FB0dfcVfM61l+vTpev311/X222/ruuuuu2Rtz5491bt3b33wwQeSvl2L2tpaVVVVBf30XFlZqSFDhjg1J0+eDBnr1KlTLf7/0qVLFw0YMEAffPCB7rvvPknf/tTes2dPp+a769We+jt69Ki2bNmiV1999ZJ17X0NG66aa4l1a8rrcpM0+Wye75nLnYx8/PhxZ9v69etDTlq95pprjN/vd2oWL14cckJn//79g8Z+9NFHQ05GHj16dFDNqFGjInIycn19vUlOTg65Sudi8vPzTefOnZ2TVsPVc0v57//+byPJuRqnJdc1Uo4dO2b69u1rxo8fH3Rlw6W093W80J133ml++ctfBm3r379/mzwZub6+3jz22GPG5/OZ999/v0n3+fzzz43b7TZr1qwxxvz/J32+8sorTs3x48cbPelz165dTs3OnTtb5WTdr7/+2lx77bVmwYIFzkmtzzzzjLPf7/c3elJre+hv/vz5xuv1mkAgcMm69raGusjJyC2xbk15XW5SD81r2X5Hjx41e/fuNQsWLDA/+MEPzN69e83evXudS5EbLncbPny4effdd82WLVvMddddF3S52+nTp01iYqJ54IEHzL59+8yrr75qunbt2ugluk888YQ5ePCgWblyZcgluv/zP/9joqOjzeLFi82hQ4fM4sWLw355eYMtW7YYSebgwYMh+15//XXz0ksvmX379pkPP/zQrFixwnTt2tXMmDEj7D1HQmlpqVm6dKnZu3ev+eijj8wrr7xifD6fycrKcmpacl0j4bPPPjN///d/b+655x5z7NixoEtZG7T3dWyKhsvLV65caQ4ePGhycnJMly5dzMcff9zaUwvxy1/+0ng8HrN169ZGL/c/c+aMmTVrliktLTVHjhwxb731lklLSzPXXnttyGW81113ndmyZYt59913zT333NPoZby33HKL2bFjh9mxY4cZMGBAi1x+PWvWLLN161bz0UcfmZ07d5rMzEwTFxfnrMfixYuNx+Mxr776qtm3b5954IEHGr1Mua3216Curs706tXL/OpXvwra3l7X8MyZM873PknO62fDD4YttW5NeV1uCoLOBSZOnGgkhfxpeE8EY74NQ2PGjDExMTEmPj7eTJs2LehyXGOMee+998yPf/xj43a7jdfrNbm5uUHvS2OMMVu3bjW333676dSpk+nTp495/vnnQ+bzxz/+0fTr18+4XC5z4403mg0bNkSk7wceeMAMGTKk0X1vvvmmue2228wPfvADExsba1JSUszy5ctDfnIJV8/hVlZWZlJTU43H4zGdO3c2/fr1M/PnzzdfffVVUF1Lrmu4rVq1qtHn7XcP2rb3dWyqf/u3fzO9e/c2nTp1Mv/wD/8QdLl2W3Kx9Vq1apUxxphz586Z9PR083d/93fG5XKZXr16mYkTJ5pPPvkkaJzz58+badOmmfj4eBMTE2MyMzNDar744gvz4IMPmri4OBMXF2cefPBBU1VVFfEeG95fxeVyGZ/PZ8aOHWsOHDjg7K+vr3eOhLjdbvOTn/zE7Nu3r93012Dz5s1Gkjl8+HDQ9va6hm+99Vajz82JEycaY1p23Zryunw5UcZc8JanAAAAluDycgAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACs9f8AgHerII9zuKcAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "y.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3848fd5d-177a-4d03-a12b-f4301b1bd3ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.sort_values('var1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8d8483bd-5b4d-4087-8d13-92cdfc662ed3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_unobserved = data.loc[data['var1']==n_levels-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2ef433a9-a3f7-4f79-94ce-a99265858fdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_observed = data.loc[data['var1']<n_levels]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "38ca1c00-b163-462c-9fa2-b91354b44429",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data_observed[cat_vars], data_observed['y'], test_size=0.3, random_state=35)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "21495b2e-2742-4490-9986-a47f4265d8d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = pd.concat([X_test, data_unobserved[cat_vars]], axis=0)\n",
    "y_test = pd.concat([y_test, data_unobserved['y']], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e3528929-b2f8-462d-bb96-636052fcd93d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = X_train.reset_index(drop=True)\n",
    "X_test = X_test.reset_index(drop=True)\n",
    "y_train = y_train.reset_index(drop=True)\n",
    "y_test = y_test.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "0360b9a9-ee20-4089-87c9-fab640ff5e21",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2, encoder = icfesl.f_get_dummies(X_train, ['var1','var2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "924c7ed4-5b53-440b-b103-77ad85033a86",
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_test = icfesl.f_get_dummies(X_test, ['var1','var2'], encoder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5fcaa6b4-8007-4e01-a0e9-4dcb8728a4a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "min_child_weight = np.floor(X2.shape[0]/X2.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9d51c73a-70d3-475e-9376-6c8c5b6cf498",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(17.0)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7c6871b4-c5c6-4b76-856a-cfcb3c4a7873",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of X2 data: 533.5237159729004 MB\n"
     ]
    }
   ],
   "source": [
    "X2_data_size = sys.getsizeof(X2)\n",
    "print(f\"Size of X2 data: {X2_data_size/1024**2} MB\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59924c30-72c8-41e4-ac5c-9f63911b271c",
   "metadata": {},
   "source": [
    "### CatBoost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a64d20c9-b58a-4edc-b9c2-44c6f4721c3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = CatBoostRegressor(\n",
    "    iterations=100,  \n",
    "    loss_function='RMSE', \n",
    "    random_seed=42,  \n",
    "    verbose=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2cd0335b-e6d5-409f-b32d-9086c345f403",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.2496\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train, y_train, cat_features=cat_vars)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e940226e-b8a7-4285-a743-0684b8fd6abd",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c26341be-4a74-483b-9810-c7f36596adb9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 1075.5715173912852\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "599550aa-a1d9-4164-ae75-4785ed139d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ffd15889-01a3-4d87-8d81-1892ef32eb98",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 1136.2494298352337\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a14325db-bbee-43d1-976c-176f0018bddb",
   "metadata": {},
   "source": [
    "### TabNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "377e0174-52ab-44b0-8fbc-92597f657406",
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_idxs = [X_train.columns.get_loc(col) for col in cat_vars]\n",
    "cat_dims = [X_train[col].nunique() for col in cat_vars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "bf5c1e81-c149-454d-a677-7048b07eecd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = TabNetRegressor(verbose=0, seed=200, cat_idxs=cat_idxs, cat_dims=cat_dims)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ff2493dd-9bbe-42ac-b8e0-c8799070b26b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stop training because you reached max_epochs = 50 with best_epoch = 44 and best_train_mse = 314125.91252\n",
      "24.2151\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.13/site-packages/pytorch_tabnet/callbacks.py:172: UserWarning: Best weights from best epoch are automatically used!\n",
      "  warnings.warn(wrn_msg)\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_train=X_train.to_numpy(), y_train=y_train.to_numpy().reshape(-1,1), eval_set=[(X_train.to_numpy(), y_train.to_numpy().reshape(-1,1))], eval_name=['train'], max_epochs=50)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "994cbfd4-36d0-4879-b0be-5ab2eef8cf06",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_train.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "2441371f-d15c-40d4-ad9d-590b03df2a0a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE:560.4693680437101\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "c35b05c9-a34b-4fda-ae6e-f5bf8c8e9de2",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_test.to_numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "72114801-717a-4ba8-83a0-8cd697671770",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE:573.5064712598468\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE:{rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04f66670-54f6-4c90-96e9-d65a096663b0",
   "metadata": {},
   "source": [
    "### One hot encoding"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9407c138-1151-49a5-b15f-067c12f1fee9",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "5876a562-9ad5-4796-a0b3-e6a250d0ad2d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16.082\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X2, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "4cad784d-cd85-42a1-b674-2281c411ccd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:1998.0; R2: 0.9998253180683524\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "9d98efc3-7dd7-4684-8bb1-e6b18d630451",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "d522aafa-4a4f-49af-b35c-252568e8752a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 48.47532185785384\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "dd3b6924-3608-4e29-9017-d332b8acc6dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X2_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "77159d20-1353-4815-9365-506dd9985242",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 51.70409796255178\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c888aa95-d9f6-4694-80ac-05b2bbe8b61c",
   "metadata": {},
   "source": [
    "#### 2.xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "ed98a66f-01db-4ea3-a8b3-a78e955be1f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "df42399f-ca2b-4525-8198-fe1fa9e3c0d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.7312\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X2, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "ae1430b7-aceb-4742-b934-a1eed4e907a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "40224e28-b679-452a-9534-678ae93c1867",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 3059.719348553363\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "16ffb892-5e5e-4601-a5a5-653e1b86997a",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X2_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "040f7a6c-00dd-45e3-9c25-60e92992708a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 3167.8420485503484\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27a2a623-5281-47c0-acbe-0247e59a4334",
   "metadata": {},
   "source": [
    "### Target Encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "586c16d4-4a09-4b8d-8dbc-76a35d822258",
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders import TargetEncoder\n",
    "enc_auto = TargetEncoder(cols=cat_vars).fit(X_train, y_train)\n",
    "X_t = enc_auto.transform(X_train)\n",
    "X_t_test = enc_auto.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e0a8ae9-bd27-4cfa-9c5d-dfd479d121e3",
   "metadata": {},
   "source": [
    "#### 1. OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "d2131938-e931-4f91-b99b-82c8f3d74226",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0226\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X_t, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "a3053246-108a-46e7-98ae-89ee1ac35bbc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DF:2.0; R2: 0.9433851967588675\n"
     ]
    }
   ],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "5ccab110-ab74-4e63-9d09-0174472a644b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "34ff4e5f-8b9a-41d3-94b7-79d64c7bf241",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 872.6934363623197\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "2ac968e0-4bfd-423d-944b-73c5710f7f69",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X_t_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "82ae9a2a-4a84-4ae3-8abd-5520a9b0f785",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 958.2501832425994\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0fb66c13-ae7e-4dbd-b1ab-6564363315e4",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "cc923abd-b206-440d-a64e-b1177cb8642a",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "647e7142-4e25-47d3-a749-b8b17a11da96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.2605\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "model.fit(X_t, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "bd191fae-0925-4ace-8690-42b41085b2f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "a75ecae2-9d64-47e0-9206-044b02010fc8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training RMSE: 691.4090589119431\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "af3cddbb-aa89-4942-9fce-f791b0270580",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_t_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "49e89411-0ebb-4dce-b2c9-1e8d6b27dc2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "testing RMSE: 815.499414923999\n"
     ]
    }
   ],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f9efe23-76f4-49e5-9130-053c9680dd64",
   "metadata": {},
   "source": [
    "### ICFESL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "95eed9eb-f300-40a6-b9d4-dd749b7e3769",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-12-01 05:49:36.335\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:45.165\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:45.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:45.539\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.01 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:45.539\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:59.039\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:59.107\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:59.406\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.05 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:49:59.406\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:16.077\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:16.149\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:16.494\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.1 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:16.494\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:35.236\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:35.310\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:35.627\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.2 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:35.627\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:56.608\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:56.688\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:57.007\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 0.5 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:50:57.008\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:18.001\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:18.073\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:18.383\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 1 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:18.383\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m397\u001b[0m - \u001b[1mrunning algorithm with L2 regularization factor = 5 ------>\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:42.075\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m453\u001b[0m - \u001b[1mRunning OLS with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:42.152\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m471\u001b[0m - \u001b[1mRunning xgbRegressor with ICFESL encoding\u001b[0m\n",
      "\u001b[32m2025-12-01 05:51:42.444\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36micfesl\u001b[0m:\u001b[36mregularized_search_algorun\u001b[0m:\u001b[36m485\u001b[0m - \u001b[1mCompleted: running algorithm with L2 regularization factor = 5 ------>\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "fit_info_panel, best_index, fit_figs, cluster_groups, criterions, inertias, gap_statss = icfesl.regularized_search_algorun(\n",
    "    X2, y_train, X2_test, y_test, cat_vars, 'regression', alphas = [0.01, 0.05, 0.1, 0.2, 0.5, 1, 5], cbine_column=False,\n",
    "    distance_threshold=0.002, figure=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "9139ee6e-1f9f-4834-961e-fa45bf6fe245",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot, summary_plot = fit_figs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "79c0959a-b17f-4c3e-a1aa-477505a69d25",
   "metadata": {},
   "outputs": [],
   "source": [
    "fit_info_panel.to_excel(\"simulation_regression_fit_info_case1.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "8bb5be76-c3f2-48d7-88fa-1c5f8e07566c",
   "metadata": {},
   "outputs": [],
   "source": [
    "decision_plot.savefig('decision_plot_simulation_regression_case1.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "ffdb9c09-7e59-49c6-9d4d-b8eda8992216",
   "metadata": {},
   "outputs": [],
   "source": [
    "summary_plot.savefig('summary_plot_simulation_regression_case1.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "0c583112-9148-4c78-8d6c-2b581c772dd9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Experiment</th>\n",
       "      <th>dof</th>\n",
       "      <th>reg_fit_time</th>\n",
       "      <th>reg_training_rmse</th>\n",
       "      <th>reg_testing_rmse</th>\n",
       "      <th>xgb_fit_time</th>\n",
       "      <th>xgb_training_rmse</th>\n",
       "      <th>xgb_testing_rmse</th>\n",
       "      <th>var_inf</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0570</td>\n",
       "      <td>347.951895</td>\n",
       "      <td>346.959016</td>\n",
       "      <td>0.2822</td>\n",
       "      <td>403.597031</td>\n",
       "      <td>407.355877</td>\n",
       "      <td>2.824213e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.0519</td>\n",
       "      <td>268.194672</td>\n",
       "      <td>269.568196</td>\n",
       "      <td>0.2815</td>\n",
       "      <td>327.816212</td>\n",
       "      <td>332.935669</td>\n",
       "      <td>2.167836e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>38.0</td>\n",
       "      <td>0.0533</td>\n",
       "      <td>237.992753</td>\n",
       "      <td>240.695656</td>\n",
       "      <td>0.3260</td>\n",
       "      <td>304.461494</td>\n",
       "      <td>310.048265</td>\n",
       "      <td>1.913597e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0538</td>\n",
       "      <td>226.543392</td>\n",
       "      <td>227.949178</td>\n",
       "      <td>0.2993</td>\n",
       "      <td>302.400580</td>\n",
       "      <td>306.452927</td>\n",
       "      <td>1.819910e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0.0596</td>\n",
       "      <td>214.249461</td>\n",
       "      <td>216.122429</td>\n",
       "      <td>0.3014</td>\n",
       "      <td>296.659722</td>\n",
       "      <td>301.665604</td>\n",
       "      <td>1.753133e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0.0608</td>\n",
       "      <td>215.389883</td>\n",
       "      <td>216.160636</td>\n",
       "      <td>0.2911</td>\n",
       "      <td>303.336476</td>\n",
       "      <td>305.647742</td>\n",
       "      <td>1.797818e-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>44.0</td>\n",
       "      <td>0.0598</td>\n",
       "      <td>244.671154</td>\n",
       "      <td>254.822751</td>\n",
       "      <td>0.2732</td>\n",
       "      <td>321.266798</td>\n",
       "      <td>332.300975</td>\n",
       "      <td>2.234451e-13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Experiment   dof  reg_fit_time  reg_training_rmse  reg_testing_rmse  \\\n",
       "0           0  26.0        0.0570         347.951895        346.959016   \n",
       "1           1  34.0        0.0519         268.194672        269.568196   \n",
       "2           2  38.0        0.0533         237.992753        240.695656   \n",
       "3           3  40.0        0.0538         226.543392        227.949178   \n",
       "4           4  42.0        0.0596         214.249461        216.122429   \n",
       "5           5  42.0        0.0608         215.389883        216.160636   \n",
       "6           6  44.0        0.0598         244.671154        254.822751   \n",
       "\n",
       "   xgb_fit_time  xgb_training_rmse  xgb_testing_rmse       var_inf  \n",
       "0        0.2822         403.597031        407.355877  2.824213e-13  \n",
       "1        0.2815         327.816212        332.935669  2.167836e-13  \n",
       "2        0.3260         304.461494        310.048265  1.913597e-13  \n",
       "3        0.2993         302.400580        306.452927  1.819910e-13  \n",
       "4        0.3014         296.659722        301.665604  1.753133e-13  \n",
       "5        0.2911         303.336476        305.647742  1.797818e-13  \n",
       "6        0.2732         321.266798        332.300975  2.234451e-13  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fit_info_panel"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ba26263-8b9d-4e35-880e-7b077e7e9159",
   "metadata": {},
   "source": [
    "## CBind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca2a9cd3-2da2-4fd4-b673-7d62c346e657",
   "metadata": {},
   "outputs": [],
   "source": [
    "cgrouping = icfesl.group_categorical_features(X2, X2.columns.tolist(), distance_threshold=0.002)\n",
    "X4 = icfesl.combine_features(X2, cgrouping)\n",
    "X4_test = icfesl.combine_features(X2_test, cgrouping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b055c561-7dce-4048-8ad6-0ca5c2c4ca68",
   "metadata": {},
   "outputs": [],
   "source": [
    "X4_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7db5b54-7a14-401e-be2f-2670f66a5bf6",
   "metadata": {},
   "source": [
    "#### 1.OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11ace45f-93b7-4611-b2ba-4f1c186f2ecc",
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "model = sm.OLS(y_train, sm.add_constant(X4, has_constant='skip')).fit(disp=False)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cd36506-6cb2-4002-bde7-75be46c788df",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f'DF:{model.df_model}; R2: {model.rsquared}') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "889e61bb-0c16-4dcf-ba9f-4117720d6bd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0f172ff-c995-4354-a9bd-c8490faafcba",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a226b37-6efc-42f2-bdf5-473a1844b96e",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(sm.add_constant(X4_test, has_constant='skip'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "189d19cb-6c55-4e55-9cdf-7c3652999a78",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing RMSE: {rmse}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f83b17f-caa2-48e7-a00c-fb108a9c74bb",
   "metadata": {},
   "source": [
    "#### 2. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5752ddb-6128-4743-a7b9-f6617ac74036",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = XGBRegressor(n_estimators=100, random_state=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9f35ec7-a09b-497a-b2a5-a6ff2b8a9479",
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "model.fit(X4, y_train)\n",
    "end = time.time()\n",
    "print(round(end - start, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "859f3057-694d-4d9b-b4c7-afd9900985fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "642b22c0-8f68-4ebf-8b8d-37e9096a591b",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = root_mean_squared_error(y_train, y_pred)\n",
    "print(f\"training rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49110501-218a-44b5-901e-35765607adaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = model.predict(X4_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d581ff33-8ae3-4b70-b709-6570723daa99",
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = root_mean_squared_error(y_test, y_pred)\n",
    "print(f\"testing rmse: {rmse}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c29881f4-a3d9-4ef4-84a7-ceb656bd7acf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab84783b-284f-4d36-9806-2f619cb9412c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c7f16a8-e38e-4ee5-8daa-f660b9a76a8e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
