{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9adee767-0fe0-40c8-af36-5cea1a42bca2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# adding tools directory to path, so we can access the utils easily\n",
    "import sys\n",
    "root_path = os.path.join('.', 'tools')\n",
    "sys.path.append(root_path)\n",
    "\n",
    "import file_tools\n",
    "_EXP_DIR = os.path.join('.', 'experiments')\n",
    "_DATASET_DIR = os.path.join(_EXP_DIR, 'datasets')\n",
    "\n",
    "_RESULTS_DIR = os.path.join(_EXP_DIR, 'results')\n",
    "file_tools.ensure_dir(_RESULTS_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06fd4bd5-9440-4131-9d57-b767eae74455",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dataset_tools\n",
    "\n",
    "import numpy as np\n",
    "from scipy.spatial import distance\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "X = dataset_tools.eeg_raw\n",
    "y = [distance.euclidean(row1, row2) for row1, row2 in zip(dataset_tools.observed_faces, dataset_tools.target_faces)]\n",
    "scaler = StandardScaler()\n",
    "y = scaler.fit_transform(np.array(y).reshape(-1, 1)).flatten().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70f583a6-589a-4f42-860a-8b21230daccf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.neural_network import MLPRegressor\n",
    "from sklearn.model_selection import ShuffleSplit\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', SVR())\n",
    "])\n",
    "\n",
    "param_grid = {\n",
    "    'svr__C': [0.01, 0.1],\n",
    "    'svr__gamma': ['scale', 'auto'],\n",
    "    'svr__kernel': ['linear', 'rbf']\n",
    "}\n",
    "# best svr\n",
    "# {'svr__C': 0.01, 'svr__gamma': 'scale', 'svr__kernel': 'linear'}\n",
    "\n",
    "\n",
    "# estimator = Pipeline([\n",
    "#     ('scaler_x', StandardScaler()),\n",
    "#     ('mlp', MLPRegressor(max_iter=1000))\n",
    "# ])\n",
    "\n",
    "# param_grid = {\n",
    "#     'mlp__hidden_layer_sizes': [2*(100,)],\n",
    "#     'mlp__activation': ['identity', 'relu'],\n",
    "#     'mlp__alpha': [0.1],\n",
    "#     'mlp__learning_rate': ['adaptive']\n",
    "# }\n",
    "\n",
    "# best mlp\n",
    "# {'mlp__activation': 'identity',\n",
    "#  'mlp__alpha': 0.1,\n",
    "#  'mlp__hidden_layer_sizes': (100, 100),\n",
    "#  'mlp__learning_rate': 'adaptive'}\n",
    "\n",
    "def rmse(y_true, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y_true, y_pred))\n",
    "\n",
    "rmse_scorer = make_scorer(rmse, greater_is_better=False)\n",
    "\n",
    "n_splits = 10\n",
    "test_size = 0.1\n",
    "ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42)\n",
    "\n",
    "# GridSearchCV\n",
    "grid_search = GridSearchCV(estimator, \n",
    "                            param_grid,\n",
    "                            cv=ss,    \n",
    "                            scoring=rmse_scorer,\n",
    "                            n_jobs=-1,\n",
    "                            verbose=2)\n",
    "\n",
    "grid_search.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "631223f8-3d9b-40e5-9326-279428ef82d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Convert cv_results_ to a DataFrame\n",
    "results_df = pd.DataFrame(grid_search.cv_results_)\n",
    "\n",
    "# Save to CSV\n",
    "results_df.to_csv('grid_search_results.csv', index=False)\n",
    "\n",
    "import pickle\n",
    "\n",
    "# Save the entire GridSearchCV object\n",
    "with open('grid_search_results.pkl', 'wb') as f:\n",
    "    pickle.dump(grid_search, f)\n",
    "\n",
    "# To load:\n",
    "# with open('grid_search_results.pkl', 'rb') as f:\n",
    "#     loaded_grid_search = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9181d496-fb96-4b91-9e41-bf17f1506e11",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "results = pd.DataFrame(grid_search.cv_results_)\n",
    "plt.plot(results['mean_test_score'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56094d05-e0f2-47fe-b343-7e8eb6c0c88f",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb8ec999-eb70-4f30-bb66-70b541ab706b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = grid_search.predict(X)\n",
    "plt.scatter(y, y_pred)\n",
    "plt.plot([-2, 2], [-2, 2], 'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b79a2345-4ab6-4757-9655-8a30754560ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search.cv_results_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a18beb02-f0e7-4b2c-b606-8a2261a5899b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from skopt import BayesSearchCV\n",
    "from skopt.space import Real, Categorical, Integer\n",
    "\n",
    "from sklearn.datasets import load_diabetes\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, y = load_diabetes(return_X_y=True)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)\n",
    "\n",
    "def on_step(optim_result):\n",
    "    print(f\"Best score: {optim_result.fun}\")\n",
    "\n",
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "from sklearn.model_selection import cross_val_score\n",
    "import numpy as np\n",
    "\n",
    "def rmse(y_true, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y_true, y_pred))\n",
    "\n",
    "rmse_scorer = make_scorer(rmse, greater_is_better=False)\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', TransformedTargetRegressor(\n",
    "        regressor=SVR(),\n",
    "        transformer=StandardScaler()\n",
    "    ))\n",
    "])\n",
    "\n",
    "search_spaces = {\n",
    "    'svr__regressor__C': Real(1e-6, 1e+6, prior='log-uniform'),\n",
    "    'svr__regressor__gamma': Real(1e-6, 1e+1, prior='log-uniform'),\n",
    "    'svr__regressor__kernel': Categorical(['linear', 'rbf'])\n",
    "}\n",
    "\n",
    "opt = BayesSearchCV(\n",
    "    estimator,\n",
    "    search_spaces,\n",
    "    scoring=rmse_scorer,\n",
    "    n_iter=50,\n",
    "    n_jobs=-1,\n",
    "    random_state=0\n",
    ")\n",
    "\n",
    "# executes bayesian optimization\n",
    "_ = opt.fit(X_train, y_train, callback=on_step)\n",
    "\n",
    "# model can be saved, used for predictions or scoring\n",
    "print(opt.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38eaa6d6-4b90-42a3-aee5-57612f51a41f",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = opt.optimizer_results_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ad494ec-9e94-4bf3-a7ae-3b83f325abe6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from skopt.plots import plot_objective\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Plot for each optimization run\n",
    "for i, res in enumerate(results):\n",
    "    _ = plot_objective(res)\n",
    "    plt.title(f\"Objective Plot for Run {i+1}\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21bdcd28-93b3-4704-b2bf-d2f9fca80c28",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
